diff --git a/BUILD.md b/BUILD.md
deleted file mode 100644
index 981a4be90..000000000
--- a/BUILD.md
+++ /dev/null
@@ -1,74 +0,0 @@
-### Building from Source
-
-To build the project, you need [CMake](https://cmake.org) to configure the project.
-
-```shell
-./configure
-```
-
-When several python virtual environments are installed, it may be useful to
-point to the correct intended version of python (see `./configure --help` for
-options)
-
-The configure script creates a virtual environment. You can source the right
-environment variables using
-
-```
-source ./scripts/python_env.sh
-```
-
-Now you can build the package as follows:
-
-```shell
-cmake -H. -Bbuild \
-  -DPYTHON_EXECUTABLE:FILEPATH=$VIRTUAL_ENV/bin/python \
-  -DPYTHON_INCLUDE_DIR=$VIRTUAL_ENV/include/python3.7m \
-  -DPYTHON_LIBRARY=$VIRTUAL_ENV/lib
-make -C build -j3
-```
-(Note that if your `virtualenv` uses a different version of Python,
-you will need to modify the `-DPYTHON_INCLUDE_DIR` value accordingly.)
-
-This build directory does not have to be identical to the `build` directory
-created by `./configure` as it is here.
-
-CMake `POST_BUILD` hooks on shared libraries targets will handle installing the
-dev tree into your virtualenv.
-
-### Running Unit Tests
-
-You can run all the unit tests not marked as slow using the following command.
-
-```shell
-pip install -e .
-pytest -rfs -m '"no slow"' coremltools/test
-```
-
-Shortcut targets to rebuild and run all the tests exist as well.
-This takes time, so the recommended workflow is to run only relevant tests until
-you're confident in a change.
-
-```shell
-make -j3 -C build pytest_no_slow
-make -j3 -C build pytest
-```
-
-See [pytest documentation](https://docs.pytest.org/en/latest/) to learn more
-about how to run a single unit test.
-
-### Building wheels
-If you would like a wheel to install outside of the virtualenv (or in it), 
-use `make -C build dist` and find the resulting wheels in `build/dist/*.whl`.
-
-If you want to build a wheel for distribution or testing, there is a script
-that automates all of the steps necessary for building a wheel,
-`scripts/make_wheel.sh`, that can be used instead (but make sure to install the
-wheel before running unit tests, if you plan to run the tests).
-
-### Building Documentation
-
-The API docs for this package can be build using the following:
-```
-./scripts/make_docs.sh --wheel-path=[PATH_TO_WHEEL]
-```
-The API docs are saved at `docs/_build/html`.
diff --git a/BUILDING.md b/BUILDING.md
new file mode 100644
index 000000000..0e85affff
--- /dev/null
+++ b/BUILDING.md
@@ -0,0 +1,40 @@
+### Building
+
+To build coremltools from source, you require you need
+[CMake](https://cmake.org) and
+[Miniconda](https://docs.conda.io/en/latest/miniconda.html) to configure the
+project.
+
+Our makefile & scripts require the **zsh** shell (default shell for macOS
+10.16+) installed in `/usr/bin`.
+
+The following targets will handle the development environment for you. If you
+need to add packages, edit the reqs/pip files and the auto-environment will
+install them automatically.
+
+
+* `build` | Build coremltools in *debug* mode (include symbols).
+* `docs` | Build documentation.
+* `clean` | Clean build dir.
+* `clean_envs` | Delete all envs created by the scripts.
+* `lint` | Linter.
+* `proto` | Build coremltools and rebuild MLModel protobuf sources.
+* `release` | Setup the package for release, but don’t upload to pypi. Include all wheels from build/dist in the built package.
+* `style` | Style checking.
+* `test` | Run all tests. Pass TEST_PACKAGES="..." to set which packages to test.
+* `test_fast` | Run all fast tests.
+* `test_slow` | Run all non-fast tests.
+* `wheel` | Build wheels in *release* mode.
+
+By default, we use python 3.7 but you can can pass `python=2.7` (or 3.6, 3.8
+etc.)  as a argument to change the env / build / wheel python version.
+
+*Using an unmanaged developer environment*
+
+Use `make env` to create an auto-set-up development environment with the
+correct package dependencies.  This env will not be changed by scripts after
+creation. However, provided scripts & makefiles do not currently support custom
+development environments; rather, they will always auto-activate the managed
+environment. Environments are generated and stored at
+`envs/coremltools-py<version string>`
+
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5618feb37..b76d8c681 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,6 +16,16 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
   ")
 endif()
 
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+# Globally ignore "no symbols" warnings during compilation
+SET(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
+if(APPLE)
+    SET(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -no_warning_for_no_symbols -c <TARGET>")
+endif()
+
 find_program(HAS_CCACHE ccache)
 if(HAS_CCACHE)
   set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
@@ -42,28 +52,10 @@ include_directories(
   ${PYTHON_INCLUDE_DIRS}
   )
 
-set(CMAKE_CXX_FLAGS " \
-${CMAKE_CXX_FLAGS} \
---std=c++14 \
-")
-
 if(APPLE)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fobjc-arc ")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fobjc-arc")
 endif()
 
-set(CMAKE_EXE_LINKER_FLAGS " \
-${CMAKE_EXE_LINKER_FLAGS} \
---std=c++14 \
-")
-set(CMAKE_MODULE_LINKER_FLAGS " \
-${CMAKE_MODULE_LINKER_FLAGS} \
---std=c++14 \
-")
-set(CMAKE_SHARED_LINKER_FLAGS " \
-${CMAKE_SHARED_LINKER_FLAGS} \
---std=c++14 \
-")
-  
 add_library(caffeconverter
   SHARED
   caffeconverter/CaffeConverterLib.cpp
@@ -117,18 +109,21 @@ if (APPLE)
   set_target_properties(caffeconverter PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
 endif()
 
-file(COPY ${CMAKE_SOURCE_DIR}/README.rst DESTINATION ${CMAKE_BINARY_DIR})
+file(COPY ${CMAKE_SOURCE_DIR}/README.md DESTINATION ${CMAKE_BINARY_DIR})
 file(COPY ${CMAKE_SOURCE_DIR}/coremltools/__init__.py
   DESTINATION ${CMAKE_BINARY_DIR}/coremltools)
 file(COPY ${CMAKE_SOURCE_DIR}/coremltools/__main__.py
   DESTINATION ${CMAKE_BINARY_DIR}/coremltools)
-set(copy_dirs _deps _scripts converters graph_visualization models proto)
+file(COPY ${CMAKE_SOURCE_DIR}/coremltools/version.py
+  DESTINATION ${CMAKE_BINARY_DIR}/coremltools)
+
+set(copy_dirs _deps _scripts converters models proto)
 foreach(cdir IN ITEMS ${copy_dirs})
   file(COPY ${CMAKE_SOURCE_DIR}/coremltools/${cdir}
     DESTINATION ${CMAKE_BINARY_DIR}/coremltools)
 endforeach()
 
-if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") 
+if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
   set(_additional_caffeconverter_command COMMAND strip -x ${PROJECT_SOURCE_DIR}/coremltools/libcaffeconverter.so)
 endif()
 
@@ -174,14 +169,13 @@ if (APPLE AND CORE_VIDEO AND CORE_ML AND FOUNDATION)
     ${CORE_VIDEO}
     ${CORE_ML}
     ${FOUNDATION}
-    ${PYTHON_LIBRARIES}
   )
 
 if(APPLE)
   set(osx_export_file ${CMAKE_SOURCE_DIR}/coremlpython/exported_symbols_osx.ver)
   set_property(TARGET coremlpython APPEND PROPERTY LINK_DEPENDS "${osx_export_file}")
   set_property(TARGET coremlpython APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-exported_symbols_list,${osx_export_file} ")
-    
+
   # Allow Python to be found at runtime instead of compile/link time
   # This is apparently the default on Linux
   set_property(TARGET coremlpython APPEND_STRING PROPERTY LINK_FLAGS "-undefined dynamic_lookup")
@@ -190,10 +184,10 @@ else()
   set(linux_export_file coremlpython/exported_symbols_linux.ver)
   set_property(TARGET coremlpython APPEND_STRING PROPERTY LINK_FLAGS " -Wl,--version-script=${linux_export_file} ")
 endif()
-  
+
   set_property(TARGET coremlpython APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-dead_strip")
 
-  if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") 
+  if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
     set(_additional_libcoremlpython_command
       COMMAND strip -x ${PROJECT_SOURCE_DIR}/coremltools/libcoremlpython.so
     )
@@ -212,7 +206,7 @@ endif()
 
 set(PYTHON_TAG "cp${PYTHON_VERSION_MAJOR}${PYTHON_VERSION_MINOR}")
 if(APPLE)
-  set(PLAT_NAME "macosx_10_15_intel;macosx_10_14_intel;macosx_10_13_intel;macosx_10_12_intel")
+  set(PLAT_NAME "macosx_10_16_intel;macosx_10_15_intel;macosx_10_14_intel;macosx_10_13_intel;macosx_10_12_intel")
 elseif("${CMAKE_SYSTEM_NAME}" MATCHES "Linux")
   set(PLAT_NAME "manylinux1_x86_64")
 else()
@@ -245,7 +239,7 @@ add_custom_target(pip_install_dev
 )
 
 add_custom_target(pytest
-  COMMAND pytest -r fs ${PROJECT_SOURCE_DIR}/coremltools/test/
+  COMMAND pytest -r fs ${PROJECT_SOURCE_DIR}/coremltools/test/ --timeout=600
   DEPENDS pip_install_dev
   USES_TERMINAL
 )
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 8f11550a8..be6e2f5eb 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -4,7 +4,7 @@ Contribution Guidelines
 
 **Core ML Open Source Community**
 
-The Core ML open source community welcomes all contributions and ideas to grow the product. This can occur within this repo as well as [onnx-coreml](https://github.com/onnx/onnx-coreml) or [tf-coreml](https://github.com/tf-coreml/tf-coreml).
+The Core ML open source community welcomes all contributions and ideas to grow the product.
 
  This could be provided in a couple of ways:
 
diff --git a/LICENSE.txt b/LICENSE.txt
index bbcdc9ef8..78a5fe85d 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,4 +1,4 @@
-Copyright (c) 2017, Apple Inc. All rights reserved.
+Copyright (c) 2020, Apple Inc. All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:  
 
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 000000000..4bf448352
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+include README.md
\ No newline at end of file
diff --git a/README.md b/README.md
index 36a12ec89..fa2ab0d8d 100644
--- a/README.md
+++ b/README.md
@@ -5,10 +5,26 @@
 Core ML Community Tools
 =======================
 
+Core ML is an Apple framework to integrate machine learning models into your
+app. Core ML provides a unified representation for all models. Your app uses
+Core ML APIs and user data to make predictions, and to fine-tune models, all on
+the user’s device. Core ML optimizes on-device performance by leveraging the
+CPU, GPU, and Neural Engine while minimizing its memory footprint and power
+consumption. Running a model strictly on the user’s device removes any need for
+a network connection, which helps keep the user’s data private and your app
+responsive.
+
 Core ML community tools contains all supporting tools for Core ML model
 conversion, editing and validation. This includes deep learning frameworks like
-TensorFlow, Keras, Caffe as well as classical machine learning frameworks like
-LIBSVB, scikit-learn, and XGBoost.
+TensorFlow, PyTorch, Keras, Caffe as well as classical machine learning
+frameworks like LIBSVM, scikit-learn, and XGBoost.
+
+With coremltools, you can do the following:
+
+- Convert trained models from frameworks like TensorFlow and PyTorch to the
+  Core ML format.
+- Read, write, and optimize Core ML models.
+- Verify conversion/creation (on macOS) by making predictions using Core ML.
 
 To get the latest version of coremltools:
 
@@ -18,90 +34,8 @@ pip install --upgrade coremltools
 
 For the latest changes please see the [release notes](https://github.com/apple/coremltools/releases/).
 
-# Table of Contents
-
-- [Neural network conversion](#Neural-network-conversion)
-- [Core ML specification](#Core-ML-specification)
-- [coremltools user guide and examples](#user-guide-and-examples)
-- [Installation from Source](#Installation)
-
-## Neural Network Conversion
-
-[Link](examples/NeuralNetworkGuide.md) to the detailed NN conversion guide.
-
-There are several `converters` available to translate neural networks trained
-in various frameworks into the Core ML model format.  Following formats can be
-converted to the Core ML `.mlmodel` format through the coremltools python
-package (this repo):
-
-- Caffe V1 (`.prototxt`, `.caffemodel` format)
-- Keras API (2.2+) (`.h5` format)
-- TensorFlow 1 (1.13+) (`.pb` frozen graph def format)
-- TensorFlow 2 (`.h5` and `SavedModel` formats)
-
-In addition, there are two more neural network converters build on top of `coremltools`:
-- [onnx-coreml](https://github.com/onnx/onnx-coreml): to convert `.onnx` model format. Several frameworks such as PyTorch, MXNet, CaffeV2 etc
-provide native export to the ONNX format.
-- [tfcoreml](https://github.com/tf-coreml/tf-coreml): to convert TensorFlow models. For producing Core ML models targeting iOS 13 or later,
-tfcoreml defers to the TensorFlow converter implemented inside coremltools.
-For iOS 12 or earlier, the code path is different and lives entirely in the [tfcoreml](https://github.com/tf-coreml/tf-coreml) package.  
-
-To get an overview on how to use the converters and features such as
-post-training quantization using coremltools, please see the [neural network
-guide](examples/NeuralNetworkGuide.md).  
-
-## Core ML Specification
-
-- Core ML specification is fully described in a set of protobuf files.
-They are all located in the folder `mlmodel/format/`
-- For an overview of the Core ML framework API, see [here](https://developer.apple.com/documentation/coreml).
-- To find the list of model types supported by Core ML, see [this](https://github.com/apple/coremltools/blob/1fcac9eb087e20bcc91b41bc938112fa91b4e5a8/mlmodel/format/Model.proto#L229)
-portion of the `model.proto` file.
-- To find the list of neural network layer types supported see [this](https://github.com/apple/coremltools/blob/1fcac9eb087e20bcc91b41bc938112fa91b4e5a8/mlmodel/format/NeuralNetwork.proto#L472)
-portion of the `NeuralNetwork.proto` file.
-- Auto-generated documentation for all the protobuf files can be found at this [link](https://apple.github.io/coremltools/coremlspecification/)
-
-## User Guide and Examples
-
-- [API documentation](https://apple.github.io/coremltools)
-- [Updatable models](examples/updatable_models)
-- [Neural network inference examples](examples/neural_network_inference)
-- [Neural network guide](examples/NeuralNetworkGuide.md)
-- [Miscellaneous How-to code snippets](examples/APIExamples.md)
-
-## Installation
-
-We recommend using virtualenv to use, install, or build coremltools. Be
-sure to install virtualenv using your system pip.
-
-```shell
-pip install virtualenv
-```
-
-The method for installing `coremltools` follows the
-[standard python package installation steps](https://packaging.python.org/installing/).
-To create a Python virtual environment called `pythonenv` follow these steps:
-
-```shell
-# Create a folder for your virtualenv
-mkdir mlvirtualenv
-cd mlvirtualenv
-
-# Create a Python virtual environment for your Core ML project
-virtualenv pythonenv
-```
-
-To activate your new virtual environment and install `coremltools` in this
-environment, follow these steps:
-
-```shell
-# Active your virtual environment
-source pythonenv/bin/activate
-
-
-# Install coremltools in the new virtual environment, pythonenv
-(pythonenv) pip install -U coremltools
-```
+# Documentation
 
-The package [documentation](https://apple.github.io/coremltools) contains
-more details on how to use coremltools.
+* [User Guides and Examples](https://coremltools.readme.io/)
+* [Core ML Specification](https://mlmodel.readme.io/)
+* [API Reference](https://coremltools.readme.io/reference/convertersconvert)
diff --git a/README.rst b/README.rst
deleted file mode 100644
index 9a87c6cad..000000000
--- a/README.rst
+++ /dev/null
@@ -1,37 +0,0 @@
-.. -*- mode: rst -*-
-
-coremltools
-===========
-
-`Core ML <http://developer.apple.com/documentation/coreml>`_
-is an Apple framework that allows developers to easily integrate
-machine learning (ML) models into apps. Core ML is available on iOS, iPadOS,
-watchOS, macOS, and tvOS. Core ML introduces a public file format (.mlmodel)
-for a broad set of ML methods including deep neural networks (convolutional
-and recurrent), tree ensembles (boosted trees, random forest, decision trees),
-and generalized linear models. Core ML models can be directly integrated into
-apps within Xcode.
-
-:code:`coremltools` is a python package for creating, examining, and testing models in
-the .mlmodel format. In particular, it can be used to:
-
-- Convert trained models from popular machine learning tools into Core ML format
-  (.mlmodel).
-- Write models to Core ML format with a simple API.
-- Making predictions using the Core ML framework (on select platforms) to
-  verify conversion.
-
-More Information
-----------------
-
-- `Core ML framework documentation <http://developer.apple.com/documentation/coreml>`_
-- `Core ML model specification <https://apple.github.io/coremltools/coremlspecification>`_
-- `Machine learning at Apple <https://developer.apple.com/machine-learning>`_
-
-License
--------
-Copyright (c) 2018, Apple Inc. All rights reserved.
-
-Use of this source code is governed by the 
-`3-Clause BSD License <https://opensource.org/licenses/BSD-3-Clause>`_
-that can be found in the LICENSE.txt file.
diff --git a/build_requirements.pip b/build_requirements.pip
deleted file mode 100644
index 9dddfa67d..000000000
--- a/build_requirements.pip
+++ /dev/null
@@ -1,4 +0,0 @@
-numpy==1.14.5
-protobuf
-pytest
-six
diff --git a/caffeconverter/CaffeConverterPython.cpp b/caffeconverter/CaffeConverterPython.cpp
index 162691bf2..5e8b80e74 100644
--- a/caffeconverter/CaffeConverterPython.cpp
+++ b/caffeconverter/CaffeConverterPython.cpp
@@ -4,6 +4,7 @@
 #pragma clang diagnostic ignored "-Wexit-time-destructors"
 #pragma clang diagnostic ignored "-Wdocumentation"
 #pragma clang diagnostic ignored "-Wrange-loop-analysis"
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #pragma clang diagnostic pop
diff --git a/cmake/coreml-utils.cmake b/cmake/coreml-utils.cmake
new file mode 100644
index 000000000..a87a3bfeb
--- /dev/null
+++ b/cmake/coreml-utils.cmake
@@ -0,0 +1,108 @@
+# Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+# Use of this source code is governed by a BSD-3-clause license that can be
+# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+
+#
+# Add custom commands and targets to build a proto file.
+#
+# Parameters
+# - proto_fn The base name of the proto file to build.
+# - target_suffix A string to append to all target names.
+#
+# Environment
+# - Consults the variable/option OVERWRITE_PB_SOURCE to determine whether
+#   to regenerate in-source.
+#
+# Side Effects
+#  - If OVERWRITE_PB_SOURCE, targets created to generate in-source are appended
+#    to the list proto_depends in PARENT_SCOPE.
+#
+function(coreml_add_build_proto proto_fn target_suffix)
+    add_custom_command(
+        OUTPUT
+            ${CMAKE_CURRENT_BINARY_DIR}/format/${proto_fn}.pb.cc
+            ${CMAKE_CURRENT_BINARY_DIR}/format/${proto_fn}.pb.h
+        COMMENT "Generating c++ sources from ${proto_fn}.proto into ${CMAKE_CURRENT_BINARY_DIR}/format/"
+        COMMAND ${CMAKE_BINARY_DIR}/deps/protobuf/cmake/protoc
+            --cpp_out=${CMAKE_CURRENT_BINARY_DIR}/format/
+            -I${CMAKE_CURRENT_SOURCE_DIR}/format
+            ${CMAKE_CURRENT_SOURCE_DIR}/format/${proto_fn}.proto
+        DEPENDS protoc
+        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+    )
+    add_custom_command(
+        OUTPUT
+            ${CMAKE_CURRENT_BINARY_DIR}/format/${proto_fn}_enum.h
+        COMMENT "Generating c++ enums from ${proto_fn}.proto into ${CMAKE_CURRENT_BINARY_DIR}/format/"
+        COMMAND ${CMAKE_BINARY_DIR}/deps/protobuf/cmake/protoc
+            --plugin=protoc-gen-enum=mlmodel${target_suffix}/enumgen
+            --enum_out=${CMAKE_CURRENT_BINARY_DIR}/format/
+            -I${CMAKE_CURRENT_SOURCE_DIR}/format/
+            ${CMAKE_CURRENT_SOURCE_DIR}/format/${proto_fn}.proto
+        DEPENDS enumgen protoc
+        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+    )
+    add_custom_command(
+        OUTPUT
+            ${CMAKE_BINARY_DIR}/coremltools${target_suffix}/proto/${proto_fn}_pb2.py
+        COMMENT "Generating Python sources from ${proto_fn}.proto into ${CMAKE_BINARY_DIR}/coremltools${target_suffix}/proto/"
+        COMMAND ${CMAKE_BINARY_DIR}/deps/protobuf/cmake/protoc
+            --python_out=${CMAKE_BINARY_DIR}/coremltools${target_suffix}/proto
+            -I${CMAKE_CURRENT_SOURCE_DIR}/format/
+            ${CMAKE_CURRENT_SOURCE_DIR}/format/${proto_fn}.proto
+        COMMAND python
+            -m lib2to3
+            -wn
+            --no-diff
+            -f import
+            ${CMAKE_BINARY_DIR}/coremltools${target_suffix}/${proto_fn}_pb2.py
+        DEPENDS protoc
+        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+    )
+    # For the CoreML framework we read the source file locations for these, and
+    # so it can be useful to update the source tree in addition.  So we repeat
+    # all of the above with different outputs.
+    if(OVERWRITE_PB_SOURCE)
+        add_custom_target(tgt_${proto_fn}_source ALL
+            COMMENT "Generating c++ sources from ${proto_fn}.proto into ${CMAKE_CURRENT_SOURCE_DIR}/build/format/"
+            COMMAND ${CMAKE_BINARY_DIR}/deps/protobuf/cmake/protoc
+                --cpp_out=${CMAKE_CURRENT_SOURCE_DIR}/build/format/
+                -I${CMAKE_CURRENT_SOURCE_DIR}/format
+                ${CMAKE_CURRENT_SOURCE_DIR}/format/${proto_fn}.proto
+            DEPENDS protoc
+            WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+        )
+        add_custom_target(tgt_${proto_fn}_enums ALL
+            COMMENT "Generating c++ enums from ${proto_fn}.proto into ${CMAKE_CURRENT_SOURCE_DIR}/build/format/"
+            COMMAND ${CMAKE_BINARY_DIR}/deps/protobuf/cmake/protoc
+                --plugin=protoc-gen-enum=mlmodel${target_suffix}/enumgen
+                --enum_out=${CMAKE_CURRENT_SOURCE_DIR}/build/format/
+                -I${CMAKE_CURRENT_SOURCE_DIR}/format/
+                ${CMAKE_CURRENT_SOURCE_DIR}/format/${proto_fn}.proto
+            DEPENDS enumgen protoc
+            WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+        )
+        add_custom_target(tgt_${proto_fn}_python ALL
+            COMMENT "Generating Python sources from ${proto_fn}.proto into ${CMAKE_SOURCE_DIR}/coremltools${target_suffix}/proto/"
+            COMMAND ${CMAKE_BINARY_DIR}/deps/protobuf/cmake/protoc
+                --python_out=${CMAKE_SOURCE_DIR}/coremltools${target_suffix}/proto
+                -I${CMAKE_CURRENT_SOURCE_DIR}/format/
+                ${CMAKE_CURRENT_SOURCE_DIR}/format/${proto_fn}.proto
+            COMMAND python
+                -m lib2to3
+                -wn
+                --no-diff
+                -f import
+                ${CMAKE_SOURCE_DIR}/coremltools${target_suffix}/proto/${proto_fn}_pb2.py
+            DEPENDS protoc
+            WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+        )
+        # Record dependencies for 'protosrc' target.
+        list(APPEND proto_depends tgt_${proto_fn}_source)
+        list(APPEND proto_depends tgt_${proto_fn}_enums)
+        list(APPEND proto_depends tgt_${proto_fn}_python)
+        set(proto_depends ${proto_depends} PARENT_SCOPE)
+    endif()
+endfunction()
\ No newline at end of file
diff --git a/configure b/configure
deleted file mode 100755
index 257fb1cd1..000000000
--- a/configure
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/bin/bash
-
-# Exit immediately on failure of a subcommand
-set -e
-
-##=============================================================================
-## Main configuration processing
-COREMLTOOLS_HOME=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
-BUILD_DIR="${COREMLTOOLS_HOME}/build"
-# TODO(Keith-Wyss): Allow configuring BUILD_DIR. This doesn't have to be the
-# same as CMAKE's build directory, but scripts/python_env.sh relies on it.
-COREMLTOOLS_ENV=coremltools-dev
-
-# command flag options
-cleanup_option=0
-include_test_deps=1
-include_docs_deps=0
-PYTHON=$(which python)
-
-function print_help {
-  echo "Configures the build with the specified toolchain. "
-  echo
-  echo "If configure has already been run before, running configure "
-  echo "will simply reconfigure the build with no changes. "
-  echo
-  echo "Usage: ./configure <options>"
-  echo
-  echo "  --cleanup            Clean up everything."
-  echo "  --exclude-test-deps  Exclude packages needed for testing" 
-  echo "  --include-docs-deps  Include packages needed for making docs" 
-  echo "  --python=*           Python to use for configuration."
-  echo
-  echo "Example: ./configure"
-  echo
-  echo "Cleanup all build directories"
-  echo "Example: ./configure --cleanup"
-  echo
-  exit 1
-} # end of print help
-
-function unknown_option {
-  echo "Unrecognized option: $1"
-  echo "To get help, run ./configure --help"
-  exit 1
-} # end of unknown option
-
-function run_cleanup {
-  echo "cleaning up";
-  rm -rf $BUILD_DIR
-}
-
-function run_cleanup_prompt {
-  #!/bin/bash
-  echo "This script completely erases all build folders including dependencies!"
-  if [[ $default_yes == 1 ]]; then
-          yesorno="yes"
-  else
-          echo "Are you sure you want to continue? (yes or no)"
-          read yesorno;
-  fi
-
-  if [ "$yesorno" == "yes" ]; then
-    run_cleanup
-  else
-    echo "Doing nothing!";
-  fi
-}
-
-
-###############################################################################
-#
-# Parse command line configure flags ------------------------------------------
-#
-while [ $# -gt 0 ]
-  do case $1 in
-    --python=*)            PYTHON=${1##--python=} ;;
-    --cleanup)             cleanup_option=1;;
-    --exclude-test-deps)   include_test_deps=0;;
-    --include-docs-deps)   include_docs_deps=1;;
-    --help)                print_help ;;
-
-    *) unknown_option $1 ;;
-  esac
-  shift
-done
-
-if [[ $cleanup_option == 1 ]]; then
-  run_cleanup_prompt
-  exit 0
-fi
-
-mkdir -p $BUILD_DIR
-cd $BUILD_DIR
-
-# Setup a new virtual env using the existing python
-echo "Creating a new virtual environment in $BUILD_DIR/env"
-$PYTHON -m pip install virtualenv
-$PYTHON -m virtualenv $BUILD_DIR/$COREMLTOOLS_ENV
-
-# Activate and install packages in the environment
-source $BUILD_DIR/$COREMLTOOLS_ENV/bin/activate
-pip install -r $COREMLTOOLS_HOME/build_requirements.pip
-
-# Install test requirements (upgrades packages if required)
-if [[ $include_test_deps == 1 ]]; then
-  pip install -r $COREMLTOOLS_HOME/test_requirements.pip --upgrade
-fi
-
-# Install doc requirements (upgrades packages if required)
-if [[ $include_docs_deps == 1 ]]; then
-  pip install -r $COREMLTOOLS_HOME/docs_requirements.pip --upgrade
-fi
-
-deactivate
-
-echo 
-echo 
-echo 
-echo "Python env for coremltools development setup."
-echo 
-echo "Run the following command to to activate it."
-echo 
-echo "      source ./scripts/python_env.sh"
-echo 
diff --git a/coremlpython/CoreMLPython.h b/coremlpython/CoreMLPython.h
index 58df94486..1ca240080 100644
--- a/coremlpython/CoreMLPython.h
+++ b/coremlpython/CoreMLPython.h
@@ -1,12 +1,14 @@
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wexit-time-destructors"
 #pragma clang diagnostic ignored "-Wdocumentation"
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #pragma clang diagnostic pop
 
 #import <CoreML/CoreML.h>
-#import "NeuralNetwork/NeuralNetworkShapes.hpp"
+#import "NeuralNetworkBuffer.hpp"
+#import "Validation/NeuralNetwork/NeuralNetworkShapes.hpp"
 
 namespace py = pybind11;
 
@@ -41,6 +43,18 @@ namespace CoreML {
             void print() const;
         };
 
-
+        // TODO:
+        // Create template class and create instance with respect
+        // to datatypes
+        class NeuralNetworkBufferInformation {
+            private:
+                std::unique_ptr<NNBuffer::NeuralNetworkBuffer> nnBuffer;
+
+            public:
+                NeuralNetworkBufferInformation(const std::string& bufferFilePath, NNBuffer::bufferMode mode);
+                ~NeuralNetworkBufferInformation();
+                std::vector<float> getBuffer(const u_int64_t offset);
+                u_int64_t addBuffer(const std::vector<float>& buffer);
+        };
     }
 }
diff --git a/coremlpython/CoreMLPython.mm b/coremlpython/CoreMLPython.mm
index 2344bb914..b334608d0 100644
--- a/coremlpython/CoreMLPython.mm
+++ b/coremlpython/CoreMLPython.mm
@@ -3,8 +3,10 @@
 #import "CoreMLPython.h"
 #import "CoreMLPythonUtils.h"
 #import "Globals.hpp"
-#import "NeuralNetwork/NeuralNetworkShapes.hpp"
 #import "Utils.hpp"
+#import <fstream>
+#import <vector>
+#import "NeuralNetworkBuffer.hpp"
 
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
@@ -18,10 +20,11 @@
 using namespace CoreML::Python;
 
 Model::~Model() {
-    NSError *error = nil;
-    NSFileManager *fileManager = [NSFileManager defaultManager];
-    if (compiledUrl != nil) {
-        [fileManager removeItemAtPath:[[compiledUrl URLByDeletingLastPathComponent] path]  error:&error];
+    @autoreleasepool {
+        NSFileManager *fileManager = [NSFileManager defaultManager];
+        if (compiledUrl != nil) {
+            [fileManager removeItemAtURL:compiledUrl error:NULL];
+        }
     }
 }
 
@@ -128,6 +131,39 @@
     shaper->print();
 }
 
+/*
+ * NeuralNetworkBuffer - NeuralNetworkBuffer
+ */
+NeuralNetworkBufferInformation::NeuralNetworkBufferInformation(const std::string &bufferFilePath, NNBuffer::bufferMode mode)
+    : nnBuffer(std::make_unique<NNBuffer::NeuralNetworkBuffer>(bufferFilePath, mode))
+{
+}
+
+/*
+ * NeuralNetworkBufferInformation - ~NeuralNetworkBufferInformation
+ */
+NeuralNetworkBufferInformation::~NeuralNetworkBufferInformation() = default;
+
+/*
+ * NeuralNetworkBuffer - addBuffer
+ * Writes given buffer into file
+ * Returns offset from the beginning of buffer
+ */
+inline u_int64_t NeuralNetworkBufferInformation::addBuffer(const std::vector<float> &buffer) {
+    return nnBuffer->addBuffer(buffer);
+}
+
+/*
+ * NeuralNetworkBufferInformation - getBuffer
+ * Reads buffer from given offset and of given size and writes to data
+ */
+inline std::vector<float> NeuralNetworkBufferInformation::getBuffer(const u_int64_t offset) {
+    // TODO: Explore Pybind11 Opaque to pass vector by reference
+    std::vector<float> buffer;
+    nnBuffer->getBuffer(offset, buffer);
+    return buffer;
+}
+
 PYBIND11_PLUGIN(libcoremlpython) {
     py::module m("libcoremlpython", "CoreML.Framework Python bindings");
 
@@ -143,6 +179,16 @@
         .def("shape", &NeuralNetworkShapeInformation::shape)
         .def("print", &NeuralNetworkShapeInformation::print);
 
+    py::class_<NeuralNetworkBufferInformation> netBuffer(m, "_NeuralNetworkBuffer");
+    netBuffer.def(py::init<const std::string&, NNBuffer::bufferMode>())
+        .def("add_buffer", &NeuralNetworkBufferInformation::addBuffer)
+        .def("get_buffer", &NeuralNetworkBufferInformation::getBuffer);
+    py::enum_<NNBuffer::bufferMode>(netBuffer, "mode")
+        .value("write", NNBuffer::bufferMode::write)
+        .value("append", NNBuffer::bufferMode::append)
+        .value("read", NNBuffer::bufferMode::read)
+        .export_values();
+
     return m.ptr();
 }
 
diff --git a/coremlpython/CoreMLPythonArray.h b/coremlpython/CoreMLPythonArray.h
index 7d575d7c6..5cabfc881 100644
--- a/coremlpython/CoreMLPythonArray.h
+++ b/coremlpython/CoreMLPythonArray.h
@@ -2,6 +2,7 @@
 #pragma clang diagnostic ignored "-Wexit-time-destructors"
 #pragma clang diagnostic ignored "-Wdocumentation"
 #pragma clang diagnostic ignored "-Wrange-loop-analysis"
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
 
 #ifdef check
 #define __old_check check
diff --git a/coremlpython/CoreMLPythonUtils.h b/coremlpython/CoreMLPythonUtils.h
index fc966d703..d5a8f04ca 100644
--- a/coremlpython/CoreMLPythonUtils.h
+++ b/coremlpython/CoreMLPythonUtils.h
@@ -3,6 +3,7 @@
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wexit-time-destructors"
 #pragma clang diagnostic ignored "-Wdocumentation"
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include <os/availability.h>
diff --git a/coremlpython/CoreMLPythonUtils.mm b/coremlpython/CoreMLPythonUtils.mm
index db046aa63..d3d559b1b 100644
--- a/coremlpython/CoreMLPythonUtils.mm
+++ b/coremlpython/CoreMLPythonUtils.mm
@@ -14,6 +14,7 @@
 
 #else
 
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 #include <numpy/arrayobject.h>
 #define PyAnyInteger_Check(name) (PyLong_Check(name) || (_import_array(), PyArray_IsScalar(name, Integer)))
 
@@ -33,7 +34,10 @@
     }
 }
 
-MLDictionaryFeatureProvider * Utils::dictToFeatures(const py::dict& dict, NSError **error) {
+MLDictionaryFeatureProvider * Utils::dictToFeatures(const py::dict& dict, NSError * __autoreleasing *error) {
+    NSError *localError;
+    MLDictionaryFeatureProvider * feautreProvider;
+
     @autoreleasepool {
         NSMutableDictionary<NSString *, NSObject *> *inputDict = [[NSMutableDictionary<NSString *, NSObject *> alloc] init];
         
@@ -44,8 +48,13 @@
             inputDict[nsKey] = nsValue;
         }
         
-        return [[MLDictionaryFeatureProvider alloc] initWithDictionary:inputDict error:error];
+        feautreProvider = [[MLDictionaryFeatureProvider alloc] initWithDictionary:inputDict error:&localError];
+    }
+
+    if (error != NULL) {
+        *error = localError;
     }
+    return feautreProvider;
 }
 
 py::dict Utils::featuresToDict(id<MLFeatureProvider> features) {
diff --git a/coremltools/__init__.py b/coremltools/__init__.py
index 36fa57f58..259f59ef8 100644
--- a/coremltools/__init__.py
+++ b/coremltools/__init__.py
@@ -22,7 +22,7 @@
 For more information: http://developer.apple.com/documentation/coreml
 """
 
-__version__ = '3.4'
+from .version import __version__
 
 # This is the basic Core ML specification format understood by iOS 11.0
 SPECIFICATION_VERSION = 1
@@ -45,6 +45,10 @@
 _MINIMUM_NEAREST_NEIGHBORS_SPEC_VERSION = 4
 _MINIMUM_LINKED_MODELS_SPEC_VERSION = 4
 _MINIMUM_UPDATABLE_SPEC_VERSION = 4
+_SPECIFICATION_VERSION_IOS_13 = 4
+
+# New versions for iOS 14.0
+_SPECIFICATION_VERSION_IOS_14 = 5
 
 # expose sub packages as directories
 from . import converters
@@ -53,3 +57,25 @@
 from .models import utils
 
 from ._scripts.converter import _main
+
+# expose unified converter in coremltools package level
+from .converters import convert
+from .converters import (
+    ClassifierConfig,
+    TensorType,
+    ImageType,
+    RangeDim,
+    Shape,
+    EnumeratedShapes,
+)
+from .converters.mil._deployment_compatibility import AvailableTarget as target
+
+# Time profiling for functions in coremltools package, decorated with @profile
+import os as _os
+import sys as _sys
+from .converters._profile_utils import _profiler
+
+_ENABLE_PROFILING = _os.environ.get("ENABLE_PROFILING", False)
+
+if _ENABLE_PROFILING:
+    _sys.setprofile(_profiler)
diff --git a/coremltools/__main__.py b/coremltools/__main__.py
index 99d00741e..646cc9096 100644
--- a/coremltools/__main__.py
+++ b/coremltools/__main__.py
@@ -4,4 +4,5 @@
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 from _scripts.converter import _main
+
 _main()
diff --git a/coremltools/_deps/__init__.py b/coremltools/_deps/__init__.py
index 720794efd..653ad3960 100644
--- a/coremltools/_deps/__init__.py
+++ b/coremltools/_deps/__init__.py
@@ -9,95 +9,156 @@
 """
 from distutils.version import StrictVersion as _StrictVersion
 import logging as _logging
+import platform as _platform
 import re as _re
+import sys as _sys
 
 
 def __get_version(version):
     # matching 1.6.1, and 1.6.1rc, 1.6.1.dev
-    version_regex = '^\d+\.\d+\.\d+'
+    version_regex = r"^\d+\.\d+\.\d+"
     version = _re.search(version_regex, str(version)).group(0)
     return _StrictVersion(version)
 
+
 # ---------------------------------------------------------------------------------------
-HAS_SKLEARN = True
-SKLEARN_VERSION = None
-SKLEARN_MIN_VERSION = '0.17'
+
+_IS_MACOS = _sys.platform == "darwin"
+_MACOS_VERSION = ()
+
+if _IS_MACOS:
+    ver_str = _platform.mac_ver()[0]
+    MACOS_VERSION = tuple([int(v) for v in ver_str.split(".")])
+
+MSG_ONLY_MACOS = "Only supported on macOS"
+
+# ---------------------------------------------------------------------------------------
+_HAS_SKLEARN = True
+_SKLEARN_VERSION = None
+_SKLEARN_MIN_VERSION = "0.17"
+_SKLEARN_MAX_VERSION = "0.19.2"
+
+
 def __get_sklearn_version(version):
     # matching 0.15b, 0.16bf, etc
-    version_regex = '^\d+\.\d+'
+    version_regex = r"^\d+\.\d+"
     version = _re.search(version_regex, str(version)).group(0)
     return _StrictVersion(version)
 
+
 try:
     import sklearn
-    SKLEARN_VERSION = __get_sklearn_version(sklearn.__version__)
-    if SKLEARN_VERSION < _StrictVersion(SKLEARN_MIN_VERSION):
-        HAS_SKLEARN = False
-        _logging.warn(('scikit-learn version %s is not supported. Minimum required version: %s. '
-                      'Disabling scikit-learn conversion API.')
-                      % (sklearn.__version__, SKLEARN_MIN_VERSION) )
+
+    _SKLEARN_VERSION = __get_sklearn_version(sklearn.__version__)
+    if _SKLEARN_VERSION < _StrictVersion(
+        _SKLEARN_MIN_VERSION
+    ) or _SKLEARN_VERSION > _StrictVersion(_SKLEARN_MAX_VERSION):
+        _HAS_SKLEARN = False
+        _logging.warning(
+            (
+                "scikit-learn version %s is not supported. Minimum required version: %s. "
+                "Maximum required version: %s. "
+                "Disabling scikit-learn conversion API."
+            )
+            % (sklearn.__version__, _SKLEARN_MIN_VERSION, _SKLEARN_MAX_VERSION)
+        )
 except:
-    HAS_SKLEARN = False
+    _HAS_SKLEARN = False
+MSG_SKLEARN_NOT_FOUND = "Sklearn not found."
 
 # ---------------------------------------------------------------------------------------
-HAS_LIBSVM = True
+_HAS_LIBSVM = True
 try:
-    import svm
+    from libsvm import svm
 except:
-    HAS_LIBSVM = False
+    _HAS_LIBSVM = False
+MSG_LIBSVM_NOT_FOUND = "Libsvm not found."
 
 # ---------------------------------------------------------------------------------------
-HAS_XGBOOST = True
+_HAS_XGBOOST = True
 try:
     import xgboost
 except:
-    HAS_XGBOOST = False
+    _HAS_XGBOOST = False
 
 # ---------------------------------------------------------------------------------------
-HAS_TF = True
-HAS_TF_1_14 = True
-HAS_TF_2 = False
-TF_MIN_VERSION = '1.0.0'
-TF_MAX_VERSION = '1.14.0'
+_HAS_TF = True
+_HAS_TF_1 = False
+_HAS_TF_2 = False
+_TF_1_MIN_VERSION = "1.0.0"
+_TF_1_MAX_VERSION = "1.15.0"
+_TF_2_MIN_VERSION = "2.1.0"
+_TF_2_MAX_VERSION = "2.2.0"
 
 try:
     import tensorflow
+
     tf_ver = __get_version(tensorflow.__version__)
 
     # TensorFlow
-    if tf_ver < _StrictVersion(TF_MIN_VERSION):
-        _logging.warn(('TensorFlow version %s is not supported. Minimum required version: %s .'
-                      'TensorFlow conversion will be disabled.')
-                      % (tensorflow.__version__, TF_MIN_VERSION))
-    if tf_ver > _StrictVersion(TF_MAX_VERSION):
-        _logging.warn('TensorFlow version %s detected. Last version known to be fully compatible is %s .'
-                      % (tensorflow.__version__, TF_MAX_VERSION))
-
-    if tf_ver < _StrictVersion('1.14.0'):
-        HAS_TF_1_14 = False
-
-    if tf_ver >= _StrictVersion('2.0.0'):
-        HAS_TF_2 = True
+    if tf_ver < _StrictVersion("2.0.0"):
+        _HAS_TF_1 = True
+
+    if tf_ver >= _StrictVersion("2.0.0"):
+        _HAS_TF_2 = True
+
+    if _HAS_TF_1:
+        if tf_ver < _StrictVersion(_TF_1_MIN_VERSION):
+            _logging.warn(
+                (
+                    "TensorFlow version %s is not supported. Minimum required version: %s ."
+                    "TensorFlow conversion will be disabled."
+                )
+                % (tensorflow.__version__, _TF_1_MIN_VERSION)
+            )
+        elif tf_ver > _StrictVersion(_TF_1_MAX_VERSION):
+            _logging.warn(
+                "TensorFlow version %s detected. Last version known to be fully compatible is %s ."
+                % (tensorflow.__version__, _TF_1_MAX_VERSION)
+            )
+    elif _HAS_TF_2:
+        if tf_ver < _StrictVersion(_TF_2_MIN_VERSION):
+            _logging.warn(
+                (
+                    "TensorFlow version %s is not supported. Minimum required version: %s ."
+                    "TensorFlow conversion will be disabled."
+                )
+                % (tensorflow.__version__, _TF_2_MIN_VERSION)
+            )
+        elif tf_ver > _StrictVersion(_TF_2_MAX_VERSION):
+            _logging.warn(
+                "TensorFlow version %s detected. Last version known to be fully compatible is %s ."
+                % (tensorflow.__version__, _TF_2_MAX_VERSION)
+            )
+
 except:
-    HAS_TF = False
-    HAS_TF_1_14 = False
-    HAS_TF_2 = False
+    _HAS_TF = False
+    _HAS_TF_1 = False
+    _HAS_TF_2 = False
+
+MSG_TF1_NOT_FOUND = "TensorFlow 1.x not found."
+MSG_TF2_NOT_FOUND = "TensorFlow 2.x not found."
 
 # ---------------------------------------------------------------------------------------
-HAS_KERAS_TF = True
-HAS_KERAS2_TF = True
-KERAS_MIN_VERSION = '1.2.2'
-KERAS_MAX_VERSION = '2.2.4'
+_HAS_KERAS_TF = True
+_HAS_KERAS2_TF = True
+_KERAS_MIN_VERSION = "1.2.2"
+_KERAS_MAX_VERSION = "2.2.4"
+MSG_KERAS1_NOT_FOUND = "Keras 1 not found."
+MSG_KERAS2_NOT_FOUND = "Keras 2 not found."
 
 try:
     # Prevent keras from printing things that are not errors to standard error.
     import six
     import sys
+
     if six.PY2:
         import StringIO
+
         temp = StringIO.StringIO()
     else:
         import io
+
         temp = io.StringIO()
     stderr = sys.stderr
     try:
@@ -115,38 +176,73 @@ def __get_sklearn_version(version):
     k_ver = __get_version(keras.__version__)
 
     # keras 1 version too old
-    if k_ver < _StrictVersion(KERAS_MIN_VERSION):
-        HAS_KERAS_TF = False
-        HAS_KERAS2_TF = False
-        _logging.warn(('Keras version %s is not supported. Minimum required version: %s .'
-                      'Keras conversion will be disabled.')
-                      % (keras.__version__, KERAS_MIN_VERSION))
+    if k_ver < _StrictVersion(_KERAS_MIN_VERSION):
+        _HAS_KERAS_TF = False
+        _HAS_KERAS2_TF = False
+        _logging.warning(
+            (
+                "Keras version %s is not supported. Minimum required version: %s ."
+                "Keras conversion will be disabled."
+            )
+            % (keras.__version__, _KERAS_MIN_VERSION)
+        )
     # keras version too new
-    if k_ver > _StrictVersion(KERAS_MAX_VERSION):
-        HAS_KERAS_TF = False
-        _logging.warn(('Keras version %s detected. Last version known to be fully compatible of Keras is %s .')
-                      % (keras.__version__, KERAS_MAX_VERSION))
+    if k_ver > _StrictVersion(_KERAS_MAX_VERSION):
+        _HAS_KERAS_TF = False
+        _logging.warning(
+            (
+                "Keras version %s detected. Last version known to be fully compatible of Keras is %s ."
+            )
+            % (keras.__version__, _KERAS_MAX_VERSION)
+        )
     # Using Keras 2 rather than 1
-    if k_ver >= _StrictVersion('2.0.0'):
-        HAS_KERAS_TF = False
-        HAS_KERAS2_TF = True
+    if k_ver >= _StrictVersion("2.0.0"):
+        _HAS_KERAS_TF = False
+        _HAS_KERAS2_TF = True
     # Using Keras 1 rather than 2
     else:
-        HAS_KERAS_TF = True
-        HAS_KERAS2_TF = False
-    if keras.backend.backend() != 'tensorflow':
-        HAS_KERAS_TF = False
-        HAS_KERAS2_TF = False
-        _logging.warn(('Unsupported Keras backend (only TensorFlow is currently supported). '
-                      'Keras conversion will be disabled.'))
+        _HAS_KERAS_TF = True
+        _HAS_KERAS2_TF = False
+    if keras.backend.backend() != "tensorflow":
+        _HAS_KERAS_TF = False
+        _HAS_KERAS2_TF = False
+        _logging.warning(
+            (
+                "Unsupported Keras backend (only TensorFlow is currently supported). "
+                "Keras conversion will be disabled."
+            )
+        )
 
 except:
-    HAS_KERAS_TF = False
-    HAS_KERAS2_TF = False
+    _HAS_KERAS_TF = False
+    _HAS_KERAS2_TF = False
 
 # ---------------------------------------------------------------------------------------
-HAS_CAFFE2 = True
+_HAS_CAFFE2 = True
 try:
     import caffe2
 except:
-    HAS_CAFFE2 = False
+    _HAS_CAFFE2 = False
+
+# ---------------------------------------------------------------------------------------
+_HAS_TORCH = True
+try:
+    import torch
+except:
+    _HAS_TORCH = False
+MSG_TORCH_NOT_FOUND = "PyTorch not found."
+
+# ---------------------------------------------------------------------------------------
+_HAS_ONNX = True
+try:
+    import onnx
+except:
+    _HAS_ONNX = False
+
+# ---------------------------------------------------------------------------------------
+_HAS_GRAPHVIZ = True
+try:
+    import graphviz
+except:
+    _HAS_GRAPHVIZ = False
+MSG_ONNX_NOT_FOUND = "ONNX not found."
diff --git a/coremltools/_scripts/converter.py b/coremltools/_scripts/converter.py
index 30ea0355b..201e0c107 100644
--- a/coremltools/_scripts/converter.py
+++ b/coremltools/_scripts/converter.py
@@ -15,100 +15,211 @@
 
 import sys as _sys
 
+
 def _convert(args):
-    if args.srcModelFormat == 'auto':
-        if args.srcModelPath.endswith('.caffemodel') or args.caffeProtoTxtPath != '':
-            args.srcModelFormat = 'caffe'
-        elif args.srcModelPath.endswith('.h5') or args.kerasJsonPath != '':
-            args.srcModelFormat = 'keras'
+    if args.srcModelFormat == "auto":
+        if args.srcModelPath.endswith(".caffemodel") or args.caffeProtoTxtPath != "":
+            args.srcModelFormat = "caffe"
+        elif args.srcModelPath.endswith(".h5") or args.kerasJsonPath != "":
+            args.srcModelFormat = "keras"
         else:
-            print("error: coremlconverter: Unable to auto-detect model format. "
-                  "Please specify the model format using the 'srcModelFormat' argument.")
+            print(
+                "error: coremlconverter: Unable to auto-detect model format. "
+                "Please specify the model format using the 'srcModelFormat' argument."
+            )
             _sys.exit(1)
 
-    if args.srcModelFormat == 'caffe':
+    if args.srcModelFormat == "caffe":
         if args.caffeProtoTxtPath:
             if args.meanImageProtoPath:
-                model = (args.srcModelPath, args.caffeProtoTxtPath, args.meanImageProtoPath)
+                model = (
+                    args.srcModelPath,
+                    args.caffeProtoTxtPath,
+                    args.meanImageProtoPath,
+                )
             else:
                 model = (args.srcModelPath, args.caffeProtoTxtPath)
         else:
             model = args.srcModelPath
         try:
-            model = converters.caffe.convert(model,
-                                    image_input_names = set(args.imageInputNames),
-                                    is_bgr = args.isBGR,
-                                    red_bias = args.redBias,
-                                    blue_bias = args.blueBias,
-                                    green_bias = args.greenBias,
-                                    gray_bias = args.grayBias,
-                                    image_scale = args.scale,
-                                    class_labels = args.classInputPath,
-                                    predicted_feature_name = args.predictedFeatureName)
+            model = converters.caffe.convert(
+                model,
+                image_input_names=set(args.imageInputNames),
+                is_bgr=args.isBGR,
+                red_bias=args.redBias,
+                blue_bias=args.blueBias,
+                green_bias=args.greenBias,
+                gray_bias=args.grayBias,
+                image_scale=args.scale,
+                class_labels=args.classInputPath,
+                predicted_feature_name=args.predictedFeatureName,
+            )
             model.save(args.dstModelPath)
         except Exception as e:
-            print('error: coremlconverter: %s.' % str(e))
-            return 1 # error
+            print("error: coremlconverter: %s." % str(e))
+            return 1  # error
         return 0
 
-    elif args.srcModelFormat == 'keras':
+    elif args.srcModelFormat == "keras":
         try:
             if not args.inputNames:
-                raise TypeError("Neural network 'inputNames' are required for converting Keras models.")
+                raise TypeError(
+                    "Neural network 'inputNames' are required for converting Keras models."
+                )
             if not args.outputNames:
-                raise TypeError("Neural network 'outputNames' are required for converting Keras models.")
+                raise TypeError(
+                    "Neural network 'outputNames' are required for converting Keras models."
+                )
 
             if args.kerasJsonPath:
                 model = (args.kerasJsonPath, args.srcModelPath)
             else:
                 model = args.srcModelPath
-            
-            model = converters.keras.convert(model,
-                                    args.inputNames,
-                                    args.outputNames,
-                                    image_input_names = set(args.imageInputNames) if args.imageInputNames else None,
-                                    is_bgr = args.isBGR,
-                                    red_bias = args.redBias,
-                                    blue_bias = args.blueBias,
-                                    green_bias = args.greenBias,
-                                    gray_bias = args.grayBias,
-                                    image_scale = args.scale,
-                                    class_labels = args.classInputPath if args.classInputPath else None,
-                                    predicted_feature_name = args.predictedFeatureName,
-                                    respect_trainable = args.respectTrainable)
+
+            model = converters.keras.convert(
+                model,
+                args.inputNames,
+                args.outputNames,
+                image_input_names=set(args.imageInputNames)
+                if args.imageInputNames
+                else None,
+                is_bgr=args.isBGR,
+                red_bias=args.redBias,
+                blue_bias=args.blueBias,
+                green_bias=args.greenBias,
+                gray_bias=args.grayBias,
+                image_scale=args.scale,
+                class_labels=args.classInputPath if args.classInputPath else None,
+                predicted_feature_name=args.predictedFeatureName,
+                respect_trainable=args.respectTrainable,
+            )
             model.save(args.dstModelPath)
         except Exception as e:
-            print('error: coremlconverter: %s.' % str(e))
-            return 1 # error
+            print("error: coremlconverter: %s." % str(e))
+            return 1  # error
         return 0
     else:
-        print('error: coremlconverter: Invalid srcModelFormat specified.')
+        print("error: coremlconverter: Invalid srcModelFormat specified.")
         return 1
 
+
 def _main():
     import argparse
 
-    parser = argparse.ArgumentParser(description='Convert other model file formats to MLKit format (.mlmodel).')
-    parser.add_argument('--srcModelFormat', type=unicode, choices=['auto', 'caffe', 'keras'], default='auto', help='Format of model at srcModelPath (default is to auto-detect).')
-    parser.add_argument('--srcModelPath', type=unicode, required=True, help='Path to the model file of the external tool (e.g caffe weights proto binary, keras h5 binary')
-    parser.add_argument('--dstModelPath', type=unicode, required=True, help='Path to save the model in format .mlmodel')
-    parser.add_argument('--caffeProtoTxtPath', type=unicode, default='', help='Path to the .prototxt file if network differs from the source file (optional)')
-    parser.add_argument('--meanImageProtoPath', type=unicode, default='', help='Path to the .binaryproto file containing the mean image if required by the network (optional). This requires a prototxt file to be specified.')
-    parser.add_argument('--kerasJsonPath', type=unicode, default=None, help='Path to the .json file for keras if the network differs from the weights file (optional)')
-    parser.add_argument('--inputNames', type=unicode, nargs='*', help='Names of the feature (input) columns, in order (required for keras models).')
-    parser.add_argument('--outputNames', type=unicode, nargs='*', help='Names of the target (output) columns, in order (required for keras models).')
-    parser.add_argument('--imageInputNames', type=unicode, default=[], action='append', help='Label the named input as an image. Can be specified more than once for multiple image inputs.')
-    parser.add_argument('--isBGR', action='store_true', default=False, help='True if the image data in BGR order (RGB default)')
-    parser.add_argument('--redBias', type=float, default=0.0, help='Bias value to be added to the red channel (optional, default 0.0)')
-    parser.add_argument('--blueBias', type=float, default=0.0, help='Bias value to be added to the blue channel (optional, default 0.0)')
-    parser.add_argument('--greenBias', type=float, default=0.0, help='Bias value to be added to the green channel (optional, default 0.0)')
-    parser.add_argument('--grayBias', type=float, default=0.0, help='Bias value to be added to the gray channel for Grayscale images (optional, default 0.0)')
-    parser.add_argument('--scale', type=float, default=1.0, help='Value by which the image data must be scaled (optional, default 1.0)')
-    parser.add_argument('--classInputPath', type=unicode, default='', help='Path to class labels (ordered new line separated) for treating the neural network as a classifier')
-    parser.add_argument('--predictedFeatureName', type=unicode, default='class_output', help='Name of the output feature that captures the class name (for classifiers models).')
-    parser.add_argument('--respectTrainable', action='store_true', default=False,
-                        help="Honor Keras' 'trainable' flag.")
+    parser = argparse.ArgumentParser(
+        description="Convert other model file formats to MLKit format (.mlmodel)."
+    )
+    parser.add_argument(
+        "--srcModelFormat",
+        type=unicode,
+        choices=["auto", "caffe", "keras"],
+        default="auto",
+        help="Format of model at srcModelPath (default is to auto-detect).",
+    )
+    parser.add_argument(
+        "--srcModelPath",
+        type=unicode,
+        required=True,
+        help="Path to the model file of the external tool (e.g caffe weights proto binary, keras h5 binary",
+    )
+    parser.add_argument(
+        "--dstModelPath",
+        type=unicode,
+        required=True,
+        help="Path to save the model in format .mlmodel",
+    )
+    parser.add_argument(
+        "--caffeProtoTxtPath",
+        type=unicode,
+        default="",
+        help="Path to the .prototxt file if network differs from the source file (optional)",
+    )
+    parser.add_argument(
+        "--meanImageProtoPath",
+        type=unicode,
+        default="",
+        help="Path to the .binaryproto file containing the mean image if required by the network (optional). This requires a prototxt file to be specified.",
+    )
+    parser.add_argument(
+        "--kerasJsonPath",
+        type=unicode,
+        default=None,
+        help="Path to the .json file for keras if the network differs from the weights file (optional)",
+    )
+    parser.add_argument(
+        "--inputNames",
+        type=unicode,
+        nargs="*",
+        help="Names of the feature (input) columns, in order (required for keras models).",
+    )
+    parser.add_argument(
+        "--outputNames",
+        type=unicode,
+        nargs="*",
+        help="Names of the target (output) columns, in order (required for keras models).",
+    )
+    parser.add_argument(
+        "--imageInputNames",
+        type=unicode,
+        default=[],
+        action="append",
+        help="Label the named input as an image. Can be specified more than once for multiple image inputs.",
+    )
+    parser.add_argument(
+        "--isBGR",
+        action="store_true",
+        default=False,
+        help="True if the image data in BGR order (RGB default)",
+    )
+    parser.add_argument(
+        "--redBias",
+        type=float,
+        default=0.0,
+        help="Bias value to be added to the red channel (optional, default 0.0)",
+    )
+    parser.add_argument(
+        "--blueBias",
+        type=float,
+        default=0.0,
+        help="Bias value to be added to the blue channel (optional, default 0.0)",
+    )
+    parser.add_argument(
+        "--greenBias",
+        type=float,
+        default=0.0,
+        help="Bias value to be added to the green channel (optional, default 0.0)",
+    )
+    parser.add_argument(
+        "--grayBias",
+        type=float,
+        default=0.0,
+        help="Bias value to be added to the gray channel for Grayscale images (optional, default 0.0)",
+    )
+    parser.add_argument(
+        "--scale",
+        type=float,
+        default=1.0,
+        help="Value by which the image data must be scaled (optional, default 1.0)",
+    )
+    parser.add_argument(
+        "--classInputPath",
+        type=unicode,
+        default="",
+        help="Path to class labels (ordered new line separated) for treating the neural network as a classifier",
+    )
+    parser.add_argument(
+        "--predictedFeatureName",
+        type=unicode,
+        default="class_output",
+        help="Name of the output feature that captures the class name (for classifiers models).",
+    )
+    parser.add_argument(
+        "--respectTrainable",
+        action="store_true",
+        default=False,
+        help="Honor Keras' 'trainable' flag.",
+    )
 
     args = parser.parse_args()
     ret = _convert(args)
-    _sys.exit(int(ret)) # cast to int or else the exit code is always 1
+    _sys.exit(int(ret))  # cast to int or else the exit code is always 1
diff --git a/coremltools/converters/__init__.py b/coremltools/converters/__init__.py
index a690bce4c..3cc47b283 100644
--- a/coremltools/converters/__init__.py
+++ b/coremltools/converters/__init__.py
@@ -9,5 +9,13 @@
 from . import xgboost
 from . import keras
 from . import caffe
-from . import tensorflow
-from . import nnssa
\ No newline at end of file
+from . import onnx
+from ._converters_entry import convert
+from .mil import (
+    ClassifierConfig,
+    TensorType,
+    ImageType,
+    RangeDim,
+    Shape,
+    EnumeratedShapes,
+)
diff --git a/coremltools/converters/_converters_entry.py b/coremltools/converters/_converters_entry.py
new file mode 100644
index 000000000..6f40b56e5
--- /dev/null
+++ b/coremltools/converters/_converters_entry.py
@@ -0,0 +1,339 @@
+from __future__ import absolute_import as _
+
+import gc
+import coremltools
+from six import string_types as _string_types
+import collections
+
+from coremltools.converters.mil.input_types import InputType, ClassifierConfig
+from coremltools.converters.mil.converter import _convert
+from coremltools.converters.mil.mil import Program
+from coremltools._deps import _HAS_TORCH, _HAS_TF_1, _HAS_TF_2
+from coremltools.converters._profile_utils import _profile
+from coremltools import __version__ as ct_version
+from coremltools.models import _METADATA_VERSION, _METADATA_SOURCE
+from coremltools.converters.mil._deployment_compatibility import (
+    AvailableTarget,
+    check_deployment_compatibility,
+)
+
+if _HAS_TF_1:
+    import tensorflow as tf
+    from coremltools.converters.mil.frontend.tensorflow.load import TF1Loader
+if _HAS_TF_2:
+    import tensorflow as tf
+    from coremltools.converters.mil.frontend.tensorflow2.load import TF2Loader
+
+if _HAS_TORCH:
+    import torch
+    from coremltools.converters.mil.frontend.torch.load import (
+        _torchscript_from_model as pytorch_load,
+    )
+
+
+@_profile
+def convert(
+    model,
+    source="auto",
+    inputs=None,
+    outputs=None,
+    classifier_config=None,
+    minimum_deployment_target=None,
+    **kwargs
+):
+    """
+    Convert TensorFlow or Pytorch models to Core ML model format. Whether a
+    parameter is required may differ between frameworks (see below). Note that
+    this function is aliased as `ct.convert` in the tutorials.
+
+    Parameters
+    ----------
+    model:
+        TensorFlow 1, TensorFlow 2 or Pytorch model in one of the following
+        format:
+
+        For TensorFlow versions 1.x:
+            - Frozen `tf.Graph <https://www.tensorflow.org/api_docs/python/tf/Graph>`_
+            - Frozen graph (`.pb`) file path
+            - `tf.keras.Model <https://www.tensorflow.org/api_docs/python/tf/keras>`_
+            -  `HDF5 <https://keras.io/api/models/model_saving_apis/>`_ file path (`.h5`)
+            - `SavedModel <https://www.tensorflow.org/guide/saved_model>`_ directory path
+        For TensorFlow versions 2.x:
+            - `tf.keras.Model <https://www.tensorflow.org/api_docs/python/tf/keras>`_
+            - `HDF5 file path <https://keras.io/api/models/model_saving_apis/>`_ (`.h5`)
+            - `SavedModel <https://www.tensorflow.org/guide/saved_model>`_ directory path
+            - A `concrete function <https://www.tensorflow.org/guide/concrete_function>`_
+        For Pytorch:
+            - A `TorchScript <https://pytorch.org/docs/stable/jit.html>`_ object
+            - Path to a `.pt` file
+
+    source: str (optional)
+        One of `auto`, `tensorflow`, or `pytorch`. `auto` determines the
+        framework automatically for most cases. Raise ValueError if it fails
+        to determine the source framework.
+
+    inputs: list of `TensorType` or `ImageType`
+        - Inputs are required for PyTorch model, but optional for TensorFlow.
+        - For PyTorch models, the inputs may be nested list or tuple, but for
+          TensorFlow models it must be a flat list.
+        - For TensorFlow, if inputs is `None`, the inputs are `Placeholder`
+          nodes in the model (if model is frozen graph) or function inputs (if
+          model is tf function).
+        - For TensorFlow, if inputs is not `None`, inputs may contain only a
+          subset of all Placeholder in the TF model.
+
+    outputs: list[str] (optional)
+
+        TensorFlow 1 and 2:
+            - `outputs` are optional.
+
+            - If specified, `outputs` is a list of string representing node
+              names.
+
+            - If `outputs` are not specified, converter infers outputs as all
+              terminal identity nodes.
+
+        PyTorch:
+            - `outputs` must not be specified.
+
+    classifier_config: ClassifierConfig class (optional)
+        The configuration if the mlmodel is intended to be a classifier.
+
+    minimum_deployment_target: coremltools.target enumeration (optional)
+        - one of the members of enum "coremltools.target."
+        - When not-specified or None, converter aims for as minimum of a deployment target as possible
+
+    Returns
+    -------
+    model: MLModel
+        A Core ML MLModel object
+
+    Examples
+    --------
+    TensorFlow 1, 2 (`model` is a frozen graph):
+
+        >>> with tf.Graph().as_default() as graph:
+        >>>     x = tf.placeholder(tf.float32, shape=(1, 2, 3), name="input")
+        >>>     y = tf.nn.relu(x, name="output")
+
+        # Automatically infer inputs and outputs
+        >>> mlmodel = ct.convert(graph)
+	    >>> test_input = np.random.rand(1, 2, 3) - 0.5
+        >>> results = mlmodel.predict({"input": test_input})
+        >>> print(results['output'])
+
+    TensorFlow 2 (`model` is tf.Keras model path):
+
+        >>> x = tf.keras.Input(shape=(32,), name='input')
+        >>> y = tf.keras.layers.Dense(16, activation='softmax')(x)
+        >>> keras_model = tf.keras.Model(x, y)
+
+        >>> keras_model.save(h5_path)
+        >>> mlmodel = ct.convert(h5_path)
+
+        >>> test_input = np.random.rand(2, 32)
+        >>> results = mlmodel.predict({'input': test_input})
+        >>> print(results['Identity'])
+
+    Pytorch:
+
+        >>> model = torchvision.models.mobilenet_v2()
+        >>> model.eval()
+        >>> example_input = torch.rand(1, 3, 256, 256)
+        >>> traced_model = torch.jit.trace(model, example_input)
+
+        >>> input = ct.TensorType(name='input_name', shape=(1, 3, 256, 256))
+        >>> mlmodel = ct.convert(traced_model, inputs=[input])
+        >>> results = mlmodel.predict({"input": example_input.numpy()})
+        >>> print(results['1651']) # 1651 is the node name given by PyTorch's JIT
+
+    See `here <https://coremltools.readme.io/docs/neural-network-conversion>`_ for
+    more advanced options
+    """
+    if minimum_deployment_target is not None and not isinstance(
+        minimum_deployment_target, AvailableTarget
+    ):
+        msg = (
+            "Unrecognized value of argument 'minimum_deployment_target': {}. "
+            "It needs to be a member of 'coremltools.target' enumeration. "
+            "For example, coremltools.target.iOS13"
+        )
+        raise TypeError(msg.format(minimum_deployment_target))
+
+    source = source.lower()
+    if source not in {"auto", "tensorflow", "pytorch"}:
+        msg = (
+            'Unrecognized value of argument "source": {}. '
+            'It must be one of ["auto", "tensorflow", "pytorch"].'
+        )
+        raise ValueError(msg.format(source))
+
+    def raise_if_duplicated(input_list):
+        # Detect duplicated inputs
+        input_names = [t.name for t in input_list if t.name is not None]
+        dups = [
+            item
+            for item, count in collections.Counter(input_names).items()
+            if count > 1
+        ]
+        if len(dups) > 0:
+            raise ValueError("Duplicated inputs: {}".format(dups))
+
+    if inputs is not None:
+        if not isinstance(inputs, list):
+            msg = '"inputs" must be of type list'
+            raise ValueError(msg)
+
+    if classifier_config is not None:
+        if not isinstance(classifier_config, ClassifierConfig):
+            msg = '"classifier_config" must be of type ClassifierConfig'
+            raise ValueError(msg)
+
+    if source == "tensorflow" and _HAS_TF_2:
+        source = "tensorflow2"
+
+    if source == "auto" and _HAS_TF_1:
+        try:
+            loader = TF1Loader(model, outputs=outputs)
+            loader._graph_def_from_model(outputs=outputs)
+            source = "tensorflow"
+        except:
+            pass
+
+    if source == "auto" and _HAS_TF_2:
+        try:
+            loader = TF2Loader(model, outputs=outputs)
+            loader._graph_def_from_model(outputs=outputs)
+            source = "tensorflow2"
+        except:
+            pass
+
+    if source == "auto" and _HAS_TORCH:
+        try:
+            pytorch_load(model)
+            source = "pytorch"
+        except:
+            pass
+
+    if source == "auto" and isinstance(model, Program):
+        source = "mil"
+
+    convert_to = kwargs.get("convert_to", "nn_proto")
+    kwargs.pop("convert_to", None)
+
+    if source == "auto":
+        msg = (
+            "Unable to determine the type of the model, i.e. the source framework. "
+            'Please provide the value of argument "source", from one of '
+            '["tensorflow", "pytorch"]. Note that model conversion requires the '
+            "source package that generates the model. Please make sure you have "
+            "the appropriate version of source package installed. E.g., if you're "
+            "converting model originally trained with TensorFlow 1.14, make sure "
+            "you have `tensorflow==1.14` installed."
+        )
+        raise ValueError(msg)
+
+    elif source in {"tensorflow", "tensorflow2"}:
+
+        if source == "tensorflow" and not _HAS_TF_1:
+            raise ValueError(
+                'Converter was called with source="tensorflow", but missing tensorflow package'
+            )
+
+        if inputs is not None:
+            raise_if_duplicated(inputs)
+
+        if inputs is not None and not all(
+            [isinstance(_input, InputType) for _input in inputs]
+        ):
+            raise ValueError("Input should be a list of TensorType or ImageType")
+
+        proto_spec = _convert(
+            model,
+            convert_from=source,
+            convert_to=convert_to,
+            inputs=inputs,
+            outputs=outputs,
+            classifier_config=classifier_config,
+            **kwargs
+        )
+
+    elif source == "pytorch":
+        if "example_inputs" in kwargs:
+            msg = 'Unexpected argument "example_inputs" found'
+            raise ValueError(msg)
+
+        def _flatten_list(_inputs):
+            ret = []
+            for _input in _inputs:
+                if isinstance(_input, (list, tuple)):
+                    ret.extend(_flatten_list(_input))
+                elif isinstance(_input, InputType):
+                    ret.append(_input)
+                else:
+                    raise ValueError(
+                        "Unknown type {} for flattening into InputType.".format(
+                            type(_input)
+                        )
+                    )
+            return ret
+
+        flat_inputs = _flatten_list(inputs)
+        raise_if_duplicated(flat_inputs)
+        if inputs is not None and not all(
+            [isinstance(_input, InputType) for _input in flat_inputs]
+        ):
+            raise ValueError(
+                "Input should be a list/tuple (or nested lists/tuples) of TensorType or ImageType"
+            )
+        if outputs is not None:
+            raise ValueError("outputs must not be specified for PyTorch")
+
+        proto_spec = _convert(
+            model,
+            convert_from="torch",
+            convert_to=convert_to,
+            inputs=inputs,
+            outputs=outputs,
+            classifier_config=classifier_config,
+            **kwargs
+        )
+
+    elif source == "mil":
+        if not isinstance(model, Program):
+            msg = "Converter was asked to convert MIL input, but input is not a MIL program!"
+            raise ValueError(msg)
+
+        proto_spec = _convert(
+            model,
+            convert_from="mil",
+            convert_to=convert_to,
+            example_inputs=inputs,
+            classifier_config=classifier_config,
+            **kwargs
+        )
+
+    model = coremltools.models.MLModel(proto_spec, useCPUOnly=True)
+
+    if minimum_deployment_target is not None:
+        check_deployment_compatibility(
+            spec=proto_spec,
+            representation=convert_to,
+            deployment_target=minimum_deployment_target,
+        )
+
+    del proto_spec
+    gc.collect()
+
+    # recording metadata: coremltools version, source framework and version
+    if source in {"tensorflow", "tensorflow2"} and (_HAS_TF_1 or _HAS_TF_2):
+        src_pkg_version = "tensorflow=={0}".format(tf.__version__)
+    elif source == "pytorch" and _HAS_TORCH:
+        src_pkg_version = "torch=={0}".format(torch.__version__)
+    else:
+        src_pkg_version = "unknown"
+
+    model.user_defined_metadata[_METADATA_VERSION] = ct_version
+    model.user_defined_metadata[_METADATA_SOURCE] = src_pkg_version
+
+    return model
diff --git a/coremltools/converters/_profile_utils.py b/coremltools/converters/_profile_utils.py
new file mode 100644
index 000000000..2c2596995
--- /dev/null
+++ b/coremltools/converters/_profile_utils.py
@@ -0,0 +1,76 @@
+from __future__ import print_function as _
+import os
+import time
+
+_FUNCTION_PROFILE_REGISTRY = {}  # str -> list (function name to time stack)
+_ENABLE_PROFILING = os.environ.get("ENABLE_PROFILING", False)
+
+
+def _profile(_f=None):
+    def func_wrapper(func):
+        f_name = func.__module__ + "." + func.__name__
+        if f_name in _FUNCTION_PROFILE_REGISTRY:
+            raise ValueError(
+                "Function {} is already registered for profiling.".format(f_name)
+            )
+
+        _FUNCTION_PROFILE_REGISTRY[f_name] = []
+        return func
+
+    if _f is None:
+        return func_wrapper
+    return func_wrapper(_f)
+
+
+_INITIAL_CALL = True
+
+
+def _pr_color(skk, color="94m", end="\n"):
+    print("\033[{} {}\033[00m".format(color, skk), end=end)
+
+
+def _profiler(frame, event, arg, indent=[0]):
+    if frame.f_globals.get("__name__", None) is None:
+        return
+
+    package_name = __name__.split(".")[0]
+
+    function_name = frame.f_globals["__name__"] + "." + frame.f_code.co_name
+
+    profile_function = (
+        package_name in str(frame) and function_name in _FUNCTION_PROFILE_REGISTRY
+    )
+
+    if event == "call" and profile_function:
+        global _INITIAL_CALL
+        if _INITIAL_CALL:
+            _INITIAL_CALL = False
+            print("\n" * 2)
+
+        indent[0] += 3
+        _pr_color(
+            "{} call {} {}".format(
+                "=" * indent[0] + ">",
+                function_name.split(".")[-1],
+                " (" + ".".join(function_name.split(".")[2:-1]) + ")",
+            )
+        )
+        start_time = time.clock()
+        _FUNCTION_PROFILE_REGISTRY[function_name].append(start_time)
+
+    elif event == "return" and profile_function:
+        duration = time.clock() - _FUNCTION_PROFILE_REGISTRY[function_name][-1]
+        duration = round(duration)
+        _pr_color(
+            "{} exit {} {} ".format(
+                "<" + "=" * indent[0],
+                function_name.split(".")[-1],
+                " (" + ".".join(function_name.split(".")[2:-1]) + ")",
+            ),
+            end="",
+        )
+        _pr_color(": Time spent {} seconds ".format(duration,), color="91m")
+        indent[0] -= 3
+        _FUNCTION_PROFILE_REGISTRY[function_name].pop()
+
+    return _profiler
diff --git a/coremltools/converters/caffe/_caffe_converter.py b/coremltools/converters/caffe/_caffe_converter.py
index 860674111..1ed7e3adb 100644
--- a/coremltools/converters/caffe/_caffe_converter.py
+++ b/coremltools/converters/caffe/_caffe_converter.py
@@ -5,13 +5,26 @@
 
 import os
 import six as _six
-from ...models import _MLMODEL_FULL_PRECISION, _MLMODEL_HALF_PRECISION, _VALID_MLMODEL_PRECISION_TYPES
-
-
-def convert(model, image_input_names=[], is_bgr=False,
-            red_bias=0.0, blue_bias=0.0, green_bias=0.0, gray_bias=0.0,
-            image_scale=1.0, class_labels=None, predicted_feature_name=None,
-            model_precision=_MLMODEL_FULL_PRECISION):
+from ...models import (
+    _MLMODEL_FULL_PRECISION,
+    _MLMODEL_HALF_PRECISION,
+    _VALID_MLMODEL_PRECISION_TYPES,
+)
+
+
+def convert(
+    model,
+    image_input_names=[],
+    is_bgr=False,
+    red_bias=0.0,
+    blue_bias=0.0,
+    green_bias=0.0,
+    gray_bias=0.0,
+    image_scale=1.0,
+    class_labels=None,
+    predicted_feature_name=None,
+    model_precision=_MLMODEL_FULL_PRECISION,
+):
     """
     Convert a Caffe model to Core ML format.
 
@@ -31,7 +44,7 @@ def convert(model, image_input_names=[], is_bgr=False,
         - A tuple of two paths to .caffemodel and .prototxt and a dict with image input names
           as keys and paths to mean image binaryprotos as values. The keys should be same as
           the input names provided via the argument 'image_input_name'.
-          
+
     image_input_names: [str] | str
         The name(s) of the input blob(s) in the Caffe model that can be treated
         as images by Core ML. All other inputs are treated as MultiArrays (N-D
@@ -59,24 +72,24 @@ def convert(model, image_input_names=[], is_bgr=False,
         Bias value to be added to the the blue channel of the input image.
         Defaults to 0.0.
         Applicable only if image_input_names is specified.
-        To specify different values for each image input provide a dictionary with input names as keys.    
+        To specify different values for each image input provide a dictionary with input names as keys.
 
     green_bias: float | dict()
         Bias value to be added to the green channel of the input image.
         Defaults to 0.0.
         Applicable only if image_input_names is specified.
-        To specify different values for each image input provide a dictionary with input names as keys.    
+        To specify different values for each image input provide a dictionary with input names as keys.
 
     gray_bias: float | dict()
         Bias value to be added to the input image (in grayscale). Defaults to 0.0.
         Applicable only if image_input_names is specified.
-        To specify different values for each image input provide a dictionary with input names as keys.    
+        To specify different values for each image input provide a dictionary with input names as keys.
 
     image_scale: float | dict()
-        Value by which the input images will be scaled before bias is added and 
+        Value by which the input images will be scaled before bias is added and
         Core ML model makes a prediction. Defaults to 1.0.
         Applicable only if image_input_names is specified.
-        To specify different values for each image input provide a dictionary with input names as keys.    
+        To specify different values for each image input provide a dictionary with input names as keys.
 
     class_labels: str
         Filepath where classes are parsed as a list of newline separated
@@ -123,21 +136,21 @@ def convert(model, image_input_names=[], is_bgr=False,
 
         >>> coreml_model = coremltools.converters.caffe.convert(('my_caffe_model.caffemodel',
         ...                 'my_deploy.prototxt', 'mean_image.binaryproto'), image_input_names = 'image_input')
-        
+
         # Multiple mean images for preprocessing
         >>> coreml_model = coremltools.converters.caffe.convert(('my_caffe_model.caffemodel',
         ...                 'my_deploy.prototxt', {'image1': 'mean_image1.binaryproto', 'image2': 'mean_image2.binaryproto'}),
         ...                     image_input_names = ['image1', 'image2'])
-        
+
         # Multiple image inputs and bias/scale values
         >>> coreml_model = coremltools.converters.caffe.convert(('my_caffe_model.caffemodel', 'my_deploy.prototxt'),
         ...                     red_bias = {'image1': -100, 'image2': -110},
         ...                     green_bias = {'image1': -90, 'image2': -125},
         ...                     blue_bias = {'image1': -105, 'image2': -120},
         ...                     image_input_names = ['image1', 'image2'])
-            
-            
-            
+
+
+
     Input and output names used in the interface of the converted Core ML model are inferred from the .prototxt file,
     which contains a description of the network architecture.
     Input names are read from the input layer definition in the .prototxt. By default, they are of type MultiArray.
@@ -145,11 +158,11 @@ def convert(model, image_input_names=[], is_bgr=False,
     All the blobs that are "dangling", i.e.
     which do not feed as input to any other layer are taken as outputs. The .prototxt file can be modified to specify
     custom input and output names.
-    
+
     The converted Core ML model is of type classifier when the argument "class_labels" is specified.
 
     Advanced usage with custom classifiers, and images:
-            
+
     .. sourcecode:: python
 
 		# Mark some inputs as Images
@@ -159,37 +172,46 @@ def convert(model, image_input_names=[], is_bgr=False,
 		# Export as a classifier with classes from a file
 		>>> coreml_model = coremltools.converters.caffe.convert(('my_caffe_model.caffemodel', 'my_caffe_model.prototxt'),
 		...         image_input_names = 'my_image_input', class_labels = 'labels.txt')
-            
-                 
-    Sometimes the converter might return a message about not able to infer input data dimensions. 
-    This happens when the input size information is absent from the deploy.prototxt file. This can be easily provided by editing 
+
+
+    Sometimes the converter might return a message about not able to infer input data dimensions.
+    This happens when the input size information is absent from the deploy.prototxt file. This can be easily provided by editing
     the .prototxt in a text editor. Simply add a snippet in the beginning, similar to the following, for each of the inputs to the model:
-            
+
     .. code-block:: bash
-            
+
         input: "my_image_input"
         input_dim: 1
         input_dim: 3
         input_dim: 227
         input_dim: 227
-                                    
-    Here we have specified an input with dimensions (1,3,227,227), using Caffe's convention, in the order (batch, channel, height, width). 
-    Input name string ("my_image_input") must also match the name of the input (or "bottom", as inputs are known in Caffe) of the first layer in the .prototxt.                                         
+
+    Here we have specified an input with dimensions (1,3,227,227), using Caffe's convention, in the order (batch, channel, height, width).
+    Input name string ("my_image_input") must also match the name of the input (or "bottom", as inputs are known in Caffe) of the first layer in the .prototxt.
 
     """
     from ...models import MLModel
     from ...models.utils import _convert_neural_network_weights_to_fp16
 
     if model_precision not in _VALID_MLMODEL_PRECISION_TYPES:
-        raise RuntimeError('Model precision {} is not valid'.format(model_precision))
+        raise RuntimeError("Model precision {} is not valid".format(model_precision))
 
     import tempfile
 
     model_path = tempfile.mktemp()
-    _export(model_path, model, image_input_names, is_bgr, red_bias,
-            blue_bias,
-            green_bias, gray_bias, image_scale, class_labels,
-            predicted_feature_name)
+    _export(
+        model_path,
+        model,
+        image_input_names,
+        is_bgr,
+        red_bias,
+        blue_bias,
+        green_bias,
+        gray_bias,
+        image_scale,
+        class_labels,
+        predicted_feature_name,
+    )
     model = MLModel(model_path)
     try:
         os.remove(model_path)
@@ -202,15 +224,24 @@ def convert(model, image_input_names=[], is_bgr=False,
     return model
 
 
-def _export(filename, model, image_input_names=[], is_bgr=False,
-           red_bias=0.0, blue_bias=0.0, green_bias=0.0, gray_bias=0.0,
-           image_scale=1.0,
-           class_labels=None, predicted_feature_name=None):
+def _export(
+    filename,
+    model,
+    image_input_names=[],
+    is_bgr=False,
+    red_bias=0.0,
+    blue_bias=0.0,
+    green_bias=0.0,
+    gray_bias=0.0,
+    image_scale=1.0,
+    class_labels=None,
+    predicted_feature_name=None,
+):
     from ... import libcaffeconverter
 
     if isinstance(model, _six.string_types):
         src_model_path = model
-        prototxt_path = u''
+        prototxt_path = u""
         binaryproto_path = dict()
     elif isinstance(model, tuple):
         if len(model) == 3:
@@ -222,13 +253,15 @@ def _export(filename, model, image_input_names=[], is_bgr=False,
     if isinstance(image_input_names, _six.string_types):
         image_input_names = [image_input_names]
     if predicted_feature_name is None:
-        predicted_feature_name = u'classLabel'
+        predicted_feature_name = u"classLabel"
     if class_labels is None:
-        class_labels = u''
+        class_labels = u""
 
     if binaryproto_path:
         if not image_input_names:
-            raise RuntimeError('\'image_input_names\' must be provided when a mean image binaryproto path is specified. ')
+            raise RuntimeError(
+                "'image_input_names' must be provided when a mean image binaryproto path is specified. "
+            )
 
     if isinstance(binaryproto_path, _six.string_types):
         binaryproto_paths = dict()
@@ -236,25 +269,35 @@ def _export(filename, model, image_input_names=[], is_bgr=False,
     elif isinstance(binaryproto_path, dict):
         binaryproto_paths = binaryproto_path
     else:
-        raise RuntimeError('Mean image binaryproto path must be a string or a dictionary of inputs names and paths. ')
-
-    if not isinstance(is_bgr, dict): is_bgr = dict.fromkeys(image_input_names, is_bgr) 
-    if not isinstance(red_bias, dict): red_bias = dict.fromkeys(image_input_names, red_bias) 
-    if not isinstance(blue_bias, dict): blue_bias = dict.fromkeys(image_input_names, blue_bias) 
-    if not isinstance(green_bias, dict): green_bias = dict.fromkeys(image_input_names, green_bias) 
-    if not isinstance(gray_bias, dict): gray_bias = dict.fromkeys(image_input_names, gray_bias) 
-    if not isinstance(image_scale, dict): image_scale = dict.fromkeys(image_input_names, image_scale) 
-
-    libcaffeconverter._convert_to_file(src_model_path,
-                                       filename,
-                                       binaryproto_paths,
-                                       set(image_input_names),
-                                       is_bgr,
-                                       red_bias,
-                                       blue_bias,
-                                       green_bias,
-                                       gray_bias,
-                                       image_scale,
-                                       prototxt_path,
-                                       class_labels,
-                                       predicted_feature_name)
+        raise RuntimeError(
+            "Mean image binaryproto path must be a string or a dictionary of inputs names and paths. "
+        )
+
+    if not isinstance(is_bgr, dict):
+        is_bgr = dict.fromkeys(image_input_names, is_bgr)
+    if not isinstance(red_bias, dict):
+        red_bias = dict.fromkeys(image_input_names, red_bias)
+    if not isinstance(blue_bias, dict):
+        blue_bias = dict.fromkeys(image_input_names, blue_bias)
+    if not isinstance(green_bias, dict):
+        green_bias = dict.fromkeys(image_input_names, green_bias)
+    if not isinstance(gray_bias, dict):
+        gray_bias = dict.fromkeys(image_input_names, gray_bias)
+    if not isinstance(image_scale, dict):
+        image_scale = dict.fromkeys(image_input_names, image_scale)
+
+    libcaffeconverter._convert_to_file(
+        src_model_path,
+        filename,
+        binaryproto_paths,
+        set(image_input_names),
+        is_bgr,
+        red_bias,
+        blue_bias,
+        green_bias,
+        gray_bias,
+        image_scale,
+        prototxt_path,
+        class_labels,
+        predicted_feature_name,
+    )
diff --git a/coremltools/converters/keras/__init__.py b/coremltools/converters/keras/__init__.py
index cfc3c0b4d..2ae650f55 100644
--- a/coremltools/converters/keras/__init__.py
+++ b/coremltools/converters/keras/__init__.py
@@ -3,20 +3,17 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from ..._deps import HAS_KERAS_TF as _HAS_KERAS_TF
-from ..._deps import HAS_KERAS2_TF as _HAS_KERAS2_TF
+from ..._deps import _HAS_KERAS_TF
+from ..._deps import _HAS_KERAS2_TF
 
 if _HAS_KERAS_TF or _HAS_KERAS2_TF:
     import keras as _keras
     import logging as _logging
-    from ._keras_converter import convert, convertToSpec
-    if _keras.backend.backend() != 'tensorflow':
+    from ._keras_converter import convert
+
+    if _keras.backend.backend() != "tensorflow":
         _HAS_KERAS_TF = False
         _HAS_KERAS2_TF = False
-        _logging.warn('Currently, only Keras models with TensorFlow backend can be converted to CoreML.')
-
-# if _HAS_KERAS_TF:
-#     from ._keras_converter import convert
-#
-# if _HAS_KERAS2_TF:
-#     from ._keras2_converter import convert
+        _logging.warning(
+            "Currently, only Keras models with TensorFlow backend can be converted to Core ML."
+        )
diff --git a/coremltools/converters/keras/_keras2_converter.py b/coremltools/converters/keras/_keras2_converter.py
index 22b3c6908..2135b5da8 100644
--- a/coremltools/converters/keras/_keras2_converter.py
+++ b/coremltools/converters/keras/_keras2_converter.py
@@ -15,14 +15,14 @@
 from ...models import MLModel as _MLModel
 from ...models.utils import save_spec as _save_spec
 
-from ..._deps import HAS_KERAS2_TF as _HAS_KERAS2_TF
+from ..._deps import _HAS_KERAS2_TF
 
 if _HAS_KERAS2_TF:
     import keras as _keras
     from . import _layers2
     from . import _topology2
-    _KERAS_LAYER_REGISTRY = {
 
+    _KERAS_LAYER_REGISTRY = {
         _keras.layers.core.Dense: _layers2.convert_dense,
         _keras.layers.core.Activation: _layers2.convert_activation,
         _keras.layers.advanced_activations.LeakyReLU: _layers2.convert_activation,
@@ -30,7 +30,6 @@
         _keras.layers.advanced_activations.ELU: _layers2.convert_activation,
         _keras.layers.advanced_activations.ThresholdedReLU: _layers2.convert_activation,
         _keras.layers.advanced_activations.Softmax: _layers2.convert_activation,
-
         _keras.layers.convolutional.Conv2D: _layers2.convert_convolution,
         _keras.layers.convolutional.Conv2DTranspose: _layers2.convert_convolution,
         _keras.layers.convolutional.SeparableConv2D: _layers2.convert_separable_convolution,
@@ -41,7 +40,6 @@
         _keras.layers.convolutional.ZeroPadding2D: _layers2.convert_padding,
         _keras.layers.convolutional.Cropping2D: _layers2.convert_cropping,
         _keras.layers.convolutional.UpSampling2D: _layers2.convert_upsample,
-
         _keras.layers.convolutional.Conv1D: _layers2.convert_convolution1d,
         _keras.layers.pooling.AveragePooling1D: _layers2.convert_pooling,
         _keras.layers.pooling.MaxPooling1D: _layers2.convert_pooling,
@@ -50,47 +48,46 @@
         _keras.layers.convolutional.ZeroPadding1D: _layers2.convert_padding,
         _keras.layers.convolutional.Cropping1D: _layers2.convert_cropping,
         _keras.layers.convolutional.UpSampling1D: _layers2.convert_upsample,
-
         _keras.layers.recurrent.LSTM: _layers2.convert_lstm,
         _keras.layers.recurrent.SimpleRNN: _layers2.convert_simple_rnn,
         _keras.layers.recurrent.GRU: _layers2.convert_gru,
         _keras.layers.wrappers.Bidirectional: _layers2.convert_bidirectional,
-
         _keras.layers.normalization.BatchNormalization: _layers2.convert_batchnorm,
-
         _keras.layers.Add: _layers2.convert_merge,
         _keras.layers.Multiply: _layers2.convert_merge,
         _keras.layers.Average: _layers2.convert_merge,
         _keras.layers.Maximum: _layers2.convert_merge,
         _keras.layers.Concatenate: _layers2.convert_merge,
         _keras.layers.Dot: _layers2.convert_merge,
-
         _keras.layers.core.Flatten: _layers2.convert_flatten,
-        _keras.layers.core.Permute:_layers2.convert_permute,
-        _keras.layers.core.Reshape:_layers2.convert_reshape,
-        _keras.layers.embeddings.Embedding:_layers2.convert_embedding,
-        _keras.layers.core.RepeatVector:_layers2.convert_repeat_vector,
-
-        _keras.layers.core.Dropout:_layers2.default_skip,
-        _keras.layers.core.SpatialDropout2D:_layers2.default_skip,
-        _keras.layers.core.SpatialDropout1D:_layers2.default_skip,
-        _keras.layers.wrappers.TimeDistributed:_layers2.default_skip,
+        _keras.layers.core.Permute: _layers2.convert_permute,
+        _keras.layers.core.Reshape: _layers2.convert_reshape,
+        _keras.layers.embeddings.Embedding: _layers2.convert_embedding,
+        _keras.layers.core.RepeatVector: _layers2.convert_repeat_vector,
+        _keras.layers.core.Dropout: _layers2.default_skip,
+        _keras.layers.core.SpatialDropout2D: _layers2.default_skip,
+        _keras.layers.core.SpatialDropout1D: _layers2.default_skip,
+        _keras.layers.wrappers.TimeDistributed: _layers2.default_skip,
     }
     from distutils.version import StrictVersion as _StrictVersion
+
     ## 2.2 Version check
-    if _keras.__version__ >= _StrictVersion('2.2.0'):
-         _KERAS_LAYER_REGISTRY[_keras.layers.DepthwiseConv2D] = \
-             _layers2.convert_convolution
-         _KERAS_LAYER_REGISTRY[_keras.engine.input_layer.InputLayer] = \
-             _layers2.default_skip
-         if _keras.__version__ >= _StrictVersion('2.2.1'):
-             _KERAS_LAYER_REGISTRY[_keras.layers.advanced_activations.ReLU] = \
-                 _layers2.convert_advanced_relu
+    if _keras.__version__ >= _StrictVersion("2.2.0"):
+        _KERAS_LAYER_REGISTRY[
+            _keras.layers.DepthwiseConv2D
+        ] = _layers2.convert_convolution
+        _KERAS_LAYER_REGISTRY[
+            _keras.engine.input_layer.InputLayer
+        ] = _layers2.default_skip
+        if _keras.__version__ >= _StrictVersion("2.2.1"):
+            _KERAS_LAYER_REGISTRY[
+                _keras.layers.advanced_activations.ReLU
+            ] = _layers2.convert_advanced_relu
     else:
-         _KERAS_LAYER_REGISTRY[_keras.applications.mobilenet.DepthwiseConv2D] =\
-             _layers2.convert_convolution
-         _KERAS_LAYER_REGISTRY[_keras.engine.topology.InputLayer] = \
-             _layers2.default_skip
+        _KERAS_LAYER_REGISTRY[
+            _keras.applications.mobilenet.DepthwiseConv2D
+        ] = _layers2.convert_convolution
+        _KERAS_LAYER_REGISTRY[_keras.engine.topology.InputLayer] = _layers2.default_skip
     # end if _HAS_KERAS2_TF
 
 
@@ -103,13 +100,14 @@ def _is_merge_layer(layer):
 
 
 def _is_activation_layer(layer):
-    return (isinstance(layer, _keras.layers.core.Activation) or
-            isinstance(layer, _keras.layers.advanced_activations.LeakyReLU) or
-            isinstance(layer, _keras.layers.advanced_activations.PReLU) or
-            isinstance(layer, _keras.layers.advanced_activations.ELU) or
-            isinstance(layer,
-                       _keras.layers.advanced_activations.ThresholdedReLU) or
-            isinstance(layer, _keras.layers.advanced_activations.Softmax))
+    return (
+        isinstance(layer, _keras.layers.core.Activation)
+        or isinstance(layer, _keras.layers.advanced_activations.LeakyReLU)
+        or isinstance(layer, _keras.layers.advanced_activations.PReLU)
+        or isinstance(layer, _keras.layers.advanced_activations.ELU)
+        or isinstance(layer, _keras.layers.advanced_activations.ThresholdedReLU)
+        or isinstance(layer, _keras.layers.advanced_activations.Softmax)
+    )
 
 
 def _check_unsupported_layers(model, add_custom_layers=False):
@@ -118,21 +116,24 @@ def _check_unsupported_layers(model, add_custom_layers=False):
     if add_custom_layers:
         return
     for i, layer in enumerate(model.layers):
-        if isinstance(layer, _keras.models.Sequential) or isinstance(layer,
-                _keras.models.Model):
+        if isinstance(layer, _keras.models.Sequential) or isinstance(
+            layer, _keras.models.Model
+        ):
             _check_unsupported_layers(layer)
         else:
             if type(layer) not in _KERAS_LAYER_REGISTRY:
                 raise ValueError("Keras layer '%s' not supported. " % str(type(layer)))
             if isinstance(layer, _keras.layers.wrappers.TimeDistributed):
                 if type(layer.layer) not in _KERAS_LAYER_REGISTRY:
-                     raise ValueError(
-                         "Keras layer '%s' not supported. " % str(type(layer.layer)))
+                    raise ValueError(
+                        "Keras layer '%s' not supported. " % str(type(layer.layer))
+                    )
             if isinstance(layer, _keras.layers.wrappers.Bidirectional):
-                if not isinstance(layer.layer,  _keras.layers.recurrent.LSTM):
+                if not isinstance(layer.layer, _keras.layers.recurrent.LSTM):
                     raise ValueError(
                         "Keras bi-directional wrapper conversion supports "
-                        "only LSTM layer at this time. ")
+                        "only LSTM layer at this time. "
+                    )
 
 
 def _get_layer_converter_fn(layer, add_custom_layers=False):
@@ -143,7 +144,7 @@ def _get_layer_converter_fn(layer, add_custom_layers=False):
         convert_func = _KERAS_LAYER_REGISTRY[layer_type]
         if convert_func is _layers2.convert_activation:
             act_name = _layers2._get_activation_name_from_keras_layer(layer)
-            if act_name == 'CUSTOM':
+            if act_name == "CUSTOM":
                 return None
         return convert_func
     elif add_custom_layers:
@@ -175,7 +176,7 @@ def _load_keras_model(model_network_path, model_weight_path, custom_objects=None
     import json
 
     # Load the model network
-    json_file = open(model_network_path, 'r')
+    json_file = open(model_network_path, "r")
     loaded_model_json = json_file.read()
     json_file.close()
 
@@ -207,50 +208,72 @@ def _convert_training_info(model, builder, output_features):
     # can just invoke training repeatedly if they'd like to do more.
     builder.set_epochs(1)
     import keras
+
     try:
-        if model.loss == keras.losses.categorical_crossentropy or model.loss == 'categorical_crossentropy':
+        if (
+            model.loss == keras.losses.categorical_crossentropy
+            or model.loss == "categorical_crossentropy"
+        ):
             builder.set_categorical_cross_entropy_loss(
-                name='loss_layer', input=output_features[0][0]
+                name="loss_layer", input=output_features[0][0]
             )
-        elif model.loss == keras.losses.mean_squared_error or model.loss == 'mean_squared_error':
+        elif (
+            model.loss == keras.losses.mean_squared_error
+            or model.loss == "mean_squared_error"
+        ):
             builder.set_mean_squared_error_loss(
-                name='loss_layer', input_feature=output_features[0]
+                name="loss_layer", input_feature=output_features[0]
             )
         else:
-            print('Models loss: ' + str(model.loss) + ', vs Keras loss: ' + str(keras.losses.mean_squared_error))
-            logging.warning("Loss " + str(model.loss) + " is not yet "
-                            "supported by Core ML. The loss layer will "
-                            "not be carried over. To train this model, "
-                            "you will need to manually add a supported "
-                            "loss layer.")
+            print(
+                "Models loss: "
+                + str(model.loss)
+                + ", vs Keras loss: "
+                + str(keras.losses.mean_squared_error)
+            )
+            logging.warning(
+                "Loss " + str(model.loss) + " is not yet "
+                "supported by Core ML. The loss layer will "
+                "not be carried over. To train this model, "
+                "you will need to manually add a supported "
+                "loss layer."
+            )
     except AttributeError:
-        logging.warning("Core ML conversion was asked to respect trainable "
-                        "parameters from the Keras model, but the input "
-                        "model does not include a loss layer.")
+        logging.warning(
+            "Core ML conversion was asked to respect trainable "
+            "parameters from the Keras model, but the input "
+            "model does not include a loss layer."
+        )
     try:
         opt = model.optimizer
     except AttributeError:
-        logging.warning("Core ML conversion was asked to respect trainable "
-                        "parameters from the Keras model, but could not read "
-                        "the optimizer from Keras.")
+        logging.warning(
+            "Core ML conversion was asked to respect trainable "
+            "parameters from the Keras model, but could not read "
+            "the optimizer from Keras."
+        )
         return
 
     if model.optimizer:
         # a dict of the parameters we need.
         cfg = model.optimizer.get_config()
-        if 'decay' in cfg and cfg['decay'] != 0.0:
-            logging.warning("Keras optimizer has 'decay' set, which is "
-                            "not supported in Core ML. This parameter "
-                            "of the optimizer will be ignored. Clients "
-                            "can change the learning rate from within an "
-                            "MLUpdateTask callback to achieve the same "
-                            "effect.")
+        if "decay" in cfg and cfg["decay"] != 0.0:
+            logging.warning(
+                "Keras optimizer has 'decay' set, which is "
+                "not supported in Core ML. This parameter "
+                "of the optimizer will be ignored. Clients "
+                "can change the learning rate from within an "
+                "MLUpdateTask callback to achieve the same "
+                "effect."
+            )
         if isinstance(model.optimizer, keras.optimizers.SGD):
-            params = SgdParams(lr=cfg['lr'], momentum=cfg['momentum'])
-            if 'nesterov' in cfg and cfg['nesterov'] == True:
-                logging.warning("Keras SGD optimizer has 'nesterov' set, "
-                                "but this is not supported by Core ML. "
-                                "The parameter will be ignored.")
+            params = SgdParams(lr=cfg["lr"], momentum=cfg["momentum"])
+            if "nesterov" in cfg and cfg["nesterov"] == True:
+                logging.warning(
+                    "Keras SGD optimizer has 'nesterov' set, "
+                    "but this is not supported by Core ML. "
+                    "The parameter will be ignored."
+                )
             # Keras does not require a user to specify batch size up front,
             # as Core ML does.  We need to choose something, let's be a bit
             # wide to minimize the chance of user "surprise" when running.
@@ -258,65 +281,76 @@ def _convert_training_info(model, builder, output_features):
             builder.set_sgd_optimizer(params)
         elif isinstance(model.optimizer, keras.optimizers.Adam):
             params = AdamParams(
-                lr=cfg['lr'], beta1=cfg['beta_1'], beta2=cfg['beta_2'],
-                eps=cfg['epsilon']
+                lr=cfg["lr"],
+                beta1=cfg["beta_1"],
+                beta2=cfg["beta_2"],
+                eps=cfg["epsilon"],
             )
-            if 'amsgrad' in cfg and cfg['amsgrad'] == True:
-                logging.warning("Keras Adam optimizer has 'amsgrad' set, "
-                                "but this is not supported by Core ML. "
-                                "The parameter will be ignored.")
+            if "amsgrad" in cfg and cfg["amsgrad"] == True:
+                logging.warning(
+                    "Keras Adam optimizer has 'amsgrad' set, "
+                    "but this is not supported by Core ML. "
+                    "The parameter will be ignored."
+                )
             # Keras does not require a user to specify batch size up front,
             # as Core ML does.  We need to choose something, let's be a bit
             # wide to minimize the chance of user "surprise" when running.
             params.set_batch(16, [1, 16, 32])
             builder.set_adam_optimizer(params)
         else:
-            logging.warning("Optimizer " + str(model.optimizer) + " is "
-                            "not yet supported by Core ML. The optimizer "
-                            "will not be carried over. To train this "
-                            "model, you will need to manually add a "
-                            "supported optimizer.")
+            logging.warning(
+                "Optimizer " + str(model.optimizer) + " is "
+                "not yet supported by Core ML. The optimizer "
+                "will not be carried over. To train this "
+                "model, you will need to manually add a "
+                "supported optimizer."
+            )
     else:
-        logging.warning("Core ML conversion was asked to respect "
-                        "trainable parameters from the Keras model, but "
-                        "the input model does not include an optimizer.")
-
-
-def _convert(model,
-             input_names=None,
-             output_names=None,
-             image_input_names=None,
-             input_name_shape_dict={},
-             is_bgr=False,
-             red_bias=0.0,
-             green_bias=0.0,
-             blue_bias=0.0,
-             gray_bias=0.0,
-             image_scale=1.0,
-             class_labels=None,
-             predicted_feature_name=None,
-             predicted_probabilities_output='',
-             add_custom_layers=False,
-             custom_conversion_functions=None,
-             custom_objects=None,
-             input_shapes=None,
-             output_shapes=None,
-             respect_trainable=False,
-             use_float_arraytype=False):
-
+        logging.warning(
+            "Core ML conversion was asked to respect "
+            "trainable parameters from the Keras model, but "
+            "the input model does not include an optimizer."
+        )
+
+
+def _convert(
+    model,
+    input_names=None,
+    output_names=None,
+    image_input_names=None,
+    input_name_shape_dict={},
+    is_bgr=False,
+    red_bias=0.0,
+    green_bias=0.0,
+    blue_bias=0.0,
+    gray_bias=0.0,
+    image_scale=1.0,
+    class_labels=None,
+    predicted_feature_name=None,
+    predicted_probabilities_output="",
+    add_custom_layers=False,
+    custom_conversion_functions=None,
+    custom_objects=None,
+    input_shapes=None,
+    output_shapes=None,
+    respect_trainable=False,
+    use_float_arraytype=False,
+):
     # Check Keras format
-    if _keras.backend.image_data_format() == 'channels_first':
-        print("Keras image data format 'channels_first' detected. Currently "
-              "only 'channels_last' is supported. "
-              "Changing to 'channels_last', but your model may not be converted "
-              "converted properly.")
-        _keras.backend.set_image_data_format('channels_last')
+    if _keras.backend.image_data_format() == "channels_first":
+        print(
+            "Keras image data format 'channels_first' detected. Currently "
+            "only 'channels_last' is supported. "
+            "Changing to 'channels_last', but your model may not be converted "
+            "converted properly."
+        )
+        _keras.backend.set_image_data_format("channels_last")
 
     # Check custom conversion functions / custom objects
     add_custom_layers = custom_conversion_functions is not None
 
     if isinstance(model, _string_types):
-        model = _keras.models.load_model(model, custom_objects = custom_objects)
+        model = _keras.models.load_model(model, custom_objects=custom_objects)
     elif isinstance(model, tuple):
         model = _load_keras_model(model[0], model[1])
 
@@ -339,13 +373,13 @@ def _convert(model,
         if isinstance(input_names, _string_types):
             input_names = [input_names]
     else:
-        input_names = ['input' + str(i+1) for i in range(len(inputs))]
+        input_names = ["input" + str(i + 1) for i in range(len(inputs))]
 
     if output_names is not None:
         if isinstance(output_names, _string_types):
             output_names = [output_names]
     else:
-        output_names = ['output' + str(i+1) for i in range(len(outputs))]
+        output_names = ["output" + str(i + 1) for i in range(len(outputs))]
 
     if image_input_names is not None and isinstance(image_input_names, _string_types):
         image_input_names = [image_input_names]
@@ -378,7 +412,7 @@ def _convert(model,
     for idx, dim in enumerate(input_dims):
         if input_names[idx] in input_name_shape_dict:
             unfiltered_shape = input_name_shape_dict[input_names[idx]]
-            dim = list(filter(None,unfiltered_shape))
+            dim = list(filter(None, unfiltered_shape))
         else:
             unfiltered_shape = unfiltered_shapes[idx]
             dim = list(input_dims[idx])
@@ -397,7 +431,9 @@ def _convert(model,
                 input_dims[idx] = (dim[1],)
             elif len(dim) == 1:
                 s = graph.get_successors(inputs[idx])[0]
-                if isinstance(graph.get_keras_layer(s), _keras.layers.embeddings.Embedding):
+                if isinstance(
+                    graph.get_keras_layer(s), _keras.layers.embeddings.Embedding
+                ):
                     # Embedding layer's special input (None, D) where D is
                     # actually sequence length
                     input_dims[idx] = (1,)
@@ -418,8 +454,10 @@ def _convert(model,
             else:
                 errMsg = "Invalid input shape for '{}'.\n".format(input_names[idx])
                 errMsg += "Please provide a finite channel value (D) using "
-                errMsg += "input_name_shape_dict arg with key = '{}' and " \
-                          "value = [None, None, D]".format(input_names[idx])
+                errMsg += (
+                    "input_name_shape_dict arg with key = '{}' and "
+                    "value = [None, None, D]".format(input_names[idx])
+                )
                 raise ValueError(errMsg)
 
         elif len(unfiltered_shape) == 4:
@@ -427,24 +465,32 @@ def _convert(model,
                 input_dims[idx] = (dim[2], dim[0], dim[1])
             else:
                 errMsg = "Invalid input shape for '{}'.\n".format(input_names[idx])
-                errMsg += "Please provide a finite height (H), width (W) & " \
-                          "channel value (C) "
-                errMsg += "using input_name_shape_dict arg with key = '{}' " \
-                          "and value = [None, H, W, C]\n".format(input_names[idx])
-                errMsg += "Converted .mlmodel can be modified to have flexible " \
-                          "input shape using coremltools.models.neural_network.flexible_shape_utils"
+                errMsg += (
+                    "Please provide a finite height (H), width (W) & "
+                    "channel value (C) "
+                )
+                errMsg += (
+                    "using input_name_shape_dict arg with key = '{}' "
+                    "and value = [None, H, W, C]\n".format(input_names[idx])
+                )
+                errMsg += (
+                    "Converted .mlmodel can be modified to have flexible "
+                    "input shape using coremltools.models.neural_network.flexible_shape_utils"
+                )
                 raise ValueError(errMsg)
 
         elif len(unfiltered_shape) == 5:
             if len(dim) == 4:  # keras uses the reverse notation from CoreML
                 input_dims[idx] = (dim[-1], dim[-3], dim[-2])
             else:
-                errMsg = "Invalid input shape for '{}', shape:{}.\n".format(input_names[idx],
-                                                                            str(unfiltered_shape))
+                errMsg = "Invalid input shape for '{}', shape:{}.\n".format(
+                    input_names[idx], str(unfiltered_shape)
+                )
                 raise ValueError(errMsg)
         else:
-            raise ValueError("Input '%s' has input shape of length %d"
-                             % (input_names[idx], len(dim)))
+            raise ValueError(
+                "Input '%s' has input shape of length %d" % (input_names[idx], len(dim))
+            )
 
     # Retrieve output shapes from model
     if len(model._outbound_nodes) > 1 and output_shapes is not None:
@@ -472,7 +518,7 @@ def _convert(model,
     output_names = map(str, output_names)
     is_classifier = class_labels is not None
     if is_classifier:
-        mode = 'classifier'
+        mode = "classifier"
     else:
         mode = None
 
@@ -480,8 +526,12 @@ def _convert(model,
     input_features = list(zip(input_names, input_types))
     output_features = list(zip(output_names, output_types))
 
-    builder = _NeuralNetworkBuilder(input_features, output_features, mode = mode,
-                                    use_float_arraytype=use_float_arraytype)
+    builder = _NeuralNetworkBuilder(
+        input_features,
+        output_features,
+        mode=mode,
+        use_float_arraytype=use_float_arraytype,
+    )
 
     for iter, layer in enumerate(graph.layer_list):
         keras_layer = graph.keras_layer_map[layer]
@@ -492,11 +542,18 @@ def _convert(model,
         input_names, output_names = graph.get_layer_blobs(layer)
         # this may be none if we're using custom layers
         if converter_func:
-            converter_func(builder, layer, input_names, output_names,
-                           keras_layer, respect_trainable)
+            converter_func(
+                builder,
+                layer,
+                input_names,
+                output_names,
+                keras_layer,
+                respect_trainable,
+            )
         else:
             if _is_activation_layer(keras_layer):
                 import six
+
                 if six.PY2:
                     layer_name = keras_layer.activation.func_name
                 else:
@@ -519,30 +576,40 @@ def _convert(model,
         classes_in = class_labels
         if isinstance(classes_in, _string_types):
             import os
+
             if not os.path.isfile(classes_in):
-                raise ValueError("Path to class labels (%s) does not exist." % classes_in)
-            with open(classes_in, 'r') as f:
+                raise ValueError(
+                    "Path to class labels (%s) does not exist." % classes_in
+                )
+            with open(classes_in, "r") as f:
                 classes = f.read()
             classes = classes.splitlines()
-        elif type(classes_in) is list: # list[int or str]
+        elif type(classes_in) is list:  # list[int or str]
             classes = classes_in
         else:
-            raise ValueError('Class labels must be a list of integers / strings, or a file path')
+            raise ValueError(
+                "Class labels must be a list of integers / strings, or a file path"
+            )
 
         if predicted_feature_name is not None:
-            builder.set_class_labels(classes, predicted_feature_name = predicted_feature_name,
-                                     prediction_blob = predicted_probabilities_output)
+            builder.set_class_labels(
+                classes,
+                predicted_feature_name=predicted_feature_name,
+                prediction_blob=predicted_probabilities_output,
+            )
         else:
             builder.set_class_labels(classes)
 
     # Set pre-processing parameters
-    builder.set_pre_processing_parameters(image_input_names=image_input_names,
-                                          is_bgr=is_bgr,
-                                          red_bias=red_bias,
-                                          green_bias=green_bias,
-                                          blue_bias=blue_bias,
-                                          gray_bias=gray_bias,
-                                          image_scale=image_scale)
+    builder.set_pre_processing_parameters(
+        image_input_names=image_input_names,
+        is_bgr=is_bgr,
+        red_bias=red_bias,
+        green_bias=green_bias,
+        blue_bias=blue_bias,
+        gray_bias=gray_bias,
+        image_scale=image_scale,
+    )
 
     # add in the loss and optimizer, if the network has it and that is
     # appropriate given the flag.
@@ -556,16 +623,21 @@ def _convert(model,
     # coremltools.models.utils.convert_double_to_float_multiarray_type(spec)
     has_double_multiarray = False
     for feature in list(spec.description.input) + list(spec.description.output):
-        if feature.type.HasField('multiArrayType'):
-            if feature.type.multiArrayType.dataType == _Model_pb2.ArrayFeatureType.DOUBLE:
+        if feature.type.HasField("multiArrayType"):
+            if (
+                feature.type.multiArrayType.dataType
+                == _Model_pb2.ArrayFeatureType.DOUBLE
+            ):
                 has_double_multiarray = True
                 break
 
     if has_double_multiarray:
-        print("\n\nRecommendation: This model has at least one multiarray input/output of type double.\n"
-              "For large sized arrays, multiarrays of type float32 are more efficient.\n"
-              "In future, float input/output multiarrays will be produced by default by the converter.\n"
-              "Please use, either the flag 'use_float_arraytype' during the call to convert or\n"
-              "the utility 'coremltools.utils.convert_double_to_float_multiarray_type(spec)', post-conversion.\n\n")
+        print(
+            "\n\nRecommendation: This model has at least one multiarray input/output of type double.\n"
+            "For large sized arrays, multiarrays of type float32 are more efficient.\n"
+            "In future, float input/output multiarrays will be produced by default by the converter.\n"
+            "Please use, either the flag 'use_float_arraytype' during the call to convert or\n"
+            "the utility 'coremltools.utils.convert_double_to_float_multiarray_type(spec)', post-conversion.\n\n"
+        )
 
     return spec
diff --git a/coremltools/converters/keras/_keras_converter.py b/coremltools/converters/keras/_keras_converter.py
index 42f867c00..32c87e95b 100644
--- a/coremltools/converters/keras/_keras_converter.py
+++ b/coremltools/converters/keras/_keras_converter.py
@@ -8,20 +8,25 @@
 from ...models.neural_network import NeuralNetworkBuilder as _NeuralNetworkBuilder
 from ...proto import FeatureTypes_pb2 as _FeatureTypes_pb2
 from collections import OrderedDict as _OrderedDict
-from ...models import datatypes
+from ...models import datatypes, _METADATA_VERSION, _METADATA_SOURCE
 from ...models import MLModel as _MLModel
-from ...models import _MLMODEL_FULL_PRECISION, _MLMODEL_HALF_PRECISION, _VALID_MLMODEL_PRECISION_TYPES
+from ...models import (
+    _MLMODEL_FULL_PRECISION,
+    _MLMODEL_HALF_PRECISION,
+    _VALID_MLMODEL_PRECISION_TYPES,
+)
 from ...models.utils import _convert_neural_network_spec_weights_to_fp16
 
-from ..._deps import HAS_KERAS_TF as _HAS_KERAS_TF
-from ..._deps import HAS_KERAS2_TF as _HAS_KERAS2_TF
+from ..._deps import _HAS_KERAS_TF
+from ..._deps import _HAS_KERAS2_TF
+from coremltools import __version__ as ct_version
 
 if _HAS_KERAS_TF:
     import keras as _keras
     from . import _layers
     from . import _topology
-    _KERAS_LAYER_REGISTRY  = {
 
+    _KERAS_LAYER_REGISTRY = {
         _keras.layers.core.Dense: _layers.convert_dense,
         _keras.layers.core.Activation: _layers.convert_activation,
         _keras.layers.advanced_activations.LeakyReLU: _layers.convert_activation,
@@ -30,7 +35,6 @@
         _keras.layers.advanced_activations.ParametricSoftplus: _layers.convert_activation,
         _keras.layers.advanced_activations.ThresholdedReLU: _layers.convert_activation,
         _keras.activations.softmax: _layers.convert_activation,
-
         _keras.layers.convolutional.Convolution2D: _layers.convert_convolution,
         _keras.layers.convolutional.Deconvolution2D: _layers.convert_convolution,
         _keras.layers.convolutional.AtrousConvolution2D: _layers.convert_convolution,
@@ -41,7 +45,6 @@
         _keras.layers.convolutional.ZeroPadding2D: _layers.convert_padding,
         _keras.layers.convolutional.Cropping2D: _layers.convert_cropping,
         _keras.layers.convolutional.UpSampling2D: _layers.convert_upsample,
-
         _keras.layers.convolutional.Convolution1D: _layers.convert_convolution1d,
         _keras.layers.convolutional.AtrousConvolution1D: _layers.convert_convolution1d,
         _keras.layers.convolutional.AveragePooling1D: _layers.convert_pooling,
@@ -51,57 +54,57 @@
         _keras.layers.convolutional.ZeroPadding1D: _layers.convert_padding,
         _keras.layers.convolutional.Cropping1D: _layers.convert_cropping,
         _keras.layers.convolutional.UpSampling1D: _layers.convert_upsample,
-
         _keras.layers.recurrent.LSTM: _layers.convert_lstm,
         _keras.layers.recurrent.SimpleRNN: _layers.convert_simple_rnn,
         _keras.layers.recurrent.GRU: _layers.convert_gru,
         _keras.layers.wrappers.Bidirectional: _layers.convert_bidirectional,
-
         _keras.layers.normalization.BatchNormalization: _layers.convert_batchnorm,
         _keras.engine.topology.Merge: _layers.convert_merge,
         _keras.layers.core.Flatten: _layers.convert_flatten,
-        _keras.layers.core.Permute:_layers.convert_permute,
-        _keras.layers.core.Reshape:_layers.convert_reshape,
-        _keras.layers.embeddings.Embedding:_layers.convert_embedding,
-
-        _keras.layers.core.RepeatVector:_layers.convert_repeat_vector,
-
+        _keras.layers.core.Permute: _layers.convert_permute,
+        _keras.layers.core.Reshape: _layers.convert_reshape,
+        _keras.layers.embeddings.Embedding: _layers.convert_embedding,
+        _keras.layers.core.RepeatVector: _layers.convert_repeat_vector,
         ## All the layers that can be skipped (merged with conv)
-        _keras.engine.topology.InputLayer:_layers.default_skip,
-        _keras.layers.core.Dropout:_layers.default_skip,
-        _keras.layers.wrappers.TimeDistributed:_layers.default_skip,
-
+        _keras.engine.topology.InputLayer: _layers.default_skip,
+        _keras.layers.core.Dropout: _layers.default_skip,
+        _keras.layers.wrappers.TimeDistributed: _layers.default_skip,
     }
 
     _KERAS_SKIP_LAYERS = [
         _keras.layers.core.Dropout,
     ]
 
+
 def _check_unsupported_layers(model):
     for i, layer in enumerate(model.layers):
-        if isinstance(layer, _keras.models.Sequential) or isinstance(layer,
-                                                                     _keras.models.Model):
+        if isinstance(layer, _keras.models.Sequential) or isinstance(
+            layer, _keras.models.Model
+        ):
             _check_unsupported_layers(layer)
         else:
             if type(layer) not in _KERAS_LAYER_REGISTRY:
-                 raise ValueError(
-                     "Keras layer '%s' not supported. " % str(type(layer)))
+                raise ValueError("Keras layer '%s' not supported. " % str(type(layer)))
             if isinstance(layer, _keras.engine.topology.Merge):
                 if layer.layers is None:
                     continue
                 for merge_layer in layer.layers:
-                    if isinstance(merge_layer, _keras.models.Sequential) or \
-                            isinstance(merge_layer, _keras.models.Model):
+                    if isinstance(merge_layer, _keras.models.Sequential) or isinstance(
+                        merge_layer, _keras.models.Model
+                    ):
                         _check_unsupported_layers(merge_layer)
             if isinstance(layer, _keras.layers.wrappers.TimeDistributed):
                 if type(layer.layer) not in _KERAS_LAYER_REGISTRY:
-                     raise ValueError(
-                         "Keras layer '%s' not supported. " % str(type(layer.layer)))
+                    raise ValueError(
+                        "Keras layer '%s' not supported. " % str(type(layer.layer))
+                    )
             if isinstance(layer, _keras.layers.wrappers.Bidirectional):
-                if not isinstance(layer.layer,  _keras.layers.recurrent.LSTM):
+                if not isinstance(layer.layer, _keras.layers.recurrent.LSTM):
                     raise ValueError(
                         "Keras bi-directional wrapper conversion supports only "
-                        "LSTM layer at this time. ")
+                        "LSTM layer at this time. "
+                    )
+
 
 def _get_layer_converter_fn(layer):
     """Get the right converter function for Keras
@@ -136,7 +139,7 @@ def _load_keras_model(model_network_path, model_weight_path, custom_objects=None
     import json
 
     # Load the model network
-    json_file = open(model_network_path, 'r')
+    json_file = open(model_network_path, "r")
     json_string = json_file.read()
     json_file.close()
     loaded_model_json = json.loads(json_string)
@@ -150,29 +153,33 @@ def _load_keras_model(model_network_path, model_weight_path, custom_objects=None
 
     return loaded_model
 
-def _convert(model,
-            input_names = None,
-            output_names = None,
-            image_input_names = None,
-            is_bgr = False,
-            red_bias = 0.0,
-            green_bias = 0.0,
-            blue_bias = 0.0,
-            gray_bias = 0.0,
-            image_scale = 1.0,
-            class_labels = None,
-            predicted_feature_name = None,
-            predicted_probabilities_output = '',
-            custom_objects = None,
-            respect_trainable = False):
-
-    if not(_HAS_KERAS_TF):
-        raise RuntimeError('keras not found or unsupported version or backend '
-                           'found. keras conversion API is disabled.')
+
+def _convert(
+    model,
+    input_names=None,
+    output_names=None,
+    image_input_names=None,
+    is_bgr=False,
+    red_bias=0.0,
+    green_bias=0.0,
+    blue_bias=0.0,
+    gray_bias=0.0,
+    image_scale=1.0,
+    class_labels=None,
+    predicted_feature_name=None,
+    predicted_probabilities_output="",
+    custom_objects=None,
+    respect_trainable=False,
+):
+    if not (_HAS_KERAS_TF):
+        raise RuntimeError(
+            "keras not found or unsupported version or backend "
+            "found. keras conversion API is disabled."
+        )
     if isinstance(model, _string_types):
-        model = _keras.models.load_model(model, custom_objects = custom_objects)
+        model = _keras.models.load_model(model, custom_objects=custom_objects)
     elif isinstance(model, tuple):
-        model = _load_keras_model(model[0], model[1], custom_objects = custom_objects)
+        model = _load_keras_model(model[0], model[1], custom_objects=custom_objects)
 
     # Check valid versions
     _check_unsupported_layers(model)
@@ -199,12 +206,12 @@ def _convert(model,
         if isinstance(input_names, _string_types):
             input_names = [input_names]
     else:
-        input_names = ['input' + str(i+1) for i in range(len(inputs))]
+        input_names = ["input" + str(i + 1) for i in range(len(inputs))]
     if output_names is not None:
         if isinstance(output_names, _string_types):
             output_names = [output_names]
     else:
-        output_names = ['output' + str(i+1) for i in range(len(outputs))]
+        output_names = ["output" + str(i + 1) for i in range(len(outputs))]
 
     if image_input_names is not None and isinstance(image_input_names, _string_types):
         image_input_names = [image_input_names]
@@ -242,19 +249,20 @@ def _convert(model,
                 # sequence length
                 input_dims[idx] = (1,)
             else:
-                input_dims[idx] = dim # dim is just a number
+                input_dims[idx] = dim  # dim is just a number
         elif len(dim) == 2:  # [Seq, D]
             input_dims[idx] = (dim[1],)
-        elif len(dim) == 3: #H,W,C
-            if (len(unfiltered_shape) > 3):
+        elif len(dim) == 3:  # H,W,C
+            if len(unfiltered_shape) > 3:
                 # keras uses the reverse notation from us
                 input_dims[idx] = (dim[2], dim[0], dim[1])
-            else: # keras provided fixed batch and sequence length, so the input
+            else:  # keras provided fixed batch and sequence length, so the input
                 # was (batch, sequence, channel)
                 input_dims[idx] = (dim[2],)
         else:
-            raise ValueError('Input' + input_names[idx] +
-                             'has input shape of length' + str(len(dim)))
+            raise ValueError(
+                "Input" + input_names[idx] + "has input shape of length" + str(len(dim))
+            )
 
     # Retrieve output shapes from model
     if type(model.output_shape) is list:
@@ -278,7 +286,7 @@ def _convert(model,
     output_names = map(str, output_names)
     is_classifier = class_labels is not None
     if is_classifier:
-        mode = 'classifier'
+        mode = "classifier"
     else:
         mode = None
 
@@ -286,7 +294,7 @@ def _convert(model,
     input_features = list(zip(input_names, input_types))
     output_features = list(zip(output_names, output_types))
 
-    builder = _NeuralNetworkBuilder(input_features, output_features, mode = mode)
+    builder = _NeuralNetworkBuilder(input_features, output_features, mode=mode)
 
     for iter, layer in enumerate(graph.layer_list):
         keras_layer = graph.keras_layer_map[layer]
@@ -310,59 +318,70 @@ def _convert(model,
         classes_in = class_labels
         if isinstance(classes_in, _string_types):
             import os
+
             if not os.path.isfile(classes_in):
-                raise ValueError("Path to class labels (%s) does not exist."
-                                 % classes_in)
-            with open(classes_in, 'r') as f:
+                raise ValueError(
+                    "Path to class labels (%s) does not exist." % classes_in
+                )
+            with open(classes_in, "r") as f:
                 classes = f.read()
             classes = classes.splitlines()
-        elif type(classes_in) is list: # list[int or str]
+        elif type(classes_in) is list:  # list[int or str]
             classes = classes_in
         else:
-            raise ValueError('Class labels must be a list of integers / strings, or a file path')
+            raise ValueError(
+                "Class labels must be a list of integers / strings, or a file path"
+            )
 
         if predicted_feature_name is not None:
-            builder.set_class_labels(classes, predicted_feature_name = predicted_feature_name,
-                                     prediction_blob = predicted_probabilities_output)
+            builder.set_class_labels(
+                classes,
+                predicted_feature_name=predicted_feature_name,
+                prediction_blob=predicted_probabilities_output,
+            )
         else:
             builder.set_class_labels(classes)
 
     # Set pre-processing paramsters
-    builder.set_pre_processing_parameters(image_input_names = image_input_names,
-                                          is_bgr = is_bgr,
-                                          red_bias = red_bias,
-                                          green_bias = green_bias,
-                                          blue_bias = blue_bias,
-                                          gray_bias = gray_bias,
-                                          image_scale = image_scale)
+    builder.set_pre_processing_parameters(
+        image_input_names=image_input_names,
+        is_bgr=is_bgr,
+        red_bias=red_bias,
+        green_bias=green_bias,
+        blue_bias=blue_bias,
+        gray_bias=gray_bias,
+        image_scale=image_scale,
+    )
 
     # Return the protobuf spec
     spec = builder.spec
     return spec
 
-def convertToSpec(model,
-                  input_names = None,
-                  output_names = None,
-                  image_input_names = None,
-                  input_name_shape_dict = {},
-                  is_bgr = False,
-                  red_bias = 0.0,
-                  green_bias = 0.0,
-                  blue_bias = 0.0,
-                  gray_bias = 0.0,
-                  image_scale = 1.0,
-                  class_labels = None,
-                  predicted_feature_name = None,
-                  model_precision = _MLMODEL_FULL_PRECISION,
-                  predicted_probabilities_output = '',
-                  add_custom_layers = False,
-                  custom_conversion_functions = None,
-                  custom_objects=None,
-                  input_shapes = None,
-                  output_shapes = None,
-                  respect_trainable = False,
-                  use_float_arraytype = False):
 
+def _convert_to_spec(
+    model,
+    input_names=None,
+    output_names=None,
+    image_input_names=None,
+    input_name_shape_dict={},
+    is_bgr=False,
+    red_bias=0.0,
+    green_bias=0.0,
+    blue_bias=0.0,
+    gray_bias=0.0,
+    image_scale=1.0,
+    class_labels=None,
+    predicted_feature_name=None,
+    model_precision=_MLMODEL_FULL_PRECISION,
+    predicted_probabilities_output="",
+    add_custom_layers=False,
+    custom_conversion_functions=None,
+    custom_objects=None,
+    input_shapes=None,
+    output_shapes=None,
+    respect_trainable=False,
+    use_float_arraytype=False,
+):
     """
     Convert a Keras model to Core ML protobuf specification (.mlmodel).
 
@@ -541,50 +560,56 @@ def convertToSpec(model,
 
     """
     if model_precision not in _VALID_MLMODEL_PRECISION_TYPES:
-        raise RuntimeError('Model precision {} is not valid'.format(model_precision))
+        raise RuntimeError("Model precision {} is not valid".format(model_precision))
 
     if _HAS_KERAS_TF:
-        spec = _convert(model=model,
-                         input_names=input_names,
-                         output_names=output_names,
-                         image_input_names=image_input_names,
-                         is_bgr=is_bgr,
-                         red_bias=red_bias,
-                         green_bias=green_bias,
-                         blue_bias=blue_bias,
-                         gray_bias=gray_bias,
-                         image_scale=image_scale,
-                         class_labels=class_labels,
-                         predicted_feature_name=predicted_feature_name,
-                         predicted_probabilities_output=predicted_probabilities_output,
-                         custom_objects=custom_objects,
-                         respect_trainable=respect_trainable)
+        spec = _convert(
+            model=model,
+            input_names=input_names,
+            output_names=output_names,
+            image_input_names=image_input_names,
+            is_bgr=is_bgr,
+            red_bias=red_bias,
+            green_bias=green_bias,
+            blue_bias=blue_bias,
+            gray_bias=gray_bias,
+            image_scale=image_scale,
+            class_labels=class_labels,
+            predicted_feature_name=predicted_feature_name,
+            predicted_probabilities_output=predicted_probabilities_output,
+            custom_objects=custom_objects,
+            respect_trainable=respect_trainable,
+        )
     elif _HAS_KERAS2_TF:
         from . import _keras2_converter
-        spec = _keras2_converter._convert(model=model,
-                                          input_names=input_names,
-                                          output_names=output_names,
-                                          image_input_names=image_input_names,
-                                          input_name_shape_dict=input_name_shape_dict,
-                                          is_bgr=is_bgr,
-                                          red_bias=red_bias,
-                                          green_bias=green_bias,
-                                          blue_bias=blue_bias,
-                                          gray_bias=gray_bias,
-                                          image_scale=image_scale,
-                                          class_labels=class_labels,
-                                          predicted_feature_name=predicted_feature_name,
-                                          predicted_probabilities_output=predicted_probabilities_output,
-                                          add_custom_layers=add_custom_layers,
-                                          custom_conversion_functions=custom_conversion_functions,
-                                          custom_objects=custom_objects,
-                                          input_shapes=input_shapes,
-                                          output_shapes=output_shapes,
-                                          respect_trainable=respect_trainable,
-                                          use_float_arraytype=use_float_arraytype)
+
+        spec = _keras2_converter._convert(
+            model=model,
+            input_names=input_names,
+            output_names=output_names,
+            image_input_names=image_input_names,
+            input_name_shape_dict=input_name_shape_dict,
+            is_bgr=is_bgr,
+            red_bias=red_bias,
+            green_bias=green_bias,
+            blue_bias=blue_bias,
+            gray_bias=gray_bias,
+            image_scale=image_scale,
+            class_labels=class_labels,
+            predicted_feature_name=predicted_feature_name,
+            predicted_probabilities_output=predicted_probabilities_output,
+            add_custom_layers=add_custom_layers,
+            custom_conversion_functions=custom_conversion_functions,
+            custom_objects=custom_objects,
+            input_shapes=input_shapes,
+            output_shapes=output_shapes,
+            respect_trainable=respect_trainable,
+            use_float_arraytype=use_float_arraytype,
+        )
     else:
         raise RuntimeError(
-            'Keras not found or unsupported version or backend found. keras conversion API is disabled.')
+            "Keras not found or unsupported version or backend found. keras conversion API is disabled."
+        )
 
     if model_precision == _MLMODEL_HALF_PRECISION and model is not None:
         spec = _convert_neural_network_spec_weights_to_fp16(spec)
@@ -592,28 +617,29 @@ def convertToSpec(model,
     return spec
 
 
-def convert(model,
-                  input_names = None,
-                  output_names = None,
-                  image_input_names = None,
-                  input_name_shape_dict = {},
-                  is_bgr = False,
-                  red_bias = 0.0,
-                  green_bias = 0.0,
-                  blue_bias = 0.0,
-                  gray_bias = 0.0,
-                  image_scale = 1.0,
-                  class_labels = None,
-                  predicted_feature_name = None,
-                  model_precision = _MLMODEL_FULL_PRECISION,
-                  predicted_probabilities_output = '',
-                  add_custom_layers = False,
-                  custom_conversion_functions = None,
-                  input_shapes = None,
-                  output_shapes = None,
-                  respect_trainable = False,
-                  use_float_arraytype = False):
-
+def convert(
+    model,
+    input_names=None,
+    output_names=None,
+    image_input_names=None,
+    input_name_shape_dict={},
+    is_bgr=False,
+    red_bias=0.0,
+    green_bias=0.0,
+    blue_bias=0.0,
+    gray_bias=0.0,
+    image_scale=1.0,
+    class_labels=None,
+    predicted_feature_name=None,
+    model_precision=_MLMODEL_FULL_PRECISION,
+    predicted_probabilities_output="",
+    add_custom_layers=False,
+    custom_conversion_functions=None,
+    input_shapes=None,
+    output_shapes=None,
+    respect_trainable=False,
+    use_float_arraytype=False,
+):
     """
     Convert a Keras model to Core ML protobuf specification (.mlmodel).
 
@@ -780,26 +806,35 @@ def convert(model,
         ...   ['my_input_1', 'my_input_2'], output_names = ['my_output'])
 
     """
-    spec = convertToSpec(model,
-                         input_names=input_names,
-                         output_names=output_names,
-                         image_input_names=image_input_names,
-                         input_name_shape_dict=input_name_shape_dict,
-                         is_bgr=is_bgr,
-                         red_bias=red_bias,
-                         green_bias=green_bias,
-                         blue_bias=blue_bias,
-                         gray_bias=gray_bias,
-                         image_scale=image_scale,
-                         class_labels=class_labels,
-                         predicted_feature_name=predicted_feature_name,
-                         model_precision=model_precision,
-                         predicted_probabilities_output=predicted_probabilities_output,
-                         add_custom_layers=add_custom_layers,
-                         custom_conversion_functions=custom_conversion_functions,
-                         input_shapes=input_shapes,
-                         output_shapes=output_shapes,
-                         respect_trainable=respect_trainable,
-                         use_float_arraytype=use_float_arraytype)
-
-    return _MLModel(spec)
+    spec = _convert_to_spec(
+        model,
+        input_names=input_names,
+        output_names=output_names,
+        image_input_names=image_input_names,
+        input_name_shape_dict=input_name_shape_dict,
+        is_bgr=is_bgr,
+        red_bias=red_bias,
+        green_bias=green_bias,
+        blue_bias=blue_bias,
+        gray_bias=gray_bias,
+        image_scale=image_scale,
+        class_labels=class_labels,
+        predicted_feature_name=predicted_feature_name,
+        model_precision=model_precision,
+        predicted_probabilities_output=predicted_probabilities_output,
+        add_custom_layers=add_custom_layers,
+        custom_conversion_functions=custom_conversion_functions,
+        input_shapes=input_shapes,
+        output_shapes=output_shapes,
+        respect_trainable=respect_trainable,
+        use_float_arraytype=use_float_arraytype,
+    )
+
+    model = _MLModel(spec)
+
+    from keras import __version__ as keras_version
+
+    model.user_defined_metadata[_METADATA_VERSION] = ct_version
+    model.user_defined_metadata[_METADATA_SOURCE] = "keras=={0}".format(keras_version)
+
+    return model
diff --git a/coremltools/converters/keras/_layers.py b/coremltools/converters/keras/_layers.py
index be82cd3bc..bb8c8614d 100644
--- a/coremltools/converters/keras/_layers.py
+++ b/coremltools/converters/keras/_layers.py
@@ -10,105 +10,115 @@
 
 def _get_recurrent_activation_name_from_keras(activation):
     if activation == keras.activations.sigmoid:
-        activation_str = 'SIGMOID'
+        activation_str = "SIGMOID"
     elif activation == keras.activations.hard_sigmoid:
-        activation_str = 'SIGMOID_HARD'
+        activation_str = "SIGMOID_HARD"
     elif activation == keras.activations.tanh:
-        activation_str = 'TANH'
+        activation_str = "TANH"
     elif activation == keras.activations.relu:
-        activation_str = 'RELU'
+        activation_str = "RELU"
     elif activation == keras.activations.linear:
-        activation_str = 'LINEAR'
+        activation_str = "LINEAR"
     else:
         raise NotImplementedError(
-            'activation %s not supported for Recurrent layer.' % activation)
+            "activation %s not supported for Recurrent layer." % activation
+        )
 
     return activation_str
 
-def _get_activation_name_from_keras_layer(keras_layer):
 
+def _get_activation_name_from_keras_layer(keras_layer):
     if isinstance(keras_layer, keras.layers.advanced_activations.LeakyReLU):
-        non_linearity = 'LEAKYRELU'
+        non_linearity = "LEAKYRELU"
     elif isinstance(keras_layer, keras.layers.advanced_activations.PReLU):
-        non_linearity = 'PRELU'
+        non_linearity = "PRELU"
     elif isinstance(keras_layer, keras.layers.advanced_activations.ELU):
-        non_linearity = 'ELU'
+        non_linearity = "ELU"
     elif isinstance(keras_layer, keras.layers.advanced_activations.ParametricSoftplus):
-        non_linearity = 'PARAMETRICSOFTPLUS'
+        non_linearity = "PARAMETRICSOFTPLUS"
     elif isinstance(keras_layer, keras.layers.advanced_activations.ThresholdedReLU):
-        non_linearity = 'THRESHOLDEDRELU'
+        non_linearity = "THRESHOLDEDRELU"
     else:
         import six
+
         if six.PY2:
             act_name = keras_layer.activation.func_name
         else:
             act_name = keras_layer.activation.__name__
 
-        if act_name == 'softmax':
-            non_linearity = 'SOFTMAX'
-        elif act_name == 'sigmoid':
-            non_linearity = 'SIGMOID'
-        elif act_name == 'tanh':
-            non_linearity = 'TANH'
-        elif act_name == 'relu':
-            non_linearity = 'RELU'
-        elif act_name == 'softplus':
-            non_linearity = 'SOFTPLUS'
-        elif act_name == 'softsign':
-            non_linearity = 'SOFTSIGN'
-        elif act_name == 'hard_sigmoid':
-            non_linearity = 'SIGMOID_HARD'
-        elif act_name == 'linear':
-            non_linearity = 'LINEAR'
+        if act_name == "softmax":
+            non_linearity = "SOFTMAX"
+        elif act_name == "sigmoid":
+            non_linearity = "SIGMOID"
+        elif act_name == "tanh":
+            non_linearity = "TANH"
+        elif act_name == "relu":
+            non_linearity = "RELU"
+        elif act_name == "softplus":
+            non_linearity = "SOFTPLUS"
+        elif act_name == "softsign":
+            non_linearity = "SOFTSIGN"
+        elif act_name == "hard_sigmoid":
+            non_linearity = "SIGMOID_HARD"
+        elif act_name == "linear":
+            non_linearity = "LINEAR"
         else:
-            _utils.raise_error_unsupported_categorical_option('activation',
-                                                            act_name, 'Dense', ##
-                    keras_layer.name)
+            _utils.raise_error_unsupported_categorical_option(
+                "activation", act_name, "Dense", keras_layer.name  ##
+            )
 
     return non_linearity
 
+
 def _get_elementwise_name_from_keras_layer(keras_layer):
     """
     Get the keras layer name from the activation name.
     """
     mode = keras_layer.mode
-    if mode == 'sum':
-        return 'ADD'
-    elif mode == 'mul':
-        return 'MULTIPLY'
-    elif mode == 'concat':
-        if len(keras_layer.input_shape[0]) == 3 and (keras_layer.concat_axis == 1
-                                                     or keras_layer.concat_axis == -2):
-            return 'SEQUENCE_CONCAT'
-        elif len(keras_layer.input_shape[0]) == 4 and (keras_layer.concat_axis == 3
-                                                       or keras_layer.concat_axis == -1):
-            return 'CONCAT'
-        elif len(keras_layer.input_shape[0]) == 2 and (keras_layer.concat_axis == 1
-                                                       or keras_layer.concat_axis == -1):
-            return 'CONCAT'
+    if mode == "sum":
+        return "ADD"
+    elif mode == "mul":
+        return "MULTIPLY"
+    elif mode == "concat":
+        if len(keras_layer.input_shape[0]) == 3 and (
+            keras_layer.concat_axis == 1 or keras_layer.concat_axis == -2
+        ):
+            return "SEQUENCE_CONCAT"
+        elif len(keras_layer.input_shape[0]) == 4 and (
+            keras_layer.concat_axis == 3 or keras_layer.concat_axis == -1
+        ):
+            return "CONCAT"
+        elif len(keras_layer.input_shape[0]) == 2 and (
+            keras_layer.concat_axis == 1 or keras_layer.concat_axis == -1
+        ):
+            return "CONCAT"
         else:
-            option = "input_shape = %s concat_axis = %s" \
-                     % (str(keras_layer.input_shape[0]), str(keras_layer.concat_axis))
+            option = "input_shape = %s concat_axis = %s" % (
+                str(keras_layer.input_shape[0]),
+                str(keras_layer.concat_axis),
+            )
             _utils.raise_error_unsupported_option(option, mode, keras_layer.name)
-    elif mode == 'cos':
-        if len(keras_layer.input_shape[0]) == 2: 
-            return 'COS'
+    elif mode == "cos":
+        if len(keras_layer.input_shape[0]) == 2:
+            return "COS"
         else:
             option = "input_shape = %s" % (str(keras_layer.input_shape[0]))
             _utils.raise_error_unsupported_option(option, mode, keras_layer.name)
-    elif mode == 'dot':
-        if len(keras_layer.input_shape[0]) == 2:  
-            return 'DOT'
+    elif mode == "dot":
+        if len(keras_layer.input_shape[0]) == 2:
+            return "DOT"
         else:
             option = "input_shape = %s" % (str(keras_layer.input_shape[0]))
             _utils.raise_error_unsupported_option(option, mode, keras_layer.name)
-    elif mode == 'max':
-        return 'MAX'
-    elif mode == 'ave':
-        return 'AVE'
+    elif mode == "max":
+        return "MAX"
+    elif mode == "ave":
+        return "AVE"
     else:
-        _utils.raise_error_unsupported_categorical_option('mode', mode, 'Merge',
-                keras_layer.name)
+        _utils.raise_error_unsupported_categorical_option(
+            "mode", mode, "Merge", keras_layer.name
+        )
+
 
 def _same_elements_per_channel(x):
     """
@@ -116,12 +126,13 @@ def _same_elements_per_channel(x):
     """
     eps = 1e-5
     dims = x.shape
-    for c in range(dims[-1]): 
-        xc = x[:,:,c].flatten()
+    for c in range(dims[-1]):
+        xc = x[:, :, c].flatten()
         if not np.all(np.absolute(xc - xc[0]) < eps):
             return False
     return True
 
+
 def convert_dense(builder, layer, input_names, output_names, keras_layer):
     """Convert a dense layer from keras to coreml.
 
@@ -138,17 +149,20 @@ def convert_dense(builder, layer, input_names, output_names, keras_layer):
 
     has_bias = keras_layer.bias
     # Get the weights from keras
-    W = keras_layer.get_weights ()[0].T
-    Wb = keras_layer.get_weights ()[1].T if has_bias else None
-
-    builder.add_inner_product(name = layer,
-            W = W,
-            b = Wb,
-            input_channels = keras_layer.input_dim,
-            output_channels = keras_layer.output_dim,
-            has_bias = has_bias,
-            input_name = input_name,
-            output_name = output_name)
+    W = keras_layer.get_weights()[0].T
+    Wb = keras_layer.get_weights()[1].T if has_bias else None
+
+    builder.add_inner_product(
+        name=layer,
+        W=W,
+        b=Wb,
+        input_channels=keras_layer.input_dim,
+        output_channels=keras_layer.output_dim,
+        has_bias=has_bias,
+        input_name=input_name,
+        output_name=output_name,
+    )
+
 
 def convert_activation(builder, layer, input_names, output_names, keras_layer):
     """Convert an activation layer from keras to coreml.
@@ -166,49 +180,55 @@ def convert_activation(builder, layer, input_names, output_names, keras_layer):
     non_linearity = _get_activation_name_from_keras_layer(keras_layer)
 
     # Add a non-linearity layer
-    if non_linearity == 'SOFTMAX':
-        builder.add_softmax(name = layer, input_name = input_name,
-                output_name = output_name)
+    if non_linearity == "SOFTMAX":
+        builder.add_softmax(name=layer, input_name=input_name, output_name=output_name)
         return
 
     params = None
-    if non_linearity == 'LEAKYRELU':
+    if non_linearity == "LEAKYRELU":
         params = [keras_layer.alpha]
 
-    elif non_linearity == 'PRELU':
-        # In Keras 1.2  PReLU layer's weights are stored as a 
-        # backend tensor, not a numpy array as it claims in documentation. 
+    elif non_linearity == "PRELU":
+        # In Keras 1.2  PReLU layer's weights are stored as a
+        # backend tensor, not a numpy array as it claims in documentation.
         shared_axes = list(keras_layer.shared_axes)
-        if not (shared_axes == [1,2,3] or shared_axes == [1,2]):
-            _utils.raise_error_unsupported_scenario("Shared axis not being [1,2,3] "
-                                                    "or [1,2]", 'parametric_relu', layer)
+        if not (shared_axes == [1, 2, 3] or shared_axes == [1, 2]):
+            _utils.raise_error_unsupported_scenario(
+                "Shared axis not being [1,2,3] " "or [1,2]", "parametric_relu", layer
+            )
         params = keras.backend.eval(keras_layer.weights[0])
-    elif non_linearity == 'ELU':
+    elif non_linearity == "ELU":
         params = keras_layer.alpha
 
-    elif non_linearity == 'PARAMETRICSOFTPLUS':
-        # In Keras 1.2  Parametric Softplus layer's weights are stored as a 
+    elif non_linearity == "PARAMETRICSOFTPLUS":
+        # In Keras 1.2  Parametric Softplus layer's weights are stored as a
         # backend tensor, not a numpy array as it claims in documentation.
         alphas = keras.backend.eval(keras_layer.weights[0])
         betas = keras.backend.eval(keras_layer.weights[1])
 
         if len(alphas.shape) == 3:  # (H,W,C)
-            if not (_same_elements_per_channel(alphas) and
-                    _same_elements_per_channel(betas)):
-                _utils.raise_error_unsupported_scenario("Different parameter values",
-                                                        'parametric_softplus', layer)
-            alphas = alphas[0,0,:]
-            betas = betas[0,0,:]
+            if not (
+                _same_elements_per_channel(alphas) and _same_elements_per_channel(betas)
+            ):
+                _utils.raise_error_unsupported_scenario(
+                    "Different parameter values", "parametric_softplus", layer
+                )
+            alphas = alphas[0, 0, :]
+            betas = betas[0, 0, :]
         params = [alphas, betas]
-                
-    elif non_linearity == 'THRESHOLDEDRELU':
+
+    elif non_linearity == "THRESHOLDEDRELU":
         params = keras_layer.theta
     else:
-        pass # do nothing to parameters
-    builder.add_activation(name = layer, 
-            non_linearity = non_linearity,
-            input_name = input_name, output_name = output_name, 
-            params = params)
+        pass  # do nothing to parameters
+    builder.add_activation(
+        name=layer,
+        non_linearity=non_linearity,
+        input_name=input_name,
+        output_name=output_name,
+        params=params,
+    )
+
 
 def convert_merge(builder, layer, input_names, output_names, keras_layer):
     """Convert concat layer from keras to coreml.
@@ -225,8 +245,10 @@ def convert_merge(builder, layer, input_names, output_names, keras_layer):
     output_name = output_names[0]
 
     mode = _get_elementwise_name_from_keras_layer(keras_layer)
-    builder.add_elementwise(name = layer, input_names = input_names,
-            output_name = output_name, mode = mode)
+    builder.add_elementwise(
+        name=layer, input_names=input_names, output_name=output_name, mode=mode
+    )
+
 
 def convert_pooling(builder, layer, input_names, output_names, keras_layer):
     """Convert pooling layer from keras to coreml.
@@ -243,51 +265,59 @@ def convert_pooling(builder, layer, input_names, output_names, keras_layer):
     input_name, output_name = (input_names[0], output_names[0])
 
     # Pooling layer type
-    if isinstance(keras_layer, keras.layers.convolutional.MaxPooling2D) or \
-        isinstance(keras_layer, keras.layers.convolutional.MaxPooling1D) or \
-        isinstance(keras_layer, keras.layers.pooling.GlobalMaxPooling2D) or \
-        isinstance(keras_layer, keras.layers.pooling.GlobalMaxPooling1D): 
-        layer_type_str = 'MAX'
-    elif isinstance(keras_layer, keras.layers.convolutional.AveragePooling2D) or \
-        isinstance(keras_layer, keras.layers.convolutional.AveragePooling1D) or \
-        isinstance(keras_layer, keras.layers.pooling.GlobalAveragePooling2D) or \
-        isinstance(keras_layer, keras.layers.pooling.GlobalAveragePooling1D): 
-        layer_type_str = 'AVERAGE'
+    if (
+        isinstance(keras_layer, keras.layers.convolutional.MaxPooling2D)
+        or isinstance(keras_layer, keras.layers.convolutional.MaxPooling1D)
+        or isinstance(keras_layer, keras.layers.pooling.GlobalMaxPooling2D)
+        or isinstance(keras_layer, keras.layers.pooling.GlobalMaxPooling1D)
+    ):
+        layer_type_str = "MAX"
+    elif (
+        isinstance(keras_layer, keras.layers.convolutional.AveragePooling2D)
+        or isinstance(keras_layer, keras.layers.convolutional.AveragePooling1D)
+        or isinstance(keras_layer, keras.layers.pooling.GlobalAveragePooling2D)
+        or isinstance(keras_layer, keras.layers.pooling.GlobalAveragePooling1D)
+    ):
+        layer_type_str = "AVERAGE"
     else:
         raise TypeError("Pooling type %s not supported" % keras_layer)
 
     # if it's global, set the global flag
-    if isinstance(keras_layer, keras.layers.pooling.GlobalMaxPooling2D) or \
-        isinstance(keras_layer, keras.layers.pooling.GlobalAveragePooling2D): 
+    if isinstance(keras_layer, keras.layers.pooling.GlobalMaxPooling2D) or isinstance(
+        keras_layer, keras.layers.pooling.GlobalAveragePooling2D
+    ):
         # 2D global pooling
         global_pooling = True
         height, width = (0, 0)
-        stride_height, stride_width = (0,0)
-        padding_type = 'VALID'
-    elif isinstance(keras_layer, keras.layers.pooling.GlobalMaxPooling1D) or \
-        isinstance(keras_layer, keras.layers.pooling.GlobalAveragePooling1D):
-        # 1D global pooling: 1D global pooling seems problematic, 
+        stride_height, stride_width = (0, 0)
+        padding_type = "VALID"
+    elif isinstance(keras_layer, keras.layers.pooling.GlobalMaxPooling1D) or isinstance(
+        keras_layer, keras.layers.pooling.GlobalAveragePooling1D
+    ):
+        # 1D global pooling: 1D global pooling seems problematic,
         # use this work-around
         global_pooling = False
         _, width, channels = keras_layer.input_shape
         height = 1
         stride_height, stride_width = height, width
-        padding_type = 'VALID'
+        padding_type = "VALID"
     else:
         global_pooling = False
         # Set pool sizes and strides
-        # 1D cases: 
-        if isinstance(keras_layer, keras.layers.convolutional.MaxPooling1D) or \
-            isinstance(keras_layer, keras.layers.pooling.GlobalMaxPooling1D) or \
-            isinstance(keras_layer, keras.layers.convolutional.AveragePooling1D) or \
-            isinstance(keras_layer, keras.layers.pooling.GlobalAveragePooling1D): 
+        # 1D cases:
+        if (
+            isinstance(keras_layer, keras.layers.convolutional.MaxPooling1D)
+            or isinstance(keras_layer, keras.layers.pooling.GlobalMaxPooling1D)
+            or isinstance(keras_layer, keras.layers.convolutional.AveragePooling1D)
+            or isinstance(keras_layer, keras.layers.pooling.GlobalAveragePooling1D)
+        ):
             height, width = 1, keras_layer.pool_length
-            if keras_layer.stride is not None: 
+            if keras_layer.stride is not None:
                 stride_height, stride_width = 1, keras_layer.stride
             else:
                 stride_height, stride_width = 1, keras_layer.pool_length
-        # 2D cases: 
-        else:        
+        # 2D cases:
+        else:
             height, width = keras_layer.pool_size
             if keras_layer.strides is None:
                 stride_height, stride_width = height, width
@@ -296,28 +326,31 @@ def convert_pooling(builder, layer, input_names, output_names, keras_layer):
 
         # Padding
         border_mode = keras_layer.border_mode
-        if keras_layer.border_mode == 'valid':
-            padding_type = 'VALID'
-        elif keras_layer.border_mode == 'same':
-            padding_type = 'SAME'
+        if keras_layer.border_mode == "valid":
+            padding_type = "VALID"
+        elif keras_layer.border_mode == "same":
+            padding_type = "SAME"
         else:
             raise TypeError("Border mode %s not supported" % border_mode)
-    
-    builder.add_pooling(name = layer,
-        height = height,
-        width = width,
-        stride_height = stride_height,
-        stride_width = stride_width,
-        layer_type = layer_type_str,
-        padding_type = padding_type,
-        input_name = input_name,
-        output_name = output_name, 
-        exclude_pad_area = True, 
-        is_global = global_pooling)
+
+    builder.add_pooling(
+        name=layer,
+        height=height,
+        width=width,
+        stride_height=stride_height,
+        stride_width=stride_width,
+        layer_type=layer_type_str,
+        padding_type=padding_type,
+        input_name=input_name,
+        output_name=output_name,
+        exclude_pad_area=True,
+        is_global=global_pooling,
+    )
+
 
 def convert_padding(builder, layer, input_names, output_names, keras_layer):
     """Convert padding layer from keras to coreml.
-    Keras only supports zero padding at this time. 
+    Keras only supports zero padding at this time.
     Parameters
     ----------
     keras_layer: layer
@@ -328,23 +361,30 @@ def convert_padding(builder, layer, input_names, output_names, keras_layer):
     """
     # Get input and output names
     input_name, output_name = (input_names[0], output_names[0])
-    
+
     if isinstance(keras_layer, keras.layers.convolutional.ZeroPadding1D):
         left, right = keras_layer.padding
         top, bottom = (0, 0)
-    else: # 2D
+    else:  # 2D
         top, left = keras_layer.padding
         bottom, right = keras_layer.padding
 
     # Now add the layer
-    builder.add_padding(name = layer,
-        left = left, right=right, top=top, bottom=bottom, value = 0, 
-        input_name = input_name, output_name=output_name
-        )
+    builder.add_padding(
+        name=layer,
+        left=left,
+        right=right,
+        top=top,
+        bottom=bottom,
+        value=0,
+        input_name=input_name,
+        output_name=output_name,
+    )
+
 
 def convert_cropping(builder, layer, input_names, output_names, keras_layer):
     """Convert padding layer from keras to coreml.
-    Keras only supports zero padding at this time. 
+    Keras only supports zero padding at this time.
     Parameters
     ----------
     keras_layer: layer
@@ -355,55 +395,66 @@ def convert_cropping(builder, layer, input_names, output_names, keras_layer):
     """
     # Get input and output names
     input_name, output_name = (input_names[0], output_names[0])
-    
+
     if isinstance(keras_layer, keras.layers.convolutional.Cropping1D):
         left, right = keras_layer.cropping
         top, bottom = (0, 0)
-    else: # 2D
+    else:  # 2D
         left, right = keras_layer.cropping[0]
         top, bottom = keras_layer.cropping[1]
-    
+
     # Now add the layer
-    builder.add_crop(name = layer,
-        left = left, right=right, top=top, bottom=bottom, offset = [0,0], 
-        input_names = [input_name], output_name=output_name
-        )
+    builder.add_crop(
+        name=layer,
+        left=left,
+        right=right,
+        top=top,
+        bottom=bottom,
+        offset=[0, 0],
+        input_names=[input_name],
+        output_name=output_name,
+    )
+
 
 def convert_reshape(builder, layer, input_names, output_names, keras_layer):
-    
     input_name, output_name = (input_names[0], output_names[0])
-    
+
     input_shape = keras_layer.input_shape
     target_shape = keras_layer.target_shape
-    
+
     def get_coreml_target_shape(target_shape):
-        if len(target_shape) == 1: #(D,)
-            coreml_shape = (1,target_shape[0],1,1)
-        elif len(target_shape) == 2: #(S,D)
-            coreml_shape = target_shape + (1,1)
-        elif len(target_shape) == 3: #(H,W,C)
+        if len(target_shape) == 1:  # (D,)
+            coreml_shape = (1, target_shape[0], 1, 1)
+        elif len(target_shape) == 2:  # (S,D)
+            coreml_shape = target_shape + (1, 1)
+        elif len(target_shape) == 3:  # (H,W,C)
             coreml_shape = (1, target_shape[2], target_shape[0], target_shape[1])
         else:
             coreml_shape = None
         return coreml_shape
-    
+
     def get_mode(input_shape, target_shape):
         in_shape = input_shape[1:]
         if len(in_shape) == 3 or len(target_shape) == 3:
-                return 1
+            return 1
         else:
             return 0
-    
+
     new_shape = get_coreml_target_shape(target_shape)
-    if new_shape is not None: 
+    if new_shape is not None:
         mode = get_mode(input_shape, target_shape)
-        builder.add_reshape(name = layer, input_name = input_name,
-                            output_name=output_name,
-                target_shape = new_shape, mode = mode)
-    else: 
-        _utils.raise_error_unsupported_categorical_option('input_shape',
-                                                          str(input_shape),
-                                                          'reshape', layer)
+        builder.add_reshape(
+            name=layer,
+            input_name=input_name,
+            output_name=output_name,
+            target_shape=new_shape,
+            mode=mode,
+        )
+    else:
+        _utils.raise_error_unsupported_categorical_option(
+            "input_shape", str(input_shape), "reshape", layer
+        )
+
 
 def convert_upsample(builder, layer, input_names, output_names, keras_layer):
     """Convert convolution layer from keras to coreml.
@@ -421,14 +472,17 @@ def convert_upsample(builder, layer, input_names, output_names, keras_layer):
 
     if isinstance(keras_layer, keras.layers.convolutional.UpSampling1D):
         fh, fw = 1, keras_layer.length
-    else: # 2D
+    else:  # 2D
         fh, fw = keras_layer.size
 
-    builder.add_upsample(name = layer,
-             scaling_factor_h = fh,
-             scaling_factor_w = fw, 
-             input_name = input_name,
-             output_name = output_name)
+    builder.add_upsample(
+        name=layer,
+        scaling_factor_h=fh,
+        scaling_factor_w=fw,
+        input_name=input_name,
+        output_name=output_name,
+    )
+
 
 def convert_convolution(builder, layer, input_names, output_names, keras_layer):
     """Convert convolution layer from keras to coreml.
@@ -461,27 +515,30 @@ def convert_convolution(builder, layer, input_names, output_names, keras_layer):
     b = weightList[1] if has_bias else None
 
     # dilation factors
-    dilation_factors = [1,1]
+    dilation_factors = [1, 1]
     if isinstance(keras_layer, keras.layers.convolutional.AtrousConvolution2D):
         dilation_factors = list(keras_layer.atrous_rate)
 
-    builder.add_convolution(name = layer,
-             kernel_channels = channels,
-             output_channels = n_filters,
-             height = height,
-             width = width,
-             stride_height = stride_height,
-             stride_width = stride_width,
-             border_mode = keras_layer.border_mode,
-             groups = 1,
-             W = W,
-             b = b,
-             has_bias = has_bias,
-             is_deconv = is_deconv,
-             output_shape = output_shape,
-             input_name = input_name,
-             output_name = output_name,
-             dilation_factors = dilation_factors)
+    builder.add_convolution(
+        name=layer,
+        kernel_channels=channels,
+        output_channels=n_filters,
+        height=height,
+        width=width,
+        stride_height=stride_height,
+        stride_width=stride_width,
+        border_mode=keras_layer.border_mode,
+        groups=1,
+        W=W,
+        b=b,
+        has_bias=has_bias,
+        is_deconv=is_deconv,
+        output_shape=output_shape,
+        input_name=input_name,
+        output_name=output_name,
+        dilation_factors=dilation_factors,
+    )
+
 
 def convert_convolution1d(builder, layer, input_names, output_names, keras_layer):
     """Convert convolution layer from keras to coreml.
@@ -513,27 +570,30 @@ def convert_convolution1d(builder, layer, input_names, output_names, keras_layer
     W = weightList[0]
     b = weightList[1] if has_bias else None
 
-    dilation_factors = [1,1]
+    dilation_factors = [1, 1]
     if isinstance(keras_layer, keras.layers.convolutional.AtrousConvolution1D):
         dilation_factors[-1] = keras_layer.atrous_rate
 
-    builder.add_convolution(name = layer,
-             kernel_channels = input_dim,
-             output_channels = n_filters,
-             height = 1,
-             width = filter_length,
-             stride_height = 1,
-             stride_width = stride_width,
-             border_mode = keras_layer.border_mode,
-             groups = 1,
-             W = W,
-             b = b,
-             has_bias = has_bias,
-             is_deconv = False, 
-             output_shape = output_shape,
-             input_name = input_name,
-             output_name = output_name, 
-             dilation_factors = dilation_factors)    
+    builder.add_convolution(
+        name=layer,
+        kernel_channels=input_dim,
+        output_channels=n_filters,
+        height=1,
+        width=filter_length,
+        stride_height=1,
+        stride_width=stride_width,
+        border_mode=keras_layer.border_mode,
+        groups=1,
+        W=W,
+        b=b,
+        has_bias=has_bias,
+        is_deconv=False,
+        output_shape=output_shape,
+        input_name=input_name,
+        output_name=output_name,
+        dilation_factors=dilation_factors,
+    )
+
 
 def convert_lstm(builder, layer, input_names, output_names, keras_layer):
     """Convert an LSTM layer from keras to coreml.
@@ -546,12 +606,14 @@ def convert_lstm(builder, layer, input_names, output_names, keras_layer):
     builder: NeuralNetworkBuilder
         A neural network builder object.
     """
-    
+
     hidden_size = keras_layer.output_dim
     input_size = keras_layer.input_shape[-1]
-    if keras_layer.consume_less not in ['cpu', 'gpu']:
-        raise ValueError('Cannot convert Keras layer with consume_less = %s'
-                         % keras_layer.consume_less)
+    if keras_layer.consume_less not in ["cpu", "gpu"]:
+        raise ValueError(
+            "Cannot convert Keras layer with consume_less = %s"
+            % keras_layer.consume_less
+        )
 
     output_all = keras_layer.return_sequences
     reverse_input = keras_layer.go_backwards
@@ -559,7 +621,7 @@ def convert_lstm(builder, layer, input_names, output_names, keras_layer):
     # Keras: I C F O; W_x, W_h, b
     # CoreML: I F O G; W_h and W_x are separated
     W_h, W_x, b = ([], [], [])
-    if keras_layer.consume_less == 'cpu':
+    if keras_layer.consume_less == "cpu":
         W_h.append(keras_layer.get_weights()[1].T)
         W_h.append(keras_layer.get_weights()[7].T)
         W_h.append(keras_layer.get_weights()[10].T)
@@ -576,40 +638,45 @@ def convert_lstm(builder, layer, input_names, output_names, keras_layer):
         b.append(keras_layer.get_weights()[5])
     else:
         keras_W_h = keras_layer.get_weights()[1].T
-        W_h.append(keras_W_h[0 * hidden_size:][:hidden_size])
-        W_h.append(keras_W_h[1 * hidden_size:][:hidden_size])
-        W_h.append(keras_W_h[3 * hidden_size:][:hidden_size])
-        W_h.append(keras_W_h[2 * hidden_size:][:hidden_size])
+        W_h.append(keras_W_h[0 * hidden_size :][:hidden_size])
+        W_h.append(keras_W_h[1 * hidden_size :][:hidden_size])
+        W_h.append(keras_W_h[3 * hidden_size :][:hidden_size])
+        W_h.append(keras_W_h[2 * hidden_size :][:hidden_size])
 
         keras_W_x = keras_layer.get_weights()[0].T
-        W_x.append(keras_W_x[0 * hidden_size:][:hidden_size])
-        W_x.append(keras_W_x[1 * hidden_size:][:hidden_size])
-        W_x.append(keras_W_x[3 * hidden_size:][:hidden_size])
-        W_x.append(keras_W_x[2 * hidden_size:][:hidden_size])
+        W_x.append(keras_W_x[0 * hidden_size :][:hidden_size])
+        W_x.append(keras_W_x[1 * hidden_size :][:hidden_size])
+        W_x.append(keras_W_x[3 * hidden_size :][:hidden_size])
+        W_x.append(keras_W_x[2 * hidden_size :][:hidden_size])
 
         keras_b = keras_layer.get_weights()[2]
-        b.append(keras_b[0 * hidden_size:][:hidden_size])
-        b.append(keras_b[1 * hidden_size:][:hidden_size])
-        b.append(keras_b[3 * hidden_size:][:hidden_size])
-        b.append(keras_b[2 * hidden_size:][:hidden_size])
+        b.append(keras_b[0 * hidden_size :][:hidden_size])
+        b.append(keras_b[1 * hidden_size :][:hidden_size])
+        b.append(keras_b[3 * hidden_size :][:hidden_size])
+        b.append(keras_b[2 * hidden_size :][:hidden_size])
 
     # Set activation type
-    inner_activation_str = _get_recurrent_activation_name_from_keras(keras_layer.inner_activation)
+    inner_activation_str = _get_recurrent_activation_name_from_keras(
+        keras_layer.inner_activation
+    )
     activation_str = _get_recurrent_activation_name_from_keras(keras_layer.activation)
 
     # Add to the network
     builder.add_unilstm(
-        name = layer,
-        W_h = W_h, W_x = W_x, b = b,
-        hidden_size = hidden_size,
-        input_size = input_size,
-        input_names = input_names,
-        output_names = output_names,
-        inner_activation = inner_activation_str,
-        cell_state_update_activation = activation_str,
-        output_activation = activation_str,
-        output_all = output_all,
-        reverse_input = reverse_input)
+        name=layer,
+        W_h=W_h,
+        W_x=W_x,
+        b=b,
+        hidden_size=hidden_size,
+        input_size=input_size,
+        input_names=input_names,
+        output_names=output_names,
+        inner_activation=inner_activation_str,
+        cell_state_update_activation=activation_str,
+        output_activation=activation_str,
+        output_all=output_all,
+        reverse_input=reverse_input,
+    )
 
 
 def convert_simple_rnn(builder, layer, input_names, output_names, keras_layer):
@@ -623,22 +690,24 @@ def convert_simple_rnn(builder, layer, input_names, output_names, keras_layer):
     builder: NeuralNetworkBuilder
         A neural network builder object.
     """
-    # Get input and output names    
+    # Get input and output names
     hidden_size = keras_layer.output_dim
     input_size = keras_layer.input_shape[-1]
 
     output_all = keras_layer.return_sequences
     reverse_input = keras_layer.go_backwards
 
-    if keras_layer.consume_less not in ['cpu', 'gpu']:
-        raise ValueError('Cannot convert Keras layer with consume_less = %s'
-                         % keras_layer.consume_less)
-    
+    if keras_layer.consume_less not in ["cpu", "gpu"]:
+        raise ValueError(
+            "Cannot convert Keras layer with consume_less = %s"
+            % keras_layer.consume_less
+        )
+
     W_h = np.zeros((hidden_size, hidden_size))
     W_x = np.zeros((hidden_size, input_size))
     b = np.zeros((hidden_size,))
 
-    if keras_layer.consume_less == 'cpu':
+    if keras_layer.consume_less == "cpu":
         W_h = keras_layer.get_weights()[1].T
         W_x = keras_layer.get_weights()[0].T
         b = keras_layer.get_weights()[2]
@@ -652,15 +721,19 @@ def convert_simple_rnn(builder, layer, input_names, output_names, keras_layer):
 
     # Add to the network
     builder.add_simple_rnn(
-        name = layer,
-        W_h = W_h, W_x = W_x, b = b,
-        hidden_size = hidden_size,
-        input_size = input_size,
-        activation = activation_str,
-        input_names = input_names,
-        output_names = output_names,
+        name=layer,
+        W_h=W_h,
+        W_x=W_x,
+        b=b,
+        hidden_size=hidden_size,
+        input_size=input_size,
+        activation=activation_str,
+        input_names=input_names,
+        output_names=output_names,
         output_all=output_all,
-        reverse_input=reverse_input)
+        reverse_input=reverse_input,
+    )
+
 
 def convert_gru(builder, layer, input_names, output_names, keras_layer):
     """Convert a GRU layer from keras to coreml.
@@ -673,21 +746,23 @@ def convert_gru(builder, layer, input_names, output_names, keras_layer):
     builder: NeuralNetworkBuilder
         A neural network builder object.
     """
-    
+
     hidden_size = keras_layer.output_dim
     input_size = keras_layer.input_shape[-1]
 
     output_all = keras_layer.return_sequences
     reverse_input = keras_layer.go_backwards
 
-    if keras_layer.consume_less not in ['cpu', 'gpu']:
-        raise ValueError('Cannot convert Keras layer with consume_less = %s'
-                         % keras_layer.consume_less)
+    if keras_layer.consume_less not in ["cpu", "gpu"]:
+        raise ValueError(
+            "Cannot convert Keras layer with consume_less = %s"
+            % keras_layer.consume_less
+        )
 
     # Keras: Z R O
-    # CoreML: Z R O    
+    # CoreML: Z R O
     W_h, W_x, b = ([], [], [])
-    if keras_layer.consume_less == 'cpu':
+    if keras_layer.consume_less == "cpu":
         W_x.append(keras_layer.get_weights()[0].T)
         W_x.append(keras_layer.get_weights()[3].T)
         W_x.append(keras_layer.get_weights()[6].T)
@@ -700,24 +775,29 @@ def convert_gru(builder, layer, input_names, output_names, keras_layer):
         b.append(keras_layer.get_weights()[5])
         b.append(keras_layer.get_weights()[8])
     else:
-        print('consume less not implemented')
+        print("consume less not implemented")
 
     # Set actication type
-    inner_activation_str = _get_recurrent_activation_name_from_keras(keras_layer.inner_activation)
+    inner_activation_str = _get_recurrent_activation_name_from_keras(
+        keras_layer.inner_activation
+    )
     activation_str = _get_recurrent_activation_name_from_keras(keras_layer.activation)
 
     # Add to the network
     builder.add_gru(
-       name = layer,
-       W_h = W_h, W_x = W_x, b = b,
-       input_size = input_size,
-       hidden_size = hidden_size,
-       input_names = input_names,
-       output_names = output_names,
-       activation = activation_str,
-       inner_activation = inner_activation_str,
-       output_all=output_all,
-       reverse_input = reverse_input)
+        name=layer,
+        W_h=W_h,
+        W_x=W_x,
+        b=b,
+        input_size=input_size,
+        hidden_size=hidden_size,
+        input_names=input_names,
+        output_names=output_names,
+        activation=activation_str,
+        inner_activation=inner_activation_str,
+        output_all=output_all,
+        reverse_input=reverse_input,
+    )
 
 
 def convert_bidirectional(builder, layer, input_names, output_names, keras_layer):
@@ -732,31 +812,33 @@ def convert_bidirectional(builder, layer, input_names, output_names, keras_layer
     builder: NeuralNetworkBuilder
         A neural network builder object.
     """
-        
+
     input_size = keras_layer.input_shape[-1]
 
     lstm_layer = keras_layer.forward_layer
-    if (type(lstm_layer) != keras.layers.recurrent.LSTM):
-        raise TypeError('Bidirectional layers only supported with LSTM')
-        
+    if type(lstm_layer) != keras.layers.recurrent.LSTM:
+        raise TypeError("Bidirectional layers only supported with LSTM")
+
     if lstm_layer.go_backwards:
-        raise TypeError(' \'go_backwards\' mode not supported with Bidirectional layers')    
+        raise TypeError(" 'go_backwards' mode not supported with Bidirectional layers")
 
     output_all = keras_layer.return_sequences
 
     hidden_size = lstm_layer.output_dim
-    #output_size = lstm_layer.output_dim * 2
+    # output_size = lstm_layer.output_dim * 2
 
-    if lstm_layer.consume_less not in ['cpu', 'gpu']:
-        raise ValueError('Cannot convert Keras layer with consume_less = %s'
-                         % keras_layer.consume_less)
+    if lstm_layer.consume_less not in ["cpu", "gpu"]:
+        raise ValueError(
+            "Cannot convert Keras layer with consume_less = %s"
+            % keras_layer.consume_less
+        )
 
     # Keras: I C F O; W_x, W_h, b
     # CoreML: I F O G; W_h and W_x are separated
 
     # Keras has all forward weights, followed by backward in the same order
     W_h, W_x, b = ([], [], [])
-    if lstm_layer.consume_less == 'cpu':
+    if lstm_layer.consume_less == "cpu":
         W_h.append(keras_layer.get_weights()[1].T)
         W_h.append(keras_layer.get_weights()[7].T)
         W_h.append(keras_layer.get_weights()[10].T)
@@ -773,25 +855,25 @@ def convert_bidirectional(builder, layer, input_names, output_names, keras_layer
         b.append(keras_layer.get_weights()[5])
     else:
         keras_W_h = keras_layer.get_weights()[1].T
-        W_h.append(keras_W_h[0 * hidden_size:][:hidden_size])
-        W_h.append(keras_W_h[1 * hidden_size:][:hidden_size])
-        W_h.append(keras_W_h[3 * hidden_size:][:hidden_size])
-        W_h.append(keras_W_h[2 * hidden_size:][:hidden_size])
+        W_h.append(keras_W_h[0 * hidden_size :][:hidden_size])
+        W_h.append(keras_W_h[1 * hidden_size :][:hidden_size])
+        W_h.append(keras_W_h[3 * hidden_size :][:hidden_size])
+        W_h.append(keras_W_h[2 * hidden_size :][:hidden_size])
 
         keras_W_x = keras_layer.get_weights()[0].T
-        W_x.append(keras_W_x[0 * hidden_size:][:hidden_size])
-        W_x.append(keras_W_x[1 * hidden_size:][:hidden_size])
-        W_x.append(keras_W_x[3 * hidden_size:][:hidden_size])
-        W_x.append(keras_W_x[2 * hidden_size:][:hidden_size])
+        W_x.append(keras_W_x[0 * hidden_size :][:hidden_size])
+        W_x.append(keras_W_x[1 * hidden_size :][:hidden_size])
+        W_x.append(keras_W_x[3 * hidden_size :][:hidden_size])
+        W_x.append(keras_W_x[2 * hidden_size :][:hidden_size])
 
         keras_b = keras_layer.get_weights()[2]
-        b.append(keras_b[0 * hidden_size:][:hidden_size])
-        b.append(keras_b[1 * hidden_size:][:hidden_size])
-        b.append(keras_b[3 * hidden_size:][:hidden_size])
-        b.append(keras_b[2 * hidden_size:][:hidden_size])
+        b.append(keras_b[0 * hidden_size :][:hidden_size])
+        b.append(keras_b[1 * hidden_size :][:hidden_size])
+        b.append(keras_b[3 * hidden_size :][:hidden_size])
+        b.append(keras_b[2 * hidden_size :][:hidden_size])
 
-    W_h_back, W_x_back, b_back = ([],[],[])
-    if keras_layer.backward_layer.consume_less == 'cpu':
+    W_h_back, W_x_back, b_back = ([], [], [])
+    if keras_layer.backward_layer.consume_less == "cpu":
         back_weights = keras_layer.backward_layer.get_weights()
         W_h_back.append(back_weights[1].T)
         W_h_back.append(back_weights[7].T)
@@ -809,41 +891,48 @@ def convert_bidirectional(builder, layer, input_names, output_names, keras_layer
         b_back.append(back_weights[5])
     else:
         keras_W_h = keras_layer.backward_layer.get_weights()[1].T
-        W_h_back.append(keras_W_h[0 * hidden_size:][:hidden_size])
-        W_h_back.append(keras_W_h[1 * hidden_size:][:hidden_size])
-        W_h_back.append(keras_W_h[3 * hidden_size:][:hidden_size])
-        W_h_back.append(keras_W_h[2 * hidden_size:][:hidden_size])
+        W_h_back.append(keras_W_h[0 * hidden_size :][:hidden_size])
+        W_h_back.append(keras_W_h[1 * hidden_size :][:hidden_size])
+        W_h_back.append(keras_W_h[3 * hidden_size :][:hidden_size])
+        W_h_back.append(keras_W_h[2 * hidden_size :][:hidden_size])
 
         keras_W_x = keras_layer.backward_layer.get_weights()[0].T
-        W_x_back.append(keras_W_x[0 * hidden_size:][:hidden_size])
-        W_x_back.append(keras_W_x[1 * hidden_size:][:hidden_size])
-        W_x_back.append(keras_W_x[3 * hidden_size:][:hidden_size])
-        W_x_back.append(keras_W_x[2 * hidden_size:][:hidden_size])
+        W_x_back.append(keras_W_x[0 * hidden_size :][:hidden_size])
+        W_x_back.append(keras_W_x[1 * hidden_size :][:hidden_size])
+        W_x_back.append(keras_W_x[3 * hidden_size :][:hidden_size])
+        W_x_back.append(keras_W_x[2 * hidden_size :][:hidden_size])
 
         keras_b = keras_layer.backward_layer.get_weights()[2]
-        b_back.append(keras_b[0 * hidden_size:][:hidden_size])
-        b_back.append(keras_b[1 * hidden_size:][:hidden_size])
-        b_back.append(keras_b[3 * hidden_size:][:hidden_size])
-        b_back.append(keras_b[2 * hidden_size:][:hidden_size])
+        b_back.append(keras_b[0 * hidden_size :][:hidden_size])
+        b_back.append(keras_b[1 * hidden_size :][:hidden_size])
+        b_back.append(keras_b[3 * hidden_size :][:hidden_size])
+        b_back.append(keras_b[2 * hidden_size :][:hidden_size])
 
     # Set activation type
-    inner_activation_str = _get_recurrent_activation_name_from_keras(lstm_layer.inner_activation)
+    inner_activation_str = _get_recurrent_activation_name_from_keras(
+        lstm_layer.inner_activation
+    )
     activation_str = _get_recurrent_activation_name_from_keras(lstm_layer.activation)
 
-
     # Add to the network
     builder.add_bidirlstm(
-        name = layer,
-        W_h = W_h, W_x = W_x, b = b, 
-        W_h_back = W_h_back, W_x_back = W_x_back, b_back = b_back,
+        name=layer,
+        W_h=W_h,
+        W_x=W_x,
+        b=b,
+        W_h_back=W_h_back,
+        W_x_back=W_x_back,
+        b_back=b_back,
         hidden_size=hidden_size,
         input_size=input_size,
         input_names=input_names,
         output_names=output_names,
-        inner_activation = inner_activation_str,
-        cell_state_update_activation = activation_str,
-        output_activation = activation_str,
-        output_all = output_all)
+        inner_activation=inner_activation_str,
+        cell_state_update_activation=activation_str,
+        output_activation=activation_str,
+        output_all=output_all,
+    )
+
 
 def convert_batchnorm(builder, layer, input_names, output_names, keras_layer):
     """
@@ -860,12 +949,10 @@ def convert_batchnorm(builder, layer, input_names, output_names, keras_layer):
 
     # Currently CoreML supports only per-channel batch-norm
     if keras_layer.mode != 0:
-        raise NotImplementedError(
-            'Currently supports only per-feature normalization')
-    
+        raise NotImplementedError("Currently supports only per-feature normalization")
+
     axis = keras_layer.axis
     nb_channels = keras_layer.input_shape[axis]
-    
 
     # Set parameters
     # Parameter arrangement in Keras: gamma, beta, mean, variance
@@ -873,23 +960,25 @@ def convert_batchnorm(builder, layer, input_names, output_names, keras_layer):
     beta = keras_layer.get_weights()[1]
     mean = keras_layer.get_weights()[2]
     std = keras_layer.get_weights()[3]
-    # compute adjusted parameters 
+    # compute adjusted parameters
     variance = std * std
     f = 1.0 / np.sqrt(std + keras_layer.epsilon)
-    gamma1 = gamma*f
-    beta1 = beta - gamma*mean*f
-    mean[:] = 0.0 #mean
-    variance[:] = 1.0 - .00001 #stddev
+    gamma1 = gamma * f
+    beta1 = beta - gamma * mean * f
+    mean[:] = 0.0  # mean
+    variance[:] = 1.0 - 0.00001  # stddev
 
     builder.add_batchnorm(
-        name = layer,
-        channels = nb_channels,
-        gamma = gamma1,
-        beta = beta1,
-        mean = mean, 
-        variance = variance,
-        input_name = input_name,
-        output_name = output_name)
+        name=layer,
+        channels=nb_channels,
+        gamma=gamma1,
+        beta=beta1,
+        mean=mean,
+        variance=variance,
+        input_name=input_name,
+        output_name=output_name,
+    )
+
 
 def convert_flatten(builder, layer, input_names, output_names, keras_layer):
     """Convert a flatten layer from keras to coreml.
@@ -903,18 +992,20 @@ def convert_flatten(builder, layer, input_names, output_names, keras_layer):
         A neural network builder object.
     """
     input_name, output_name = (input_names[0], output_names[0])
-    
+
     # blob_order == 0 if the input blob needs not be rearranged
     # blob_order == 1 if the input blob needs to be rearranged
     blob_order = 0
-    
-    # using keras_layer.input.shape have a "?" (Dimension[None] at the front), 
+
+    # using keras_layer.input.shape have a "?" (Dimension[None] at the front),
     # making a 3D tensor with unknown batch size 4D
     if len(keras_layer.input.shape) == 4:
         blob_order = 1
-    
-    builder.add_flatten(name=layer, mode=blob_order, input_name=input_name,
-                        output_name=output_name)
+
+    builder.add_flatten(
+        name=layer, mode=blob_order, input_name=input_name, output_name=output_name
+    )
+
 
 def convert_softmax(builder, layer, input_names, output_names, keras_layer):
     """Convert a softmax layer from keras to coreml.
@@ -928,9 +1019,9 @@ def convert_softmax(builder, layer, input_names, output_names, keras_layer):
         A neural network builder object.
     """
     input_name, output_name = (input_names[0], output_names[0])
-    
-    builder.add_softmax(name = layer, input_name = input_name,
-            output_name = output_name)
+
+    builder.add_softmax(name=layer, input_name=input_name, output_name=output_name)
+
 
 def convert_permute(builder, layer, input_names, output_names, keras_layer):
     """Convert a softmax layer from keras to coreml.
@@ -944,10 +1035,10 @@ def convert_permute(builder, layer, input_names, output_names, keras_layer):
         A neural network builder object.
     """
     input_name, output_name = (input_names[0], output_names[0])
-    
+
     keras_dims = keras_layer.dims
     # Keras permute layer index begins at 1
-    if len(keras_dims) == 3: 
+    if len(keras_dims) == 3:
         # Keras input tensor interpret as (H,W,C)
         x = list(np.array(keras_dims))
         i1, i2, i3 = x.index(1), x.index(2), x.index(3)
@@ -960,11 +1051,13 @@ def convert_permute(builder, layer, input_names, output_names, keras_layer):
         # permutations - the values here are not valid Keras dim parameters
         # but parameters we need to use to convert to CoreML model
         dim = keras_dims
-    else: 
-        raise NotImplementedError('Supports only 3d permutation.')
-    
-    builder.add_permute(name = layer, dim=dim, input_name = input_name,
-            output_name = output_name)
+    else:
+        raise NotImplementedError("Supports only 3d permutation.")
+
+    builder.add_permute(
+        name=layer, dim=dim, input_name=input_name, output_name=output_name
+    )
+
 
 def convert_embedding(builder, layer, input_names, output_names, keras_layer):
     """Convert a dense layer from keras to coreml.
@@ -981,27 +1074,30 @@ def convert_embedding(builder, layer, input_names, output_names, keras_layer):
     input_name, output_name = (input_names[0], output_names[0])
 
     # Get the weights from keras
-    W = keras_layer.get_weights ()[0].T
-
-    # assuming keras embedding layers don't have biases 
-    builder.add_embedding(name = layer,
-                          W = W,
-                          b = None,
-                          input_dim = keras_layer.input_dim,
-                          output_channels = keras_layer.output_dim,
-                          has_bias = False,
-                          input_name = input_name,
-                          output_name = output_name)
+    W = keras_layer.get_weights()[0].T
+
+    # assuming keras embedding layers don't have biases
+    builder.add_embedding(
+        name=layer,
+        W=W,
+        b=None,
+        input_dim=keras_layer.input_dim,
+        output_channels=keras_layer.output_dim,
+        has_bias=False,
+        input_name=input_name,
+        output_name=output_name,
+    )
+
 
 def convert_repeat_vector(builder, layer, input_names, output_names, keras_layer):
     # Keras RepeatVector only deals with 1D input
     # Get input and output names
     input_name, output_name = (input_names[0], output_names[0])
 
-    builder.add_sequence_repeat(name = layer,
-            nrep = keras_layer.n, 
-            input_name = input_name,
-            output_name = output_name)
+    builder.add_sequence_repeat(
+        name=layer, nrep=keras_layer.n, input_name=input_name, output_name=output_name
+    )
+
 
 def default_skip(builder, layer, input_names, output_names, keras_layer):
     """ Layers that can be skipped (because they are train time only. """
diff --git a/coremltools/converters/keras/_layers2.py b/coremltools/converters/keras/_layers2.py
index d5d43b798..59b823de4 100644
--- a/coremltools/converters/keras/_layers2.py
+++ b/coremltools/converters/keras/_layers2.py
@@ -10,142 +10,160 @@
 
 from distutils.version import StrictVersion as _StrictVersion
 
-if _keras.__version__ >= _StrictVersion('2.2.1'):
+if _keras.__version__ >= _StrictVersion("2.2.1"):
     from keras.layers import DepthwiseConv2D
-elif _keras.__version__ >= _StrictVersion('2.2.0'):
+elif _keras.__version__ >= _StrictVersion("2.2.0"):
     from keras.layers import DepthwiseConv2D
     from keras_applications.mobilenet import relu6
 else:
     from keras.applications.mobilenet import DepthwiseConv2D, relu6
 
+
 def _get_recurrent_activation_name_from_keras(activation):
     if activation == _keras.activations.sigmoid:
-        activation_str = 'SIGMOID'
+        activation_str = "SIGMOID"
     elif activation == _keras.activations.hard_sigmoid:
-        activation_str = 'SIGMOID_HARD'
+        activation_str = "SIGMOID_HARD"
     elif activation == _keras.activations.tanh:
-        activation_str = 'TANH'
+        activation_str = "TANH"
     elif activation == _keras.activations.relu:
-        activation_str = 'RELU'
+        activation_str = "RELU"
     elif activation == _keras.activations.linear:
-        activation_str = 'LINEAR'
+        activation_str = "LINEAR"
     else:
         raise NotImplementedError(
-            'activation %s not supported for Recurrent layer.' % activation)
+            "activation %s not supported for Recurrent layer." % activation
+        )
 
     return activation_str
 
-def _get_activation_name_from_keras_layer(keras_layer):
 
+def _get_activation_name_from_keras_layer(keras_layer):
     if isinstance(keras_layer, _keras.layers.advanced_activations.LeakyReLU):
-        non_linearity = 'LEAKYRELU'
+        non_linearity = "LEAKYRELU"
     elif isinstance(keras_layer, _keras.layers.advanced_activations.PReLU):
-        non_linearity = 'PRELU'
+        non_linearity = "PRELU"
     elif isinstance(keras_layer, _keras.layers.advanced_activations.ELU):
-        non_linearity = 'ELU'
+        non_linearity = "ELU"
     elif isinstance(keras_layer, _keras.layers.advanced_activations.ThresholdedReLU):
-        non_linearity = 'THRESHOLDEDRELU'
+        non_linearity = "THRESHOLDEDRELU"
     elif isinstance(keras_layer, _keras.layers.advanced_activations.Softmax):
-        non_linearity = 'SOFTMAX'
+        non_linearity = "SOFTMAX"
     else:
         import six
+
         if six.PY2:
             act_name = keras_layer.activation.func_name
         else:
             act_name = keras_layer.activation.__name__
 
-        if act_name == 'softmax':
-            non_linearity = 'SOFTMAX'
-        elif act_name == 'sigmoid':
-            non_linearity = 'SIGMOID'
-        elif act_name == 'tanh':
-            non_linearity = 'TANH'
-        elif act_name == 'relu':
-            non_linearity = 'RELU'
-        elif act_name == 'relu6':
-            non_linearity = 'RELU6'
-        elif act_name == 'softplus':
-            non_linearity = 'SOFTPLUS'
-        elif act_name == 'softsign':
-            non_linearity = 'SOFTSIGN'
-        elif act_name == 'hard_sigmoid':
-            non_linearity = 'SIGMOID_HARD'
-        elif act_name == 'elu':
-            non_linearity = 'UNIT_ELU'
-        elif act_name == 'linear':
-            non_linearity = 'LINEAR'
-        elif act_name == 'selu':
-          non_linearity = 'SELU'
+        if act_name == "softmax":
+            non_linearity = "SOFTMAX"
+        elif act_name == "sigmoid":
+            non_linearity = "SIGMOID"
+        elif act_name == "tanh":
+            non_linearity = "TANH"
+        elif act_name == "relu":
+            non_linearity = "RELU"
+        elif act_name == "relu6":
+            non_linearity = "RELU6"
+        elif act_name == "softplus":
+            non_linearity = "SOFTPLUS"
+        elif act_name == "softsign":
+            non_linearity = "SOFTSIGN"
+        elif act_name == "hard_sigmoid":
+            non_linearity = "SIGMOID_HARD"
+        elif act_name == "elu":
+            non_linearity = "UNIT_ELU"
+        elif act_name == "linear":
+            non_linearity = "LINEAR"
+        elif act_name == "selu":
+            non_linearity = "SELU"
         else:
-            non_linearity = 'CUSTOM'
+            non_linearity = "CUSTOM"
 
     return non_linearity
 
+
 def _get_elementwise_name_from_keras_layer(keras_layer):
     """
     Get the keras layer name from the activation name.
     """
     if isinstance(keras_layer, _keras.layers.Add):
-        return 'ADD'
+        return "ADD"
     elif isinstance(keras_layer, _keras.layers.Multiply):
-        return 'MULTIPLY'
+        return "MULTIPLY"
     elif isinstance(keras_layer, _keras.layers.Concatenate):
-        if len(keras_layer.input_shape[0]) == 3 and (keras_layer.axis == 1 or keras_layer.axis == -2):
-            return 'SEQUENCE_CONCAT'
-        if len(keras_layer.input_shape[0]) == 3 and (keras_layer.axis == 2 or keras_layer.axis == -1):
-            return 'CONCAT'
-        elif len(keras_layer.input_shape[0]) == 4 and (keras_layer.axis == 3 or keras_layer.axis == -1):
-            return 'CONCAT'
-        elif len(keras_layer.input_shape[0]) == 2 and (keras_layer.axis == 1 or keras_layer.axis == -1):
-            return 'CONCAT'
+        if len(keras_layer.input_shape[0]) == 3 and (
+            keras_layer.axis == 1 or keras_layer.axis == -2
+        ):
+            return "SEQUENCE_CONCAT"
+        if len(keras_layer.input_shape[0]) == 3 and (
+            keras_layer.axis == 2 or keras_layer.axis == -1
+        ):
+            return "CONCAT"
+        elif len(keras_layer.input_shape[0]) == 4 and (
+            keras_layer.axis == 3 or keras_layer.axis == -1
+        ):
+            return "CONCAT"
+        elif len(keras_layer.input_shape[0]) == 2 and (
+            keras_layer.axis == 1 or keras_layer.axis == -1
+        ):
+            return "CONCAT"
         else:
-            raise ValueError('Only channel and sequence concatenation are supported.')
+            raise ValueError("Only channel and sequence concatenation are supported.")
     elif isinstance(keras_layer, _keras.layers.Dot):
         if len(keras_layer.input_shape[0]) == 2:
             if type(keras_layer.axes) is list or type(keras_layer.axes) is tuple:
-                if len(keras_layer.axes) > 1: 
-                    raise ValueError('Only vector dot-product is supported.')
+                if len(keras_layer.axes) > 1:
+                    raise ValueError("Only vector dot-product is supported.")
                 else:
                     axis = keras_layer.axes[0]
             else:
                 axis = keras_layer.axes
-            if axis != -1 and axis != 1: 
-                 raise ValueError('Only vector dot-product is supported.')
+            if axis != -1 and axis != 1:
+                raise ValueError("Only vector dot-product is supported.")
 
             if keras_layer.normalize:
-                return 'COS'
+                return "COS"
             else:
-                return 'DOT'
+                return "DOT"
         else:
-            raise ValueError('Only vector dot-product is supported.')
+            raise ValueError("Only vector dot-product is supported.")
     elif isinstance(keras_layer, _keras.layers.Maximum):
-        return 'MAX'
+        return "MAX"
     elif isinstance(keras_layer, _keras.layers.Average):
-        return 'AVE'
+        return "AVE"
     else:
-        _utils.raise_error_unsupported_option(str(type(keras_layer)), 'merge',
-                keras_layer.name)
+        _utils.raise_error_unsupported_option(
+            str(type(keras_layer)), "merge", keras_layer.name
+        )
+
 
 def _same_elements_per_channel(x):
-    """ Test if a 3D (H,W,C) matrix x has the same element in each (H,W) 
+    """ Test if a 3D (H,W,C) matrix x has the same element in each (H,W)
     matrix for each channel
     """
     eps = 1e-5
     dims = x.shape
     for c in range(dims[-1]):
-        xc = x[:,:,c].flatten()
+        xc = x[:, :, c].flatten()
         if not _np.all(_np.absolute(xc - xc[0]) < eps):
             return False
     return True
 
+
 def _check_data_format(keras_layer):
-    if hasattr(keras_layer,('data_format')):
-        if keras_layer.data_format != 'channels_last':
-            raise ValueError("Converter currently supports data_format = "
-                "'channels_last' only.")        
+    if hasattr(keras_layer, ("data_format")):
+        if keras_layer.data_format != "channels_last":
+            raise ValueError(
+                "Converter currently supports data_format = " "'channels_last' only."
+            )
 
-def convert_dense(builder, layer, input_names, output_names, keras_layer,
-                  respect_train):
+
+def convert_dense(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     Convert a dense layer from keras to coreml.
 
@@ -164,23 +182,27 @@ def convert_dense(builder, layer, input_names, output_names, keras_layer,
 
     has_bias = keras_layer.use_bias
     # Get the weights from keras
-    W = keras_layer.get_weights ()[0].T
-    Wb = keras_layer.get_weights ()[1].T if has_bias else None
+    W = keras_layer.get_weights()[0].T
+    Wb = keras_layer.get_weights()[1].T if has_bias else None
     output_channels, input_channels = W.shape
 
-    builder.add_inner_product(name = layer,
-            W = W,
-            b = Wb,
-            input_channels = input_channels, 
-            output_channels = output_channels,
-            has_bias = has_bias,
-            input_name = input_name,
-            output_name = output_name)
+    builder.add_inner_product(
+        name=layer,
+        W=W,
+        b=Wb,
+        input_channels=input_channels,
+        output_channels=output_channels,
+        has_bias=has_bias,
+        input_name=input_name,
+        output_name=output_name,
+    )
     if respect_train and keras_layer.trainable:
         builder.make_updatable([layer])
 
-def convert_embedding(builder, layer, input_names, output_names, keras_layer,
-                      respect_train):
+
+def convert_embedding(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """Convert a dense layer from keras to coreml.
 
     Parameters
@@ -197,25 +219,32 @@ def convert_embedding(builder, layer, input_names, output_names, keras_layer,
     input_name, output_name = (input_names[0], output_names[0])
 
     # Get the weights from keras
-    W = keras_layer.get_weights ()[0].T
+    W = keras_layer.get_weights()[0].T
 
     # assuming keras embedding layers don't have biases
-    builder.add_embedding(name = layer,
-                          W = W,
-                          b = None,
-                          input_dim = keras_layer.input_dim,
-                          output_channels = keras_layer.output_dim,
-                          has_bias = False,
-                          input_name = input_name,
-                          output_name = output_name)
+    builder.add_embedding(
+        name=layer,
+        W=W,
+        b=None,
+        input_dim=keras_layer.input_dim,
+        output_channels=keras_layer.output_dim,
+        has_bias=False,
+        input_name=input_name,
+        output_name=output_name,
+    )
 
     if respect_train and keras_layer.trainable:
-        logging.warning("Embedding layer '%s' is marked updatable, but Core "
-                        "ML does not yet support updating layers of this "
-                        "type. The layer will be frozen in Core ML.", layer)
+        logging.warning(
+            "Embedding layer '%s' is marked updatable, but Core "
+            "ML does not yet support updating layers of this "
+            "type. The layer will be frozen in Core ML.",
+            layer,
+        )
+
 
-def convert_activation(builder, layer, input_names, output_names, keras_layer,
-                       respect_train):
+def convert_activation(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     Convert an activation layer from keras to coreml.
 
@@ -234,66 +263,80 @@ def convert_activation(builder, layer, input_names, output_names, keras_layer,
     non_linearity = _get_activation_name_from_keras_layer(keras_layer)
 
     # Add a non-linearity layer
-    if non_linearity == 'SOFTMAX':
-        builder.add_softmax(name = layer, input_name = input_name,
-                output_name = output_name)
+    if non_linearity == "SOFTMAX":
+        builder.add_softmax(name=layer, input_name=input_name, output_name=output_name)
         return
-    if non_linearity == 'RELU6':
-        # No direct support of RELU with max-activation value - use negate and 
+    if non_linearity == "RELU6":
+        # No direct support of RELU with max-activation value - use negate and
         # clip layers
-        relu_output_name = output_name + '_relu'
-        builder.add_activation(layer, 'RELU', input_name, relu_output_name)
+        relu_output_name = output_name + "_relu"
+        builder.add_activation(layer, "RELU", input_name, relu_output_name)
         # negate it
-        neg_output_name = relu_output_name + '_neg'
-        builder.add_activation(layer+'__neg__', 'LINEAR', relu_output_name, 
-                neg_output_name,[-1.0, 0])
+        neg_output_name = relu_output_name + "_neg"
+        builder.add_activation(
+            layer + "__neg__", "LINEAR", relu_output_name, neg_output_name, [-1.0, 0]
+        )
         # apply threshold
-        clip_output_name = relu_output_name + '_clip'
-        builder.add_unary(layer+'__clip__', neg_output_name, clip_output_name, 
-                'threshold', alpha = -6.0)
+        clip_output_name = relu_output_name + "_clip"
+        builder.add_unary(
+            layer + "__clip__",
+            neg_output_name,
+            clip_output_name,
+            "threshold",
+            alpha=-6.0,
+        )
         # negate it back
-        builder.add_activation(layer+'_neg2', 'LINEAR', clip_output_name, 
-                output_name,[-1.0, 0])
+        builder.add_activation(
+            layer + "_neg2", "LINEAR", clip_output_name, output_name, [-1.0, 0]
+        )
         return
 
-    if non_linearity == 'SELU':
-        elu_output_name = output_name + '_elu'
-        builder.add_activation(layer+'__elu__', 'ELU', input_name, elu_output_name,
-                             params=1.6732)
-        builder.add_elementwise(layer,
-                          input_names=elu_output_name,
-                          output_name=output_name,
-                          mode='MULTIPLY',
-                          alpha=1.0507)
+    if non_linearity == "SELU":
+        elu_output_name = output_name + "_elu"
+        builder.add_activation(
+            layer + "__elu__", "ELU", input_name, elu_output_name, params=1.6732
+        )
+        builder.add_elementwise(
+            layer,
+            input_names=elu_output_name,
+            output_name=output_name,
+            mode="MULTIPLY",
+            alpha=1.0507,
+        )
         return
 
     params = None
-    if non_linearity == 'UNIT_ELU':
+    if non_linearity == "UNIT_ELU":
         params = 1.0
-        non_linearity = 'ELU'
-    elif non_linearity == 'LEAKYRELU':
+        non_linearity = "ELU"
+    elif non_linearity == "LEAKYRELU":
         params = [keras_layer.alpha]
-    elif non_linearity == 'PRELU':
+    elif non_linearity == "PRELU":
         shared_axes = list(keras_layer.shared_axes)
-        if not (shared_axes == [1,2,3] or shared_axes == [1,2]):
+        if not (shared_axes == [1, 2, 3] or shared_axes == [1, 2]):
             _utils.raise_error_unsupported_scenario(
-                    "Shared axis not being [1,2,3] or [1,2]", 
-                    'parametric_relu', layer)
+                "Shared axis not being [1,2,3] or [1,2]", "parametric_relu", layer
+            )
         params = _keras.backend.eval(keras_layer.weights[0])
-    elif non_linearity == 'ELU':
+    elif non_linearity == "ELU":
         params = keras_layer.alpha
-    elif non_linearity == 'THRESHOLDEDRELU':
+    elif non_linearity == "THRESHOLDEDRELU":
         params = keras_layer.theta
     else:
-        pass # do nothing to parameters
+        pass  # do nothing to parameters
+
+    builder.add_activation(
+        name=layer,
+        non_linearity=non_linearity,
+        input_name=input_name,
+        output_name=output_name,
+        params=params,
+    )
 
-    builder.add_activation(name = layer,
-            non_linearity = non_linearity,
-            input_name = input_name, output_name = output_name,
-            params = params)
 
-def convert_advanced_relu(builder, layer, input_names, output_names,
-                          keras_layer, respect_train):
+def convert_advanced_relu(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     Convert an ReLU layer with maximum value from keras to coreml.
 
@@ -311,27 +354,36 @@ def convert_advanced_relu(builder, layer, input_names, output_names,
     input_name, output_name = (input_names[0], output_names[0])
 
     if keras_layer.max_value is None:
-        builder.add_activation(layer, 'RELU', input_name, output_name)
+        builder.add_activation(layer, "RELU", input_name, output_name)
         return
 
     # No direct support of RELU with max-activation value - use negate and
     # clip layers
-    relu_output_name = output_name + '_relu'
-    builder.add_activation(layer, 'RELU', input_name, relu_output_name)
+    relu_output_name = output_name + "_relu"
+    builder.add_activation(layer, "RELU", input_name, relu_output_name)
     # negate it
-    neg_output_name = relu_output_name + '_neg'
-    builder.add_activation(layer+'__neg__', 'LINEAR', relu_output_name,
-            neg_output_name,[-1.0, 0])
+    neg_output_name = relu_output_name + "_neg"
+    builder.add_activation(
+        layer + "__neg__", "LINEAR", relu_output_name, neg_output_name, [-1.0, 0]
+    )
     # apply threshold
-    clip_output_name = relu_output_name + '_clip'
-    builder.add_unary(layer+'__clip__', neg_output_name, clip_output_name,
-            'threshold', alpha = -keras_layer.max_value)
+    clip_output_name = relu_output_name + "_clip"
+    builder.add_unary(
+        layer + "__clip__",
+        neg_output_name,
+        clip_output_name,
+        "threshold",
+        alpha=-keras_layer.max_value,
+    )
     # negate it back
-    builder.add_activation(layer+'_neg2', 'LINEAR', clip_output_name,
-            output_name,[-1.0, 0])
+    builder.add_activation(
+        layer + "_neg2", "LINEAR", clip_output_name, output_name, [-1.0, 0]
+    )
+
 
-def convert_convolution(builder, layer, input_names, output_names, keras_layer,
-                        respect_train):
+def convert_convolution(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     Convert convolution layer from keras to coreml.
 
@@ -345,29 +397,28 @@ def convert_convolution(builder, layer, input_names, output_names, keras_layer,
     respect_train: boolean
         Whether or not to carry over Keras' "trainable" parameter.
     """
-    
+
     _check_data_format(keras_layer)
-    
+
     # Get input and output names
     input_name, output_name = (input_names[0], output_names[0])
 
     has_bias = keras_layer.use_bias
-    is_deconv = isinstance(keras_layer, 
-            _keras.layers.convolutional.Conv2DTranspose)
+    is_deconv = isinstance(keras_layer, _keras.layers.convolutional.Conv2DTranspose)
 
     # Get the weights from _keras.
     weightList = keras_layer.get_weights()
-    
+
     # Dimensions and weights
-    if is_deconv: 
+    if is_deconv:
         height, width, n_filters, channels = weightList[0].shape
-        W = weightList[0].transpose([0,1,3,2])
+        W = weightList[0].transpose([0, 1, 3, 2])
         try:
             output_blob_shape = list(filter(None, keras_layer.output_shape))
             output_shape = output_blob_shape[:-1]
         except:
             output_shape = None
-    else: 
+    else:
         height, width, channels, n_filters = weightList[0].shape
         W = weightList[0]
         output_shape = None
@@ -378,14 +429,16 @@ def convert_convolution(builder, layer, input_names, output_names, keras_layer,
     stride_height, stride_width = keras_layer.strides
 
     # Dilations
-    dilations = [1,1]
-    if (type(keras_layer.dilation_rate) is list) or (type(keras_layer.dilation_rate) is tuple):
+    dilations = [1, 1]
+    if (type(keras_layer.dilation_rate) is list) or (
+        type(keras_layer.dilation_rate) is tuple
+    ):
         dilations = [keras_layer.dilation_rate[0], keras_layer.dilation_rate[1]]
     else:
         dilations = [keras_layer.dilation_rate, keras_layer.dilation_rate]
-    if is_deconv and not dilations == [1,1]:
+    if is_deconv and not dilations == [1, 1]:
         raise ValueError("Unsupported non-unity dilation for Deconvolution layer")
-        
+
     groups = 1
     kernel_channels = channels
     # depth-wise convolution
@@ -393,34 +446,36 @@ def convert_convolution(builder, layer, input_names, output_names, keras_layer,
         groups = channels
         kernel_channels = 1
         depth_multiplier = keras_layer.depth_multiplier
-        W = _np.reshape(W,(height, width,1,channels * depth_multiplier))
+        W = _np.reshape(W, (height, width, 1, channels * depth_multiplier))
         output_channels = channels * depth_multiplier
 
-    builder.add_convolution(name = layer,
-             kernel_channels = kernel_channels,
-             output_channels = output_channels,
-             height = height,
-             width = width,
-             stride_height = stride_height,
-             stride_width = stride_width,
-             border_mode = keras_layer.padding, 
-             groups = groups,
-             W = W,
-             b = b,
-             has_bias = has_bias,
-             is_deconv = is_deconv,
-             output_shape = output_shape,
-             input_name = input_name,
-             output_name = output_name, 
-             dilation_factors = dilations)
+    builder.add_convolution(
+        name=layer,
+        kernel_channels=kernel_channels,
+        output_channels=output_channels,
+        height=height,
+        width=width,
+        stride_height=stride_height,
+        stride_width=stride_width,
+        border_mode=keras_layer.padding,
+        groups=groups,
+        W=W,
+        b=b,
+        has_bias=has_bias,
+        is_deconv=is_deconv,
+        output_shape=output_shape,
+        input_name=input_name,
+        output_name=output_name,
+        dilation_factors=dilations,
+    )
 
     if respect_train and keras_layer.trainable:
         builder.make_updatable([layer])
 
 
-
-def convert_convolution1d(builder, layer, input_names, output_names,
-                          keras_layer, respect_train):
+def convert_convolution1d(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     Convert convolution layer from keras to coreml.
 
@@ -446,52 +501,66 @@ def convert_convolution1d(builder, layer, input_names, output_names,
 
     # Parameter
     filter_length, input_dim, n_filters = weightList[0].shape
-    stride_width = keras_layer.strides if type(keras_layer.strides) is int \
-            else keras_layer.strides[0]
+    stride_width = (
+        keras_layer.strides
+        if type(keras_layer.strides) is int
+        else keras_layer.strides[0]
+    )
 
     # Weights and bias terms
-    W = _np.expand_dims(weightList[0],axis=0)
+    W = _np.expand_dims(weightList[0], axis=0)
     b = weightList[1] if has_bias else None
 
-    dilations = [1,1]
-    if (type(keras_layer.dilation_rate) is list) or \
-            (type(keras_layer.dilation_rate) is tuple):
+    dilations = [1, 1]
+    if (type(keras_layer.dilation_rate) is list) or (
+        type(keras_layer.dilation_rate) is tuple
+    ):
         dilations = [1, keras_layer.dilation_rate[0]]
     else:
         dilations = [1, keras_layer.dilation_rate]
-    
+
     keras_padding = keras_layer.padding
-    if keras_padding == 'causal':
-        builder.add_padding(name = layer + '__causal_pad__',
-                left = filter_length-1, right=0, top=0, bottom=0, value= 0,
-                input_name = input_name, 
-                output_name= input_name + '__causal_pad__')
-        input_name = input_name + '__causal_pad__'
-        keras_padding = 'valid'
-    
-    builder.add_convolution(name = layer,
-             kernel_channels = input_dim,
-             output_channels = n_filters,
-             height = 1,
-             width = filter_length,
-             stride_height = 1,
-             stride_width = stride_width,
-             border_mode = keras_padding, 
-             groups = 1,
-             W = W,
-             b = b,
-             has_bias = has_bias,
-             is_deconv = False,
-             output_shape = output_shape,
-             input_name = input_name,
-             output_name = output_name, 
-             dilation_factors = dilations)
+    if keras_padding == "causal":
+        builder.add_padding(
+            name=layer + "__causal_pad__",
+            left=filter_length - 1,
+            right=0,
+            top=0,
+            bottom=0,
+            value=0,
+            input_name=input_name,
+            output_name=input_name + "__causal_pad__",
+        )
+        input_name = input_name + "__causal_pad__"
+        keras_padding = "valid"
+
+    builder.add_convolution(
+        name=layer,
+        kernel_channels=input_dim,
+        output_channels=n_filters,
+        height=1,
+        width=filter_length,
+        stride_height=1,
+        stride_width=stride_width,
+        border_mode=keras_padding,
+        groups=1,
+        W=W,
+        b=b,
+        has_bias=has_bias,
+        is_deconv=False,
+        output_shape=output_shape,
+        input_name=input_name,
+        output_name=output_name,
+        dilation_factors=dilations,
+    )
 
     if respect_train and keras_layer.trainable:
         builder.make_updatable([layer])
 
-def convert_separable_convolution(builder, layer, input_names, output_names,
-                                  keras_layer, respect_train):
+
+def convert_separable_convolution(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     Convert separable convolution layer from keras to coreml.
 
@@ -506,7 +575,7 @@ def convert_separable_convolution(builder, layer, input_names, output_names,
         Whether to honor Keras' "trainable" flag.
     """
     _check_data_format(keras_layer)
-    
+
     # Get input and output names
     input_name, output_name = (input_names[0], output_names[0])
 
@@ -516,7 +585,7 @@ def convert_separable_convolution(builder, layer, input_names, output_names,
     weight_list = keras_layer.get_weights()
     output_blob_shape = list(filter(None, keras_layer.output_shape))
     output_channels = output_blob_shape[-1]
-    
+
     # D: depth mutliplier
     # w[0] is (H,W,Cin,D)
     # w[1] is (1,1,Cin * D, Cout)
@@ -524,64 +593,71 @@ def convert_separable_convolution(builder, layer, input_names, output_names,
     W1 = weight_list[1]
     height, width, input_channels, depth_mult = W0.shape
     b = weight_list[2] if has_bias else None
-    
+
     W0 = _np.reshape(W0, (height, width, 1, input_channels * depth_mult))
 
     stride_height, stride_width = keras_layer.strides
 
     # Dilations
-    if (type(keras_layer.dilation_rate) is list) or (type(keras_layer.dilation_rate) is tuple):
+    if (type(keras_layer.dilation_rate) is list) or (
+        type(keras_layer.dilation_rate) is tuple
+    ):
         dilations = [keras_layer.dilation_rate[0], keras_layer.dilation_rate[1]]
     else:
         dilations = [keras_layer.dilation_rate, keras_layer.dilation_rate]
 
-    intermediate_name = output_name + '_intermin_'
-
-    builder.add_convolution(name = layer + '_step_1',
-             kernel_channels = 1,
-             output_channels = input_channels * depth_mult,
-             height = height,
-             width = width,
-             stride_height = stride_height,
-             stride_width = stride_width,
-             border_mode = keras_layer.padding, 
-             groups = input_channels,
-             W = W0,
-             b = None,
-             has_bias = False,
-             is_deconv = False,
-             output_shape = None,
-             input_name = input_name,
-             output_name = intermediate_name, 
-             dilation_factors = dilations)
-
-    builder.add_convolution(name = layer + '_step_2',
-             kernel_channels = input_channels * depth_mult,
-             output_channels = output_channels,
-             height = 1,
-             width = 1,
-             stride_height = 1,
-             stride_width = 1,
-             border_mode = keras_layer.padding, 
-             groups = 1,
-             W = W1,
-             b = b,
-             has_bias = has_bias,
-             is_deconv = False,
-             output_shape = None,
-             input_name = intermediate_name,
-             output_name = output_name, 
-             dilation_factors = [1,1])
+    intermediate_name = output_name + "_intermin_"
+
+    builder.add_convolution(
+        name=layer + "_step_1",
+        kernel_channels=1,
+        output_channels=input_channels * depth_mult,
+        height=height,
+        width=width,
+        stride_height=stride_height,
+        stride_width=stride_width,
+        border_mode=keras_layer.padding,
+        groups=input_channels,
+        W=W0,
+        b=None,
+        has_bias=False,
+        is_deconv=False,
+        output_shape=None,
+        input_name=input_name,
+        output_name=intermediate_name,
+        dilation_factors=dilations,
+    )
+
+    builder.add_convolution(
+        name=layer + "_step_2",
+        kernel_channels=input_channels * depth_mult,
+        output_channels=output_channels,
+        height=1,
+        width=1,
+        stride_height=1,
+        stride_width=1,
+        border_mode=keras_layer.padding,
+        groups=1,
+        W=W1,
+        b=b,
+        has_bias=has_bias,
+        is_deconv=False,
+        output_shape=None,
+        input_name=intermediate_name,
+        output_name=output_name,
+        dilation_factors=[1, 1],
+    )
 
     if respect_train and keras_layer.trainable:
-        builder.make_updatable([layer+'_step_1', layer+'_step_2'])
+        builder.make_updatable([layer + "_step_1", layer + "_step_2"])
 
 
-def convert_batchnorm(builder, layer, input_names, output_names, keras_layer,
-                      respect_train):
+def convert_batchnorm(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
-    Convert a Batch Normalization layer. 
-    
+    Convert a Batch Normalization layer.
+
     Parameters
     keras_layer: layer
         A keras layer object.
@@ -609,37 +685,42 @@ def convert_batchnorm(builder, layer, input_names, output_names, keras_layer,
         beta = keras_layer.get_weights()[idx]
         idx += 1
     mean = keras_layer.get_weights()[idx]
-    std = keras_layer.get_weights()[idx+1]
-    
+    std = keras_layer.get_weights()[idx + 1]
+
     gamma = _np.ones(mean.shape) if gamma is None else gamma
     beta = _np.zeros(mean.shape) if beta is None else beta
 
     # compute adjusted parameters
     variance = std * std
     f = 1.0 / _np.sqrt(std + keras_layer.epsilon)
-    gamma1 = gamma*f
-    beta1 = beta - gamma*mean*f
-    mean[:] = 0.0 #mean
-    variance[:] = 1.0 - .00001 #stddev
+    gamma1 = gamma * f
+    beta1 = beta - gamma * mean * f
+    mean[:] = 0.0  # mean
+    variance[:] = 1.0 - 0.00001  # stddev
 
     builder.add_batchnorm(
-        name = layer,
-        channels = nb_channels,
-        gamma = gamma1,
-        beta = beta1,
-        mean = mean,
-        variance = variance,
-        input_name = input_name,
-        output_name = output_name)
+        name=layer,
+        channels=nb_channels,
+        gamma=gamma1,
+        beta=beta1,
+        mean=mean,
+        variance=variance,
+        input_name=input_name,
+        output_name=output_name,
+    )
 
     if respect_train and keras_layer.trainable:
-        logging.warning("BatchNorm layer '%s' is marked updatable, but Core "
-                        "ML does not yet support updating layers of this "
-                        "type. The layer will be frozen in Core ML.", layer)
+        logging.warning(
+            "BatchNorm layer '%s' is marked updatable, but Core "
+            "ML does not yet support updating layers of this "
+            "type. The layer will be frozen in Core ML.",
+            layer,
+        )
 
 
-def convert_flatten(builder, layer, input_names, output_names, keras_layer,
-                    respect_train):
+def convert_flatten(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     Convert a flatten layer from keras to coreml.
     ----------
@@ -667,19 +748,35 @@ def convert_flatten(builder, layer, input_names, output_names, keras_layer,
             blob_order = 1
         if len(in_shape) == 3 and in_shape[0] is None:
             # handling Keras rank-3 tensor (Batch, Sequence, Channels)
-            permute_output_name = output_name + '__permute__'
-            builder.add_permute(name=layer+'__permute__', dim=(2,1,0,3),
-                input_name=input_name, output_name=permute_output_name)
-            builder.add_flatten(name=layer, mode=1,
-                input_name=permute_output_name, output_name=output_name)
+            permute_output_name = output_name + "__permute__"
+            builder.add_permute(
+                name=layer + "__permute__",
+                dim=(2, 1, 0, 3),
+                input_name=input_name,
+                output_name=permute_output_name,
+            )
+            builder.add_flatten(
+                name=layer,
+                mode=1,
+                input_name=permute_output_name,
+                output_name=output_name,
+            )
         else:
-            builder.add_flatten(name=layer, mode=blob_order, input_name=input_name,
-                    output_name=output_name)
+            builder.add_flatten(
+                name=layer,
+                mode=blob_order,
+                input_name=input_name,
+                output_name=output_name,
+            )
     except:
-        builder.add_flatten(name=layer, mode=1, input_name=input_name, output_name=output_name)
+        builder.add_flatten(
+            name=layer, mode=1, input_name=input_name, output_name=output_name
+        )
 
-def convert_merge(builder, layer, input_names, output_names, keras_layer,
-                  respect_train):
+
+def convert_merge(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     Convert concat layer from keras to coreml.
 
@@ -697,11 +794,14 @@ def convert_merge(builder, layer, input_names, output_names, keras_layer,
     output_name = output_names[0]
 
     mode = _get_elementwise_name_from_keras_layer(keras_layer)
-    builder.add_elementwise(name = layer, input_names = input_names,
-            output_name = output_name, mode = mode)
+    builder.add_elementwise(
+        name=layer, input_names=input_names, output_name=output_name, mode=mode
+    )
+
 
-def convert_pooling(builder, layer, input_names, output_names, keras_layer,
-                    respect_train):
+def convert_pooling(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     Convert pooling layer from keras to coreml.
 
@@ -720,50 +820,64 @@ def convert_pooling(builder, layer, input_names, output_names, keras_layer,
     input_name, output_name = (input_names[0], output_names[0])
 
     # Pooling layer type
-    if isinstance(keras_layer, _keras.layers.convolutional.MaxPooling2D) or \
-        isinstance(keras_layer, _keras.layers.convolutional.MaxPooling1D) or \
-        isinstance(keras_layer, _keras.layers.pooling.GlobalMaxPooling2D) or \
-        isinstance(keras_layer, _keras.layers.pooling.GlobalMaxPooling1D):
-        layer_type_str = 'MAX'
-    elif isinstance(keras_layer, _keras.layers.convolutional.AveragePooling2D) or \
-        isinstance(keras_layer, _keras.layers.convolutional.AveragePooling1D) or \
-        isinstance(keras_layer, _keras.layers.pooling.GlobalAveragePooling2D) or \
-        isinstance(keras_layer, _keras.layers.pooling.GlobalAveragePooling1D):
-        layer_type_str = 'AVERAGE'
+    if (
+        isinstance(keras_layer, _keras.layers.convolutional.MaxPooling2D)
+        or isinstance(keras_layer, _keras.layers.convolutional.MaxPooling1D)
+        or isinstance(keras_layer, _keras.layers.pooling.GlobalMaxPooling2D)
+        or isinstance(keras_layer, _keras.layers.pooling.GlobalMaxPooling1D)
+    ):
+        layer_type_str = "MAX"
+    elif (
+        isinstance(keras_layer, _keras.layers.convolutional.AveragePooling2D)
+        or isinstance(keras_layer, _keras.layers.convolutional.AveragePooling1D)
+        or isinstance(keras_layer, _keras.layers.pooling.GlobalAveragePooling2D)
+        or isinstance(keras_layer, _keras.layers.pooling.GlobalAveragePooling1D)
+    ):
+        layer_type_str = "AVERAGE"
     else:
         raise TypeError("Pooling type %s not supported" % keras_layer)
 
     # if it's global, set the global flag
-    if isinstance(keras_layer, _keras.layers.pooling.GlobalMaxPooling2D) or \
-        isinstance(keras_layer, _keras.layers.pooling.GlobalAveragePooling2D):
+    if isinstance(keras_layer, _keras.layers.pooling.GlobalMaxPooling2D) or isinstance(
+        keras_layer, _keras.layers.pooling.GlobalAveragePooling2D
+    ):
         # 2D global pooling
         global_pooling = True
         height, width = (0, 0)
-        stride_height, stride_width = (0,0)
-        padding_type = 'VALID'
-    elif isinstance(keras_layer, _keras.layers.pooling.GlobalMaxPooling1D) or \
-        isinstance(keras_layer, _keras.layers.pooling.GlobalAveragePooling1D):
+        stride_height, stride_width = (0, 0)
+        padding_type = "VALID"
+    elif isinstance(
+        keras_layer, _keras.layers.pooling.GlobalMaxPooling1D
+    ) or isinstance(keras_layer, _keras.layers.pooling.GlobalAveragePooling1D):
         # 1D global pooling: 1D global pooling seems problematic in the backend,
         # use this work-around
         global_pooling = False
         _, width, channels = keras_layer.input_shape
         height = 1
         stride_height, stride_width = height, width
-        padding_type = 'VALID'
+        padding_type = "VALID"
     else:
         global_pooling = False
         # Set pool sizes and strides
         # 1D cases:
-        if isinstance(keras_layer, _keras.layers.convolutional.MaxPooling1D) or \
-            isinstance(keras_layer, _keras.layers.pooling.GlobalMaxPooling1D) or \
-            isinstance(keras_layer, _keras.layers.convolutional.AveragePooling1D) or \
-            isinstance(keras_layer, _keras.layers.pooling.GlobalAveragePooling1D):
-            pool_size = keras_layer.pool_size if type(keras_layer.pool_size) is \
-                                                 int else keras_layer.pool_size[0]
+        if (
+            isinstance(keras_layer, _keras.layers.convolutional.MaxPooling1D)
+            or isinstance(keras_layer, _keras.layers.pooling.GlobalMaxPooling1D)
+            or isinstance(keras_layer, _keras.layers.convolutional.AveragePooling1D)
+            or isinstance(keras_layer, _keras.layers.pooling.GlobalAveragePooling1D)
+        ):
+            pool_size = (
+                keras_layer.pool_size
+                if type(keras_layer.pool_size) is int
+                else keras_layer.pool_size[0]
+            )
             height, width = 1, pool_size
             if keras_layer.strides is not None:
-                strides = keras_layer.strides if type(keras_layer.strides) is \
-                                                 int else keras_layer.strides[0]
+                strides = (
+                    keras_layer.strides
+                    if type(keras_layer.strides) is int
+                    else keras_layer.strides[0]
+                )
                 stride_height, stride_width = 1, strides
             else:
                 stride_height, stride_width = 1, pool_size
@@ -777,27 +891,31 @@ def convert_pooling(builder, layer, input_names, output_names, keras_layer,
 
         # Padding
         padding = keras_layer.padding
-        if keras_layer.padding == 'valid':
-            padding_type = 'VALID'
-        elif keras_layer.padding == 'same':
-            padding_type = 'SAME'
+        if keras_layer.padding == "valid":
+            padding_type = "VALID"
+        elif keras_layer.padding == "same":
+            padding_type = "SAME"
         else:
             raise TypeError("Border mode %s not supported" % padding)
 
-    builder.add_pooling(name = layer,
-        height = height,
-        width = width,
-        stride_height = stride_height,
-        stride_width = stride_width,
-        layer_type = layer_type_str,
-        padding_type = padding_type,
-        input_name = input_name,
-        output_name = output_name,
-        exclude_pad_area = True,
-        is_global = global_pooling)
-
-def convert_padding(builder, layer, input_names, output_names, keras_layer,
-                    respect_train):
+    builder.add_pooling(
+        name=layer,
+        height=height,
+        width=width,
+        stride_height=stride_height,
+        stride_width=stride_width,
+        layer_type=layer_type_str,
+        padding_type=padding_type,
+        input_name=input_name,
+        output_name=output_name,
+        exclude_pad_area=True,
+        is_global=global_pooling,
+    )
+
+
+def convert_padding(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     Convert padding layer from keras to coreml.
     Keras only supports zero padding at this time.
@@ -814,12 +932,12 @@ def convert_padding(builder, layer, input_names, output_names, keras_layer,
     _check_data_format(keras_layer)
     # Get input and output names
     input_name, output_name = (input_names[0], output_names[0])
-    
+
     is_1d = isinstance(keras_layer, _keras.layers.ZeroPadding1D)
-    
+
     padding = keras_layer.padding
     top = left = bottom = right = 0
-    if is_1d: 
+    if is_1d:
         if type(padding) is int:
             left = right = padding
         elif type(padding) is tuple:
@@ -829,7 +947,7 @@ def convert_padding(builder, layer, input_names, output_names, keras_layer,
                 left, right = padding[0]
             else:
                 raise ValueError("Unrecognized padding option: %s" % (str(padding)))
-        else: 
+        else:
             raise ValueError("Unrecognized padding option: %s" % (str(padding)))
     else:
         if type(padding) is int:
@@ -843,17 +961,25 @@ def convert_padding(builder, layer, input_names, output_names, keras_layer,
                 left, right = padding[1]
             else:
                 raise ValueError("Unrecognized padding option: %s" % (str(padding)))
-        else: 
+        else:
             raise ValueError("Unrecognized padding option: %s" % (str(padding)))
 
     # Now add the layer
-    builder.add_padding(name = layer,
-        left = left, right=right, top=top, bottom=bottom, value = 0,
-        input_name = input_name, output_name=output_name
-        )
-
-def convert_cropping(builder, layer, input_names, output_names, keras_layer,
-                     respect_train):
+    builder.add_padding(
+        name=layer,
+        left=left,
+        right=right,
+        top=top,
+        bottom=bottom,
+        value=0,
+        input_name=input_name,
+        output_name=output_name,
+    )
+
+
+def convert_cropping(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     Convert padding layer from keras to coreml.
     Keras only supports zero padding at this time.
@@ -867,7 +993,7 @@ def convert_cropping(builder, layer, input_names, output_names, keras_layer,
     respect_train: boolean
         Ignored.
     """
-    
+
     _check_data_format(keras_layer)
     # Get input and output names
     input_name, output_name = (input_names[0], output_names[0])
@@ -875,7 +1001,7 @@ def convert_cropping(builder, layer, input_names, output_names, keras_layer,
 
     cropping = keras_layer.cropping
     top = left = bottom = right = 0
-    if is_1d: 
+    if is_1d:
         if type(cropping) is int:
             left = right = cropping
         elif type(cropping) is tuple:
@@ -885,7 +1011,7 @@ def convert_cropping(builder, layer, input_names, output_names, keras_layer,
                 left, right = cropping[0]
             else:
                 raise ValueError("Unrecognized cropping option: %s" % (str(cropping)))
-        else: 
+        else:
             raise ValueError("Unrecognized cropping option: %s" % (str(cropping)))
     else:
         if type(cropping) is int:
@@ -899,17 +1025,25 @@ def convert_cropping(builder, layer, input_names, output_names, keras_layer,
                 left, right = cropping[1]
             else:
                 raise ValueError("Unrecognized cropping option: %s" % (str(cropping)))
-        else: 
+        else:
             raise ValueError("Unrecognized cropping option: %s" % (str(cropping)))
 
     # Now add the layer
-    builder.add_crop(name = layer,
-        left = left, right=right, top=top, bottom=bottom, offset = [0,0],
-        input_names = [input_name], output_name=output_name
-        )
-
-def convert_upsample(builder, layer, input_names, output_names, keras_layer,
-                     respect_train):
+    builder.add_crop(
+        name=layer,
+        left=left,
+        right=right,
+        top=top,
+        bottom=bottom,
+        offset=[0, 0],
+        input_names=[input_name],
+        output_name=output_name,
+    )
+
+
+def convert_upsample(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     Convert upsample layer from keras to coreml.
 
@@ -937,37 +1071,50 @@ def convert_upsample(builder, layer, input_names, output_names, keras_layer,
         elif type(keras_layer.size) is int:
             fh, fw = 1, keras_layer.size
         else:
-            raise ValueError("Unrecognized upsample factor format %s" % (str(keras_layer.size)))
-    else: 
+            raise ValueError(
+                "Unrecognized upsample factor format %s" % (str(keras_layer.size))
+            )
+    else:
         if type(keras_layer.size) is int:
             fh = fw = keras_layer.size
         elif len(keras_layer.size) == 2:
             if keras_layer.size[0] != keras_layer.size[1]:
-                raise ValueError("Upsample with different rows and columns not "
-                                 "supported.")
+                raise ValueError(
+                    "Upsample with different rows and columns not " "supported."
+                )
             else:
                 fh = keras_layer.size[0]
                 fw = keras_layer.size[1]
         else:
-            raise ValueError("Unrecognized upsample factor format %s" % (str(keras_layer.size)))
+            raise ValueError(
+                "Unrecognized upsample factor format %s" % (str(keras_layer.size))
+            )
 
-    kerasmode2coreml = {'nearest': 'NN', 'bilinear': 'BILINEAR'}
-    interpolation = getattr(keras_layer, 'interpolation', 'nearest') # Defaults to 'nearest' for Keras < 2.2.3
+    kerasmode2coreml = {"nearest": "NN", "bilinear": "BILINEAR"}
+    interpolation = getattr(
+        keras_layer, "interpolation", "nearest"
+    )  # Defaults to 'nearest' for Keras < 2.2.3
 
     if interpolation not in kerasmode2coreml:
-        raise ValueError('Only supported "nearest" or "bilinear" interpolation for upsampling layers.')
+        raise ValueError(
+            'Only supported "nearest" or "bilinear" interpolation for upsampling layers.'
+        )
 
     mode = kerasmode2coreml[interpolation]
 
-    builder.add_upsample(name = layer,
-             scaling_factor_h = fh,
-             scaling_factor_w = fw,
-             input_name = input_name,
-             output_name = output_name,
-             mode = mode)
+    builder.add_upsample(
+        name=layer,
+        scaling_factor_h=fh,
+        scaling_factor_w=fw,
+        input_name=input_name,
+        output_name=output_name,
+        mode=mode,
+    )
+
 
-def convert_permute(builder, layer, input_names, output_names, keras_layer,
-                    respect_train):
+def convert_permute(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     Convert a softmax layer from keras to coreml.
 
@@ -990,7 +1137,11 @@ def convert_permute(builder, layer, input_names, output_names, keras_layer,
         x = list(_np.array(keras_dims))
         arr = [2, 3, 1]  # HWC in Keras
         arr_permuted = [arr[x[0] - 1], arr[x[1] - 1], arr[x[2] - 1]]
-        arr_permuted = [arr_permuted[2], arr_permuted[0], arr_permuted[1]]  # coreml format: channel first
+        arr_permuted = [
+            arr_permuted[2],
+            arr_permuted[0],
+            arr_permuted[1],
+        ]  # coreml format: channel first
         # add a sequence axis
         dim = [0] + arr_permuted
         dim = tuple(dim)
@@ -1000,13 +1151,16 @@ def convert_permute(builder, layer, input_names, output_names, keras_layer,
         # but parameters we need to use to convert to CoreML model
         dim = keras_dims
     else:
-        raise NotImplementedError('Supports only 3d permutation.')
+        raise NotImplementedError("Supports only 3d permutation.")
 
-    builder.add_permute(name = layer, dim=dim, input_name = input_name,
-            output_name = output_name)
+    builder.add_permute(
+        name=layer, dim=dim, input_name=input_name, output_name=output_name
+    )
 
-def convert_reshape(builder, layer, input_names, output_names, keras_layer,
-                    respect_train):
+
+def convert_reshape(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     respect_train: boolean
         Ignored.
@@ -1018,11 +1172,11 @@ def convert_reshape(builder, layer, input_names, output_names, keras_layer,
     target_shape = keras_layer.target_shape
 
     def get_coreml_target_shape(target_shape):
-        if len(target_shape) == 1: #(D,)
-            coreml_shape = (1,target_shape[0],1,1)
-        elif len(target_shape) == 2: #(S,D)
-            coreml_shape = target_shape + (1,1)
-        elif len(target_shape) == 3: #(H,W,C)
+        if len(target_shape) == 1:  # (D,)
+            coreml_shape = (1, target_shape[0], 1, 1)
+        elif len(target_shape) == 2:  # (S,D)
+            coreml_shape = target_shape + (1, 1)
+        elif len(target_shape) == 3:  # (H,W,C)
             coreml_shape = (1, target_shape[2], target_shape[0], target_shape[1])
         else:
             coreml_shape = None
@@ -1031,20 +1185,29 @@ def get_coreml_target_shape(target_shape):
     def get_mode(input_shape, target_shape):
         in_shape = input_shape[1:]
         if len(in_shape) == 3 or len(target_shape) == 3:
-                return 1
+            return 1
         else:
             return 0
 
     new_shape = get_coreml_target_shape(target_shape)
     if new_shape is not None:
         mode = get_mode(input_shape, target_shape)
-        builder.add_reshape(name = layer, input_name = input_name, output_name=output_name,
-                target_shape = new_shape, mode = mode)
+        builder.add_reshape(
+            name=layer,
+            input_name=input_name,
+            output_name=output_name,
+            target_shape=new_shape,
+            mode=mode,
+        )
     else:
-        _utils.raise_error_unsupported_categorical_option('input_shape', str(input_shape), 'reshape', layer)
+        _utils.raise_error_unsupported_categorical_option(
+            "input_shape", str(input_shape), "reshape", layer
+        )
 
-def convert_simple_rnn(builder, layer, input_names, output_names, keras_layer,
-                       respect_train):
+
+def convert_simple_rnn(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     Convert an SimpleRNN layer from keras to coreml.
 
@@ -1069,8 +1232,9 @@ def convert_simple_rnn(builder, layer, input_names, output_names, keras_layer,
     W_x = _np.zeros((hidden_size, input_size))
     b = None
 
-    implementation = keras_layer.implementation if hasattr(keras_layer,
-            'implementation') else 0
+    implementation = (
+        keras_layer.implementation if hasattr(keras_layer, "implementation") else 0
+    )
     if implementation == 0:
         W_h = keras_layer.get_weights()[1].T
         W_x = keras_layer.get_weights()[0].T
@@ -1082,23 +1246,29 @@ def convert_simple_rnn(builder, layer, input_names, output_names, keras_layer,
 
     # Add to the network
     builder.add_simple_rnn(
-        name = layer,
-        W_h = W_h, W_x = W_x, b = b,
-        hidden_size = hidden_size,
-        input_size = input_size,
-        activation = activation_str,
-        input_names = input_names,
-        output_names = output_names,
+        name=layer,
+        W_h=W_h,
+        W_x=W_x,
+        b=b,
+        hidden_size=hidden_size,
+        input_size=input_size,
+        activation=activation_str,
+        input_names=input_names,
+        output_names=output_names,
         output_all=output_all,
-        reverse_input=reverse_input)
+        reverse_input=reverse_input,
+    )
 
     if respect_train and keras_layer.trainable:
-        logging.warning("RNN layer '%s' is marked updatable, but Core "
-                        "ML does not yet support updating layers of this "
-                        "type. The layer will be frozen in Core ML.", layer)
+        logging.warning(
+            "RNN layer '%s' is marked updatable, but Core "
+            "ML does not yet support updating layers of this "
+            "type. The layer will be frozen in Core ML.",
+            layer,
+        )
 
-def convert_lstm(builder, layer, input_names, output_names, keras_layer,
-                 respect_train):
+
+def convert_lstm(builder, layer, input_names, output_names, keras_layer, respect_train):
     """
     Convert an LSTM layer from keras to coreml.
 
@@ -1122,51 +1292,59 @@ def convert_lstm(builder, layer, input_names, output_names, keras_layer,
     # CoreML: I F O G; W_h and W_x are separated
     W_h, W_x, b = ([], [], [])
     keras_W_h = keras_layer.get_weights()[1].T
-    W_h.append(keras_W_h[0 * hidden_size:][:hidden_size])
-    W_h.append(keras_W_h[1 * hidden_size:][:hidden_size])
-    W_h.append(keras_W_h[3 * hidden_size:][:hidden_size])
-    W_h.append(keras_W_h[2 * hidden_size:][:hidden_size])
+    W_h.append(keras_W_h[0 * hidden_size :][:hidden_size])
+    W_h.append(keras_W_h[1 * hidden_size :][:hidden_size])
+    W_h.append(keras_W_h[3 * hidden_size :][:hidden_size])
+    W_h.append(keras_W_h[2 * hidden_size :][:hidden_size])
 
     keras_W_x = keras_layer.get_weights()[0].T
-    W_x.append(keras_W_x[0 * hidden_size:][:hidden_size])
-    W_x.append(keras_W_x[1 * hidden_size:][:hidden_size])
-    W_x.append(keras_W_x[3 * hidden_size:][:hidden_size])
-    W_x.append(keras_W_x[2 * hidden_size:][:hidden_size])
+    W_x.append(keras_W_x[0 * hidden_size :][:hidden_size])
+    W_x.append(keras_W_x[1 * hidden_size :][:hidden_size])
+    W_x.append(keras_W_x[3 * hidden_size :][:hidden_size])
+    W_x.append(keras_W_x[2 * hidden_size :][:hidden_size])
     if keras_layer.use_bias:
         keras_b = keras_layer.get_weights()[2]
-        b.append(keras_b[0 * hidden_size:][:hidden_size])
-        b.append(keras_b[1 * hidden_size:][:hidden_size])
-        b.append(keras_b[3 * hidden_size:][:hidden_size])
-        b.append(keras_b[2 * hidden_size:][:hidden_size])
-    if len(b) == 0: 
+        b.append(keras_b[0 * hidden_size :][:hidden_size])
+        b.append(keras_b[1 * hidden_size :][:hidden_size])
+        b.append(keras_b[3 * hidden_size :][:hidden_size])
+        b.append(keras_b[2 * hidden_size :][:hidden_size])
+    if len(b) == 0:
         b = None
 
     # Set activation type
-    inner_activation_str = _get_recurrent_activation_name_from_keras(keras_layer.recurrent_activation)
+    inner_activation_str = _get_recurrent_activation_name_from_keras(
+        keras_layer.recurrent_activation
+    )
     activation_str = _get_recurrent_activation_name_from_keras(keras_layer.activation)
 
     # Add to the network
     builder.add_unilstm(
-        name = layer,
-        W_h = W_h, W_x = W_x, b = b,
-        hidden_size = hidden_size,
-        input_size = input_size,
-        input_names = input_names,
-        output_names = output_names,
-        inner_activation = inner_activation_str,
-        cell_state_update_activation = activation_str,
-        output_activation = activation_str,
-        output_all = output_all,
-        forget_bias = keras_layer.unit_forget_bias,
-        reverse_input = reverse_input)
+        name=layer,
+        W_h=W_h,
+        W_x=W_x,
+        b=b,
+        hidden_size=hidden_size,
+        input_size=input_size,
+        input_names=input_names,
+        output_names=output_names,
+        inner_activation=inner_activation_str,
+        cell_state_update_activation=activation_str,
+        output_activation=activation_str,
+        output_all=output_all,
+        forget_bias=keras_layer.unit_forget_bias,
+        reverse_input=reverse_input,
+    )
 
     if respect_train and keras_layer.trainable:
-        logging.warning("LSTM layer '%s' is marked updatable, but Core "
-                        "ML does not yet support updating layers of this "
-                        "type. The layer will be frozen in Core ML.", layer)
+        logging.warning(
+            "LSTM layer '%s' is marked updatable, but Core "
+            "ML does not yet support updating layers of this "
+            "type. The layer will be frozen in Core ML.",
+            layer,
+        )
+
 
-def convert_gru(builder, layer, input_names, output_names, keras_layer,
-                respect_train):
+def convert_gru(builder, layer, input_names, output_names, keras_layer, respect_train):
     """
     Convert a GRU layer from keras to coreml.
 
@@ -1191,47 +1369,57 @@ def convert_gru(builder, layer, input_names, output_names, keras_layer,
     # CoreML: Z R O
     W_h, W_x, b = ([], [], [])
     keras_W_h = keras_layer.get_weights()[1].T
-    W_h.append(keras_W_h[0 * hidden_size:][:hidden_size])
-    W_h.append(keras_W_h[1 * hidden_size:][:hidden_size])
-    W_h.append(keras_W_h[2 * hidden_size:][:hidden_size])
+    W_h.append(keras_W_h[0 * hidden_size :][:hidden_size])
+    W_h.append(keras_W_h[1 * hidden_size :][:hidden_size])
+    W_h.append(keras_W_h[2 * hidden_size :][:hidden_size])
 
     keras_W_x = keras_layer.get_weights()[0].T
-    W_x.append(keras_W_x[0 * hidden_size:][:hidden_size])
-    W_x.append(keras_W_x[1 * hidden_size:][:hidden_size])
-    W_x.append(keras_W_x[2 * hidden_size:][:hidden_size])
-    
+    W_x.append(keras_W_x[0 * hidden_size :][:hidden_size])
+    W_x.append(keras_W_x[1 * hidden_size :][:hidden_size])
+    W_x.append(keras_W_x[2 * hidden_size :][:hidden_size])
+
     if keras_layer.use_bias:
         keras_b = keras_layer.get_weights()[2]
-        b.append(keras_b[0 * hidden_size:][:hidden_size])
-        b.append(keras_b[1 * hidden_size:][:hidden_size])
-        b.append(keras_b[2 * hidden_size:][:hidden_size])
+        b.append(keras_b[0 * hidden_size :][:hidden_size])
+        b.append(keras_b[1 * hidden_size :][:hidden_size])
+        b.append(keras_b[2 * hidden_size :][:hidden_size])
     if len(b) == 0:
         b = None
 
     # Set actication type
-    inner_activation_str = _get_recurrent_activation_name_from_keras(keras_layer.recurrent_activation)
+    inner_activation_str = _get_recurrent_activation_name_from_keras(
+        keras_layer.recurrent_activation
+    )
     activation_str = _get_recurrent_activation_name_from_keras(keras_layer.activation)
 
     # Add to the network
     builder.add_gru(
-       name = layer,
-       W_h = W_h, W_x = W_x, b = b,
-       input_size = input_size,
-       hidden_size = hidden_size,
-       input_names = input_names,
-       output_names = output_names,
-       activation = activation_str,
-       inner_activation = inner_activation_str,
-       output_all = output_all,
-       reverse_input = reverse_input)
+        name=layer,
+        W_h=W_h,
+        W_x=W_x,
+        b=b,
+        input_size=input_size,
+        hidden_size=hidden_size,
+        input_names=input_names,
+        output_names=output_names,
+        activation=activation_str,
+        inner_activation=inner_activation_str,
+        output_all=output_all,
+        reverse_input=reverse_input,
+    )
 
     if respect_train and keras_layer.trainable:
-        logging.warning("GRU layer '%s' is marked updatable, but Core "
-                        "ML does not yet support updating layers of this "
-                        "type. The layer will be frozen in Core ML.", layer)
+        logging.warning(
+            "GRU layer '%s' is marked updatable, but Core "
+            "ML does not yet support updating layers of this "
+            "type. The layer will be frozen in Core ML.",
+            layer,
+        )
+
 
-def convert_bidirectional(builder, layer, input_names, output_names,
-                          keras_layer, respect_train):
+def convert_bidirectional(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     Convert a bidirectional layer from keras to coreml.
     Currently assumes the units are LSTMs.
@@ -1250,11 +1438,11 @@ def convert_bidirectional(builder, layer, input_names, output_names,
     input_size = keras_layer.input_shape[-1]
 
     lstm_layer = keras_layer.forward_layer
-    if (type(lstm_layer) != _keras.layers.recurrent.LSTM):
-        raise TypeError('Bidirectional layers only supported with LSTM')
-        
+    if type(lstm_layer) != _keras.layers.recurrent.LSTM:
+        raise TypeError("Bidirectional layers only supported with LSTM")
+
     if lstm_layer.go_backwards:
-        raise TypeError(' \'go_backwards\' mode not supported with Bidirectional layers')
+        raise TypeError(" 'go_backwards' mode not supported with Bidirectional layers")
 
     output_all = keras_layer.return_sequences
     hidden_size = lstm_layer.units
@@ -1264,107 +1452,124 @@ def convert_bidirectional(builder, layer, input_names, output_names,
     # Keras has all forward weights, followed by backward in the same order
     W_h, W_x, b = ([], [], [])
     keras_W_h = keras_layer.forward_layer.get_weights()[1].T
-    W_h.append(keras_W_h[0 * hidden_size:][:hidden_size])
-    W_h.append(keras_W_h[1 * hidden_size:][:hidden_size])
-    W_h.append(keras_W_h[3 * hidden_size:][:hidden_size])
-    W_h.append(keras_W_h[2 * hidden_size:][:hidden_size])
+    W_h.append(keras_W_h[0 * hidden_size :][:hidden_size])
+    W_h.append(keras_W_h[1 * hidden_size :][:hidden_size])
+    W_h.append(keras_W_h[3 * hidden_size :][:hidden_size])
+    W_h.append(keras_W_h[2 * hidden_size :][:hidden_size])
 
     keras_W_x = keras_layer.forward_layer.get_weights()[0].T
-    W_x.append(keras_W_x[0 * hidden_size:][:hidden_size])
-    W_x.append(keras_W_x[1 * hidden_size:][:hidden_size])
-    W_x.append(keras_W_x[3 * hidden_size:][:hidden_size])
-    W_x.append(keras_W_x[2 * hidden_size:][:hidden_size])
+    W_x.append(keras_W_x[0 * hidden_size :][:hidden_size])
+    W_x.append(keras_W_x[1 * hidden_size :][:hidden_size])
+    W_x.append(keras_W_x[3 * hidden_size :][:hidden_size])
+    W_x.append(keras_W_x[2 * hidden_size :][:hidden_size])
 
     if keras_layer.forward_layer.use_bias:
         keras_b = keras_layer.forward_layer.get_weights()[2]
-        b.append(keras_b[0 * hidden_size:][:hidden_size])
-        b.append(keras_b[1 * hidden_size:][:hidden_size])
-        b.append(keras_b[3 * hidden_size:][:hidden_size])
-        b.append(keras_b[2 * hidden_size:][:hidden_size])
+        b.append(keras_b[0 * hidden_size :][:hidden_size])
+        b.append(keras_b[1 * hidden_size :][:hidden_size])
+        b.append(keras_b[3 * hidden_size :][:hidden_size])
+        b.append(keras_b[2 * hidden_size :][:hidden_size])
 
     if len(b) == 0:
         b = None
 
-    W_h_back, W_x_back, b_back = ([],[],[])
+    W_h_back, W_x_back, b_back = ([], [], [])
     keras_W_h = keras_layer.backward_layer.get_weights()[1].T
-    W_h_back.append(keras_W_h[0 * hidden_size:][:hidden_size])
-    W_h_back.append(keras_W_h[1 * hidden_size:][:hidden_size])
-    W_h_back.append(keras_W_h[3 * hidden_size:][:hidden_size])
-    W_h_back.append(keras_W_h[2 * hidden_size:][:hidden_size])
+    W_h_back.append(keras_W_h[0 * hidden_size :][:hidden_size])
+    W_h_back.append(keras_W_h[1 * hidden_size :][:hidden_size])
+    W_h_back.append(keras_W_h[3 * hidden_size :][:hidden_size])
+    W_h_back.append(keras_W_h[2 * hidden_size :][:hidden_size])
 
     keras_W_x = keras_layer.backward_layer.get_weights()[0].T
-    W_x_back.append(keras_W_x[0 * hidden_size:][:hidden_size])
-    W_x_back.append(keras_W_x[1 * hidden_size:][:hidden_size])
-    W_x_back.append(keras_W_x[3 * hidden_size:][:hidden_size])
-    W_x_back.append(keras_W_x[2 * hidden_size:][:hidden_size])
+    W_x_back.append(keras_W_x[0 * hidden_size :][:hidden_size])
+    W_x_back.append(keras_W_x[1 * hidden_size :][:hidden_size])
+    W_x_back.append(keras_W_x[3 * hidden_size :][:hidden_size])
+    W_x_back.append(keras_W_x[2 * hidden_size :][:hidden_size])
 
     if keras_layer.backward_layer.use_bias:
         keras_b = keras_layer.backward_layer.get_weights()[2]
-        b_back.append(keras_b[0 * hidden_size:][:hidden_size])
-        b_back.append(keras_b[1 * hidden_size:][:hidden_size])
-        b_back.append(keras_b[3 * hidden_size:][:hidden_size])
-        b_back.append(keras_b[2 * hidden_size:][:hidden_size])
+        b_back.append(keras_b[0 * hidden_size :][:hidden_size])
+        b_back.append(keras_b[1 * hidden_size :][:hidden_size])
+        b_back.append(keras_b[3 * hidden_size :][:hidden_size])
+        b_back.append(keras_b[2 * hidden_size :][:hidden_size])
     if len(b_back) == 0:
         b_back = None
-        
+
     if (b == None and b_back != None) or (b != None and b_back == None):
-        raise ValueError('Unsupported Bi-directional LSTM configuration. Bias '
-                         'must be enabled/disabled for both directions.')
+        raise ValueError(
+            "Unsupported Bi-directional LSTM configuration. Bias "
+            "must be enabled/disabled for both directions."
+        )
 
     # Set activation type
-    inner_activation_str = _get_recurrent_activation_name_from_keras(lstm_layer.recurrent_activation)
+    inner_activation_str = _get_recurrent_activation_name_from_keras(
+        lstm_layer.recurrent_activation
+    )
     activation_str = _get_recurrent_activation_name_from_keras(lstm_layer.activation)
 
     output_name_1 = output_names[0]
-    if hasattr(keras_layer, 'merge_mode'):
+    if hasattr(keras_layer, "merge_mode"):
         merge_mode = keras_layer.merge_mode
-        if merge_mode not in ['concat','sum','mul','ave']:
-            raise NotImplementedError('merge_mode \'%s\' in Bidirectional LSTM '
-                                      'not supported currently' % merge_mode)
-        if merge_mode != 'concat':
-            output_name_1 += '_concatenated_bilstm_output'
+        if merge_mode not in ["concat", "sum", "mul", "ave"]:
+            raise NotImplementedError(
+                "merge_mode '%s' in Bidirectional LSTM "
+                "not supported currently" % merge_mode
+            )
+        if merge_mode != "concat":
+            output_name_1 += "_concatenated_bilstm_output"
 
     # Add to the network
     builder.add_bidirlstm(
-        name = layer,
-        W_h = W_h, W_x = W_x, b = b,
-        W_h_back = W_h_back, W_x_back = W_x_back, b_back = b_back,
+        name=layer,
+        W_h=W_h,
+        W_x=W_x,
+        b=b,
+        W_h_back=W_h_back,
+        W_x_back=W_x_back,
+        b_back=b_back,
         hidden_size=hidden_size,
         input_size=input_size,
         input_names=input_names,
         output_names=[output_name_1] + output_names[1:],
-        inner_activation = inner_activation_str,
-        cell_state_update_activation = activation_str,
-        output_activation = activation_str,
-        forget_bias = lstm_layer.unit_forget_bias,
-        output_all = output_all)
+        inner_activation=inner_activation_str,
+        cell_state_update_activation=activation_str,
+        output_activation=activation_str,
+        forget_bias=lstm_layer.unit_forget_bias,
+        output_all=output_all,
+    )
 
     if output_name_1 != output_names[0]:
-        mode = 'CONCAT'
-        if merge_mode == 'sum':
-            mode = 'ADD'
-        elif merge_mode == 'ave':
-            mode = 'AVE'
-        elif merge_mode == 'mul':
-            mode = 'MULTIPLY'
-        builder.add_split(name = layer + '_split',
-                          input_name= output_name_1,
-                          output_names= [output_names[0] + '_forward',
-                                         output_names[0] + '_backward'])
-        builder.add_elementwise(name = layer + '_elementwise',
-                                input_names = [output_names[0] + '_forward',
-                                               output_names[0] + '_backward'],
-                                output_name = output_names[0],
-                                mode = mode)
+        mode = "CONCAT"
+        if merge_mode == "sum":
+            mode = "ADD"
+        elif merge_mode == "ave":
+            mode = "AVE"
+        elif merge_mode == "mul":
+            mode = "MULTIPLY"
+        builder.add_split(
+            name=layer + "_split",
+            input_name=output_name_1,
+            output_names=[output_names[0] + "_forward", output_names[0] + "_backward"],
+        )
+        builder.add_elementwise(
+            name=layer + "_elementwise",
+            input_names=[output_names[0] + "_forward", output_names[0] + "_backward"],
+            output_name=output_names[0],
+            mode=mode,
+        )
 
     if respect_train and keras_layer.trainable:
-        logging.warning("Bidirectional layer '%s' is marked updatable, but "
-                        "Core ML does not yet support updating layers of this "
-                        "type. The layer will be frozen in Core ML.", layer)
+        logging.warning(
+            "Bidirectional layer '%s' is marked updatable, but "
+            "Core ML does not yet support updating layers of this "
+            "type. The layer will be frozen in Core ML.",
+            layer,
+        )
 
 
-def convert_repeat_vector(builder, layer, input_names, output_names,
-                          keras_layer, respect_train):
+def convert_repeat_vector(
+    builder, layer, input_names, output_names, keras_layer, respect_train
+):
     """
     respect_train: boolean
         Ignored.
@@ -1373,13 +1578,12 @@ def convert_repeat_vector(builder, layer, input_names, output_names,
     # Get input and output names
     input_name, output_name = (input_names[0], output_names[0])
 
-    builder.add_sequence_repeat(name = layer,
-            nrep = keras_layer.n,
-            input_name = input_name,
-            output_name = output_name)
+    builder.add_sequence_repeat(
+        name=layer, nrep=keras_layer.n, input_name=input_name, output_name=output_name
+    )
+
 
-def default_skip(builder, layer, input_names, output_names, keras_layer,
-                 respect_train):
+def default_skip(builder, layer, input_names, output_names, keras_layer, respect_train):
     """
     Layers that can be skipped.
     """
diff --git a/coremltools/converters/keras/_topology.py b/coremltools/converters/keras/_topology.py
index 32db4bb30..0e8ba9069 100644
--- a/coremltools/converters/keras/_topology.py
+++ b/coremltools/converters/keras/_topology.py
@@ -8,27 +8,27 @@
 
 _KERAS_LAYERS_1D = [
     _keras.layers.Convolution1D,
-    _keras.layers.AtrousConvolution1D, 
-    _keras.layers.UpSampling1D, 
-    _keras.layers.ZeroPadding1D, 
-    _keras.layers.Cropping1D, 
-    _keras.layers.MaxPooling1D, 
-    _keras.layers.AveragePooling1D, 
-    _keras.layers.GlobalMaxPooling1D, 
-    _keras.layers.GlobalAveragePooling1D
+    _keras.layers.AtrousConvolution1D,
+    _keras.layers.UpSampling1D,
+    _keras.layers.ZeroPadding1D,
+    _keras.layers.Cropping1D,
+    _keras.layers.MaxPooling1D,
+    _keras.layers.AveragePooling1D,
+    _keras.layers.GlobalMaxPooling1D,
+    _keras.layers.GlobalAveragePooling1D,
 ]
 _KERAS_ACTIVATION_LAYERS = [
-    _keras.layers.Activation, 
-    _keras.layers.advanced_activations.LeakyReLU, 
-    _keras.layers.advanced_activations.PReLU, 
-    _keras.layers.advanced_activations.ELU, 
-    _keras.layers.advanced_activations.ParametricSoftplus, 
-    _keras.layers.advanced_activations.ThresholdedReLU, 
-    _keras.layers.advanced_activations.SReLU
+    _keras.layers.Activation,
+    _keras.layers.advanced_activations.LeakyReLU,
+    _keras.layers.advanced_activations.PReLU,
+    _keras.layers.advanced_activations.ELU,
+    _keras.layers.advanced_activations.ParametricSoftplus,
+    _keras.layers.advanced_activations.ThresholdedReLU,
+    _keras.layers.advanced_activations.SReLU,
 ]
 
 _KERAS_NORMALIZATION_LAYERS = [
-    _keras.layers.BatchNormalization, 
+    _keras.layers.BatchNormalization,
 ]
 
 _KERAS_RECURRENT_LAYERS = [
@@ -38,13 +38,15 @@
     _keras.layers.wrappers.Bidirectional,
 ]
 
+
 def _to_list(x):
-    if type(x) is not list: 
+    if type(x) is not list:
         return [x]
-    else: 
+    else:
         return x
 
-def _insert_to_dict(d, key, e): 
+
+def _insert_to_dict(d, key, e):
     # d is a dict where key maps to a list
     if key not in d:
         d[key] = []
@@ -59,97 +61,101 @@ class NetGraph(object):
     connection_map - a map where the key is a layer, the value is a list of its successors
     reverse_connection_map - a map where the key is a layer, the value is a list of its predecessors
     keras_layer_map - a map where the key is a layer name, the value is Keras layer
-    model - a reference of the keras model. 
-    blob_names - blob names for each one of the edge. 
+    model - a reference of the keras model.
+    blob_names - blob names for each one of the edge.
     """
+
     def __init__(self, model):
         self.layer_list = []
         self.edge_map = {}
         self.reverse_edge_map = {}
         self.keras_layer_map = {}
-        
+
         self.input_layers = []
         self.output_layers = []
-        self.layers_inputs = {} # each layer's input blobs
-        self.layers_outputs = {} # each layer's output blobs
-        
+        self.layers_inputs = {}  # each layer's input blobs
+        self.layers_outputs = {}  # each layer's output blobs
+
         # these will be pairs of the form (name, shape) because it'll show up on the interface
         self.optional_inputs = []
         self.optional_outputs = []
         self.layers_optional_inputs = {}
         self.layers_optional_outputs = {}
-        
+
         self.model = model
-    
+
     def _add_layer(self, keras_layer):
-        # add a layer without adding connections. 
+        # add a layer without adding connections.
         # when a layer exist alreday, this operation won't do anything
         layer = keras_layer.name
-        if layer not in self.layer_list: 
+        if layer not in self.layer_list:
             self.layer_list.append(layer)
             self.keras_layer_map[layer] = keras_layer
-    
+
     def get_predecessors(self, layer_name):
-        if layer_name in self.reverse_edge_map: 
-            return self.reverse_edge_map[layer_name][:] # needs to make a copy
-        else: 
+        if layer_name in self.reverse_edge_map:
+            return self.reverse_edge_map[layer_name][:]  # needs to make a copy
+        else:
             return []
-        
+
     def get_successors(self, layer_name):
         if layer_name in self.edge_map:
-            return self.edge_map[layer_name][:] # needs to make a copy
+            return self.edge_map[layer_name][:]  # needs to make a copy
         else:
             return []
-    
+
     def get_keras_layer(self, layer_name):
         return self.keras_layer_map[layer_name]
-    
+
     def make_input_layers(self):
         """
-        Extract the ordering of the input layers. 
+        Extract the ordering of the input layers.
         """
         self.input_layers = []
-        if hasattr(self.model, 'input_layers'):
+        if hasattr(self.model, "input_layers"):
             input_keras_layers = self.model.input_layers[:]
             self.input_layers = [None] * len(input_keras_layers)
-            for layer in self.layer_list: 
+            for layer in self.layer_list:
                 keras_layer = self.keras_layer_map[layer]
-                if isinstance(keras_layer, _keras.engine.topology.InputLayer): 
+                if isinstance(keras_layer, _keras.engine.topology.InputLayer):
                     if keras_layer in input_keras_layers:
                         idx = input_keras_layers.index(keras_layer)
                         self.input_layers[idx] = layer
-        elif len(self.model.inbound_nodes) <= 1: 
-            for ts in _to_list(self.model.input): 
+        elif len(self.model.inbound_nodes) <= 1:
+            for ts in _to_list(self.model.input):
                 # search for the InputLayer that matches this ts
-                for l in self.layer_list: 
+                for l in self.layer_list:
                     kl = self.keras_layer_map[l]
-                    if isinstance(kl, _keras.engine.topology.InputLayer) and kl.input == ts: 
+                    if (
+                        isinstance(kl, _keras.engine.topology.InputLayer)
+                        and kl.input == ts
+                    ):
                         self.input_layers.append(l)
-        else: 
+        else:
             raise ValueError("Input values cannot be identified.")
-    
+
     def make_output_layers(self):
         """
-        Extract the ordering of output layers. 
+        Extract the ordering of output layers.
         """
         # TODO
-        # use successors == 0 as the criteria for output layer 
-        # will fail when some intermediate layers also generate output. 
-        # However, because the possibility of having inserted layers, 
-        # it's more difficult to tell which layer is the output layer. 
-        # Once possible way is to keep track of newly added layers... 
+        # use successors == 0 as the criteria for output layer
+        # will fail when some intermediate layers also generate output.
+        # However, because the possibility of having inserted layers,
+        # it's more difficult to tell which layer is the output layer.
+        # Once possible way is to keep track of newly added layers...
         self.output_layers = []
         for layer in self.layer_list:
-            if len(self.get_successors(layer)) == 0: 
+            if len(self.get_successors(layer)) == 0:
                 self.output_layers.append(layer)
-    
+
     def get_input_layers(self):
         return self.input_layers
-        
+
     def get_output_layers(self):
         return self.output_layers
-    
-    def generate_blob_names(self): 
+
+    def generate_blob_names(self):
         """
         Generate blob names for each one of the edge.  At this time, Keras does not
         support "fork" operation (a layer with more than 1 blob output). So we just
@@ -157,90 +163,90 @@ def generate_blob_names(self):
         networks are singly-connected graphs - which should be the case.
         """
         # generate blob names that represent edges in blob_name_map
-        # because of the InputLayers, input blobs are also generated. 
-        
+        # because of the InputLayers, input blobs are also generated.
+
         # Generate each layer's input / output blob names
-        for layer in self.layer_list: 
+        for layer in self.layer_list:
             keras_layer = self.keras_layer_map[layer]
             # no need to generate InputLayers' blobs
             if not isinstance(keras_layer, _keras.engine.topology.InputLayer):
                 # layer's input blob names depend on predecessors
                 preds = self.get_predecessors(layer)
-                for pred in preds: 
-                    blob_name = pred + '_output'
+                for pred in preds:
+                    blob_name = pred + "_output"
                     _insert_to_dict(self.layers_inputs, layer, blob_name)
                 # layer's output blob is just named after itself
-                blob_name = layer + '_output'
+                blob_name = layer + "_output"
                 _insert_to_dict(self.layers_outputs, layer, blob_name)
-    
+
     def get_layer_blobs(self, layer):
         keras_layer = self.keras_layer_map[layer]
         if isinstance(keras_layer, _keras.engine.topology.InputLayer):
             return None, None
-        else: 
+        else:
             input_blobs = self.layers_inputs[layer]
             output_blobs = self.layers_outputs[layer]
-            if layer in self.layers_optional_inputs: 
+            if layer in self.layers_optional_inputs:
                 input_blobs += self.layers_optional_inputs[layer]
-            if layer in self.layers_optional_outputs: 
+            if layer in self.layers_optional_outputs:
                 output_blobs += self.layers_optional_outputs[layer]
             return input_blobs, output_blobs
-    
+
     def reset_model_input_names(self, new_names):
         # call this method after make_input_layers() is called
-        if new_names is None: 
+        if new_names is None:
             return
-        if len(new_names) != len(self.input_layers): 
-            print('Input name length mismatch')
+        if len(new_names) != len(self.input_layers):
+            print("Input name length mismatch")
             return
         for i, in_layer in enumerate(self.input_layers):
-            old_blob_name = in_layer + '_output'
+            old_blob_name = in_layer + "_output"
             new_blob_name = new_names[i]
             succs = self.get_successors(in_layer)
-            for succ in succs: 
+            for succ in succs:
                 idx = self.layers_inputs[succ].index(old_blob_name)
                 self.layers_inputs[succ][idx] = new_blob_name
 
     def reset_model_output_names(self, new_names):
-        if new_names is None: 
+        if new_names is None:
             return
-        if len(new_names) != len(self.output_layers): 
-            print('Output name length mismatch')
+        if len(new_names) != len(self.output_layers):
+            print("Output name length mismatch")
             return
         for i, out_layer in enumerate(self.output_layers):
             new_blob_name = new_names[i]
             self.layers_outputs[out_layer][0] = new_blob_name
-    
+
     # need to update both layer's in/out list and graph in/out ports
     def add_recurrent_optionals(self):
         # call this after blob names are generated
-        for layer in self.layer_list: 
+        for layer in self.layer_list:
             keras_layer = self.keras_layer_map[layer]
-            if type(keras_layer) in _KERAS_RECURRENT_LAYERS: 
+            if type(keras_layer) in _KERAS_RECURRENT_LAYERS:
                 if not isinstance(keras_layer, _keras.layers.wrappers.Bidirectional):
                     hidden_size = keras_layer.output_dim
-                else: 
+                else:
                     hidden_size = keras_layer.forward_layer.output_dim
-                h_in_name = layer + '_h_in'
-                h_out_name = layer + '_h_out' 
+                h_in_name = layer + "_h_in"
+                h_out_name = layer + "_h_out"
                 self.optional_inputs.append((h_in_name, hidden_size))
                 self.optional_outputs.append((h_out_name, hidden_size))
-                _insert_to_dict(self.layers_optional_inputs, layer, h_in_name) 
+                _insert_to_dict(self.layers_optional_inputs, layer, h_in_name)
                 _insert_to_dict(self.layers_optional_outputs, layer, h_out_name)
                 if isinstance(keras_layer, _keras.layers.recurrent.LSTM):
-                    c_in_name = layer + '_c_in'
-                    c_out_name = layer + '_c_out'
+                    c_in_name = layer + "_c_in"
+                    c_out_name = layer + "_c_out"
                     self.optional_inputs.append((c_in_name, hidden_size))
                     self.optional_outputs.append((c_out_name, hidden_size))
                     _insert_to_dict(self.layers_optional_inputs, layer, c_in_name)
                     _insert_to_dict(self.layers_optional_outputs, layer, c_out_name)
                 elif isinstance(keras_layer, _keras.layers.wrappers.Bidirectional):
-                    c_in_name = layer + '_c_in'
-                    c_out_name = layer + '_c_out'
-                    h_in_name_rev = layer + '_h_in_rev'
-                    c_in_name_rev = layer + '_c_in_rev'
-                    h_out_name_rev = layer + '_h_out_rev'
-                    c_out_name_rev = layer + '_c_out_rev'
+                    c_in_name = layer + "_c_in"
+                    c_out_name = layer + "_c_out"
+                    h_in_name_rev = layer + "_h_in_rev"
+                    c_in_name_rev = layer + "_c_in_rev"
+                    h_out_name_rev = layer + "_h_out_rev"
+                    c_out_name_rev = layer + "_c_out_rev"
                     self.optional_inputs.append((c_in_name, hidden_size))
                     self.optional_outputs.append((c_out_name, hidden_size))
                     self.optional_inputs.append((h_in_name_rev, hidden_size))
@@ -253,27 +259,32 @@ def add_recurrent_optionals(self):
                     _insert_to_dict(self.layers_optional_inputs, layer, c_in_name_rev)
                     _insert_to_dict(self.layers_optional_outputs, layer, h_out_name_rev)
                     _insert_to_dict(self.layers_optional_outputs, layer, c_out_name_rev)
-    
+
     def _get_first_embedded_model(self):
         for idx, layer in enumerate(self.layer_list):
             keras_layer = self.keras_layer_map[layer]
-            if isinstance(keras_layer, _keras.models.Sequential) or isinstance(keras_layer, _keras.models.Model):
+            if isinstance(keras_layer, _keras.models.Sequential) or isinstance(
+                keras_layer, _keras.models.Model
+            ):
                 return idx
         return -1
-    
+
     def _get_first_shared_layer(self):
         for idx, layer in enumerate(self.layer_list):
-            if (not isinstance(self.keras_layer_map[layer], _keras.layers.Merge)) and len(self.get_predecessors(layer)) > 1:   # weight sharing criteria
+            if (
+                (not isinstance(self.keras_layer_map[layer], _keras.layers.Merge))
+                and len(self.get_predecessors(layer)) > 1
+            ):  # weight sharing criteria
                 return idx
         return -1
-    
+
     def _get_first_layer_of_type(self, layer_type):
-        for idx, layer in enumerate(self.layer_list): 
+        for idx, layer in enumerate(self.layer_list):
             keras_layer = self.keras_layer_map[layer]
             if isinstance(keras_layer, layer_type):
                 return idx
         return -1
-    
+
     def _add_edge(self, src, snk):
         if src not in self.edge_map:
             self.edge_map[src] = []
@@ -283,15 +294,15 @@ def _add_edge(self, src, snk):
             self.reverse_edge_map[snk] = []
         if src not in self.reverse_edge_map[snk]:
             self.reverse_edge_map[snk].append(src)
-    
+
     def _remove_edge(self, src, snk):
         self.edge_map[src].remove(snk)
         if len(self.edge_map[src]) == 0:
             self.edge_map.pop(src)
         self.reverse_edge_map[snk].remove(src)
-        if len(self.reverse_edge_map[snk]) == 0: 
+        if len(self.reverse_edge_map[snk]) == 0:
             self.reverse_edge_map.pop(snk)
-    
+
     def _remove_layer(self, layer):
         """
         remove the layer and its input/output edges
@@ -306,7 +317,7 @@ def _remove_layer(self, layer):
         # remove layer in the data structures
         self.keras_layer_map.pop(layer)
         self.layer_list.remove(layer)
-    
+
     def _remove_layer_and_reconnect(self, layer):
         """
         remove the layer, and reconnect each of its predecessor to each of its successor
@@ -334,40 +345,43 @@ def _remove_old_edges(self, layer):
             self._remove_edge(pred, layer)
         for succ in successors:
             self._remove_edge(layer, succ)
-        
+
     def _remove_layers_of_type(self, layer_type):
         idx = self._get_first_layer_of_type(layer_type)
         while idx >= 0:
             layer = self.layer_list[idx]
             self._remove_layer_and_reconnect(layer)
             idx = self._get_first_layer_of_type(layer_type)
-    
+
     def remove_skip_layers(self, skip_layers):
-        for skip_layer in skip_layers: 
+        for skip_layer in skip_layers:
             self._remove_layers_of_type(skip_layer)
-        
+
     def remove_internal_input_layers(self):
         idx, nb_layers = 0, len(self.layer_list)
-        while idx < nb_layers: 
+        while idx < nb_layers:
             layer = self.layer_list[idx]
             keras_layer = self.keras_layer_map[layer]
-            if isinstance(keras_layer, _keras.engine.topology.InputLayer) and len(self.get_predecessors(layer)) > 0:
+            if (
+                isinstance(keras_layer, _keras.engine.topology.InputLayer)
+                and len(self.get_predecessors(layer)) > 0
+            ):
                 # these are internal input layers that needs to be taken out
                 self._remove_layer_and_reconnect(layer)
                 idx -= 1
                 nb_layers -= 1
             idx += 1
-    
+
     def _insert_layer_after(self, layer_idx, new_layer, new_keras_layer):
         """
-        Insert the new_layer after layer, whose position is layer_idx. The new layer's 
+        Insert the new_layer after layer, whose position is layer_idx. The new layer's
         parameter is stored in a Keras layer called new_keras_layer
         """
-        # reminder: new_keras_layer is not part of the original Keras network, 
-        # so it's input / output blob information is missing. It serves only as 
-        # a parameter holder. 
+        # reminder: new_keras_layer is not part of the original Keras network,
+        # so it's input / output blob information is missing. It serves only as
+        # a parameter holder.
         layer = self.layer_list[layer_idx]
-        self.layer_list.insert(layer_idx+1, new_layer)
+        self.layer_list.insert(layer_idx + 1, new_layer)
         self.keras_layer_map[new_layer] = new_keras_layer
         successors = self.get_successors(layer)
         # add edge layer -> new_layer
@@ -376,51 +390,54 @@ def _insert_layer_after(self, layer_idx, new_layer, new_keras_layer):
         for succ in successors:
             self._add_edge(new_layer, succ)
             self._remove_edge(layer, succ)
-        
+
     def _insert_layer_between(self, src, snk, new_layer, new_keras_layer):
         """
-        Insert the new_layer before layer, whose position is layer_idx. The new layer's 
+        Insert the new_layer before layer, whose position is layer_idx. The new layer's
         parameter is stored in a Keras layer called new_keras_layer
         """
-        if snk is None: 
+        if snk is None:
             insert_pos = self.layer_list.index(src) + 1
-        else: 
-            insert_pos = self.layer_list.index(snk) # insert position
+        else:
+            insert_pos = self.layer_list.index(snk)  # insert position
         self.layer_list.insert(insert_pos, new_layer)
         self.keras_layer_map[new_layer] = new_keras_layer
-        if src is None: # snk is an input layer
+        if src is None:  # snk is an input layer
             self._add_edge(new_layer, snk)
         elif snk is None:  # src is an output layer
             self._add_edge(src, new_layer)
-        else: 
+        else:
             self._add_edge(src, new_layer)
             self._add_edge(new_layer, snk)
-            self._remove_edge(src, snk)    
-    
+            self._remove_edge(src, snk)
+
     def defuse_activation(self):
         """
-        Defuse the fused activation layers in the network. 
+        Defuse the fused activation layers in the network.
         """
         idx, nb_layers = 0, len(self.layer_list)
         while idx < nb_layers:
             layer = self.layer_list[idx]
             k_layer = self.keras_layer_map[layer]
             # unwrap time-distributed layers
-            if (isinstance(k_layer, _keras.layers.TimeDistributed)):
+            if isinstance(k_layer, _keras.layers.TimeDistributed):
                 k_layer = k_layer.layer
-            if (isinstance(k_layer, _keras.layers.convolutional.Convolution2D) or 
-                isinstance(k_layer, _keras.layers.convolutional.Convolution1D) or 
-                isinstance(k_layer, _keras.layers.core.Dense)):
+            if (
+                isinstance(k_layer, _keras.layers.convolutional.Convolution2D)
+                or isinstance(k_layer, _keras.layers.convolutional.Convolution1D)
+                or isinstance(k_layer, _keras.layers.core.Dense)
+            ):
 
                 import six
+
                 if six.PY2:
                     func_name = k_layer.activation.func_name
                 else:
                     func_name = k_layer.activation.__name__
 
-                if (func_name != 'linear'):
+                if func_name != "linear":
                     # Create new layer
-                    new_layer = layer + '__activation__'
+                    new_layer = layer + "__activation__"
                     new_keras_layer = _keras.layers.core.Activation(func_name)
                     # insert new layer after it
                     self._insert_layer_after(idx, new_layer, new_keras_layer)
@@ -428,134 +445,146 @@ def defuse_activation(self):
                     nb_layers += 1
 
             idx += 1
-    
-    def is_activation(self,layer):
+
+    def is_activation(self, layer):
         keras_layer = self.keras_layer_map[layer]
         for activation_type in _KERAS_ACTIVATION_LAYERS:
             if isinstance(keras_layer, activation_type):
                 return True
         return False
-    
-    def is_1d_layer(self,layer):
+
+    def is_1d_layer(self, layer):
         keras_layer = self.keras_layer_map[layer]
         for layer_type in _KERAS_LAYERS_1D:
             if isinstance(keras_layer, layer_type):
                 return True
         return False
-    
+
     def _get_1d_interface_edges(self):
         """
         Get edges that represents transition from not 1D to 1D, and 1D to not 1D
-        A 'in_edge e(u,v)' means u operates on non-1D blobs, but v operates on 1D blobs.  
-        An 'out_edge e(u,v)' means u operates on 1D blobs, but v operates on non-1D blobs. 
+        A 'in_edge e(u,v)' means u operates on non-1D blobs, but v operates on 1D blobs.
+        An 'out_edge e(u,v)' means u operates on 1D blobs, but v operates on non-1D blobs.
         """
         in_edges = []
-        for layer in self.layer_list: 
+        for layer in self.layer_list:
             if not self.is_1d_layer(layer):
                 continue
             preds = self.get_predecessors(layer)
             if len(preds) == 0:
                 in_edges.append((None, layer))
-            else: 
+            else:
                 # because 1D layers are all 1-input, there should only be 1 predecessor
                 u, v = preds[0], layer
-                while (u != None) and (self.is_activation(u) or type(u) in _KERAS_NORMALIZATION_LAYERS):
+                while (u != None) and (
+                    self.is_activation(u) or type(u) in _KERAS_NORMALIZATION_LAYERS
+                ):
                     preds = self.get_predecessors(u)
                     v = u
                     u = preds[0] if len(preds) > 0 else None
                 if u is None or (not self.is_1d_layer(u)):
                     in_edges.append((u, v))
-        
+
         out_edges = []
-        for layer in self.layer_list: 
+        for layer in self.layer_list:
             if not self.is_1d_layer(layer):
                 continue
             succs = self.get_successors(layer)
             if len(succs) == 0:
                 out_edges.append((layer, None))
             elif not self.is_activation(succs[0]):
-                for succ in succs: 
+                for succ in succs:
                     if not self.is_1d_layer(succ):
                         out_edges.append((layer, succ))
-            else: 
+            else:
                 act_layer = succs[0]
                 succs = self.get_successors(act_layer)
                 if len(succs) == 0:
                     out_edges.append((act_layer, None))
-                else: 
-                    for succ in succs: 
+                else:
+                    for succ in succs:
                         if not self.is_1d_layer(succ):
                             out_edges.append((act_layer, succ))
 
         return in_edges, out_edges
-    
+
     def insert_1d_permute_layers(self):
         """
-        Insert permutation layers before a 1D start point or after 1D end point 
+        Insert permutation layers before a 1D start point or after 1D end point
         """
         idx, nb_layers = 0, len(self.layer_list)
         in_edges, out_edges = self._get_1d_interface_edges()
-        
-        # Hacky Warning: (1) use a 4-D permute, which is not likely to happen in Keras, 
+
+        # Hacky Warning: (1) use a 4-D permute, which is not likely to happen in Keras,
         # to represent actual permutation needed for (seq, c, h, w) in CoreML
-        # (2) Assume 2-D input shape has meaning (seq, c), and during CoreML runtime, 
+        # (2) Assume 2-D input shape has meaning (seq, c), and during CoreML runtime,
         # it is represented as 4D blob, (seq, c, h, w)
-        for in_edge in in_edges: 
+        for in_edge in in_edges:
             src, snk = in_edge
             if src is None:
-                permute_layer = '_permute_' + snk
-            else: 
-                permute_layer = src + '_permute_' + snk
-            keras_permute = _keras.layers.Permute(dims=(3,1,2,0)) # assume w = 1, switch seq and w
+                permute_layer = "_permute_" + snk
+            else:
+                permute_layer = src + "_permute_" + snk
+            keras_permute = _keras.layers.Permute(
+                dims=(3, 1, 2, 0)
+            )  # assume w = 1, switch seq and w
             self._insert_layer_between(src, snk, permute_layer, keras_permute)
-        for out_edge in out_edges: 
+        for out_edge in out_edges:
             src, snk = out_edge
-            if snk is None: 
-                permute_layer = src + '_permute_'
+            if snk is None:
+                permute_layer = src + "_permute_"
             else:
-                permute_layer = src + '_permute_' + snk
-            keras_permute = _keras.layers.Permute(dims=(3,1,2,0)) # assume w = 1, switch seq and w back
+                permute_layer = src + "_permute_" + snk
+            keras_permute = _keras.layers.Permute(
+                dims=(3, 1, 2, 0)
+            )  # assume w = 1, switch seq and w back
             self._insert_layer_between(src, snk, permute_layer, keras_permute)
-    
+
     def insert_permute_for_spatial_bn(self):
-        
+
         # find spatial batchnorm layers
         spatial_bn_layers = []
         for layer in self.layer_list:
             keras_layer = self.keras_layer_map[layer]
-            if isinstance(keras_layer, _keras.layers.BatchNormalization) and len(keras_layer.input_shape) == 4:
+            if (
+                isinstance(keras_layer, _keras.layers.BatchNormalization)
+                and len(keras_layer.input_shape) == 4
+            ):
                 if keras_layer.axis == 1 or keras_layer.axis == 2:
                     spatial_bn_layers.append(layer)
-        
+
         for sbn in spatial_bn_layers:
             axis = self.keras_layer_map[sbn].axis
             # axis == 1: swap H axis; axis == 2 : swap W axis
-            dims = (0,2,1,3) if axis == 1 else (0,3,2,1)
+            dims = (0, 2, 1, 3) if axis == 1 else (0, 3, 2, 1)
             # add permutation before spatial batchnorm
             pred = self.get_predecessors(sbn)[0]
-            permute_layer = pred + '_permute_' + sbn    
+            permute_layer = pred + "_permute_" + sbn
             keras_permute = _keras.layers.Permute(dims=dims)
             self._insert_layer_between(pred, sbn, permute_layer, keras_permute)
             # add permutation after spatial batchnorm
             succs = self.get_successors(sbn)
-            if len(succs) == 0: 
-                permute_layer = sbn + '_permute_'
+            if len(succs) == 0:
+                permute_layer = sbn + "_permute_"
                 keras_permute = _keras.layers.Permute(dims=dims)
                 self._insert_layer_between(sbn, None, permute_layer, keras_permute)
-            else: 
-                for succ in succs: 
-                    permute_layer = sbn + '_permute_' + succ
+            else:
+                for succ in succs:
+                    permute_layer = sbn + "_permute_" + succ
                     keras_permute = _keras.layers.Permute(dims=dims)
                     self._insert_layer_between(sbn, succ, permute_layer, keras_permute)
-    
+
     def build(self):
-        # sanity check. 
+        # sanity check.
         model = self.model
-        if not (type(model) == _keras.models.Sequential or type(model) == _keras.models.Model):
+        if not (
+            type(model) == _keras.models.Sequential
+            or type(model) == _keras.models.Model
+        ):
             raise TypeError("Keras layer of type %s is not supported." % type(model))
             self = None
             return
-        
+
         # build the graph without considering embedded subgraphs
         for i, layer in enumerate(model.layers):
             for node in layer.inbound_nodes:
@@ -566,7 +595,7 @@ def build(self):
                     self._add_edge(pred.name, layer.name)
             self.layer_list.append(layer.name)
             self.keras_layer_map[layer.name] = layer
-        
+
         # Duplicate models for weight sharing
         idx = self._get_first_shared_layer()
         while idx >= 0:
@@ -574,8 +603,8 @@ def build(self):
             keras_layer = self.keras_layer_map[layer]
             predecessors = self.reverse_edge_map[layer]
             successors = self.edge_map[layer]
-            new_layers = [layer+'_'+str(i) for i in range(len(predecessors))]
-            self.layer_list[idx:idx+1] = new_layers
+            new_layers = [layer + "_" + str(i) for i in range(len(predecessors))]
+            self.layer_list[idx : idx + 1] = new_layers
             for i, new_layer in enumerate(new_layers):
                 self.edge_map[new_layer] = []
                 self.reverse_edge_map[new_layer] = []
@@ -587,7 +616,7 @@ def build(self):
             self._remove_old_edges(layer)
             self.keras_layer_map.pop(layer)
             idx = self._get_first_shared_layer()
-        
+
         # Expand the sub-models
         idx = self._get_first_embedded_model()
         while idx >= 0:
@@ -601,73 +630,82 @@ def build(self):
             embedded_layer_list = embedded_graph.layer_list
             new_layer_list = []
             for embedded_layer_name in embedded_layer_list:
-                new_layer_name = embedded_model + '_' + embedded_layer_name
+                new_layer_name = embedded_model + "_" + embedded_layer_name
                 new_layer_list.append(new_layer_name)
-                self.keras_layer_map[new_layer_name] = embedded_graph.keras_layer_map[embedded_layer_name]
+                self.keras_layer_map[new_layer_name] = embedded_graph.keras_layer_map[
+                    embedded_layer_name
+                ]
                 # add edge [embed_layer -> its succ]
                 embedded_successors = embedded_graph.get_successors(embedded_layer_name)
                 for embed_succ_name in embedded_successors:
-                    new_embed_succ_name = embedded_model + '_' + embed_succ_name
+                    new_embed_succ_name = embedded_model + "_" + embed_succ_name
                     self._add_edge(new_layer_name, new_embed_succ_name)
                 # add edge [pred -> embed_layer]
-                embedded_predecessors = embedded_graph.get_predecessors(embedded_layer_name)
-                for embed_pred_name in embedded_predecessors: 
-                    new_embed_pred_name = embedded_model + '_' + embed_pred_name
+                embedded_predecessors = embedded_graph.get_predecessors(
+                    embedded_layer_name
+                )
+                for embed_pred_name in embedded_predecessors:
+                    new_embed_pred_name = embedded_model + "_" + embed_pred_name
                     self._add_edge(new_embed_pred_name, new_layer_name)
-            
-            self.layer_list[idx+1:idx+1] = new_layer_list
+
+            self.layer_list[idx + 1 : idx + 1] = new_layer_list
             # replace input / output edges to the model with input/output edges of the embedded layers
             predecessors = self.get_predecessors(embedded_model)
             embedded_inputs = embedded_graph.get_input_layers()
-            for i, pred in enumerate(predecessors): 
+            for i, pred in enumerate(predecessors):
                 embed_input = embedded_inputs[i]
-                new_embed_input = embedded_model + '_' + embed_input
+                new_embed_input = embedded_model + "_" + embed_input
                 self._add_edge(pred, new_embed_input)
-                
+
             embedded_outputs = embedded_graph.get_output_layers()
             successors = self.get_successors(embedded_model)
             for i, succ in enumerate(successors):
                 embed_output = embedded_outputs[i]
-                new_embed_output = embedded_model + '_' + embed_output
+                new_embed_output = embedded_model + "_" + embed_output
 
                 self._add_edge(new_embed_output, succ)
-                
+
             # clear up the embedded model
             self._remove_layer(embedded_model)
             idx = self._get_first_embedded_model()
-        
+
         self.make_input_layers()
         self.make_output_layers()
-    
+
     def print_layer_list(self):
-        print('\n')
-        print('layer_list')
+        print("\n")
+        print("layer_list")
         print(self.layer_list)
-    
+
     def print_edge_map(self):
-        print('\n')
-        print('edge map:')
+        print("\n")
+        print("edge map:")
         for src in self.edge_map:
             for snk in self.edge_map[src]:
-                print('  ', src, '-->', snk)
+                print("  ", src, "-->", snk)
 
     def print_reverse_edge_map(self):
-        print('\n')
-        print('reverse edge map: ')
+        print("\n")
+        print("reverse edge map: ")
         for snk in self.reverse_edge_map:
             for src in self.reverse_edge_map[snk]:
-                print('  ', snk, '<--', src)
+                print("  ", snk, "<--", src)
 
     def print_mapping(self):
-        print('\nmapping:')
+        print("\nmapping:")
         for key in self.keras_layer_map:
-            print(key, '-->', self.keras_layer_map[key], '(', self.keras_layer_map[key].name, ')')
-    
+            print(
+                key,
+                "-->",
+                self.keras_layer_map[key],
+                "(",
+                self.keras_layer_map[key].name,
+                ")",
+            )
+
     def print_all(self):
-        print('='*80)
+        print("=" * 80)
         self.print_layer_list()
         self.print_edge_map()
         self.print_reverse_edge_map()
         self.print_mapping()
-    
-    
diff --git a/coremltools/converters/keras/_topology2.py b/coremltools/converters/keras/_topology2.py
index 938bab3fd..b09a62aef 100644
--- a/coremltools/converters/keras/_topology2.py
+++ b/coremltools/converters/keras/_topology2.py
@@ -10,7 +10,7 @@
     _keras.layers.MaxPooling1D,
     _keras.layers.AveragePooling1D,
     _keras.layers.GlobalMaxPooling1D,
-    _keras.layers.GlobalAveragePooling1D
+    _keras.layers.GlobalAveragePooling1D,
 ]
 
 _KERAS_ACTIVATION_LAYERS = [
@@ -22,7 +22,7 @@
 ]
 
 _KERAS_NORMALIZATION_LAYERS = [
-    _keras.layers.BatchNormalization, 
+    _keras.layers.BatchNormalization,
 ]
 
 _KERAS_RECURRENT_LAYERS = [
@@ -49,31 +49,34 @@
 
 from distutils.version import StrictVersion as _StrictVersion
 
-if _keras.__version__ >= _StrictVersion('2.2.0'):
+if _keras.__version__ >= _StrictVersion("2.2.0"):
     from keras.engine.input_layer import InputLayer
 else:
     from keras.engine.topology import InputLayer
 
 
 def _to_list(x):
-    if type(x) is not list: 
+    if type(x) is not list:
         return [x]
-    else: 
+    else:
         return x
 
-def _insert_to_dict(d, key, e): 
+
+def _insert_to_dict(d, key, e):
     # d is a dict where key maps to a list
     if key not in d:
         d[key] = []
     if e not in d[key]:
         d[key].append(e)
 
+
 def _is_merge_layer(layer):
     for lt in _KERAS_MERGE_LAYERS:
         if isinstance(layer, lt):
             return True
     return False
 
+
 class NetGraph(object):
     """
     Attributes:
@@ -81,33 +84,34 @@ class NetGraph(object):
     connection_map - a map where the key is a layer, the value is a list of its successors
     reverse_connection_map - a map where the key is a layer, the value is a list of its predecessors
     keras_layer_map - a map where the key is a layer name, the value is Keras layer
-    model - a reference of the keras model. 
-    blob_names - blob names for each one of the edge. 
+    model - a reference of the keras model.
+    blob_names - blob names for each one of the edge.
     """
+
     def __init__(self, model):
         self.layer_list = []
         self.edge_map = {}
         self.reverse_edge_map = {}
         self.keras_layer_map = {}
-        
+
         self.input_layers = []
         self.output_layers = []
-        self.layers_inputs = {} # each layer's input blobs
-        self.layers_outputs = {} # each layer's output blobs
-        
+        self.layers_inputs = {}  # each layer's input blobs
+        self.layers_outputs = {}  # each layer's output blobs
+
         # these will be pairs of the form (name, shape) because it'll show up on the interface
         self.optional_inputs = []
         self.optional_outputs = []
         self.layers_optional_inputs = {}
         self.layers_optional_outputs = {}
-        
+
         self.model = model
-    
+
     def _add_layer(self, keras_layer):
-        # add a layer without adding connections. 
+        # add a layer without adding connections.
         # when a layer exist alreday, this operation won't do anything
         layer = keras_layer.name
-        if layer not in self.layer_list: 
+        if layer not in self.layer_list:
             self.layer_list.append(layer)
             self.keras_layer_map[layer] = keras_layer
 
@@ -117,74 +121,77 @@ def _replace_blob_name(self, old_name, new_name):
             for idx, b in enumerate(self.layers_outputs[l]):
                 if b == old_name:
                     self.layers_outputs[l][idx] = new_name
-        
+
         for l in self.layers_inputs:
             for idx, b in enumerate(self.layers_inputs[l]):
                 if b == old_name:
                     self.layers_inputs[l][idx] = new_name
-    
+
     def get_predecessors(self, layer_name):
-        if layer_name in self.reverse_edge_map: 
-            return self.reverse_edge_map[layer_name][:] # needs to make a copy
-        else: 
+        if layer_name in self.reverse_edge_map:
+            return self.reverse_edge_map[layer_name][:]  # needs to make a copy
+        else:
             return []
-        
+
     def get_successors(self, layer_name):
         if layer_name in self.edge_map:
-            return self.edge_map[layer_name][:] # needs to make a copy
+            return self.edge_map[layer_name][:]  # needs to make a copy
         else:
             return []
-    
+
     def get_keras_layer(self, layer_name):
         return self.keras_layer_map[layer_name]
-    
+
     def get_coreml_layers(self, keras_layer):
         coreml_layers = []
         for key in self.keras_layer_map:
             if self.keras_layer_map[key] == keras_layer:
                 coreml_layers.append(key)
         return coreml_layers
-    
+
     def make_input_layers(self):
         """
-        Extract the ordering of the input layers. 
+        Extract the ordering of the input layers.
         """
         self.input_layers = []
-        in_nodes = self.model._inbound_nodes if hasattr(
-                self.model,'_inbound_nodes') else self.model.inbound_nodes
-        if hasattr(self.model, 'input_layers'):
+        in_nodes = (
+            self.model._inbound_nodes
+            if hasattr(self.model, "_inbound_nodes")
+            else self.model.inbound_nodes
+        )
+        if hasattr(self.model, "input_layers"):
             input_keras_layers = self.model.input_layers[:]
             self.input_layers = [None] * len(input_keras_layers)
-            for layer in self.layer_list: 
+            for layer in self.layer_list:
                 keras_layer = self.keras_layer_map[layer]
-                if isinstance(keras_layer, InputLayer): 
+                if isinstance(keras_layer, InputLayer):
                     if keras_layer in input_keras_layers:
                         idx = input_keras_layers.index(keras_layer)
                         self.input_layers[idx] = layer
-        elif hasattr(self.model, 'inputs'):
-            for ts in _to_list(self.model.inputs): 
+        elif hasattr(self.model, "inputs"):
+            for ts in _to_list(self.model.inputs):
                 # search for the InputLayer that matches this ts
-                for l in self.layer_list: 
+                for l in self.layer_list:
                     kl = self.keras_layer_map[l]
-                    if isinstance(kl, InputLayer) and kl.input == ts: 
+                    if isinstance(kl, InputLayer) and kl.input == ts:
                         self.input_layers.append(l)
         elif len(in_nodes) <= 1:
-            for ts in _to_list(self.model.input): 
+            for ts in _to_list(self.model.input):
                 # search for the InputLayer that matches this ts
-                for l in self.layer_list: 
+                for l in self.layer_list:
                     kl = self.keras_layer_map[l]
-                    if isinstance(kl, InputLayer) and kl.input == ts: 
+                    if isinstance(kl, InputLayer) and kl.input == ts:
                         self.input_layers.append(l)
         else:
             raise ValueError("Input values cannot be identified.")
-    
+
     def make_output_layers(self):
         """
-        Extract the ordering of output layers. 
+        Extract the ordering of output layers.
         """
         self.output_layers = []
         # import pytest; pytest.set_trace()
-        if hasattr(self.model, 'output_layers'):
+        if hasattr(self.model, "output_layers"):
             # find corresponding output layers in CoreML model
             # assume output layers are not shared
             # Helper function to recursively extract output layers
@@ -192,7 +199,7 @@ def make_output_layers(self):
             def extract_output_layers(keras_model):
                 output_layers = []
                 for layer in keras_model.output_layers:
-                    if hasattr(layer,'output_layers'):
+                    if hasattr(layer, "output_layers"):
                         output_layers.extend(extract_output_layers(layer))
                     else:
                         output_layers.append(layer)
@@ -207,21 +214,27 @@ def extract_output_layers(keras_model):
             for model_output in self.model.outputs:
                 for l in self.layer_list:
                     k_layer = self.keras_layer_map[l]
-                    in_nodes = k_layer._inbound_nodes if hasattr(k_layer, '_inbound_nodes') else k_layer.inbound_nodes
+                    in_nodes = (
+                        k_layer._inbound_nodes
+                        if hasattr(k_layer, "_inbound_nodes")
+                        else k_layer.inbound_nodes
+                    )
                     for idx in range(len(in_nodes)):
                         out_tensor = k_layer.get_output_at(idx)
-                        if out_tensor == model_output or (out_tensor.name in model_output.name):
+                        if out_tensor == model_output or (
+                            out_tensor.name in model_output.name
+                        ):
                             self.output_layers.append(l)
         if len(self.output_layers) == 0:
             raise ValueError("No outputs can be identified")
-    
+
     def get_input_layers(self):
         return self.input_layers
-        
+
     def get_output_layers(self):
         return self.output_layers
-    
-    def generate_blob_names(self): 
+
+    def generate_blob_names(self):
         """
         Generate blob names for each one of the edge.  At this time, Keras does not
         support "fork" operation (a layer with more than 1 blob output). So we just
@@ -229,91 +242,90 @@ def generate_blob_names(self):
         networks are singly-connected graphs - which should be the case.
         """
         # generate blob names that represent edges in blob_name_map
-        # because of the InputLayers, input blobs are also generated. 
-        
+        # because of the InputLayers, input blobs are also generated.
+
         # Generate each layer's input / output blob names
-        for layer in self.layer_list: 
+        for layer in self.layer_list:
             keras_layer = self.keras_layer_map[layer]
             # no need to generate InputLayers' blobs
             if not isinstance(keras_layer, InputLayer):
                 # layer's input blob names depend on predecessors
                 preds = self.get_predecessors(layer)
-                for pred in preds: 
-                    blob_name = pred + '_output'
+                for pred in preds:
+                    blob_name = pred + "_output"
                     _insert_to_dict(self.layers_inputs, layer, blob_name)
                 # layer's output blob is just named after itself
-                blob_name = layer + '_output'
+                blob_name = layer + "_output"
                 _insert_to_dict(self.layers_outputs, layer, blob_name)
-    
+
     def get_layer_blobs(self, layer):
         keras_layer = self.keras_layer_map[layer]
         if isinstance(keras_layer, InputLayer):
             return None, None
-        else: 
+        else:
             input_blobs = self.layers_inputs[layer]
             output_blobs = self.layers_outputs[layer]
-            if layer in self.layers_optional_inputs: 
+            if layer in self.layers_optional_inputs:
                 input_blobs += self.layers_optional_inputs[layer]
-            if layer in self.layers_optional_outputs: 
+            if layer in self.layers_optional_outputs:
                 output_blobs += self.layers_optional_outputs[layer]
             return input_blobs, output_blobs
-    
+
     def reset_model_input_names(self, new_names):
         # call this method after make_input_layers() is called
-        if new_names is None: 
+        if new_names is None:
             return
-        if len(new_names) != len(self.input_layers): 
-            print('Input name length mismatch')
+        if len(new_names) != len(self.input_layers):
+            print("Input name length mismatch")
             return
         for i, in_layer in enumerate(self.input_layers):
-            old_blob_name = in_layer + '_output'
+            old_blob_name = in_layer + "_output"
             new_blob_name = new_names[i]
             succs = self.get_successors(in_layer)
-            for succ in succs: 
+            for succ in succs:
                 idx = self.layers_inputs[succ].index(old_blob_name)
                 self.layers_inputs[succ][idx] = new_blob_name
 
     def reset_model_output_names(self, new_names):
-        if new_names is None: 
+        if new_names is None:
             return
-        if len(new_names) != len(self.output_layers): 
-            print('Output name length mismatch')
+        if len(new_names) != len(self.output_layers):
+            print("Output name length mismatch")
             return
         for i, out_layer in enumerate(self.output_layers):
             old_blob_name = self.layers_outputs[self.output_layers[i]][0]
-            self._replace_blob_name(old_blob_name, 
-                    new_names[i])
+            self._replace_blob_name(old_blob_name, new_names[i])
 
     # need to update both layer's in/out list and graph in/out ports
     def add_recurrent_optionals(self):
         # call this after blob names are generated
-        for layer in self.layer_list: 
+        for layer in self.layer_list:
             keras_layer = self.keras_layer_map[layer]
-            if type(keras_layer) in _KERAS_RECURRENT_LAYERS: 
+            if type(keras_layer) in _KERAS_RECURRENT_LAYERS:
                 if not isinstance(keras_layer, _keras.layers.wrappers.Bidirectional):
                     hidden_size = keras_layer.units
-                else: 
+                else:
                     hidden_size = keras_layer.forward_layer.units
-                h_in_name = layer + '_h_in'
-                h_out_name = layer + '_h_out' 
+                h_in_name = layer + "_h_in"
+                h_out_name = layer + "_h_out"
                 self.optional_inputs.append((h_in_name, hidden_size))
                 self.optional_outputs.append((h_out_name, hidden_size))
-                _insert_to_dict(self.layers_optional_inputs, layer, h_in_name) 
+                _insert_to_dict(self.layers_optional_inputs, layer, h_in_name)
                 _insert_to_dict(self.layers_optional_outputs, layer, h_out_name)
                 if isinstance(keras_layer, _keras.layers.recurrent.LSTM):
-                    c_in_name = layer + '_c_in'
-                    c_out_name = layer + '_c_out'
+                    c_in_name = layer + "_c_in"
+                    c_out_name = layer + "_c_out"
                     self.optional_inputs.append((c_in_name, hidden_size))
                     self.optional_outputs.append((c_out_name, hidden_size))
                     _insert_to_dict(self.layers_optional_inputs, layer, c_in_name)
                     _insert_to_dict(self.layers_optional_outputs, layer, c_out_name)
                 elif isinstance(keras_layer, _keras.layers.wrappers.Bidirectional):
-                    c_in_name = layer + '_c_in'
-                    c_out_name = layer + '_c_out'
-                    h_in_name_rev = layer + '_h_in_rev'
-                    c_in_name_rev = layer + '_c_in_rev'
-                    h_out_name_rev = layer + '_h_out_rev'
-                    c_out_name_rev = layer + '_c_out_rev'
+                    c_in_name = layer + "_c_in"
+                    c_out_name = layer + "_c_out"
+                    h_in_name_rev = layer + "_h_in_rev"
+                    c_in_name_rev = layer + "_c_in_rev"
+                    h_out_name_rev = layer + "_h_out_rev"
+                    c_out_name_rev = layer + "_c_out_rev"
                     self.optional_inputs.append((c_in_name, hidden_size))
                     self.optional_outputs.append((c_out_name, hidden_size))
                     self.optional_inputs.append((h_in_name_rev, hidden_size))
@@ -326,27 +338,37 @@ def add_recurrent_optionals(self):
                     _insert_to_dict(self.layers_optional_inputs, layer, c_in_name_rev)
                     _insert_to_dict(self.layers_optional_outputs, layer, h_out_name_rev)
                     _insert_to_dict(self.layers_optional_outputs, layer, c_out_name_rev)
-    
+
     def _get_first_embedded_model(self):
         for idx, layer in enumerate(self.layer_list):
             keras_layer = self.keras_layer_map[layer]
-            if isinstance(keras_layer, _keras.models.Sequential) or isinstance(keras_layer, _keras.models.Model):
+            if isinstance(keras_layer, _keras.models.Sequential) or isinstance(
+                keras_layer, _keras.models.Model
+            ):
                 return idx
         return -1
-    
+
     def _get_first_shared_layer(self):
         for idx, layer in enumerate(self.layer_list):
             keras_layer = self.keras_layer_map[layer]
-            inbound_nodes = keras_layer.inbound_nodes if hasattr(keras_layer, 'inbound_nodes') else keras_layer._inbound_nodes
-            inbound_nodes = [node for node in inbound_nodes if len(node.inbound_layers) > 0]
-            if not _is_merge_layer(self.keras_layer_map[layer]) and \
-                len(self.get_predecessors(layer)) > 1 and \
-                len(inbound_nodes) > 1:
+            inbound_nodes = (
+                keras_layer.inbound_nodes
+                if hasattr(keras_layer, "inbound_nodes")
+                else keras_layer._inbound_nodes
+            )
+            inbound_nodes = [
+                node for node in inbound_nodes if len(node.inbound_layers) > 0
+            ]
+            if (
+                not _is_merge_layer(self.keras_layer_map[layer])
+                and len(self.get_predecessors(layer)) > 1
+                and len(inbound_nodes) > 1
+            ):
                 return idx
         return -1
-    
+
     def _get_first_layer_of_type(self, layer_type):
-        for idx, layer in enumerate(self.layer_list): 
+        for idx, layer in enumerate(self.layer_list):
             keras_layer = self.keras_layer_map[layer]
             if isinstance(keras_layer, layer_type):
                 return idx
@@ -384,7 +406,7 @@ def _remove_layer(self, layer):
         # remove layer in the data structures
         self.keras_layer_map.pop(layer)
         self.layer_list.remove(layer)
-    
+
     def _remove_layer_and_reconnect(self, layer):
         """ Remove the layer, and reconnect each of its predecessor to each of
         its successor
@@ -401,12 +423,12 @@ def _remove_layer_and_reconnect(self, layer):
         for pred in predecessors:
             for succ in successors:
                 self._add_edge(pred, succ)
-        
+
         # remove layer in the data structures
         self.layer_list.remove(layer)
         self.keras_layer_map.pop(layer)
-        
-        # re-assign input and output layers if layer happens to be an 
+
+        # re-assign input and output layers if layer happens to be an
         # input / output layer
         if layer in self.input_layers:
             idx = self.input_layers.index(layer)
@@ -428,37 +450,39 @@ def _remove_old_edges(self, layer):
             self._remove_edge(pred, layer)
         for succ in successors:
             self._remove_edge(layer, succ)
-        
+
     def _remove_layers_of_type(self, layer_type):
         idx = self._get_first_layer_of_type(layer_type)
         while idx >= 0:
             layer = self.layer_list[idx]
             self._remove_layer_and_reconnect(layer)
             idx = self._get_first_layer_of_type(layer_type)
-    
+
     def remove_skip_layers(self, skip_layers):
-        for skip_layer in skip_layers: 
+        for skip_layer in skip_layers:
             self._remove_layers_of_type(skip_layer)
-        
+
     def remove_internal_input_layers(self):
         idx, nb_layers = 0, len(self.layer_list)
-        while idx < nb_layers: 
+        while idx < nb_layers:
             layer = self.layer_list[idx]
             keras_layer = self.keras_layer_map[layer]
-            if isinstance(keras_layer, InputLayer) and \
-                    len(self.get_predecessors(layer)) > 0:
+            if (
+                isinstance(keras_layer, InputLayer)
+                and len(self.get_predecessors(layer)) > 0
+            ):
                 # these are internal input layers that needs to be taken out
                 self._remove_layer_and_reconnect(layer)
                 idx -= 1
                 nb_layers -= 1
             idx += 1
-    
+
     def _insert_layer_after(self, layer_idx, new_layer, new_keras_layer):
-        """ Insert the new_layer, whose parameter is stored in a Keras layer 
-        structure new_keras_layer, after the layer whose position is layer_idx. 
+        """ Insert the new_layer, whose parameter is stored in a Keras layer
+        structure new_keras_layer, after the layer whose position is layer_idx.
         """
         layer = self.layer_list[layer_idx]
-        self.layer_list.insert(layer_idx+1, new_layer)
+        self.layer_list.insert(layer_idx + 1, new_layer)
         self.keras_layer_map[new_layer] = new_keras_layer
         successors = self.get_successors(layer)
         # add edge layer -> new_layer
@@ -473,7 +497,7 @@ def _insert_layer_after(self, layer_idx, new_layer, new_keras_layer):
             self.output_layers[idx] = new_layer
 
     def _insert_layer_between(self, src, snk, new_layer, new_keras_layer):
-        """ Insert the new_layer, whose keras layer parameters are stored in 
+        """ Insert the new_layer, whose keras layer parameters are stored in
         new_keras_layer, between src and snk.
         """
         if snk is None:
@@ -495,43 +519,48 @@ def _insert_layer_between(self, src, snk, new_layer, new_keras_layer):
         if src in self.output_layers:
             idx = self.output_layers.index(src)
             self.output_layers[idx] = new_layer
-    
+
     def defuse_activation(self):
-        """ Defuse the fused activation layers in the network. 
+        """ Defuse the fused activation layers in the network.
         """
         idx, nb_layers = 0, len(self.layer_list)
         while idx < nb_layers:
             layer = self.layer_list[idx]
             k_layer = self.keras_layer_map[layer]
-            if (isinstance(k_layer, _keras.layers.TimeDistributed)):
+            if isinstance(k_layer, _keras.layers.TimeDistributed):
                 k_layer = k_layer.layer
-            if (isinstance(k_layer, _keras.layers.Conv2D) or 
-                isinstance(k_layer, _keras.layers.Conv1D) or 
-                isinstance(k_layer, _keras.layers.SeparableConv2D) or 
-                isinstance(k_layer, _keras.layers.SeparableConv1D) or 
-                isinstance(k_layer, _keras.layers.Dense)):
-                
-                func_name = k_layer.activation.func_name if six.PY2 else \
-                        k_layer.activation.__name__
-
-                if func_name != 'linear':
+            if (
+                isinstance(k_layer, _keras.layers.Conv2D)
+                or isinstance(k_layer, _keras.layers.Conv1D)
+                or isinstance(k_layer, _keras.layers.SeparableConv2D)
+                or isinstance(k_layer, _keras.layers.SeparableConv1D)
+                or isinstance(k_layer, _keras.layers.Dense)
+            ):
+
+                func_name = (
+                    k_layer.activation.func_name
+                    if six.PY2
+                    else k_layer.activation.__name__
+                )
+
+                if func_name != "linear":
                     # Create new layer
-                    new_layer = layer + '__activation__'
+                    new_layer = layer + "__activation__"
                     new_keras_layer = _keras.layers.core.Activation(func_name)
                     # insert new layer after it
                     self._insert_layer_after(idx, new_layer, new_keras_layer)
                     idx += 1
                     nb_layers += 1
             idx += 1
-    
-    def is_activation(self,layer):
+
+    def is_activation(self, layer):
         keras_layer = self.keras_layer_map[layer]
         for activation_type in _KERAS_ACTIVATION_LAYERS:
             if isinstance(keras_layer, activation_type):
                 return True
         return False
-    
-    def is_1d_layer(self,layer):
+
+    def is_1d_layer(self, layer):
         keras_layer = self.keras_layer_map[layer]
         for layer_type in _KERAS_LAYERS_1D:
             if isinstance(keras_layer, layer_type):
@@ -539,10 +568,10 @@ def is_1d_layer(self,layer):
         return False
 
     def _get_1d_interface_edges(self):
-        """ Get edges that represents transition from not-1D to 1D, and 1D to 
-        not-1D. A 'in_edge e(u,v)' means u operates on non-1D blobs, but v 
+        """ Get edges that represents transition from not-1D to 1D, and 1D to
+        not-1D. A 'in_edge e(u,v)' means u operates on non-1D blobs, but v
         operates on 1D blobs. An 'out_edge e(u,v)' means u operates on 1D
-        blobs, but v operates on non-1D blobs. 
+        blobs, but v operates on non-1D blobs.
         """
         in_edges = set()
         for layer in self.layer_list:
@@ -552,10 +581,12 @@ def _get_1d_interface_edges(self):
             if len(preds) == 0:
                 in_edges.add((None, layer))
             else:
-                # because 1D layers are all 1-input, 
+                # because 1D layers are all 1-input,
                 # there should only be 1 predecessor
                 u, v = preds[0], layer
-                while u and (self.is_activation(u) or type(u) in _KERAS_NORMALIZATION_LAYERS):
+                while u and (
+                    self.is_activation(u) or type(u) in _KERAS_NORMALIZATION_LAYERS
+                ):
                     preds = self.get_predecessors(u)
                     v = u
                     u = preds[0] if len(preds) > 0 else None
@@ -591,73 +622,86 @@ def _get_1d_interface_edges(self):
 
     def insert_1d_permute_layers(self):
         """
-        Insert permutation layers before a 1D start point or after 1D end point 
+        Insert permutation layers before a 1D start point or after 1D end point
         """
         idx, nb_layers = 0, len(self.layer_list)
         in_edges, out_edges = self._get_1d_interface_edges()
 
         # Hacky Warning: (1) use a 4-D permute, which is not likely to happen in Keras,
         # to represent actual permutation needed for (seq, c, h, w) in CoreML
-        # (2) Assume 2-D input shape has meaning (seq, c), and during CoreML runtime, 
+        # (2) Assume 2-D input shape has meaning (seq, c), and during CoreML runtime,
         # it is represented as 4D blob, (seq, c, h, w)
         for in_edge in in_edges:
             src, snk = in_edge
             if src is None:
-                permute_layer = '_permute_' + snk
+                permute_layer = "_permute_" + snk
             else:
-                permute_layer = src + '_permute_' + snk
-            keras_permute = _keras.layers.Permute(dims=(3, 1, 2, 0))  # assume w = 1, switch seq and w
+                permute_layer = src + "_permute_" + snk
+            keras_permute = _keras.layers.Permute(
+                dims=(3, 1, 2, 0)
+            )  # assume w = 1, switch seq and w
             self._insert_layer_between(src, snk, permute_layer, keras_permute)
         for out_edge in out_edges:
             src, snk = out_edge
             if snk is None:
-                permute_layer = src + '_permute_'
+                permute_layer = src + "_permute_"
             else:
-                permute_layer = src + '_permute_' + snk
-            keras_permute = _keras.layers.Permute(dims=(3, 1, 2, 0))  # assume w = 1, switch seq and w back
+                permute_layer = src + "_permute_" + snk
+            keras_permute = _keras.layers.Permute(
+                dims=(3, 1, 2, 0)
+            )  # assume w = 1, switch seq and w back
             self._insert_layer_between(src, snk, permute_layer, keras_permute)
-    
+
     def insert_permute_for_spatial_bn(self):
-        
+
         # find spatial batchnorm layers
         spatial_bn_layers = []
         for layer in self.layer_list:
             keras_layer = self.keras_layer_map[layer]
-            if isinstance(keras_layer, _keras.layers.BatchNormalization) and len(keras_layer.input_shape) == 4:
+            if (
+                isinstance(keras_layer, _keras.layers.BatchNormalization)
+                and len(keras_layer.input_shape) == 4
+            ):
                 if keras_layer.axis == 1 or keras_layer.axis == 2:
                     spatial_bn_layers.append(layer)
-        
+
         for sbn in spatial_bn_layers:
             axis = self.keras_layer_map[sbn].axis
             # axis == 1: swap H axis; axis == 2 : swap W axis
-            dims = (0,2,1,3) if axis == 1 else (0,3,2,1)
+            dims = (0, 2, 1, 3) if axis == 1 else (0, 3, 2, 1)
             # add permutation before spatial batchnorm
             pred = self.get_predecessors(sbn)[0]
-            permute_layer = pred + '_permute_' + sbn    
+            permute_layer = pred + "_permute_" + sbn
             keras_permute = _keras.layers.Permute(dims=dims)
             self._insert_layer_between(pred, sbn, permute_layer, keras_permute)
             # add permutation after spatial batchnorm
             succs = self.get_successors(sbn)
-            if len(succs) == 0: 
-                permute_layer = sbn + '_permute_'
+            if len(succs) == 0:
+                permute_layer = sbn + "_permute_"
                 keras_permute = _keras.layers.Permute(dims=dims)
                 self._insert_layer_between(sbn, None, permute_layer, keras_permute)
-            else: 
-                for succ in succs: 
-                    permute_layer = sbn + '_permute_' + succ
+            else:
+                for succ in succs:
+                    permute_layer = sbn + "_permute_" + succ
                     keras_permute = _keras.layers.Permute(dims=dims)
                     self._insert_layer_between(sbn, succ, permute_layer, keras_permute)
-    
-    def build(self, is_top_level = True):
-        # sanity check. 
+
+    def build(self, is_top_level=True):
+        # sanity check.
         model = self.model
-        if not (type(model) == _keras.models.Sequential or type(model) == _keras.models.Model):
+        if not (
+            type(model) == _keras.models.Sequential
+            or type(model) == _keras.models.Model
+        ):
             raise TypeError("Keras layer of type %s is not supported." % type(model))
 
         # build the graph without considering embedded subgraphs
         for i, layer in enumerate(model.layers):
-            in_nodes = layer._inbound_nodes if hasattr(layer,
-                    '_inbound_nodes') else layer.inbound_nodes
+            in_nodes = (
+                layer._inbound_nodes
+                if hasattr(layer, "_inbound_nodes")
+                else layer.inbound_nodes
+            )
             for node in in_nodes:
                 for pred in node.inbound_layers:
                     if pred.name not in self.layer_list:
@@ -666,7 +710,7 @@ def build(self, is_top_level = True):
                     self._add_edge(pred.name, layer.name)
             self.layer_list.append(layer.name)
             self.keras_layer_map[layer.name] = layer
-        
+
         # Duplicate models for weight sharing
         idx = self._get_first_shared_layer()
         while idx >= 0:
@@ -674,8 +718,8 @@ def build(self, is_top_level = True):
             keras_layer = self.keras_layer_map[layer]
             predecessors = self.reverse_edge_map[layer]
             successors = self.edge_map[layer]
-            new_layers = [layer+'_'+str(i) for i in range(len(predecessors))]
-            self.layer_list[idx:idx+1] = new_layers
+            new_layers = [layer + "_" + str(i) for i in range(len(predecessors))]
+            self.layer_list[idx : idx + 1] = new_layers
             for i, new_layer in enumerate(new_layers):
                 self.edge_map[new_layer] = []
                 self.reverse_edge_map[new_layer] = []
@@ -687,7 +731,7 @@ def build(self, is_top_level = True):
             self._remove_old_edges(layer)
             self.keras_layer_map.pop(layer)
             idx = self._get_first_shared_layer()
-        
+
         # Expand the sub-models
         idx = self._get_first_embedded_model()
         while idx >= 0:
@@ -701,79 +745,90 @@ def build(self, is_top_level = True):
             embedded_layer_list = embedded_graph.layer_list
             new_layer_list = []
             for embedded_layer_name in embedded_layer_list:
-                new_layer_name = embedded_model + '_' + embedded_layer_name
+                new_layer_name = embedded_model + "_" + embedded_layer_name
                 new_layer_list.append(new_layer_name)
-                self.keras_layer_map[new_layer_name] = embedded_graph.keras_layer_map[embedded_layer_name]
+                self.keras_layer_map[new_layer_name] = embedded_graph.keras_layer_map[
+                    embedded_layer_name
+                ]
                 # add edge [embed_layer -> its succ]
                 embedded_successors = embedded_graph.get_successors(embedded_layer_name)
                 for embed_succ_name in embedded_successors:
-                    new_embed_succ_name = embedded_model + '_' + embed_succ_name
+                    new_embed_succ_name = embedded_model + "_" + embed_succ_name
                     self._add_edge(new_layer_name, new_embed_succ_name)
                 # add edge [pred -> embed_layer]
-                embedded_predecessors = embedded_graph.get_predecessors(embedded_layer_name)
-                for embed_pred_name in embedded_predecessors: 
-                    new_embed_pred_name = embedded_model + '_' + embed_pred_name
+                embedded_predecessors = embedded_graph.get_predecessors(
+                    embedded_layer_name
+                )
+                for embed_pred_name in embedded_predecessors:
+                    new_embed_pred_name = embedded_model + "_" + embed_pred_name
                     self._add_edge(new_embed_pred_name, new_layer_name)
-            
-            self.layer_list[idx+1:idx+1] = new_layer_list
+
+            self.layer_list[idx + 1 : idx + 1] = new_layer_list
             # replace input / output edges to the model with input/output edges of the embedded layers
             predecessors = self.get_predecessors(embedded_model)
             embedded_inputs = embedded_graph.get_input_layers()
-            for i, pred in enumerate(predecessors): 
+            for i, pred in enumerate(predecessors):
                 embed_input = embedded_inputs[i]
-                new_embed_input = embedded_model + '_' + embed_input
+                new_embed_input = embedded_model + "_" + embed_input
                 self._add_edge(pred, new_embed_input)
-                
+
             embedded_outputs = embedded_graph.get_output_layers()
             successors = self.get_successors(embedded_model)
             for i, succ in enumerate(successors):
                 embed_output = embedded_outputs[i]
-                new_embed_output = embedded_model + '_' + embed_output
+                new_embed_output = embedded_model + "_" + embed_output
 
                 self._add_edge(new_embed_output, succ)
-                
+
             # clear up the embedded model
             self._remove_layer(embedded_model)
             idx = self._get_first_embedded_model()
-        
+
         # tag input layers and and output layers
         self.make_input_layers()
         self.make_output_layers()
 
         # make graph level adjustments - do this only on top level
         if is_top_level:
-            self.remove_skip_layers(_KERAS_SKIP_LAYERS) # done 1 pass
+            self.remove_skip_layers(_KERAS_SKIP_LAYERS)  # done 1 pass
             self.insert_1d_permute_layers()
             self.insert_permute_for_spatial_bn()
             self.defuse_activation()
             self.remove_internal_input_layers()
 
     def print_layer_list(self):
-        print('\n')
-        print('layer_list')
+        print("\n")
+        print("layer_list")
         print(self.layer_list)
-    
+
     def print_edge_map(self):
-        print('\n')
-        print('edge map:')
+        print("\n")
+        print("edge map:")
         for src in self.edge_map:
             for snk in self.edge_map[src]:
-                print('  ', src, '-->', snk)
+                print("  ", src, "-->", snk)
 
     def print_reverse_edge_map(self):
-        print('\n')
-        print('reverse edge map: ')
+        print("\n")
+        print("reverse edge map: ")
         for snk in self.reverse_edge_map:
             for src in self.reverse_edge_map[snk]:
-                print('  ', snk, '<--', src)
+                print("  ", snk, "<--", src)
 
     def print_mapping(self):
-        print('\nmapping:')
+        print("\nmapping:")
         for key in self.keras_layer_map:
-            print(key, '-->', self.keras_layer_map[key], '(', self.keras_layer_map[key].name, ')')
-    
+            print(
+                key,
+                "-->",
+                self.keras_layer_map[key],
+                "(",
+                self.keras_layer_map[key].name,
+                ")",
+            )
+
     def print_all(self):
-        print('='*80)
+        print("=" * 80)
         self.print_layer_list()
         self.print_edge_map()
         self.print_reverse_edge_map()
diff --git a/coremltools/converters/keras/_utils.py b/coremltools/converters/keras/_utils.py
index 6f249c90b..aa5b8cf0d 100644
--- a/coremltools/converters/keras/_utils.py
+++ b/coremltools/converters/keras/_utils.py
@@ -4,23 +4,31 @@
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 
-def raise_error_unsupported_categorical_option(option_name, option_value, layer_type, layer_name):
+def raise_error_unsupported_categorical_option(
+    option_name, option_value, layer_type, layer_name
+):
     """
     Raise an error if an option is not supported.
     """
-    raise RuntimeError("Unsupported option %s=%s in layer %s(%s)" % (option_name, option_value,
-        layer_type, layer_name))
+    raise RuntimeError(
+        "Unsupported option %s=%s in layer %s(%s)"
+        % (option_name, option_value, layer_type, layer_name)
+    )
+
 
 def raise_error_unsupported_option(option, layer_type, layer_name):
     """
     Raise an error if an option is not supported.
     """
-    raise RuntimeError("Unsupported option =%s in layer %s(%s)" % (option,
-        layer_type, layer_name))
+    raise RuntimeError(
+        "Unsupported option =%s in layer %s(%s)" % (option, layer_type, layer_name)
+    )
+
 
 def raise_error_unsupported_scenario(message, layer_type, layer_name):
     """
     Raise an error if an scenario is not supported.
     """
-    raise RuntimeError("Unsupported scenario '%s' in layer %s(%s)" % (message, 
-        layer_type, layer_name))
+    raise RuntimeError(
+        "Unsupported scenario '%s' in layer %s(%s)" % (message, layer_type, layer_name)
+    )
diff --git a/coremltools/converters/libsvm/__init__.py b/coremltools/converters/libsvm/__init__.py
index 6f7d02f64..af5c13199 100644
--- a/coremltools/converters/libsvm/__init__.py
+++ b/coremltools/converters/libsvm/__init__.py
@@ -8,14 +8,19 @@
 from . import _libsvm_converter
 from . import _libsvm_util
 
-from ..._deps import HAS_LIBSVM as _HAS_LIBSVM
+from ..._deps import _HAS_LIBSVM
 
 if _HAS_LIBSVM:
-    import svm as _libsvm
+    from libsvm import svmutil as _svmutil
 
 
-def convert(model, input_names='input', target_name='target',
-            probability='classProbability', input_length='auto'):
+def convert(
+    model,
+    input_names="input",
+    target_name="target",
+    probability="classProbability",
+    input_length="auto",
+):
     """
     Convert a LIBSVM model to Core ML format.
 
@@ -68,26 +73,39 @@ def convert(model, input_names='input', target_name='target',
         # Convert using user specified input names
         >>> coreml_model = coremltools.converters.libsvm.convert(libsvm_model, input_names=['x', 'y'])
     """
-    if not(_HAS_LIBSVM):
-        raise RuntimeError('libsvm not found. libsvm conversion API is disabled.')
+    if not (_HAS_LIBSVM):
+        raise RuntimeError("libsvm not found. libsvm conversion API is disabled.")
 
     if isinstance(model, _string_types):
         libsvm_model = _libsvm_util.load_model(model)
     else:
         libsvm_model = model
-    if not isinstance(libsvm_model, _libsvm.svm_model):
-        raise TypeError("Expected 'model' of type '%s' (got %s)" % (_libsvm.svm_model, type(libsvm_model)))
+    if not isinstance(libsvm_model, _svmutil.svm_model):
+        raise TypeError(
+            "Expected 'model' of type '%s' (got %s)"
+            % (_svmutil.svm_model, type(libsvm_model))
+        )
 
     if not isinstance(target_name, _string_types):
-        raise TypeError("Expected 'target_name' of type str (got %s)" % type(libsvm_model))
+        raise TypeError(
+            "Expected 'target_name' of type str (got %s)" % type(libsvm_model)
+        )
 
-    if input_length != 'auto' and not isinstance(input_length, int):
-        raise TypeError("Expected 'input_length' of type int, got %s" % type(input_length))
+    if input_length != "auto" and not isinstance(input_length, int):
+        raise TypeError(
+            "Expected 'input_length' of type int, got %s" % type(input_length)
+        )
 
-    if input_length != 'auto' and not isinstance(input_names, _string_types):
-        raise ValueError("'input_length' should not be used unless the input will be only one array.")
+    if input_length != "auto" and not isinstance(input_names, _string_types):
+        raise ValueError(
+            "'input_length' should not be used unless the input will be only one array."
+        )
 
     if not isinstance(probability, _string_types):
-        raise TypeError("Expected 'probability' of type str (got %s)" % type(probability))
+        raise TypeError(
+            "Expected 'probability' of type str (got %s)" % type(probability)
+        )
 
-    return _libsvm_converter.convert(libsvm_model, input_names, target_name, input_length, probability)
+    return _libsvm_converter.convert(
+        libsvm_model, input_names, target_name, input_length, probability
+    )
diff --git a/coremltools/converters/libsvm/_libsvm_converter.py b/coremltools/converters/libsvm/_libsvm_converter.py
index aadb62f29..e00c86e49 100644
--- a/coremltools/converters/libsvm/_libsvm_converter.py
+++ b/coremltools/converters/libsvm/_libsvm_converter.py
@@ -4,7 +4,10 @@
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 from ... import SPECIFICATION_VERSION
-from ..._deps import HAS_LIBSVM
+from ..._deps import _HAS_LIBSVM
+from coremltools import __version__ as ct_version
+from coremltools.models import _METADATA_VERSION, _METADATA_SOURCE
+from six import string_types as _string_types
 
 
 def _infer_min_num_features(model):
@@ -48,10 +51,10 @@ def convert(libsvm_model, feature_names, target, input_length, probability):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(HAS_LIBSVM):
-        raise RuntimeError('libsvm not found. libsvm conversion API is disabled.')
-    
-    import svm as libsvm
+    if not (_HAS_LIBSVM):
+        raise RuntimeError("libsvm not found. libsvm conversion API is disabled.")
+
+    from libsvm import svm as _svm
     from ...proto import SVM_pb2
     from ...proto import Model_pb2
     from ...proto import FeatureTypes_pb2
@@ -63,22 +66,26 @@ def convert(libsvm_model, feature_names, target, input_length, probability):
     export_spec = Model_pb2.Model()
     export_spec.specificationVersion = SPECIFICATION_VERSION
 
-    if(svm_type_enum == libsvm.EPSILON_SVR or svm_type_enum == libsvm.NU_SVR):
+    if svm_type_enum == _svm.EPSILON_SVR or svm_type_enum == _svm.NU_SVR:
         svm = export_spec.supportVectorRegressor
     else:
         svm = export_spec.supportVectorClassifier
 
     # Set the features names
     inferred_length = _infer_min_num_features(libsvm_model)
-    if isinstance(feature_names, str):
+    if isinstance(feature_names, _string_types):
         # input will be a single array
-        if input_length == 'auto':
-            print("[WARNING] Infering an input length of %d. If this is not correct,"
-                  " use the 'input_length' parameter." % inferred_length)
+        if input_length == "auto":
+            print(
+                "[WARNING] Infering an input length of %d. If this is not correct,"
+                " use the 'input_length' parameter." % inferred_length
+            )
             input_length = inferred_length
         elif inferred_length > input_length:
-            raise ValueError("An input length of %d was given, but the model requires an"
-                             " input of at least %d." % (input_length, inferred_length))
+            raise ValueError(
+                "An input length of %d was given, but the model requires an"
+                " input of at least %d." % (input_length, inferred_length)
+            )
 
         input = export_spec.description.input.add()
         input.name = feature_names
@@ -88,26 +95,28 @@ def convert(libsvm_model, feature_names, target, input_length, probability):
     else:
         # input will be a series of doubles
         if inferred_length > len(feature_names):
-                raise ValueError("%d feature names were given, but the model requires at"
-                                 " least %d features." % (len(feature_names), inferred_length))
+            raise ValueError(
+                "%d feature names were given, but the model requires at"
+                " least %d features." % (len(feature_names), inferred_length)
+            )
         for cur_input_name in feature_names:
             input = export_spec.description.input.add()
             input.name = cur_input_name
-            input.type.doubleType.MergeFromString(b'')
+            input.type.doubleType.MergeFromString(b"")
 
     # Set target
     output = export_spec.description.output.add()
     output.name = target
 
     # Set the interface types
-    if(svm_type_enum == libsvm.EPSILON_SVR or svm_type_enum == libsvm.NU_SVR):
+    if svm_type_enum == _svm.EPSILON_SVR or svm_type_enum == _svm.NU_SVR:
         export_spec.description.predictedFeatureName = target
-        output.type.doubleType.MergeFromString(b'')
+        output.type.doubleType.MergeFromString(b"")
         nr_class = 2
 
-    elif(svm_type_enum == libsvm.C_SVC or svm_type_enum == libsvm.NU_SVC):
+    elif svm_type_enum == _svm.C_SVC or svm_type_enum == _svm.NU_SVC:
         export_spec.description.predictedFeatureName = target
-        output.type.int64Type.MergeFromString(b'')
+        output.type.int64Type.MergeFromString(b"")
 
         nr_class = len(libsvm_model.get_labels())
 
@@ -118,34 +127,40 @@ def convert(libsvm_model, feature_names, target, input_length, probability):
         if probability and bool(libsvm_model.probA):
             output = export_spec.description.output.add()
             output.name = probability
-            output.type.dictionaryType.MergeFromString(b'')
-            output.type.dictionaryType.int64KeyType.MergeFromString(b'')
+            output.type.dictionaryType.MergeFromString(b"")
+            output.type.dictionaryType.int64KeyType.MergeFromString(b"")
             export_spec.description.predictedProbabilitiesName = probability
 
     else:
-        raise ValueError('Only the following SVM types are supported: C_SVC, NU_SVC, EPSILON_SVR, NU_SVR')
-
-    if(libsvm_model.param.kernel_type == libsvm.LINEAR):
-        svm.kernel.linearKernel.MergeFromString(b'')  # Hack to set kernel to an empty type
-    elif(libsvm_model.param.kernel_type == libsvm.RBF):
+        raise ValueError(
+            "Only the following SVM types are supported: C_SVC, NU_SVC, EPSILON_SVR, NU_SVR"
+        )
+
+    if libsvm_model.param.kernel_type == _svm.LINEAR:
+        svm.kernel.linearKernel.MergeFromString(
+            b""
+        )  # Hack to set kernel to an empty type
+    elif libsvm_model.param.kernel_type == _svm.RBF:
         svm.kernel.rbfKernel.gamma = libsvm_model.param.gamma
-    elif(libsvm_model.param.kernel_type == libsvm.POLY):
+    elif libsvm_model.param.kernel_type == _svm.POLY:
         svm.kernel.polyKernel.degree = libsvm_model.param.degree
         svm.kernel.polyKernel.c = libsvm_model.param.coef0
         svm.kernel.polyKernel.gamma = libsvm_model.param.gamma
-    elif(libsvm_model.param.kernel_type == libsvm.SIGMOID):
+    elif libsvm_model.param.kernel_type == _svm.SIGMOID:
         svm.kernel.sigmoidKernel.c = libsvm_model.param.coef0
         svm.kernel.sigmoidKernel.gamma = libsvm_model.param.gamma
     else:
-        raise ValueError('Unsupported kernel. The following kernel are supported: linear, RBF, polynomial and sigmoid.')
+        raise ValueError(
+            "Unsupported kernel. The following kernel are supported: linear, RBF, polynomial and sigmoid."
+        )
 
     # set rho
     # also set probA/ProbB only for SVC
-    if(svm_type_enum == libsvm.C_SVC or svm_type_enum == libsvm.NU_SVC):
-        num_class_pairs = nr_class * (nr_class-1)//2
+    if svm_type_enum == _svm.C_SVC or svm_type_enum == _svm.NU_SVC:
+        num_class_pairs = nr_class * (nr_class - 1) // 2
         for i in range(num_class_pairs):
             svm.rho.append(libsvm_model.rho[i])
-        if(bool(libsvm_model.probA) and bool(libsvm_model.probB)):
+        if bool(libsvm_model.probA) and bool(libsvm_model.probB):
             for i in range(num_class_pairs):
                 svm.probA.append(libsvm_model.probA[i])
                 svm.probB.append(libsvm_model.probB[i])
@@ -153,7 +168,7 @@ def convert(libsvm_model, feature_names, target, input_length, probability):
         svm.rho = libsvm_model.rho[0]
 
     # set coefficents
-    if(svm_type_enum == libsvm.C_SVC or svm_type_enum == libsvm.NU_SVC):
+    if svm_type_enum == _svm.C_SVC or svm_type_enum == _svm.NU_SVC:
         for _ in range(nr_class - 1):
             svm.coefficients.add()
         for i in range(libsvm_model.l):
@@ -173,4 +188,12 @@ def convert(libsvm_model, feature_names, target, input_length, probability):
             cur_node.value = libsvm_model.SV[i][j].value
             j += 1
 
-    return MLModel(export_spec)
+    model = MLModel(export_spec)
+
+    from libsvm import __version__ as libsvm_version
+
+    libsvm_version = "libsvm=={0}".format(libsvm_version)
+    model.user_defined_metadata[_METADATA_VERSION] = ct_version
+    model.user_defined_metadata[_METADATA_SOURCE] = libsvm_version
+
+    return model
diff --git a/coremltools/converters/libsvm/_libsvm_util.py b/coremltools/converters/libsvm/_libsvm_util.py
index f2b9ce074..f00c9faf2 100644
--- a/coremltools/converters/libsvm/_libsvm_util.py
+++ b/coremltools/converters/libsvm/_libsvm_util.py
@@ -3,7 +3,8 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from ..._deps import HAS_LIBSVM
+from ..._deps import _HAS_LIBSVM
+
 
 def load_model(model_path):
     """Load a libsvm model from a path on disk.
@@ -24,11 +25,12 @@ def load_model(model_path):
     model: libsvm_model
         A model of the libsvm format.
     """
-    if not(HAS_LIBSVM):
-        raise RuntimeError('libsvm not found. libsvm conversion API is disabled.')
-    
-    from svmutil import svm_load_model # From libsvm
+    if not (_HAS_LIBSVM):
+        raise RuntimeError("libsvm not found. libsvm conversion API is disabled.")
+
+    from svmutil import svm_load_model  # From libsvm
     import os
-    if (not os.path.exists(model_path)):
+
+    if not os.path.exists(model_path):
         raise IOError("Expected a valid file path. %s does not exist" % model_path)
     return svm_load_model(model_path)
diff --git a/coremltools/converters/mil/__init__.py b/coremltools/converters/mil/__init__.py
new file mode 100644
index 000000000..e36bfb823
--- /dev/null
+++ b/coremltools/converters/mil/__init__.py
@@ -0,0 +1,20 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from .mil import *
+
+from .frontend.torch import register_torch_op
+
+from .input_types import (
+    ClassifierConfig,
+    InputType,
+    TensorType,
+    ImageType,
+    RangeDim,
+    Shape,
+    EnumeratedShapes,
+)
+
+from coremltools.converters.mil.frontend.tensorflow.tf_op_registry import register_tf_op
diff --git a/coremltools/converters/mil/_deployment_compatibility.py b/coremltools/converters/mil/_deployment_compatibility.py
new file mode 100644
index 000000000..ef332f7be
--- /dev/null
+++ b/coremltools/converters/mil/_deployment_compatibility.py
@@ -0,0 +1,147 @@
+from enum import Enum
+from coremltools import _SPECIFICATION_VERSION_IOS_13, _SPECIFICATION_VERSION_IOS_14
+
+
+class AvailableTarget(Enum):
+    # iOS versions
+    iOS13 = _SPECIFICATION_VERSION_IOS_13
+    iOS14 = _SPECIFICATION_VERSION_IOS_14
+
+    # macOS versions (aliases of iOS versions)
+    macOS15 = _SPECIFICATION_VERSION_IOS_13
+    macOS16 = _SPECIFICATION_VERSION_IOS_14
+
+    # watchOS versions (aliases of iOS versions)
+    watchOS6 = _SPECIFICATION_VERSION_IOS_13
+    watchOS7 = _SPECIFICATION_VERSION_IOS_14
+
+    # tvOS versions (aliases of iOS versions)
+    tvOS13 = _SPECIFICATION_VERSION_IOS_13
+    tvOS14 = _SPECIFICATION_VERSION_IOS_14
+
+
+_get_features_associated_with = {}
+
+
+def register_with(name):
+    def decorator(func):
+        if name not in _get_features_associated_with:
+            _get_features_associated_with[name] = func
+        else:
+            raise ValueError("Function is already registered with {}".format(name))
+        return func
+
+    return decorator
+
+
+@register_with(AvailableTarget.iOS14)
+def iOS14Features(spec):
+    features_list = []
+
+    if spec.WhichOneof("Type") == "neuralNetwork":
+        nn_spec = spec.neuralNetwork
+    elif spec.WhichOneof("Type") in "neuralNetworkClassifier":
+        nn_spec = spec.neuralNetworkClassifier
+    elif spec.WhichOneof("Type") in "neuralNetworkRegressor":
+        nn_spec = spec.neuralNetworkRegressor
+    else:
+        raise ValueError("Invalid neural network specification for the model")
+
+    # Non-zero default optional values
+    for idx, input in enumerate(spec.description.input):
+        value = 0
+        if input.type.isOptional:
+            value = max(value, input.type.multiArrayType.floatDefaultValue)
+            value = max(value, input.type.multiArrayType.doubleDefaultValue)
+            value = max(value, input.type.multiArrayType.intDefaultValue)
+
+        if value != 0:
+            msg = "Support of non-zero default optional values for inputs."
+            features_list.append(msg)
+            break
+
+    # Layers or modifications introduced in iOS14
+    new_layers = [
+        "oneHot",
+        "cumSum",
+        "clampedReLU",
+        "argSort",
+        "pooling3d",
+        "convolution3d",
+        "globalPooling3d",
+    ]
+    for layer in nn_spec.layers:
+        layer_type = layer.WhichOneof("layer")
+
+        msg = ""
+
+        if layer_type in new_layers:
+            msg = "{} {}".format(layer_type.capitalize(), "operation")
+
+        if layer_type == "tile" and len(layer.input) == 2:
+            msg = "Dynamic Tile operation"
+
+        if layer_type == "upsample" and layer.upsample.linearUpsampleMode in [1, 2]:
+            msg = "Upsample operation with Align Corners mode"
+
+        if layer_type == "reorganizeData" and layer.reorganizeData.mode == 2:
+            msg = "Pixel Shuffle operation"
+
+        if layer_type == "sliceDynamic" and layer.sliceDynamic.squeezeMasks:
+            msg = "Squeeze mask for dynamic slice operation"
+
+        if layer_type == "sliceStatic" and layer.sliceDynamic.squeezeMasks:
+            msg = "Squeeze mask for static slice operation"
+
+        if msg != "" and (msg not in features_list):
+            features_list.append(msg)
+
+    return features_list
+
+
+def check_deployment_compatibility(spec, representation=None, deployment_target=None):
+    if representation is None:
+        representation = "nn_proto"
+
+    if deployment_target is None:
+        deployment_target = AvailableTarget.iOS13
+
+    if representation != "nn_proto":
+        raise ValueError(
+            "Deployment is supported only for mlmodel in nn_proto representation. Provided: {}".format(
+                representation
+            )
+        )
+
+    if not isinstance(deployment_target, AvailableTarget):
+        raise TypeError(
+            "Argument for deployment_target must be an enumeration from Enum class AvailableTarget"
+        )
+
+    for any_target in AvailableTarget:
+
+        if any_target.value > deployment_target.value:
+            missing_features = _get_features_associated_with[any_target](spec)
+
+            if missing_features:
+                msg = (
+                    "Provided minimum deployment target requires model to be of version {} but converted model "
+                    "uses following features which are available from version {} onwards.\n ".format(
+                        deployment_target.value, any_target.value
+                    )
+                )
+
+                for i, feature in enumerate(missing_features):
+                    msg += "   {}. {}\n".format(i + 1, feature)
+
+                raise ValueError(msg)
+
+    # Default exception throwing if not able to find the reason behind spec version bump
+    if spec.specificationVersion > deployment_target.value:
+        msg = (
+            "Provided deployment target requires model to be of version {} but converted model has version {} "
+            "suitable for later releases".format(
+                deployment_target.value, spec.specificationVersion,
+            )
+        )
+        raise ValueError(msg)
diff --git a/coremltools/converters/mil/backend/__init__.py b/coremltools/converters/mil/backend/__init__.py
new file mode 100644
index 000000000..61aafff42
--- /dev/null
+++ b/coremltools/converters/mil/backend/__init__.py
@@ -0,0 +1,4 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
diff --git a/coremltools/converters/mil/backend/nn/__init__.py b/coremltools/converters/mil/backend/nn/__init__.py
new file mode 100644
index 000000000..845cdd674
--- /dev/null
+++ b/coremltools/converters/mil/backend/nn/__init__.py
@@ -0,0 +1,6 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from .load import load
diff --git a/coremltools/converters/mil/backend/nn/load.py b/coremltools/converters/mil/backend/nn/load.py
new file mode 100644
index 000000000..7b6528d4e
--- /dev/null
+++ b/coremltools/converters/mil/backend/nn/load.py
@@ -0,0 +1,264 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+import logging
+from collections import defaultdict
+
+from coremltools.converters.mil.input_types import (
+    ClassifierConfig,
+    ImageType,
+    EnumeratedShapes,
+    Shape,
+    RangeDim,
+)
+from coremltools.models import MLModel
+from coremltools.models import neural_network as neural_network
+import coremltools.models.datatypes as datatypes
+from coremltools.converters.mil.mil import types
+from coremltools.converters.mil.mil.types.symbolic import (
+    any_symbolic,
+    any_variadic,
+    is_symbolic,
+)
+from .op_mapping import convert_ops
+from coremltools.models.neural_network import flexible_shape_utils
+from coremltools.models.neural_network.flexible_shape_utils import (
+    update_image_size_range,
+    add_enumerated_image_sizes,
+    set_multiarray_ndshape_range,
+    add_multiarray_ndshape_enumeration,
+)
+from .passes.nn_passes import nn_backend_passes
+from coremltools.converters._profile_utils import _profile
+
+
+def _convert_to_image_input(proto, inputs):
+    tmp_model = MLModel(proto)
+    for input_type in inputs:
+        if isinstance(input_type, ImageType):
+            if input_type.color_layout == "G":
+                gray_bias = input_type.bias
+                red_bias, green_bias, blue_bias = 0.0, 0.0, 0.0
+            elif input_type.color_layout == "RGB":
+                gray_bias = 0.0
+                red_bias, green_bias, blue_bias = input_type.bias
+            elif input_type.color_layout == "BGR":
+                gray_bias = 0.0
+                blue_bias, green_bias, red_bias = input_type.bias
+            tmp_model = neural_network.utils.make_image_input(
+                tmp_model,
+                input_type.name,
+                is_bgr=input_type.color_layout == "BGR",
+                image_format="NCHW" if input_type.channel_first else "NHWC",
+                red_bias=red_bias,
+                green_bias=green_bias,
+                blue_bias=blue_bias,
+                gray_bias=gray_bias,
+                scale=input_type.scale,
+            )
+    return tmp_model.get_spec()
+
+
+def _convert_to_classifier(proto, classifier_config):
+    tmp_model = MLModel(proto)
+    tmp_model = neural_network.utils.make_nn_classifier(
+        tmp_model,
+        classifier_config.class_labels,
+        classifier_config.predicted_feature_name,
+        classifier_config.predicted_probabilities_output,
+    )
+    return tmp_model.get_spec()
+
+
+def _set_user_inputs(proto, inputs):
+    for input_type in inputs:
+        shape = input_type.shape
+        if isinstance(shape, EnumeratedShapes):
+            if isinstance(input_type, ImageType):
+                image_sizes = []
+                if input_type.image_config.channel_first:
+                    for s in shape.shapes:
+                        image_sizes.append(
+                            flexible_shape_utils.NeuralNetworkImageSize(
+                                s.shape[-2], s.shape[-1]
+                            )
+                        )
+                else:
+                    for s in shape.shapes:
+                        image_sizes.append(
+                            flexible_shape_utils.NeuralNetworkImageSize(
+                                s.shape[-3], s.shape[-2]
+                            )
+                        )
+                add_enumerated_image_sizes(
+                    proto, input_type.name, image_sizes=image_sizes
+                )
+            else:
+                add_multiarray_ndshape_enumeration(
+                    proto, input_type.name, [tuple(s.shape) for s in shape.shapes]
+                )
+        elif isinstance(shape, Shape):
+            shape = shape.shape  # This is shape in Shape
+            if all(
+                [
+                    not isinstance(s, RangeDim) and not is_symbolic(s) and s > 0
+                    for s in shape
+                ]
+            ):
+                continue
+            if isinstance(input_type, ImageType):
+                img_range = flexible_shape_utils.NeuralNetworkImageSizeRange()
+                if input_type.channel_first:
+                    H = shape[-2]
+                    W = shape[-1]
+                else:
+                    H = shape[-3]
+                    W = shape[-2]
+
+                if isinstance(H, RangeDim):
+                    img_range.add_height_range((H.lower_bound, H.upper_bound))
+                elif is_symbolic(H):
+                    img_range.add_height_range((1, -1))
+                else:
+                    img_range.add_height_range((H, H))
+                if isinstance(W, RangeDim):
+                    img_range.add_width_range((W.lower_bound, W.upper_bound))
+                elif is_symbolic(W):
+                    img_range.add_width_range((1, -1))
+                else:
+                    img_range.add_width_range((W, W))
+
+                flexible_shape_utils.update_image_size_range(
+                    proto, input_type.name, img_range
+                )
+            else:
+                lb = []
+                ub = []
+                for s in shape:
+                    if isinstance(s, RangeDim):
+                        lb.append(s.lower_bound)
+                        ub.append(s.upper_bound)
+                    elif is_symbolic(s):
+                        lb.append(1)
+                        ub.append(-1)
+                    else:
+                        lb.append(s)
+                        ub.append(s)
+                set_multiarray_ndshape_range(
+                    proto, input_type.name, lower_bounds=lb, upper_bounds=ub
+                )
+
+
+def _set_symbolic_inputs(proto, symbolic_inputs):
+    # Set symbolic input shapes by -1 infered from graph
+    for input_name, shape in symbolic_inputs.items():
+        lb = [1 if is_symbolic(d) else d for d in shape]
+        ub = [-1 if is_symbolic(d) else d for d in shape]
+        set_multiarray_ndshape_range(
+            proto, input_name, lower_bounds=lb, upper_bounds=ub
+        )
+
+
+@_profile
+def load(prog, **kwargs):
+    if "main" not in prog.functions:
+        msg = "main function not found in program {}"
+        raise ValueError(msg.format(prog))
+    if len(prog.functions) != 1:
+        msg = (
+            "Program must have exactly one `main` function to "
+            "convert to NN. Program: {}"
+        )
+        raise ValueError(msg.format(prog))
+
+    nn_backend_passes(prog)
+    input_types = prog.main_input_types
+
+    v1_inputs = []
+    symbolic_inputs = {}
+    for name, var in prog.functions["main"].inputs.items():
+        if types.is_tensor(var.sym_type):
+            sym_shape = var.sym_type.get_shape()
+            if any_variadic(sym_shape):
+                # TODO: rdar://59559656
+                raise NotImplementedError("Variadic rank is not supported")
+            if any_symbolic(sym_shape):
+                user_specified = False
+                for input_type in input_types:
+                    if name == input_type.name:
+                        sym_shape = input_type.shape.default
+                        user_specified = True
+                        break
+                # Use dummy static shape, and will set it later.
+                shape = [1 if is_symbolic(d) else d for d in sym_shape]
+                if not user_specified:
+                    symbolic_inputs[name] = sym_shape
+            else:
+                shape = sym_shape
+            v1_inputs.append((name, datatypes.Array(*shape)))
+        elif types.is_scalar(var.sym_type):
+            v1_inputs.append((name, datatypes.Array(1)))
+        else:
+            raise NotImplementedError()
+
+    v1_outputs = []
+    for var in prog.functions["main"].outputs:
+        if types.is_tensor(var.sym_type) or types.is_primitive(var.sym_type):
+            # Disregard the output types
+            v1_outputs.append((var.name, None))
+        else:
+            raise NotImplementedError()
+
+    # create neural network builder
+    builder = neural_network.NeuralNetworkBuilder(
+        v1_inputs,
+        v1_outputs,
+        disable_rank5_shape_mapping=True,
+        use_float_arraytype=True,
+    )
+
+    # const in V2 are added lazily to V1 by each op whenever needed.
+    # `const_context` stores the const names we've added so far and avoid
+    # adding a const more than once.
+    # const_context: list[set of str] (const name for v1 & v2
+    # (the same)). Note that in NN in outer layer is visible from the inner
+    # layer, so the const_context is simply a stack of set.
+    const_context = []
+    # Iterate through ops and add to builder
+    convert_ops(
+        const_context,
+        builder,
+        prog.functions["main"].operations,
+        prog.functions["main"].outputs,
+    )
+
+    # Replace model outputs's name with v1_outputs
+    output_names = [x[0] for x in v1_outputs]
+    for i, spec_layer in enumerate(builder.nn_spec.layers):
+        for j, name in enumerate(spec_layer.output):
+            for output_name in output_names:
+                if output_name.split(":")[0] == name:
+                    spec_layer.output[j] = output_name
+
+    proto = builder.spec
+    # image input
+    has_image_input = any([isinstance(s, ImageType) for s in input_types])
+    if has_image_input:
+        proto = _convert_to_image_input(proto, input_types)
+
+    # classifier flag
+    classifier_config = kwargs.get("classifier_config", None)
+    if classifier_config is not None:
+        proto = _convert_to_classifier(proto, classifier_config)
+
+    _set_user_inputs(proto, input_types)
+    _set_symbolic_inputs(proto, symbolic_inputs)
+
+    return proto
diff --git a/coremltools/converters/mil/backend/nn/op_mapping.py b/coremltools/converters/mil/backend/nn/op_mapping.py
new file mode 100644
index 000000000..19e2cb95f
--- /dev/null
+++ b/coremltools/converters/mil/backend/nn/op_mapping.py
@@ -0,0 +1,3061 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import numpy as _np
+import logging as _logging
+from six import string_types as _string_types
+from coremltools.models import neural_network as neural_network
+from coremltools.proto import NeuralNetwork_pb2
+from coremltools.converters.mil.mil.types.symbolic import (
+    is_variadic,
+    any_symbolic,
+    is_symbolic,
+)
+from coremltools.converters.mil.mil import types
+from coremltools.converters.mil.mil.ops.registry import SSAOpRegistry
+from tqdm import tqdm as _tqdm
+
+V2_TO_V1_OP_REGISTRY = {}
+
+
+def register_v2_op(func):
+    f_name = func.__name__
+    if f_name in V2_TO_V1_OP_REGISTRY:
+        raise ValueError("V2 op {} is already registered.".format(f_name))
+    V2_TO_V1_OP_REGISTRY[f_name] = func
+    return func
+
+
+def convert_ops(const_context, builder, ops, outputs):
+    """
+    const_context: list[set of str]: const name for v1 & v2 (the same)
+    builder: neural_network.NeuralNetworkBuilder
+    ops: list[Operation], usually from Block.operations.
+    outputs: list[Var]. block outputs
+    """
+
+    const_context.append(set())
+    custom_ops = SSAOpRegistry.custom_ops
+    for op in _tqdm(ops, desc="Translating MIL ==> MLModel Ops", unit=" ops"):
+        if op.op_type in custom_ops:
+            mapper = V2_TO_V1_OP_REGISTRY["custom_op"]
+        elif op.op_type in V2_TO_V1_OP_REGISTRY:
+            mapper = V2_TO_V1_OP_REGISTRY[op.op_type]
+        else:
+            msg = "{} is not implemented for nn backend. block: {}"
+            raise ValueError(msg.format(op.op_type, op.enclosing_block))
+        # const is globally shared in nn.
+        mapper(const_context, builder, op)
+
+    for ov in outputs:
+        # If block return value is a const, we need to add it.
+        if ov.op is None:
+            continue  # placeholder
+        if ov.op.op_type == "const":
+            add_const(const_context, builder, ov.name, ov.val)
+    const_context.pop()
+
+
+def make_input(const_context, builder, variables):
+    """
+    Ensure that variables, if const, are added to builder.
+
+    variables: list[Var] or Var or str. Inputs for an nn layer.
+
+    Returns:
+        list[str] or str: variables' names.
+    """
+    if isinstance(variables, (list, tuple)):
+        return [make_input(const_context, builder, v) for v in variables]
+    if isinstance(variables, _string_types):
+        return variables
+
+    v = variables  # variables is Var
+    if v.op is not None and v.op.op_type == "const" and not v.name in const_context[-1]:
+        add_const(const_context, builder, v.name, v.val)
+    return v.name
+
+
+def _convert_pool(const_context, builder, op, mode, exclude_padding_from_average=True):
+    num_spatial_dimensions = len(op.kernel_sizes.val)
+    op_pad = op.pad.val if op.pad is not None else [0] * num_spatial_dimensions * 2
+    if num_spatial_dimensions <= 2:
+        padding_type = op.pad_type.val.upper()
+        # nn's add_pool function does not support CUSTOM padding,
+        # but VALID padding supports user-defined padding amounts.
+        # Therefore we map CUSTOM padding to VALID padding.
+        if padding_type == "CUSTOM":
+            padding_type = "VALID"
+        builder.add_pooling(
+            name=op.name,
+            height=op.kernel_sizes.val[-2],
+            width=op.kernel_sizes.val[-1],
+            stride_height=op.strides.val[-2],
+            stride_width=op.strides.val[-1],
+            layer_type=mode.upper(),
+            padding_type=padding_type,
+            input_name=make_input(const_context, builder, op.x),
+            output_name=op.name,
+            exclude_pad_area=exclude_padding_from_average,
+            padding_top=op_pad[0],
+            padding_bottom=op_pad[1],
+            padding_left=op_pad[2],
+            padding_right=op_pad[3],
+            is_global=False,
+        )
+    elif num_spatial_dimensions == 3:
+        builder.add_pooling3d(
+            name=op.name,
+            input_name=make_input(const_context, builder, op.x),
+            output_name=op.outputs[0].name,
+            pooling_type=mode.upper(),
+            kernel_depth=op.kernel_sizes.val[-3],
+            kernel_height=op.kernel_sizes.val[-2],
+            kernel_width=op.kernel_sizes.val[-1],
+            stride_depth=op.strides.val[-3],
+            stride_height=op.strides.val[-2],
+            stride_width=op.strides.val[-1],
+            padding_mode=op.pad_type.val,
+            custom_padding_front=op_pad[0],
+            custom_padding_back=op_pad[1],
+            custom_padding_top=op_pad[2],
+            custom_padding_bottom=op_pad[3],
+            custom_padding_left=op_pad[4],
+            custom_padding_right=op_pad[5],
+            average_pooling_count_excludes_padding=exclude_padding_from_average,
+        )
+    else:
+        raise ValueError(
+            "Unsupported number of spatial dimensions.  Maximum is 3, but got %s"
+            % num_spatial_dimensions
+        )
+
+
+def _try_convert_global_pool(const_context, builder, op, mode):
+    """
+    Optional performance optimization pass that tries to lower spatial
+    reduce_mean / reduce_max to global_avg_pool / global_max_pool.
+    Return True if the lowering happened, otherwise return False to
+    continue as normal reduction op.
+    """
+    rank = op.x.rank
+    if is_variadic(rank) or rank not in {4, 5}:
+        return False
+    keep_dims = op.keep_dims.val
+    if op.axes is not None:
+        axes = op.axes.val
+        axes = sorted([rank + axis if axis < 0 else axis for axis in axes])
+        if keep_dims is False:
+            return False
+        if rank == 4 and tuple(axes) != (2, 3):
+            return False
+        if rank == 5 and tuple(axes) != (2, 3, 4):
+            return False
+    builder.add_pooling(
+        name=op.name,
+        height=0,
+        width=0,
+        stride_height=0,
+        stride_width=0,
+        layer_type=mode.upper(),
+        padding_type="valid".upper(),
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        is_global=True,
+    )
+    return True
+
+
+def add_const(const_context, builder, name, val):
+    """
+    const_context (list of set of str): const names added to v1 builder. Const names are
+    identical between v2 and v1
+
+    name (str): name of const. Should be the same for v1 and v2.
+    val: np.ndarray
+
+    No return values as `name` is the name of const in v1.
+
+    Comment: we don't need to add scalar const as they are just fields in
+             layer proto message in NN.
+             If we really need a const scalar, we upcast it to rank-1.
+
+    """
+    for const_set in const_context:
+        if name in const_set:
+            _logging.warning("Const {} was already added.".format(name))
+            return
+    if not isinstance(val, (_np.ndarray, _np.generic)):
+        val = _np.array([val])
+    rank = len(val.shape)
+    if rank == 0:
+        builder.add_load_constant_nd(
+            name=name, output_name=name, constant_value=val.reshape([1]), shape=[1]
+        )
+    else:
+        builder.add_load_constant_nd(
+            name=name, output_name=name, constant_value=val, shape=val.shape
+        )
+    const_context[-1].add(name)
+    _logging.info("added const {} for builder {}".format(name, builder))
+
+
+# Helper routines for recurrent layers
+def _expand_dim(builder, node_name, input_name, axes):
+    builder.add_expand_dims(
+        name=node_name, input_name=input_name, output_name=node_name, axes=axes
+    )
+
+
+def _squeeze(builder, node_name, input_name, axes):
+    builder.add_squeeze(
+        name=node_name, input_name=input_name, output_name=node_name, axes=axes
+    )
+
+
+def _split(x, sections, axis):
+    if x is None:
+        return None
+    if x.shape[axis] % sections != 0:
+        raise ValueError(
+            "Cannot split axis {} into {} sections for input of shape {}".format(
+                axis, sections, x.shape
+            )
+        )
+    return _np.split(x, sections, axis=axis)
+
+
+# Split weights into given number of sections
+# This method should be used when weights are combined into
+# one matrix for several nodes e.g. Input, forget, output and cell gate
+# of LSTM
+def _split_weights(w, sections):
+    hidden_size = w.shape[-1] // sections
+    input_size = w.shape[0] - hidden_size
+    w = _np.transpose(w, (1, 0))
+    w_x = _split(w[:, :input_size], sections=sections, axis=0)
+    w_h = _split(w[:, input_size:], sections=sections, axis=0)
+    return w_x, w_h
+
+
+# Split bias into given number of sections
+# This method should be used when biases are combined into
+# one matrix for several nodes e.g. Input, forget, output and cell gate
+# of LSTM
+def _split_bias(b, sections):
+    if b is None:
+        return None
+    # Combine input-hidden and hidden-hidden bias
+    b = b[0] + b[1]
+    b = _split(b, sections=sections, axis=0)
+    return b
+
+
+@register_v2_op
+def avg_pool(const_context, builder, op):
+    _convert_pool(
+        const_context=const_context,
+        builder=builder,
+        op=op,
+        mode="average",
+        exclude_padding_from_average=op.exclude_padding_from_average.val,
+    )
+
+
+@register_v2_op
+def addn(const_context, builder, op):
+    input_names = make_input(const_context, builder, op.values)
+    if len(input_names) == 1:
+        builder.add_copy(
+            name=op.name, input_name=input_names[0], output_name=op.outputs[0].name
+        )
+    else:
+        prev_name = input_names[0]
+        for i, input in enumerate(input_names[1:-1]):
+            builder.add_elementwise(
+                name=op.name + input,
+                input_names=[prev_name, input],
+                mode="ADD",
+                output_name=op.name + input,
+            )
+            prev_name = op.name + input
+        builder.add_elementwise(
+            name=op.name,
+            input_names=[prev_name, input_names[-1]],
+            mode="ADD",
+            output_name=op.outputs[0].name,
+        )
+
+
+@register_v2_op
+def band_part(const_context, builder, op):
+    builder.add_matrix_band_part(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        num_lower=op.lower.val,
+        num_upper=op.upper.val,
+    )
+
+
+@register_v2_op
+def batch_norm(const_context, builder, op):
+    channels = op.x.shape[1]
+    gamma = _np.array([1.0] * channels) if op.gamma is None else op.gamma.val
+    beta = _np.array([0.0] * channels) if op.beta is None else op.beta.val
+    builder.add_batchnorm(
+        name=op.name,
+        channels=channels,
+        gamma=gamma,
+        beta=beta,
+        mean=op.mean.val,
+        variance=op.variance.val,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        compute_mean_var=False,
+        instance_normalization=False,
+        epsilon=op.epsilon.val,
+    )
+
+
+@register_v2_op
+def const(const_context, builder, op):
+    # const in V2 are added to V1 lazily.
+    pass
+
+
+@register_v2_op
+def conv(const_context, builder, op):
+    # v2 x: (n, C_in/groups, spatial_dims)
+    x_name = make_input(const_context, builder, op.x)
+    out_name = op.outputs[0].name
+
+    is_conv1d = op.x.rank == 3
+    is_conv2d = op.x.rank == 4
+    is_conv3d = op.x.rank == 5
+    if not (is_conv1d or is_conv2d or is_conv3d):
+        raise ValueError(
+            "Input tensor rank '{}' is not one of '{}'.".format(op.x.rank, (3, 4, 5),)
+        )
+    if is_conv1d:
+        x_name = op.name + "_expand_dim"
+        out_name += "_expanded"
+        builder.add_expand_dims(
+            name=x_name, input_name=op.x.name, output_name=x_name, axes=[3],
+        )
+    # `x_name` is guaranteed to be (n, C_in/groups, spatial_dims) for 1D and 2D convolution
+    # W_v1 wil be np.ndarray (if W is const at compile time) or None
+    # (if W is not known at compile time).
+    weights = None
+    input_names = [x_name]
+    if op.weight.val is not None:
+        # v2 convolution (conv3d) expects weights to have shape (C_out, C_in/groups, spatial_dims)
+        # v1 convolution expects (H, W, C_in/groups, C_out) or (D, H, W, C_in/groups, C_out)
+        weights = op.weight.val
+        if is_conv1d:
+            weights = _np.expand_dims(op.weight.val, 3)
+        if is_conv1d or is_conv2d:
+            weights = _np.transpose(weights, [2, 3, 1, 0])
+    else:
+        # op.weight is not const at compile time.
+        # When weight is dynamic, v1 convolution expects weight to be
+        # (C_out, C_in/groups, H, W)
+        # TODO 3D convolution doesn't support dynamic weights:
+        if is_conv3d:
+            raise ValueError("3D Convolution doesn't support dynamic weights.")
+        weights_name = op.weight.name
+        if is_conv1d:
+            weights_name += "_expand_dim"
+            builder.add_expand_dims(
+                name=weights_name,
+                input_name=op.weight.name,
+                output_name=weights_name,
+                axes=[3],
+            )
+        input_names.append(weights_name)
+
+    # padding
+    padding_mode = "valid" if op.pad_type is None else op.pad_type.val
+    pad = {}
+    if padding_mode == "custom" or op.pad:
+        if not is_conv3d:
+            padding_mode = "valid"
+            pad["padding_top"] = op.pad.val[0]
+            pad["padding_bottom"] = op.pad.val[1]
+            if is_conv2d or is_conv3d:
+                pad["padding_left"] = op.pad.val[2]
+                pad["padding_right"] = op.pad.val[3]
+        else:
+            pad["padding_front"] = op.pad.val[0]
+            pad["padding_back"] = op.pad.val[1]
+            pad["padding_top"] = op.pad.val[2]
+            pad["padding_bottom"] = op.pad.val[3]
+            pad["padding_left"] = op.pad.val[4]
+            pad["padding_right"] = op.pad.val[5]
+
+    # This doesn't work till builder fills in all optional values
+    # (rdar://59280101)
+    has_bias = op.bias is not None
+    groups = op.groups.val
+
+    rank_factor = 1
+    if is_conv2d:
+        rank_factor = 2
+    elif is_conv3d:
+        rank_factor = 3
+    strides = [1] * rank_factor
+    dilations = [1] * rank_factor
+
+    if op.strides is not None:
+        strides = op.strides.val.tolist()
+    if op.dilations is not None:
+        dilations = op.dilations.val.tolist()
+    if is_conv1d:
+        dilations = dilations + [1]
+        strides = strides + [1]
+
+    if is_conv1d or is_conv2d:
+        builder.add_convolution(
+            name=out_name,
+            kernel_channels=op.weight.shape[1],
+            output_channels=op.weight.shape[0],
+            height=op.weight.shape[2],
+            width=1 if is_conv1d else op.weight.shape[3],
+            stride_height=strides[0],
+            stride_width=strides[1],
+            border_mode=padding_mode,
+            groups=groups,
+            W=weights,
+            b=op.bias.val if has_bias else None,
+            has_bias=has_bias,
+            is_deconv=False,
+            input_name=input_names,
+            output_name=out_name,
+            dilation_factors=dilations,
+            **pad  # Python 2.7.16 will fail with a syntax error if a comma is included after `**pad`
+        )
+
+        # Squeeze added `Width` dimension for 1d case
+        if is_conv1d:
+            x_name = op.name + "expand_dim"
+            builder.add_squeeze(
+                name=op.name,
+                input_name=out_name,
+                output_name=op.outputs[0].name,
+                axes=[3],
+            )
+
+    if is_conv3d:
+        builder.add_convolution3d(
+            name=op.name,
+            input_channels=op.weight.shape[1] * groups,
+            output_channels=op.weight.shape[0],
+            depth=op.weight.shape[2],
+            height=op.weight.shape[3],
+            width=op.weight.shape[4],
+            W=op.weight.val,
+            b=op.bias.val if has_bias else None,
+            has_bias=has_bias,
+            groups=groups,
+            stride_depth=strides[0],
+            stride_height=strides[1],
+            stride_width=strides[2],
+            dilation_depth=dilations[0],
+            dilation_height=dilations[1],
+            dilation_width=dilations[2],
+            padding_mode=padding_mode,
+            is_deconv=False,
+            output_shape=None,
+            input_name=input_names,
+            output_name=out_name,
+            **pad  # Python 2.7.16 will fail with a syntax error if a comma is included after `**pad`
+        )
+
+
+@register_v2_op
+def cumsum(const_context, builder, op):
+    input_names = make_input(const_context, builder, [op.x])
+    builder.add_cumsum(
+        name=op.name,
+        input_names=input_names,
+        output_name=op.name,
+        axis=op.axis.val,
+        reverse=op.reverse.val,
+        exclusive=op.exclusive.val,
+    )
+
+
+def _add_elementwise_unary(
+    const_context, builder, op, mode, output_name=None, **kwargs
+):
+    output_name = output_name if output_name else op.outputs[0].name
+    name = output_name if output_name else op.name
+    if mode in ["sqrt", "rsqrt", "inverse", "power", "exp", "log", "abs", "threshold"]:
+        builder.add_unary(
+            name=name,
+            input_name=make_input(const_context, builder, op.x),
+            output_name=output_name,
+            mode=mode,
+            **kwargs
+        )
+    else:
+        add_func = getattr(builder, "add_" + mode, None)
+        if add_func is None:
+            _logging.error(
+                "Elementwise unary method {} not found in builder.".format(mode)
+            )
+        add_func(
+            name=name,
+            input_name=make_input(const_context, builder, op.x),
+            output_name=output_name,
+            **kwargs
+        )
+
+
+def _add_elementwise_binary(
+    const_context, builder, op, mode, output_name=None, **kwargs
+):
+    output_name = output_name if output_name else op.outputs[0].name
+    name = output_name if output_name else op.name
+    if mode in ["add", "multiply"]:
+        params = {"name": name, "output_name": output_name, "mode": mode.upper()}
+        if op.x.val is not None and op.x.rank == 0:
+            params["input_names"] = make_input(const_context, builder, [op.y])
+            params["alpha"] = op.x.val
+            builder.add_elementwise(**params)
+            return
+        elif op.y.val is not None and op.y.rank == 0:
+            params["input_names"] = make_input(const_context, builder, [op.x])
+            params["alpha"] = op.y.val
+            builder.add_elementwise(**params)
+            return
+    elif mode in ["equal", "not_equal"]:
+        add_func = getattr(builder, "add_" + mode, None)
+        params = {"name": name, "output_name": output_name}
+        if op.x.val is not None and op.x.rank == 0:
+            params["input_names"] = make_input(const_context, builder, [op.y])
+            params["alpha"] = op.x.val
+            add_func(**params)
+            return
+        elif op.y.val is not None and op.y.rank == 0:
+            params["input_names"] = make_input(const_context, builder, [op.x])
+            params["alpha"] = op.y.val
+            add_func(**params)
+            return
+    elif mode in ["greater_than", "greater_equal", "less_than", "less_equal"]:
+        params = {"name": name, "output_name": output_name}
+        if op.x.val is not None and op.x.rank == 0:
+            params["input_names"] = make_input(const_context, builder, [op.y])
+            params["alpha"] = op.x.val
+            if "less" in mode:
+                params["use_greater_than_equal"] = mode.endswith("_equal")
+                builder.add_greater_than(**params)
+            elif "greater" in mode:
+                params["use_less_than_equal"] = mode.endswith("_equal")
+                builder.add_less_than(**params)
+            return
+        elif op.y.val is not None and op.y.rank == 0:
+            params["input_names"] = make_input(const_context, builder, [op.x])
+            params["alpha"] = op.y.val
+            if "greater" in mode:
+                params["use_greater_than_equal"] = mode.endswith("_equal")
+                builder.add_greater_than(**params)
+            elif "less" in mode:
+                params["use_less_than_equal"] = mode.endswith("_equal")
+                builder.add_less_than(**params)
+            return
+
+    if op.x.val is not None:
+        add_const(const_context, builder, op.x.name, op.x.val)
+    if op.y.val is not None:
+        if mode == "pow":
+            _add_elementwise_unary(
+                const_context,
+                builder,
+                op,
+                "power",
+                output_name=output_name,
+                alpha=op.y.val,
+            )
+            return
+        add_const(const_context, builder, op.y.name, op.y.val)
+
+    if mode in ["add", "multiply", "max", "min", "ave"]:
+        if op.x.shape == op.y.shape:
+            builder.add_elementwise(
+                name=name,
+                input_names=make_input(const_context, builder, [op.x, op.y]),
+                output_name=output_name,
+                mode=mode.upper(),
+            )
+        else:
+            add_func = getattr(builder, "add_" + mode + "_broadcastable", None)
+
+            if add_func is None:
+                _logging.error(
+                    "Elementwise binary broadcastable method {} not found in builder.".format(
+                        mode
+                    )
+                )
+
+            add_func(
+                name=name,
+                input_names=make_input(const_context, builder, [op.x, op.y]),
+                output_name=output_name,
+                **kwargs
+            )
+    else:
+        if mode in ["divide", "floor_div", "mod", "pow", "subtract"]:
+            add_func = getattr(builder, "add_" + mode + "_broadcastable", None)
+        elif mode == "less_equal":
+            add_func = builder.add_less_than
+            kwargs["use_less_than_equal"] = True
+        elif mode == "greater_equal":
+            add_func = builder.add_greater_than
+            kwargs["use_greater_than_equal"] = True
+        else:
+            add_func = getattr(builder, "add_" + mode, None)
+
+        if add_func is None:
+            msg = "Elementwise binary method {} not found in builder."
+            raise ValueError(msg.format(mode))
+
+        add_func(
+            name=name,
+            input_names=make_input(const_context, builder, [op.x, op.y]),
+            output_name=output_name,
+            **kwargs
+        )
+
+
+def _add_logical(const_context, builder, op, mode):
+    input_names = []
+    input_names.append(make_input(const_context, builder, op.x))
+    if mode != "NOT":
+        input_names.append(make_input(const_context, builder, op.y))
+
+    builder.add_logical(
+        name=op.name, input_names=input_names, output_name=op.outputs[0].name, mode=mode
+    )
+
+
+@register_v2_op
+def abs(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "abs")
+
+
+@register_v2_op
+def acos(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "acos")
+
+
+@register_v2_op
+def add(const_context, builder, op):
+    _add_elementwise_binary(const_context, builder, op, "add")
+
+
+@register_v2_op
+def asin(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "asin")
+
+
+@register_v2_op
+def atan(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "atan")
+
+
+@register_v2_op
+def atanh(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "atanh")
+
+
+@register_v2_op
+def cast(const_context, builder, op):
+    if op.dtype.val in ["int32", "int64"]:
+        _add_elementwise_unary(
+            const_context, builder, op, "floor", output_name=op.name + "_floor"
+        )
+        _add_elementwise_unary(
+            const_context, builder, op, "ceil", output_name=op.name + "_ceil"
+        )
+
+        builder.add_greater_than(
+            name=op.name + "_cond",
+            input_names=[make_input(const_context, builder, op.x)],
+            output_name=op.name + "_cond",
+            alpha=0.0,
+        )
+
+        builder.add_where_broadcastable(
+            name=op.name,
+            input_names=[op.name + i for i in ["_cond", "_floor", "_ceil"]],
+            output_name=op.outputs[0].name,
+        )
+    elif op.dtype.val in ["fp32", "fp64"]:
+        builder.add_activation(
+            name=op.name,
+            non_linearity="LINEAR",
+            input_name=make_input(const_context, builder, op.x),
+            output_name=op.outputs[0].name,
+            params=[1.0, 0.0],
+        )
+    else:
+        raise NotImplementedError(
+            "Parameter dtype of the cast operation can be one of the {}. "
+            "Provided {}".format(["int32", "int64", "fp32", "fp64"], op.dtype.val)
+        )
+
+
+@register_v2_op
+def ceil(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "ceil")
+
+
+@register_v2_op
+def clip(const_context, builder, op):
+    _add_elementwise_unary(
+        const_context,
+        builder,
+        op,
+        "clip",
+        min_value=op.alpha.val,
+        max_value=op.beta.val,
+    )
+
+
+@register_v2_op
+def cos(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "cos")
+
+
+@register_v2_op
+def cosh(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "cosh")
+
+
+@register_v2_op
+def equal(const_context, builder, op):
+    _add_elementwise_binary(const_context, builder, op, "equal")
+
+
+@register_v2_op
+def exp(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "exp")
+
+
+@register_v2_op
+def exp2(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "exp2")
+
+
+@register_v2_op
+def floor(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "floor")
+
+
+@register_v2_op
+def floor_div(const_context, builder, op):
+    _add_elementwise_binary(const_context, builder, op, "floor_div")
+
+
+@register_v2_op
+def greater(const_context, builder, op):
+    _add_elementwise_binary(const_context, builder, op, "greater_than")
+
+
+@register_v2_op
+def greater_equal(const_context, builder, op):
+    _add_elementwise_binary(const_context, builder, op, "greater_equal")
+
+
+@register_v2_op
+def inverse(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "inverse")
+
+
+@register_v2_op
+def less(const_context, builder, op):
+    _add_elementwise_binary(const_context, builder, op, "less_than")
+
+
+@register_v2_op
+def less_equal(const_context, builder, op):
+    _add_elementwise_binary(const_context, builder, op, "less_equal")
+
+
+@register_v2_op
+def log(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "log")
+
+
+@register_v2_op
+def logical_and(const_context, builder, op):
+    _add_logical(const_context, builder, op, "AND")
+
+
+@register_v2_op
+def logical_not(const_context, builder, op):
+    _add_logical(const_context, builder, op, "NOT")
+
+
+@register_v2_op
+def logical_or(const_context, builder, op):
+    _add_logical(const_context, builder, op, "OR")
+
+
+@register_v2_op
+def logical_xor(const_context, builder, op):
+    _add_logical(const_context, builder, op, "XOR")
+
+
+@register_v2_op
+def maximum(const_context, builder, op):
+    _add_elementwise_binary(const_context, builder, op, "max")
+
+
+@register_v2_op
+def minimum(const_context, builder, op):
+    _add_elementwise_binary(const_context, builder, op, "min")
+
+
+@register_v2_op
+def mod(const_context, builder, op):
+    _add_elementwise_binary(const_context, builder, op, "mod")
+
+
+@register_v2_op
+def mul(const_context, builder, op):
+    _add_elementwise_binary(const_context, builder, op, "multiply")
+
+
+@register_v2_op
+def not_equal(const_context, builder, op):
+    _add_elementwise_binary(const_context, builder, op, "not_equal")
+
+
+@register_v2_op
+def pow(const_context, builder, op):
+    _add_elementwise_binary(const_context, builder, op, "pow")
+
+
+@register_v2_op
+def real_div(const_context, builder, op):
+    _add_elementwise_binary(const_context, builder, op, "divide")
+
+
+@register_v2_op
+def round(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "round")
+
+
+@register_v2_op
+def rsqrt(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "rsqrt")
+
+
+@register_v2_op
+def sign(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "sign")
+
+
+@register_v2_op
+def sin(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "sin")
+
+
+@register_v2_op
+def sinh(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "sinh")
+
+
+@register_v2_op
+def slice_by_index(const_context, builder, op):
+    rank = op.x.rank
+    stride = [1] * rank if op.stride is None else op.stride.val
+    begin_mask = [False] * rank if op.begin_mask is None else op.begin_mask.val
+    end_mask = [False] * rank if op.end_mask is None else op.end_mask.val
+    squeeze_mask = [False] * rank if op.squeeze_mask is None else op.squeeze_mask.val
+
+    builder.add_slice_dynamic(
+        name=op.name,
+        input_names=make_input(const_context, builder, [op.x, op.begin, op.end]),
+        output_name=op.outputs[0].name,
+        strides=tuple(stride),
+        begin_masks=tuple(begin_mask),
+        end_masks=tuple(end_mask),
+        squeeze_masks=tuple(squeeze_mask),
+    )
+
+
+@register_v2_op
+def slice_by_size(const_context, builder, op):
+    """
+    A block of ops achieving slice_by_size with dynamic input x and size.
+    """
+
+    # get the end_index of input x
+    # for instance, x with shape [2,3,4] results in [2,3,4]
+    end_index_name = op.name + "_end_index"
+    builder.add_get_shape(
+        name=end_index_name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=end_index_name,
+    )
+
+    # get the mask where size = -1
+    # for instance, size = [-1,1,2] results in [1,0,0]
+    const_name = op.name + "_const_name"
+    add_const(const_context, builder, const_name, _np.array([-1] * op.x.rank))
+
+    is_end_mask_name = op.name + "_is_end_mask"
+    builder.add_equal(
+        name=is_end_mask_name,
+        input_names=make_input(const_context, builder, [const_name, op.size]),
+        output_name=is_end_mask_name,
+    )
+
+    # get the mask where size != -1
+    # for instance, size = [-1,1,2] results in [0,1,1]
+    is_not_end_mask_name = op.name + "_is_not_end_mask"
+    builder.add_not_equal(
+        name=is_not_end_mask_name,
+        input_names=make_input(const_context, builder, [const_name, op.size]),
+        output_name=is_not_end_mask_name,
+    )
+
+    # get the end index for dimensions i where size[i] = -1
+    # for size[i] != -1, just make it 0
+    # for instance, x with shape [2,3,4] and size = [-1,1,2]
+    # results in [2,0,0]
+    end_index_with_mask_name = op.name + "_end_index_with_mask"
+    builder.add_elementwise(
+        name=end_index_with_mask_name,
+        input_names=[end_index_name, is_end_mask_name],
+        output_name=end_index_with_mask_name,
+        mode="MULTIPLY",
+    )
+
+    # get the end index for dimension i where size[i] != -1
+    # for size[i] = 1, just make it 0
+    # for instance, x with shape [2,3,4], size = [-1,1,2],
+    # begin = [0,1,1] results in [0,2,3]
+    end_ids = op.name + "_end_ids"
+    builder.add_elementwise(
+        name=end_ids,
+        input_names=make_input(const_context, builder, [op.begin, op.size]),
+        output_name=end_ids,
+        mode="ADD",
+    )
+
+    end_index_without_mask_name = op.name + "_end_index_without_mask"
+    builder.add_elementwise(
+        name=end_index_without_mask_name,
+        input_names=make_input(const_context, builder, [is_not_end_mask_name, end_ids]),
+        output_name=end_index_without_mask_name,
+        mode="MULTIPLY",
+    )
+
+    # add two end index array together to get the final index
+    final_end_index_name = op.name + "_final_index"
+    builder.add_elementwise(
+        name=final_end_index_name,
+        input_names=make_input(
+            const_context,
+            builder,
+            [end_index_with_mask_name, end_index_without_mask_name],
+        ),
+        output_name=final_end_index_name,
+        mode="ADD",
+    )
+
+    input_names = make_input(
+        const_context, builder, [op.x, op.begin, final_end_index_name]
+    )
+    builder.add_slice_dynamic(
+        name=op.name, input_names=input_names, output_name=op.outputs[0].name
+    )
+
+
+@register_v2_op
+def sqrt(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "sqrt")
+
+
+@register_v2_op
+def square(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "power", alpha=2.0)
+
+
+@register_v2_op
+def sub(const_context, builder, op):
+    _add_elementwise_binary(const_context, builder, op, "subtract")
+
+
+@register_v2_op
+def tan(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "tan")
+
+
+@register_v2_op
+def threshold(const_context, builder, op):
+    _add_elementwise_unary(const_context, builder, op, "threshold", alpha=op.alpha.val)
+
+
+@register_v2_op
+def depth_to_space(const_context, builder, op):
+    builder.add_reorganize_data(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        mode="DEPTH_TO_SPACE",
+        block_size=op.block_size.val,
+    )
+
+
+@register_v2_op
+def expand_dims(const_context, builder, op):
+    builder.add_expand_dims(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        axes=op.axes.val,
+    )
+
+
+@register_v2_op
+def fill(const_context, builder, op):
+    if op.shape.val is None:
+        builder.add_fill_dynamic(
+            name=op.name,
+            input_name=make_input(const_context, builder, op.shape),
+            output_name=op.outputs[0].name,
+            value=op.value.val,
+        )
+    else:
+        builder.add_fill_static(
+            name=op.name,
+            output_name=op.outputs[0].name,
+            output_shape=op.shape.val,
+            value=op.value.val,
+        )
+
+
+@register_v2_op
+def random_bernoulli(const_context, builder, op):
+    if op.shape.val is None:
+        builder.add_random_bernoulli_dynamic(
+            name=op.name,
+            input_names=make_input(const_context, builder, [op.shape]),
+            output_name=op.outputs[0].name,
+            prob=op.prob.val,
+            seed=op.seed.val,
+        )
+    else:
+        builder.add_random_bernoulli_static(
+            name=op.name,
+            output_name=op.outputs[0].name,
+            output_shape=op.shape.val,
+            prob=op.prob.val,
+            seed=op.seed.val,
+        )
+
+
+@register_v2_op
+def random_categorical(const_context, builder, op):
+    builder.add_categorical_distribution(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        num_samples=op.size.val,
+        is_logits=(op.mode.val == "logits"),
+        seed=op.seed.val,
+    )
+
+
+@register_v2_op
+def random_normal(const_context, builder, op):
+    if op.shape.val is None:
+        builder.add_random_normal_dynamic(
+            name=op.name,
+            input_names=make_input(const_context, builder, [op.shape]),
+            output_name=op.outputs[0].name,
+            mean=op.mean.val,
+            stddev=op.stddev.val,
+            seed=op.seed.val,
+        )
+    else:
+        builder.add_random_normal_static(
+            name=op.name,
+            output_name=op.outputs[0].name,
+            output_shape=op.shape.val,
+            mean=op.mean.val,
+            stddev=op.stddev.val,
+            seed=op.seed.val,
+        )
+
+
+@register_v2_op
+def random_uniform(const_context, builder, op):
+    if op.shape.val is None:
+        builder.add_random_uniform_dynamic(
+            name=op.name,
+            input_names=make_input(const_context, builder, [op.shape]),
+            output_name=op.outputs[0].name,
+            minval=op.low.val,
+            maxval=op.high.val,
+            seed=op.seed.val,
+        )
+    else:
+        builder.add_random_uniform_static(
+            name=op.name,
+            output_name=op.outputs[0].name,
+            output_shape=op.shape.val,
+            minval=op.low.val,
+            maxval=op.high.val,
+            seed=op.seed.val,
+        )
+
+
+@register_v2_op
+def gru(const_context, builder, op):
+    make_input(const_context, builder, [op.x, op.initial_h])
+    # Input shape: [b, s, I]
+    input_name = op.x.name
+    # Shape: [b, H]
+    initial_h = op.initial_h.name
+
+    w = op.weight.val
+    b = op.bias.val if op.bias is not None else None
+    direction = op.direction.val
+    output_sequence = op.output_sequence.val
+    activations = [v.val for v in op.activations]
+
+    # Add expand dims for input, in
+    _expand_dim(builder, input_name + "_expanded", input_name, [3, 4])
+    input_name += "_expanded"
+
+    if direction not in {"forward", "reverse"}:
+        raise ValueError(
+            "Unknown direction {} for GRU layer. Supported are forward, reverse".format(
+                direction
+            )
+        )
+
+    # Expand initial_h
+    _expand_dim(builder, initial_h + "_expanded", initial_h, [2, 3, 4])
+    initial_h += "_expanded"
+
+    # Get weights here
+    # weight format: [I+H, 3*H]
+    # Split into Input and hidden weights
+    # w_x: [I*H, I*H, I*H]
+    # w_h: [H*H, H*H, H*H]
+    # where, format is [Z, R, O]
+    # Z: Update gate, R: Reset gate, O: Output gate
+    w_x, w_h = _split_weights(w, sections=3)
+    # bias format: [2, 3*H]
+    # bias[0]: Input-Hidden bias
+    # bias[1]: Hidden-Hidden bias
+    # Combine bias into one and split into ifoz layout
+    b = _split_bias(b, sections=3)
+
+    input_size = w_x[0].shape[1]
+    hidden_size = w_x[0].shape[0]
+
+    # 2 outputs
+    # Y  : [s/1, b, h, 1, 1]
+    # Y_h: [  1, b, h, 1, 1]
+    output_names = [_output.name + "_5d" for _output in op.outputs]
+    builder.add_gru(
+        name=op.name,
+        W_h=w_h,
+        W_x=w_x,
+        b=b,
+        hidden_size=hidden_size,
+        input_size=input_size,
+        input_names=[input_name, initial_h],
+        output_names=output_names,
+        inner_activation=activations[0],
+        activation=activations[1],
+        output_all=output_sequence,
+        reverse_input=(direction == "reverse"),
+    )
+
+    # Squeeze Output
+    # to output shape of [Seq Len or 1, Batch Size, Hidden Size]
+    _squeeze(builder, op.outputs[0].name, output_names[0], axes=[3, 4])
+    # Squeeze Output H and Output C
+    # to output shape of [Batch Size, Hidden Size]
+    _squeeze(builder, op.outputs[1].name, output_names[1], axes=[0, 3, 4])
+
+
+@register_v2_op
+def squeeze(const_context, builder, op):
+    axes = op.axes.val if op.axes is not None else None
+    builder.add_squeeze(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        axes=axes,
+        squeeze_all=axes is None,
+    )
+
+
+@register_v2_op
+def topk(const_context, builder, op):
+    builder.add_topk(
+        name=op.name,
+        input_names=make_input(const_context, builder, [op.x]),
+        output_names=[op.name + ":0", op.name + ":1"],
+        k=op.k.val,
+        axis=op.axis.val,
+        use_bottom_k=op.ascending.val,
+    )
+
+
+@register_v2_op
+def l2_pool(const_context, builder, op):
+    _convert_pool(const_context=const_context, builder=builder, op=op, mode="l2")
+
+
+@register_v2_op
+def linear(const_context, builder, op):
+    out_channels, in_channels = op.weight.shape
+    has_bias = op.bias.val is not None
+    builder.add_inner_product(
+        name=op.name,
+        W=op.weight.val,
+        b=op.bias.val,
+        input_channels=in_channels,
+        output_channels=out_channels,
+        has_bias=has_bias,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def matmul(const_context, builder, op):
+    weight = None
+    rows, columns = 0, 0
+
+    if (
+        op.y.val is not None
+        and op.y.rank == 2
+        and len(op.y.child_ops) == 1
+        and len(op.y.consuming_blocks) == 0
+    ):
+
+        weight = op.y.val
+        if op.transpose_y.val:
+            weight = weight.transpose((1, 0))
+
+        rows, columns = weight.shape
+        input_names = make_input(const_context, builder, [op.x])
+
+        if op.transpose_x.val:
+            perm = [i for i in range(op.x.rank)]
+            perm[-1], perm[-2] = perm[-2], perm[-1]
+            name = op.name + "_x_transpose"
+            builder.add_transpose(
+                name=name, axes=perm, input_name=input_names[0], output_name=name
+            )
+            input_names = [name]
+
+    else:
+        input_names = make_input(const_context, builder, [op.x, op.y])
+
+    builder.add_batched_mat_mul(
+        name=op.name,
+        input_names=input_names,
+        output_name=op.outputs[0].name,
+        transpose_a=op.transpose_x.val,
+        transpose_b=op.transpose_y.val,
+        W=weight,
+        weight_matrix_rows=rows,
+        weight_matrix_columns=columns,
+    )
+
+
+@register_v2_op
+def max_pool(const_context, builder, op):
+    _convert_pool(const_context=const_context, builder=builder, op=op, mode="max")
+
+
+@register_v2_op
+def non_zero(const_context, builder, op):
+    builder.add_where_nonzero(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def lstm(const_context, builder, op):
+    make_input(const_context, builder, [op.x, op.initial_h, op.initial_c])
+    # Input shape [b, s, I]
+    input_name = op.x.name
+    # Shape: [b, DIRECTION*H]
+    initial_h = op.initial_h.name
+    initial_c = op.initial_c.name
+
+    w = op.weight.val
+    b = op.bias.val if op.bias is not None else None
+    direction = op.direction.val
+    output_sequence = op.output_sequence.val
+    activations = [v.val for v in op.activations]
+    peephole = op.peephole.val if op.peephole is not None else None
+    # High enough clip value to be ineffective!
+    clip = 500.0 if op.clip is None else op.clip.val
+
+    # Add expand dims for input, in
+    _expand_dim(builder, input_name + "_expanded", input_name, [3, 4])
+    input_name += "_expanded"
+
+    if direction in {"forward", "reverse"}:
+        # Expand initial_h and initial_c
+        _expand_dim(builder, initial_h + "_expanded", initial_h, [2, 3, 4])
+        initial_h += "_expanded"
+        # initial_h may have the same name as initial_c (e.g., same Var).
+        # Append a different string to avoid conflict
+        _expand_dim(builder, initial_c + "_expanded2", initial_c, [2, 3, 4])
+        initial_c += "_expanded2"
+
+        # Get weights here
+        # weight format: [I+H, 4*H]
+        # Split into Input and hidden weights
+        # w_x: [I*H, I*H, I*H, I*H]
+        # w_h: [H*H, H*H, H*H, H*H]
+        w_x, w_h = _split_weights(w, sections=4)  # ifoz layout
+        # bias format: [2, 4*H]
+        # bias[0]: Input-Hidden bias
+        # bias[1]: Hidden-Hidden bias
+        b = _split_bias(b, sections=4)  # ifoz layout
+        # peephole format: [3*H]
+        # where format is, [input gate, forget gate, output gate]
+        peephole = _split(peephole, sections=3, axis=0)
+
+        input_size = w_x[0].shape[1]
+        hidden_size = w_x[0].shape[0]
+
+        # 3 outputs
+        # Y  : [s/1, b, h, 1, 1]
+        # Y_h: [  1, b, h, 1, 1]
+        # Y_c: [  1, b, h, 1, 1]
+        output_names = [_output.name + "_5d" for _output in op.outputs]
+        builder.add_unilstm(
+            name=op.name,
+            W_h=w_h,
+            W_x=w_x,
+            b=b,
+            hidden_size=hidden_size,
+            input_size=input_size,
+            input_names=[input_name, initial_h, initial_c],
+            output_names=output_names,
+            inner_activation=activations[0].upper(),
+            cell_state_update_activation=activations[1].upper(),
+            output_activation=activations[2].upper(),
+            peep=peephole,
+            output_all=output_sequence,
+            cell_clip_threshold=clip,
+            reverse_input=(direction == "reverse"),
+        )
+
+        # Squeeze Output
+        # to output shape of [Seq Len or 1, Batch Size, Hidden Size]
+        _squeeze(builder, op.outputs[0].name, output_names[0], axes=[3, 4])
+        # Squeeze Output H and Output C
+        # to output shape of [Batch Size, Hidden Size]
+        _squeeze(builder, op.outputs[1].name, output_names[1], axes=[0, 3, 4])
+        _squeeze(builder, op.outputs[2].name, output_names[2], axes=[0, 3, 4])
+
+    elif direction == "bidirectional":
+        # Expand initial_h and initial_c
+        _expand_dim(builder, initial_h + "_expanded", initial_h, [2, 3, 4])
+        initial_h += "_expanded"
+        # initial_h may have the same name as initial_c (e.g., same Var)
+        _expand_dim(builder, initial_c + "_expanded2", initial_c, [2, 3, 4])
+        initial_c += "_expanded2"
+
+        initial_h_f = initial_h + "_forward"
+        initial_h_r = initial_h + "_reverse"
+        initial_c_f = initial_c + "_forward"
+        initial_c_r = initial_c + "_reverse"
+
+        # split input_h and input_c into two parts
+        builder.add_split_nd(
+            name=op.name + "_split_h",
+            input_name=initial_h,
+            output_names=[initial_h_f, initial_h_r],
+            axis=1,
+        )
+        builder.add_split_nd(
+            name=op.name + "_split_c",
+            input_name=initial_c,
+            output_names=[initial_c_f, initial_c_r],
+            axis=1,
+        )
+
+        # Get weights here
+        # weight format: [I+H, 2*4*H] -> [I+H, 4*H (forward):4*H (backward)]
+        hidden_size = w.shape[-1] // 8
+        input_size = w.shape[0] - hidden_size
+        forward_wts_index = 4 * hidden_size
+        # f_w_x and r_w_x: [I*H, I*H, I*H, I*H]
+        # f_w_h and r_w_h: [H*H, H*H, H*H, H*H]
+        # where format is, [input gate, forget gate, cell gate, output gate]
+        f_w_x, f_w_h = _split_weights(w[:, :forward_wts_index], sections=4)
+        r_w_x, r_w_h = _split_weights(w[:, forward_wts_index:], sections=4)
+
+        # bias format: [2, 2*4*H]
+        # bias[0]: Input-Hidden bias
+        # bias[1]: Hidden-Hidden bias
+        f_b, r_b = None, None
+        if b is not None:
+            f_b = _split_bias(b[:, :forward_wts_index], sections=4)
+            r_b = _split_bias(b[:, forward_wts_index:], sections=4)
+
+        # peephole format: [2*3*H] -> [3*H (forward) : 3*H (backward)]
+        if peephole is None:
+            f_peephole, r_peephole = None, None
+        else:
+            f_peephole = _split(peephole[: 3 * hidden_size], sections=3, axis=0)
+            r_peephole = _split(peephole[3 * hidden_size :], sections=3, axis=0)
+
+        output_names = [
+            op.outputs[0].name + "_5d",  # Output Y           [s/1, b, 2*h, 1, 1]
+            op.outputs[1].name + "_5d_foward",  # Output Y_h         [  1, b,   h, 1, 1]
+            op.outputs[2].name
+            + "_5d_forward",  # Output Y_c         [  1, b,   h, 1, 1]
+            op.outputs[1].name
+            + "_5d_reverse",  # Output Y_h_reverse [  1, b,   h, 1, 1]
+            op.outputs[2].name + "_5d_reverse",
+        ]  # Output Y_c_reverse [  1, b,   h, 1, 1]
+
+        builder.add_bidirlstm(
+            name=op.name,
+            W_h=f_w_h,
+            W_x=f_w_x,
+            b=f_b,
+            W_h_back=r_w_h,
+            W_x_back=r_w_x,
+            b_back=r_b,
+            hidden_size=hidden_size,
+            input_size=input_size,
+            input_names=[
+                input_name,
+                initial_h_f,
+                initial_c_f,
+                initial_h_r,
+                initial_c_r,
+            ],
+            output_names=output_names,
+            inner_activation=activations[0].upper(),
+            cell_state_update_activation=activations[1].upper(),
+            output_activation=activations[2].upper(),
+            peep=f_peephole,
+            peep_back=r_peephole,
+            output_all=output_sequence,
+            cell_clip_threshold=clip,
+        )
+
+        # Squeeze Output
+        # to output shape of [Seq Len or 1, Batch Size, 2*Hidden Size]
+        _squeeze(builder, op.outputs[0].name, output_names[0], axes=[3, 4])
+
+        # Output H is of format
+        # 1, Batch_Size, Hidden_Size, 1, 1
+        # Concat to make it
+        # 1, Batch_Size, 2*Hidden_Size, 1, 1
+        builder.add_elementwise(
+            name=op.outputs[1].name + "_5d",
+            input_names=[output_names[1], output_names[3]],
+            output_name=op.outputs[1].name + "_5d",
+            mode="CONCAT",
+        )
+        # Output C is of format
+        # 1, Batch_Size, Hidden_Size, 1, 1
+        builder.add_elementwise(
+            name=op.outputs[2].name + "_5d",
+            input_names=[output_names[2], output_names[4]],
+            output_name=op.outputs[2].name + "_5d",
+            mode="CONCAT",
+        )
+
+        # Squeeze Output H and Output C
+        # to output shape of [Batch Size, 2*Hidden Size]
+        _squeeze(
+            builder, op.outputs[1].name, op.outputs[1].name + "_5d", axes=[0, 3, 4]
+        )
+        _squeeze(
+            builder, op.outputs[2].name, op.outputs[2].name + "_5d", axes=[0, 3, 4]
+        )
+    else:
+        raise ValueError(
+            "Unknown direction {} for LSTM layer. Supported are forward, reverse or bidirectional".format(
+                direction
+            )
+        )
+
+
+@register_v2_op
+def reshape(const_context, builder, op):
+    if op.shape.val is None:
+        builder.add_reshape_dynamic(
+            name=op.name,
+            input_names=make_input(const_context, builder, [op.x, op.shape]),
+            output_name=op.outputs[0].name,
+        )
+    elif -1 in op.shape.val and len(op.shape.val) == op.x.rank:
+        # Support 0 in shape.
+        builder.add_rank_preserving_reshape(
+            name=op.name,
+            input_name=make_input(const_context, builder, op.x),
+            output_name=op.outputs[0].name,
+            output_shape=op.shape.val,
+        )
+    else:
+        if 0 in op.shape.val:
+            # Does not support 0 in shape
+            msg = "Use 0 in shape only if len(shape) == x.rank. Report bug."
+            raise ValueError(msg)
+        output_shape = op.shape.val if len(op.shape.val) != 0 else (1,)
+        builder.add_reshape_static(
+            name=op.name,
+            input_name=make_input(const_context, builder, op.x),
+            output_name=op.outputs[0].name,
+            output_shape=output_shape,
+        )
+
+
+@register_v2_op
+def reduce_argmax(const_context, builder, op):
+    builder.add_argmax(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        axis=op.axis.val,
+        keepdims=op.keep_dims.val,
+    )
+
+
+@register_v2_op
+def reduce_argmin(const_context, builder, op):
+    builder.add_argmin(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        axis=op.axis.val,
+        keepdims=op.keep_dims.val,
+    )
+
+
+def _reduce_axes(const_context, builder, builder_op, op):
+    axes = op.axes.val if op.axes is not None else op.axes
+    builder_op(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        axes=axes,
+        keepdims=op.keep_dims.val,
+        reduce_all=axes is None,
+    )
+
+
+@register_v2_op
+def reduce_l1_norm(const_context, builder, op):
+    _reduce_axes(const_context, builder, builder.add_reduce_l1, op)
+
+
+@register_v2_op
+def reduce_l2_norm(const_context, builder, op):
+    _reduce_axes(const_context, builder, builder.add_reduce_l2, op)
+
+
+@register_v2_op
+def reduce_log_sum(const_context, builder, op):
+    _reduce_axes(const_context, builder, builder.add_reduce_logsum, op)
+
+
+@register_v2_op
+def reduce_log_sum_exp(const_context, builder, op):
+    _reduce_axes(const_context, builder, builder.add_reduce_logsumexp, op)
+
+
+@register_v2_op
+def reduce_max(const_context, builder, op):
+    if not _try_convert_global_pool(const_context, builder, op, mode="max"):
+        _reduce_axes(const_context, builder, builder.add_reduce_max, op)
+
+
+@register_v2_op
+def reduce_mean(const_context, builder, op):
+    if not _try_convert_global_pool(const_context, builder, op, mode="average"):
+        _reduce_axes(const_context, builder, builder.add_reduce_mean, op)
+
+
+@register_v2_op
+def reduce_min(const_context, builder, op):
+    _reduce_axes(const_context, builder, builder.add_reduce_min, op)
+
+
+@register_v2_op
+def reduce_prod(const_context, builder, op):
+    _reduce_axes(const_context, builder, builder.add_reduce_prod, op)
+
+
+@register_v2_op
+def reduce_sum(const_context, builder, op):
+    _reduce_axes(const_context, builder, builder.add_reduce_sum, op)
+
+
+@register_v2_op
+def reduce_sum_square(const_context, builder, op):
+    _reduce_axes(const_context, builder, builder.add_reduce_sumsquare, op)
+
+
+@register_v2_op
+def reverse(const_context, builder, op):
+    reverse_dim = [False] * op.x.rank
+    if op.axes is None:
+        reverse_dim = [True] * op.x.rank
+    else:
+        for axis in op.axes.val:
+            reverse_dim[axis] = True
+    builder.add_reverse(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        reverse_dim=reverse_dim,
+    )
+
+
+@register_v2_op
+def reverse_sequence(const_context, builder, op):
+    builder.add_reverse_sequence(
+        name=op.name,
+        input_names=make_input(const_context, builder, [op.x, op.lengths]),
+        output_name=op.outputs[0].name,
+        batch_axis=op.batch_axis.val,
+        seq_axis=op.seq_axis.val,
+    )
+
+
+@register_v2_op
+def rnn(const_context, builder, op):
+    input_name = make_input(const_context, builder, op.x)  # [b, s, I]
+    initial_h = make_input(const_context, builder, op.initial_h)  # [b, H]
+
+    w = op.weight.val
+    b = op.bias.val if op.bias is not None else None
+    direction = op.direction.val
+    output_sequence = op.output_sequence.val
+    activation = op.activation.val
+
+    # Add expand dims for input, in
+    _expand_dim(builder, input_name + "_expanded", input_name, [3, 4])
+    input_name += "_expanded"
+
+    if direction not in {"forward", "reverse"}:
+        raise ValueError(
+            "Unknown direction {} for RNN layer. Supported are forward and reverse".format(
+                direction
+            )
+        )
+
+    # Expand initial_h and initial_c
+    _expand_dim(builder, initial_h + "_expanded", initial_h, [2, 3, 4])
+    initial_h += "_expanded"
+
+    # Get weights here
+    # weight format: [I+H, H]
+    # Split into Input and hidden weights
+    # w_x: (H, I)
+    # w_h: (H, H)
+    w = w.transpose()
+    hidden_size = w.shape[0]
+    input_size = w.shape[-1] - hidden_size
+    w_x, w_h = w[:, :input_size], w[:, input_size:]
+    # bias format: [2, H]
+    # bias[0]: Input-Hidden bias
+    # bias[1]: Hidden-Hidden bias
+    if b is not None:
+        b = b[0] + b[1]
+
+    # 3 outputs
+    # Y  : [s/1, b, h, 1, 1]
+    # Y_h: [  1, b, h, 1, 1]
+    output_names = [_output.name + "_5d" for _output in op.outputs]
+    builder.add_simple_rnn(
+        name=op.name,
+        W_h=w_h,
+        W_x=w_x,
+        b=b,
+        hidden_size=hidden_size,
+        input_size=input_size,
+        input_names=[input_name, initial_h],
+        output_names=output_names,
+        activation=activation,
+        output_all=output_sequence,
+        reverse_input=(direction == "reverse"),
+    )
+
+    # Squeeze Output
+    # to output shape of [Seq Len or 1, Batch Size, Hidden Size]
+    _squeeze(builder, op.outputs[0].name, output_names[0], [3, 4])
+    # Squeeze Output H and Output C
+    # to output shape of [Batch Size, Hidden Size]
+    _squeeze(builder, op.outputs[1].name, output_names[1], [0, 3, 4])
+
+
+@register_v2_op
+def select(const_context, builder, op):
+    builder.add_where_broadcastable(
+        name=op.name,
+        input_names=make_input(const_context, builder, [op.cond, op.a, op.b]),
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def space_to_depth(const_context, builder, op):
+    builder.add_reorganize_data(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        mode="SPACE_TO_DEPTH",
+        block_size=op.block_size.val,
+    )
+
+
+@register_v2_op
+def transpose(const_context, builder, op):
+    builder.add_transpose(
+        name=op.name,
+        axes=op.perm.val,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def gather(const_context, builder, op):
+    is_embedding = False
+
+    if op.x.val is not None:
+        W = op.x.val
+        if len(W.shape) == 2:
+            if op.axis.val == 0 or op.axis.val == -2:
+                if len(op.x.child_ops) == 1:
+                    # the constant feeding into the gather doesn't go to any other op
+                    is_embedding = True
+
+    if is_embedding:
+        """"
+        The following:
+            %3 = gather(%1, %2, axis=0) # %1 is a constant matrix of shape (vocab_size, embedding_size)
+        can be mapped to:
+            %2_e = expand_dims(%2, axis=-1)
+            %3 = embeddingND(%2_e, weight=%1)
+        """
+        builder.add_expand_dims(
+            name=op.name + "_expand_dims",
+            input_name=make_input(const_context, builder, op.indices),
+            output_name=op.name + "_expand_dims",
+            axes=[-1],
+        )
+
+        builder.add_embedding_nd(
+            name=op.name,
+            input_name=op.name + "_expand_dims",
+            output_name=op.name,
+            vocab_size=W.shape[0],
+            embedding_size=W.shape[1],
+            W=_np.transpose(W),
+        )
+
+    else:
+        builder.add_gather(
+            name=op.name,
+            input_names=make_input(const_context, builder, [op.x, op.indices]),
+            output_name=op.outputs[0].name,
+            axis=op.axis.val,
+        )
+
+
+@register_v2_op
+def scatter(const_context, builder, op):
+    builder.add_scatter(
+        name=op.name,
+        input_names=make_input(
+            const_context, builder, [op.data, op.indices, op.updates]
+        ),
+        output_name=op.outputs[0].name,
+        axis=op.axis.val,
+        mode=op.mode.val.upper(),
+    )
+
+
+@register_v2_op
+def gather_along_axis(const_context, builder, op):
+    builder.add_gather_along_axis(
+        name=op.name,
+        input_names=make_input(const_context, builder, [op.x, op.indices]),
+        output_name=op.outputs[0].name,
+        axis=op.axis.val,
+    )
+
+
+@register_v2_op
+def scatter_along_axis(const_context, builder, op):
+    builder.add_scatter_along_axis(
+        name=op.name,
+        input_names=make_input(
+            const_context, builder, [op.data, op.indices, op.updates]
+        ),
+        output_name=op.outputs[0].name,
+        axis=op.axis.val,
+        mode=op.mode.val.upper(),
+    )
+
+
+@register_v2_op
+def gather_nd(const_context, builder, op):
+    builder.add_gather_nd(
+        name=op.name,
+        input_names=[op.x.name, op.indices.name],
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def scatter_nd(const_context, builder, op):
+    builder.add_scatter_nd(
+        name=op.name,
+        input_names=[op.data.name, op.indices.name, op.updates.name],
+        output_name=op.outputs[0].name,
+        mode=op.mode.val.upper(),
+    )
+
+
+@register_v2_op
+def tile(const_context, builder, op):
+    inputs = [make_input(const_context, builder, op.x)]
+    if op.reps.val is None:
+        inputs.append(op.reps.name)
+    builder.add_tile(
+        name=op.name,
+        reps=op.reps.val,
+        input_name=inputs,
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def tanh(const_context, builder, op):
+    builder.add_activation(
+        name=op.name,
+        non_linearity="TANH",
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def scaled_tanh(const_context, builder, op):
+    builder.add_activation(
+        name=op.name,
+        non_linearity="SCALED_TANH",
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        params=[op.alpha.val, op.beta.val],
+    )
+
+
+@register_v2_op
+def sigmoid(const_context, builder, op):
+    builder.add_activation(
+        name=op.name,
+        non_linearity="SIGMOID",
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def sigmoid_hard(const_context, builder, op):
+    builder.add_activation(
+        name=op.name,
+        non_linearity="SIGMOID_HARD",
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        params=[op.alpha.val, op.beta.val],
+    )
+
+
+@register_v2_op
+def erf(const_context, builder, op):
+    builder.add_erf(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def thresholded_relu(const_context, builder, op):
+    builder.add_activation(
+        name=op.name,
+        non_linearity="THRESHOLDEDRELU",
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        params=op.alpha.val,
+    )
+
+
+@register_v2_op
+def elu(const_context, builder, op):
+    builder.add_activation(
+        name=op.name,
+        non_linearity="ELU",
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        params=op.alpha.val,
+    )
+
+
+@register_v2_op
+def leaky_relu(const_context, builder, op):
+    builder.add_activation(
+        name=op.name,
+        non_linearity="LEAKYRELU",
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        params=[op.alpha.val],
+    )
+
+
+@register_v2_op
+def gelu(const_context, builder, op):
+    builder.add_gelu(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        mode=op.mode.val,
+    )
+
+
+@register_v2_op
+def softplus(const_context, builder, op):
+    builder.add_activation(
+        name=op.name,
+        non_linearity="SOFTPLUS",
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def softmax(const_context, builder, op):
+    rank = op.logit.rank
+    if op.axis.val == -3 or op.axis.val > 0 and op.axis.val == rank - 3:
+        builder.add_softmax(
+            name=op.name, input_name=op.logit.name, output_name=op.outputs[0].name,
+        )
+    else:
+        builder.add_softmax_nd(
+            name=op.name,
+            input_name=op.logit.name,
+            output_name=op.outputs[0].name,
+            axis=op.axis.val,
+        )
+
+
+@register_v2_op
+def softplus_parametric(const_context, builder, op):
+    builder.add_activation(
+        name=op.name,
+        non_linearity="PARAMETRICSOFTPLUS",
+        input_name=make_input(const_context, builder, op.x),
+        input_shape=op.x.shape,
+        input_rank=op.x.rank,
+        output_name=op.outputs[0].name,
+        params=[op.alpha.val, op.beta.val],
+    )
+
+
+@register_v2_op
+def softsign(const_context, builder, op):
+    builder.add_activation(
+        name=op.name,
+        non_linearity="SOFTSIGN",
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def linear_activation(const_context, builder, op):
+    builder.add_activation(
+        name=op.name,
+        non_linearity="LINEAR",
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        params=[op.alpha.val, op.beta.val],
+    )
+
+
+@register_v2_op
+def relu(const_context, builder, op):
+    builder.add_activation(
+        name=op.name,
+        non_linearity="RELU",
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def clamped_relu(const_context, builder, op):
+    builder.add_clamped_relu(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        alpha=op.alpha.val,
+        beta=op.beta.val,
+    )
+
+
+@register_v2_op
+def relu6(const_context, builder, op):
+    builder.add_activation(
+        name=op.name + "__relu6_relu__",
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.name + "__relu6_relu__",
+        non_linearity="RELU",
+    )
+    builder.add_activation(
+        name=op.name + "__relu6_neg__",
+        input_name=op.name + "__relu6_relu__",
+        output_name=op.name + "__relu6_neg__",
+        non_linearity="LINEAR",
+        params=[-1, 0],
+    )
+    builder.add_unary(
+        name=op.name + "__relu6_threshold6__",
+        input_name=op.name + "__relu6_neg__",
+        output_name=op.name + "__relu6_threshold6__",
+        mode="threshold",
+        alpha=-6,
+    )
+    builder.add_activation(
+        name=op.name,
+        input_name=op.name + "__relu6_threshold6__",
+        output_name=op.outputs[0].name,
+        non_linearity="LINEAR",
+        params=[-1, 0],
+    )
+
+
+@register_v2_op
+def prelu(const_context, builder, op):
+    builder.add_activation(
+        name=op.name,
+        non_linearity="PRELU",
+        input_name=make_input(const_context, builder, op.x),
+        input_shape=op.x.shape,
+        input_rank=op.x.rank,
+        output_name=op.outputs[0].name,
+        params=op.alpha.val,
+    )
+
+
+@register_v2_op
+def pad(const_context, builder, op):
+    pad = op.pad.val
+    mode = op.mode.val
+
+    if len(pad.shape) != 1:
+        raise ValueError("Pad should be a 1D tensor.")
+    constant_val = op.constant_val.val
+
+    nn_mode_mapping = {"reflect": "reflection", "replicate": "replication"}
+    mode = nn_mode_mapping.get(mode, mode)
+
+    if op.x.rank > 1 and _np.all(pad[:-4] == 0):
+        # check and map mode
+        if mode == "symmetric":
+            mode = "reflection"
+        pad = pad[-4:]
+        left, right = pad[2], pad[3]
+        top, bottom = pad[0], pad[1]
+        layer = builder.add_padding(
+            name=op.name,
+            left=left,
+            right=right,
+            top=top,
+            bottom=bottom,
+            input_name=make_input(const_context, builder, op.x),
+            output_name=op.outputs[0].name,
+            padding_type=mode,
+            value=constant_val,
+        )
+    elif mode == "constant":
+        builder.add_constant_pad(
+            name=op.name,
+            input_names=[op.x.name],
+            output_name=op.outputs[0].name,
+            value=constant_val,
+            pad_to_given_output_size_mode=False,
+            pad_amounts=pad,
+        )
+    else:
+        raise ValueError("Unsupported mode for Pad layer! {}".format(mode))
+
+
+@register_v2_op
+def instance_norm(const_context, builder, op):
+    channels = op.x.shape[1]
+    gamma = _np.array([1.0] * channels) if op.gamma is None else op.gamma.val
+    beta = _np.array([0.0] * channels) if op.beta is None else op.beta.val
+    builder.add_batchnorm(
+        name=op.name,
+        channels=channels,
+        gamma=gamma,
+        beta=beta,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        compute_mean_var=True,
+        instance_normalization=True,
+        epsilon=op.epsilon.val,
+    )
+
+
+@register_v2_op
+def l2_norm(const_context, builder, op):
+    builder.add_l2_normalize(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        epsilon=op.epsilon.val,
+    )
+
+
+@register_v2_op
+def layer_norm(const_context, builder, op):
+    input_shape_full = list(op.x.shape)
+    input_shape = [-1 if is_symbolic(s) else s for s in input_shape_full]
+    axes = None if op.axes is None else op.axes.val
+    normalized_shape = input_shape[-len(axes) :]
+    gamma = _np.ones(normalized_shape) if op.gamma is None else op.gamma.val
+    beta = _np.zeros(normalized_shape) if op.beta is None else op.beta.val
+    if (
+        len(input_shape) in [2, 3]
+        and len(axes) == 1
+        and axes[0] == len(input_shape) - 1
+        and input_shape.count(-1) < 2
+    ):
+        builder.add_reshape_static(
+            name=op.name + "_reshape",
+            input_name=make_input(const_context, builder, op.x),
+            output_name=op.x.name + "_reshape",
+            output_shape=input_shape + [1, 1],
+        )
+
+        builder.add_mvn(
+            name=op.x.name + "_mvn",
+            input_name=op.x.name + "_reshape",
+            output_name=op.x.name + "_mvn",
+            across_channels=True,
+            normalize_variance=True,
+            epsilon=op.epsilon.val,
+        )
+
+        builder.add_scale(
+            name=op.x.name + "_5d",
+            input_name=op.x.name + "_mvn",
+            output_name=op.x.name + "_5d",
+            W=gamma,
+            b=beta,
+            has_bias=True,
+            shape_scale=[len(gamma)],
+            shape_bias=[len(beta)],
+        )
+
+        builder.add_reshape_static(
+            name=op.name,
+            input_name=op.x.name + "_5d",
+            output_name=op.outputs[0].name,
+            output_shape=input_shape,
+        )
+    else:
+        builder.add_layer_normalization(
+            name=op.name,
+            input_name=make_input(const_context, builder, op.x),
+            output_name=op.outputs[0].name,
+            normalized_shape=normalized_shape,
+            gamma=gamma,
+            beta=beta,
+            eps=op.epsilon.val,
+        )
+
+
+@register_v2_op
+def local_response_norm(const_context, builder, op):
+    builder.add_lrn(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        alpha=op.alpha.val,
+        beta=op.beta.val,
+        local_size=op.size.val,
+        k=op.k.val,
+    )
+
+
+@register_v2_op
+def conv_transpose(const_context, builder, op):
+    x_name = make_input(const_context, builder, op.x)
+    out_name = op.outputs[0].name
+
+    # Special handling for 1d conv transpose
+    is_conv_transpose_1d = op.x.rank == 3
+    is_conv_transpose_2d = op.x.rank == 4
+    is_conv_transpose_3d = op.x.rank == 5
+
+    if is_conv_transpose_1d:
+        x_name = op.name + "_expand_dim"
+        out_name = op.name + "_expanded"
+        builder.add_expand_dims(
+            name=x_name, input_name=op.x.name, output_name=x_name, axes=[3]
+        )
+
+    # Input names to be used
+    input_names = [x_name]
+
+    # Kernel shape: [C_out, C_in, D, H, W]
+    weight = op.weight.val
+    kernel_channels = weight.shape[1]
+    output_channels = weight.shape[0] * op.groups.val
+
+    if is_conv_transpose_1d:
+        weight = _np.expand_dims(weight, 3)
+
+    # DeConvolution3D expects weights to have shape (C_out / groups, C_in, spatial_dims)
+    # DeConvolution2D/1D expects (spatial_dims, C_out/groups, C_in)
+    if not is_conv_transpose_3d:
+        weight = _np.transpose(weight, [2, 3, 1, 0])
+
+    # Adjust for Deconv1D case
+    rank_factor = 1 if is_conv_transpose_1d else 2
+    strides = [1] * rank_factor
+    dilations = [1] * rank_factor
+
+    if op.strides is not None:
+        strides = op.strides.val.tolist()
+    if op.dilations is not None:
+        dilations = op.dilations.val.tolist()
+    if is_conv_transpose_1d:
+        dilations = dilations + [1]
+        strides = strides + [1]
+
+    # padding
+    padding_mode = "valid" if op.pad_type is None else op.pad_type.val
+    pad = {}
+    if padding_mode == "custom" or op.pad is not None:
+        if not is_conv_transpose_3d:
+            padding_mode = "valid"
+            pad["padding_top"] = op.pad.val[0]  # Top
+            pad["padding_bottom"] = op.pad.val[1]  # Bottom
+            if not is_conv_transpose_1d:
+                pad["padding_left"] = op.pad.val[2]  # Left
+                pad["padding_right"] = op.pad.val[3]  # Right
+        else:
+            pad["padding_front"] = op.pad.val[0]  # Front
+            pad["padding_back"] = op.pad.val[1]  # Back
+            pad["padding_top"] = op.pad.val[2]  # Top
+            pad["padding_bottom"] = op.pad.val[3]  # Bottom
+            pad["padding_left"] = op.pad.val[4]  # Left
+            pad["padding_right"] = op.pad.val[5]  # Right
+
+    groups = op.groups.val
+    has_bias = op.bias is not None
+    # Get H and W from output shape
+    output_shape = None if op.output_shape is None else tuple(op.output_shape.val)
+
+    if is_conv_transpose_3d:
+        builder.add_convolution3d(
+            name=op.name,
+            input_channels=kernel_channels,
+            output_channels=output_channels,
+            depth=weight.shape[-3],
+            height=weight.shape[-2],
+            width=weight.shape[-1],
+            W=weight,
+            b=op.bias.val if has_bias else None,
+            has_bias=has_bias,
+            groups=groups,
+            stride_depth=strides[0],
+            stride_height=strides[1],
+            stride_width=strides[2],
+            dilation_depth=dilations[0],
+            dilation_height=dilations[1],
+            dilation_width=dilations[2],
+            padding_mode=padding_mode,
+            is_deconv=True,
+            output_shape=output_shape,
+            input_name=input_names,
+            output_name=out_name,
+            **pad
+        )
+    else:
+        builder.add_convolution(
+            name=out_name,
+            kernel_channels=kernel_channels,
+            output_channels=output_channels,
+            height=weight.shape[0],
+            width=weight.shape[1],
+            stride_height=strides[0],
+            stride_width=strides[1],
+            border_mode=padding_mode,
+            groups=groups,
+            W=weight,
+            b=op.bias.val if has_bias else None,
+            has_bias=has_bias,
+            is_deconv=True,
+            output_shape=output_shape,
+            input_name=input_names,
+            output_name=out_name,
+            dilation_factors=dilations,
+            **pad
+        )
+
+        # Squeeze added `Width` dimension for 1d case
+        if is_conv_transpose_1d:
+            builder.add_squeeze(
+                name=op.name,
+                input_name=out_name,
+                output_name=op.outputs[0].name,
+                axes=[3],
+            )
+
+
+@register_v2_op
+def range_1d(const_context, builder, op):
+    if op.start.val is not None and op.step.val is not None:
+        inputs = [op.end]
+    elif op.start.val is None and op.step.val is not None:
+        inputs = [op.end, op.start]
+    elif op.start.val is not None and op.step.val is None:
+        inputs = [op.end, op.start, op.step]
+    else:
+        inputs = [op.end, op.start, op.step]
+
+    builder.add_range_dynamic(
+        name=op.name,
+        output_name=op.outputs[0].name,
+        input_names=make_input(const_context, builder, inputs),
+        start=op.start.val if op.start.val is not None else 0,
+        step=op.step.val if op.step.val is not None else 1,
+    )
+
+
+@register_v2_op
+def one_hot(const_context, builder, op):
+    if op.one_hot_vector_size.val is not None:
+        inputs = [op.indices]
+    else:
+        inputs = [op.indices, op.one_hot_vector_size]
+
+    builder.add_one_hot(
+        name=op.name,
+        input_names=make_input(const_context, builder, inputs),
+        output_name=op.name,
+        one_hot_vector_size=op.one_hot_vector_size.val,
+        axis=op.axis.val,
+        on_value=op.on_value.val,
+        off_value=op.off_value.val,
+    )
+
+
+@register_v2_op
+def non_maximum_suppression(const_context, builder, op):
+    builder.add_nms(
+        name=op.name,
+        input_names=make_input(const_context, builder, [op.boxes, op.scores]),
+        output_names=["{}:{}".format(op.name, i) for i in range(4)],
+        iou_threshold=op.iou_threshold.val,
+        score_threshold=op.score_threshold.val,
+        max_boxes=op.max_boxes.val,
+        per_class_suppression=op.per_class_suppression.val,
+    )
+
+
+@register_v2_op
+def flatten(const_context, builder, op):
+    builder.add_flatten_to_2d(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        axis=op.axis.val,
+    )
+
+
+@register_v2_op
+def shape(const_context, builder, op):
+    builder.add_get_shape(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def upsample_nearest_neighbor(const_context, builder, op):
+    builder.add_upsample(
+        name=op.name,
+        scaling_factor_h=op.upscale_factor_height.val,
+        scaling_factor_w=op.upscale_factor_width.val,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        mode="NN",
+    )
+
+
+@register_v2_op
+def upsample_bilinear(const_context, builder, op):
+    if op.align_corners.val:
+        builder.add_upsample(
+            name=op.name,
+            scaling_factor_h=op.scale_factor_height.val,
+            scaling_factor_w=op.scale_factor_width.val,
+            input_name=make_input(const_context, builder, op.x),
+            output_name=op.outputs[0].name,
+            mode="BILINEAR",
+            linear_upsample_mode="ALIGN_CORNERS_TRUE",
+        )
+    else:
+        builder.add_upsample(
+            name=op.name,
+            scaling_factor_h=op.scale_factor_height.val,
+            scaling_factor_w=op.scale_factor_width.val,
+            input_name=make_input(const_context, builder, op.x),
+            output_name=op.outputs[0].name,
+            mode="BILINEAR",
+            linear_upsample_mode="ALIGN_CORNERS_FALSE",
+        )
+
+
+@register_v2_op
+def resize_bilinear(const_context, builder, op):
+    grid_sampling_mode_map = {}
+    grid_sampling_mode_map["STRICT_ALIGN_CORNERS"] = "STRICT_ALIGN_ENDPOINTS_MODE"
+    grid_sampling_mode_map["ALIGN_CORNERS"] = "ALIGN_ENDPOINTS_MODE"
+    grid_sampling_mode_map["DEFAULT"] = "UPSAMPLE_MODE"
+    grid_sampling_mode_map["OFFSET_CORNERS"] = "ROI_ALIGN_MODE"
+
+    builder.add_resize_bilinear(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        target_height=op.target_size_height.val,
+        target_width=op.target_size_width.val,
+        mode=grid_sampling_mode_map[op.sampling_mode.val],
+    )
+
+
+@register_v2_op
+def cond(const_context, builder, op):
+    true_block = op.blocks[0]
+    false_block = op.blocks[1]
+
+    branch_layer = builder.add_branch(
+        name=op.name, input_name=make_input(const_context, builder, op.pred),
+    )
+    true_builder = neural_network.NeuralNetworkBuilder(
+        nn_spec=branch_layer.branch.ifBranch,
+        disable_rank5_shape_mapping=True,
+        use_float_arraytype=True,
+    )
+    convert_ops(const_context, true_builder, true_block.operations, true_block.outputs)
+
+    # Copy block output to cond op output.
+    for block_out, op_out in zip(true_block.outputs, op.outputs):
+        true_builder.add_copy(
+            name=block_out.name + "_ret_copy",
+            # No need to make_input for block_out which is guaranteed
+            # to be a node
+            input_name=block_out.name,
+            output_name=op_out.name,
+        )
+
+    false_builder = neural_network.NeuralNetworkBuilder(
+        nn_spec=branch_layer.branch.elseBranch,
+        disable_rank5_shape_mapping=True,
+        use_float_arraytype=True,
+    )
+    convert_ops(
+        const_context, false_builder, false_block.operations, false_block.outputs
+    )
+
+    for block_out, op_out in zip(false_block.outputs, op.outputs):
+        false_builder.add_copy(
+            name=block_out.name + "_ret_copy",
+            input_name=block_out.name,
+            output_name=op_out.name,
+        )
+
+
+@register_v2_op
+def while_loop(const_context, builder, op):
+    block = op.blocks[0]
+
+    # Assume that all loop vars aren't loop invariant (invariant loop vars
+    # should've be optimized away in graph passes).
+    for v_in, vx_in in zip(op.loop_vars, block.inputs):
+        assert v_in.name != vx_in.name, "Loop invariant detected in {}".format(op)
+        builder.add_copy(
+            name=vx_in.name + "_input_copy",
+            input_name=make_input(const_context, builder, v_in),
+            output_name=vx_in.name,
+        )
+
+    loop_layer = builder.add_loop(
+        name=op.name,
+        # max_iterations=0 to use condition network.
+        max_iterations=0,
+    )
+
+    # Construct while_loop condition
+    cond_builder = neural_network.NeuralNetworkBuilder(
+        nn_spec=loop_layer.loop.conditionNetwork,
+        disable_rank5_shape_mapping=True,
+        use_float_arraytype=True,
+    )
+    cond_builder.rank_dict = {k.name: builder.rank_dict[k.name] for k in block.inputs}
+    convert_ops(
+        const_context,
+        cond_builder,
+        block.operations_for_vars(block.outputs[:1]),
+        block.outputs[:1],
+    )
+
+    loop_layer.loop.conditionVar = block.outputs[0].name
+
+    # while_loop body produces [cond_var] + loop_vars
+    body_builder = neural_network.NeuralNetworkBuilder(
+        nn_spec=loop_layer.loop.bodyNetwork,
+        disable_rank5_shape_mapping=True,
+        use_float_arraytype=True,
+    )
+    body_builder.rank_dict = {k.name: builder.rank_dict[k.name] for k in block.inputs}
+    convert_ops(
+        const_context,
+        body_builder,
+        block.operations_for_vars(block.outputs[1:]),
+        block.outputs[1:],
+    )
+
+    # Also assume all outputs are different from loop inputs (i.e., no loop
+    # invariant.)
+    for vx_in, vx_out in zip(block.inputs, block.outputs[1:]):
+        if vx_in.name == vx_out.name:
+            msg = "Loop invariant var {} detected in block {}"
+            _logging.warning(msg.format(vx_in.name, block.name))
+            continue
+        body_builder.add_copy(
+            name=vx_in.name + "_ret_copy",
+            input_name=make_input(const_context, builder, vx_out),
+            output_name=vx_in.name,
+        )
+
+
+@register_v2_op
+def identity(const_context, builder, op):
+    builder.add_copy(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def concat(const_context, builder, op):
+    rank = op.values[0].rank
+    input_names = make_input(const_context, builder, op.values)
+
+    if op.axis.val == -3 or op.axis.val > 0 and op.axis.val == rank - 3:
+        builder.add_elementwise(
+            name=op.name,
+            input_names=input_names,
+            output_name=op.outputs[0].name,
+            mode="CONCAT",
+        )
+    else:
+        builder.add_concat_nd(
+            name=op.name,
+            input_names=input_names,
+            output_name=op.outputs[0].name,
+            axis=op.axis.val,
+        )
+
+
+@register_v2_op
+def stack(const_context, builder, op):
+    builder.add_stack(
+        name=op.name,
+        input_names=make_input(const_context, builder, op.values),
+        output_name=op.outputs[0].name,
+        axis=op.axis.val,
+    )
+
+
+@register_v2_op
+def split(const_context, builder, op):
+    split_sizes = None
+    if op.split_sizes is not None:
+        if op.split_sizes.val is None:
+            raise ValueError("Non-const split_sizes unsupported in NN")
+        split_sizes = op.split_sizes.val.tolist()
+    builder.add_split_nd(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_names=[v.name for v in op.outputs],
+        axis=op.axis.val,
+        num_splits=len(op.outputs),
+        split_sizes=split_sizes,
+    )
+
+
+@register_v2_op
+def argsort(const_context, builder, op):
+    axis = op.x.rank + op.axis.val if op.axis.val < 0 else op.axis.val
+    builder.add_argsort(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        axis=axis,
+        descending=(not op.ascending.val),
+    )
+
+
+@register_v2_op
+def pixel_shuffle(const_context, builder, op):
+    builder.add_reorganize_data(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        mode="PIXEL_SHUFFLE",
+        block_size=op.upscale_factor.val,
+    )
+
+
+@register_v2_op
+def sliding_windows(const_context, builder, op):
+    builder.add_sliding_windows(
+        name=op.name,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=op.outputs[0].name,
+        axis=op.axis.val,
+        window_size=op.size.val,
+        step=op.stride.val,
+    )
+
+
+@register_v2_op
+def crop(const_context, builder, op):
+    builder.add_crop(
+        name=op.name,
+        input_names=[op.x.name],
+        output_name=op.name,
+        offset=0,
+        left=op.crop_width.val[0],
+        right=op.crop_width.val[1],
+        top=op.crop_height.val[0],
+        bottom=op.crop_height.val[1],
+    )
+
+
+@register_v2_op
+def crop_resize(const_context, builder, op):
+    grid_sampling_mode_map = {
+        "STRICT_ALIGN_CORNERS": "STRICT_ALIGN_ENDPOINTS_MODE",
+        "ALIGN_CORNERS": "ALIGN_ENDPOINTS_MODE",
+        "DEFAULT": "UPSAMPLE_MODE",
+        "OFFSET_CORNERS": "ROI_ALIGN_MODE",
+    }
+
+    mode = grid_sampling_mode_map[op.sampling_mode.val]
+
+    input_expanded = op.name + "_x_expand"
+    builder.add_expand_dims(
+        name=input_expanded,
+        input_name=make_input(const_context, builder, op.x),
+        output_name=input_expanded,
+        axes=[0],
+    )
+    builder.add_crop_resize(
+        name=op.name,
+        input_names=make_input(const_context, builder, [input_expanded, op.roi]),
+        output_name=op.outputs[0].name,
+        target_height=op.target_height.val,
+        target_width=op.target_width.val,
+        mode=mode,
+        normalized_roi=op.normalized_coordinates.val,
+        box_indices_mode=op.box_coordinate_mode.val,
+        spatial_scale=op.spatial_scale.val,
+    )
+
+
+@register_v2_op
+def custom_op(const_context, builder, op):
+    class_name = op.bindings.get("class_name", op.name)
+    input_order = op.bindings.get("input_order", [])
+    parameters = op.bindings.get("parameters", [])
+    weights = op.bindings.get("weights", [])
+    description = op.bindings.get("description", "")
+
+    if len(input_order) == 0:
+        raise ValueError("Inputs not provided for Custom Layer: {}".format(op.name))
+
+    # Get input names
+    input_names = [op.inputs[_name].name for _name in input_order]
+
+    # Get output names
+    output_names = [_output.name for _output in op.outputs]
+
+    # Load custom params
+    params = NeuralNetwork_pb2.CustomLayerParams()
+    params.className = class_name
+    params.description = description
+
+    # Load parameters
+    for _param in parameters:
+        param = op.inputs[_param]
+        param_val = param.val
+        if types.is_bool(param.dtype):
+            params.parameters[_param].boolValue = param_val
+        elif types.is_int(param.dtype):
+            params.parameters[_param].intValue = param_val
+        elif types.is_float(param.dtype):
+            params.parameters[_param].doubleValue = param_val
+        elif types.is_str(param.dtype):
+            params.parameters[_param].stringValue = param_val
+        else:
+            raise ValueError(
+                "Unknown parameter type for custom layer- "
+                "Op: {}, Parameter: {}, Type: {}".format(op.name, _param, param.dtype)
+            )
+
+    # Load weights
+    for _weight in weights:
+        wt = params.weights.add()
+        wt.floatValue.extend(map(float, _weight))
+
+    # Add a custom layer
+    builder.add_custom(
+        name=op.name,
+        input_names=input_names,
+        output_names=output_names,
+        custom_proto_spec=params,
+    )
+
+
+@register_v2_op
+def make_list(const_context, builder, op):
+    # op.elem_shape is technically optional but ssa passes ensures it's
+    # always there
+    elem_shape = op.elem_shape.val
+    has_static_elem_shape = all([dim > 0 for dim in elem_shape])
+
+    # Set a default initial size
+    size = op.init_length.val
+    if size is not None and has_static_elem_shape:
+        array_size = size if size > 0 else 1
+        array_shape = [array_size] + list(elem_shape)
+        add_const(
+            const_context,
+            builder,
+            op.outputs[0].name,
+            val=_np.zeros(array_shape, dtype="float"),
+        )
+    elif has_static_elem_shape:
+        if len(elem_shape) > 0:
+            node_es_name = op.name + "_element_shape"
+            add_const(
+                const_context,
+                builder,
+                node_es_name,
+                val=_np.array(elem_shape, dtype="float"),
+            )
+
+            # Concatenate list length (the input, should be a constant vector of size 1) with element shape
+            node_arr_shape_name = op.name + "_arr_shape"
+            layer = builder.add_concat_nd(
+                name=node_arr_shape_name,
+                input_names=[op.init_length.name, node_es_name],
+                output_name=node_arr_shape_name,
+                axis=0,
+            )
+        else:
+            node_es_name = op.init_length.name
+        builder.add_fill_dynamic(
+            name=op.name, input_name=node_arr_shape_name, output_name=op.outputs[0].name
+        )
+    else:
+        raise ValueError("TensorArray cannot determine element shapes statically")
+
+
+def _realloc_list(const_context, builder, ls_var, index_var):
+    # If index_var >= len(ls_var), reallocate the array and copy over existing
+    # contents
+    # index_var: str or Var
+    # ls_var: Var
+
+    full_shape_name = ls_var.name + "_full_shape"
+    builder.add_get_shape(
+        name=full_shape_name,
+        input_name=ls_var.name,  # no need to make_input
+        output_name=full_shape_name,
+    )
+
+    # slice shape [length, elem_size1, ...] to get current length
+    curr_len_name = ls_var.name + "_length"
+    builder.add_slice_static(
+        name=curr_len_name,
+        input_name=full_shape_name,
+        output_name=curr_len_name,
+        begin_ids=[0],
+        end_ids=[1],
+        begin_masks=[False],
+        end_masks=[False],
+        strides=[1],
+    )
+
+    is_growing_name = ls_var.name + "_is_growing"
+    builder.add_greater_than(
+        name=is_growing_name,
+        input_names=make_input(const_context, builder, [index_var, curr_len_name]),
+        output_name=is_growing_name,
+        use_greater_than_equal=True,
+    )
+
+    elem_shape_name = ls_var.name + "_elem_shape"
+    add_const(const_context, builder, elem_shape_name, _np.array(ls_var.elem_shape))
+
+    condition_name = ls_var.name + "_condition"
+    layer = builder.add_branch(name=condition_name, input_name=is_growing_name)
+
+    true_builder = neural_network.NeuralNetworkBuilder(
+        nn_spec=layer.branch.ifBranch,
+        disable_rank5_shape_mapping=True,
+        use_float_arraytype=True,
+    )
+
+    # alloc_length_name0 = index - list_length
+    alloc_length_name0 = ls_var.name + "_extra_length0"
+    true_builder.add_subtract_broadcastable(
+        name=alloc_length_name0,
+        input_names=make_input(const_context, builder, [index_var, curr_len_name]),
+        output_name=alloc_length_name0,
+    )
+
+    # alloc_length_name1 = index - list_length + 1
+    alloc_length_name1 = ls_var.name + "_extra_length1"
+    true_builder.add_elementwise(
+        name=alloc_length_name1,
+        input_names=[alloc_length_name0],
+        mode="ADD",
+        output_name=alloc_length_name1,
+        alpha=1,
+    )
+
+    # alloc_shape_name = [alloc_length] + elem_shape
+    alloc_shape_name = ls_var.name + "_alloc_shape"
+    true_builder.add_concat_nd(
+        name=alloc_shape_name,
+        input_names=[alloc_length_name1, elem_shape_name],
+        output_name=alloc_shape_name,
+        axis=0,
+    )
+
+    # new_alloc_name is np.zeros([alloc_length] + elem_shape)
+    new_alloc_name = ls_var.name + "_alloc"
+    true_builder.add_fill_dynamic(
+        name=new_alloc_name,
+        input_name=alloc_shape_name,
+        output_name=new_alloc_name,
+        value=0.0,
+    )
+
+    # new_list_name is np.concat([old_list, new_alloc])
+    new_list_name = ls_var.name + "_new"
+    true_builder.add_concat_nd(
+        name=new_list_name,
+        input_names=[ls_var.name, new_alloc_name],
+        output_name=new_list_name,
+        axis=0,
+    )
+
+    # Copy new_list_name to ls_var.name
+    true_builder.add_copy(
+        name=ls_var.name + "_assign", input_name=new_list_name, output_name=ls_var.name
+    )
+
+
+@register_v2_op
+def list_write(const_context, builder, op):
+    _realloc_list(const_context, builder, op.ls, op.index)
+
+    # expanded_value_name is [1, op.value]
+    expanded_value_name = op.value.name + "_expanded"
+    builder.add_expand_dims(
+        name=expanded_value_name,
+        input_name=make_input(const_context, builder, op.value),
+        output_name=expanded_value_name,
+        axes=[0],
+    )
+
+    builder.add_scatter(
+        name=op.name,
+        input_names=make_input(
+            const_context, builder, [op.ls, op.index, expanded_value_name]
+        ),
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def list_gather(const_context, builder, op):
+    builder.add_gather(
+        name=op.name,
+        input_names=make_input(const_context, builder, [op.ls, op.indices]),
+        output_name=op.outputs[0].name,
+        axis=0,
+    )
+
+
+@register_v2_op
+def list_scatter(const_context, builder, op):
+    max_idx_name = op.indices.name + "_max"
+    builder.add_reduce_max(
+        name=max_idx_name,
+        axes=[0],
+        keepdims=False,
+        input_name=make_input(const_context, builder, op.indices),
+        output_name=max_idx_name,
+    )
+
+    _realloc_list(const_context, builder, op.ls, max_idx_name)
+
+    builder.add_scatter(
+        name=op.name,
+        input_names=make_input(const_context, builder, [op.ls, op.indices, op.value]),
+        output_name=op.outputs[0].name,
+    )
+
+
+@register_v2_op
+def list_read(const_context, builder, op):
+    # gathered_name has shape [1] + elem_shape
+    gathered_name = op.name + "_gathered"
+    builder.add_gather(
+        name=op.name,
+        input_names=make_input(const_context, builder, [op.ls, op.index]),
+        output_name=gathered_name,
+        axis=0,
+    )
+
+    # squeezed_name has shape elem_shape
+    squeezed_name = op.name + "_squeezed"
+    builder.add_squeeze(
+        name=squeezed_name,
+        input_name=gathered_name,
+        output_name=op.outputs[0].name,
+        axes=[0],
+    )
+
+
+@register_v2_op
+def list_length(const_context, builder, op):
+    # list_shape_name == [list_length] + elem_shape
+    list_shape_name = op.ls.name + "_shape"
+    builder.add_get_shape(
+        name=list_shape_name,
+        input_name=make_input(const_context, builder, op.ls),
+        output_name=list_shape_name,
+    )
+
+    # slice to get list_length
+    builder.add_slice_static(
+        name=op.name,
+        input_name=list_shape_name,
+        output_name=op.outputs[0].name,
+        begin_ids=[0],
+        end_ids=[1],
+        begin_masks=[False],
+        end_masks=[False],
+        strides=[1],
+    )
+
+
+@register_v2_op
+def isfinite(const_context, builder, op):
+    int_max = _np.iinfo(_np.int64).max
+    int_min = -_np.iinfo(_np.int64).max - 1
+    const_name_max = op.name + "_const_name_max"
+    const_name_min = op.name + "_const_name_min"
+    if any_symbolic(op.x.shape):
+        shape_name = op.name + "_shape"
+        builder.add_get_shape(
+            name=shape_name,
+            input_name=make_input(const_context, builder, op.x),
+            output_name=shape_name,
+        )
+        builder.add_fill_dynamic(
+            name=const_name_max,
+            input_name=shape_name,
+            output_name=const_name_max,
+            value=int_max,
+        )
+        builder.add_fill_dynamic(
+            name=const_name_min,
+            input_name=shape_name,
+            output_name=const_name_min,
+            value=int_min,
+        )
+    else:
+        shape = [1] if op.x.shape == () else op.x.shape
+        builder.add_fill_static(
+            name=const_name_max,
+            output_name=const_name_max,
+            output_shape=shape,
+            value=int_max,
+        )
+        builder.add_fill_static(
+            name=const_name_min,
+            output_name=const_name_min,
+            output_shape=shape,
+            value=int_min,
+        )
+    smaller_than_name = op.name + "_smaller"
+    greater_than_name = op.name + "_greater"
+    builder.add_less_than(
+        name=smaller_than_name,
+        input_names=make_input(const_context, builder, [op.x, const_name_max]),
+        output_name=smaller_than_name,
+    )
+    builder.add_greater_than(
+        name=greater_than_name,
+        input_names=make_input(const_context, builder, [op.x, const_name_min]),
+        output_name=greater_than_name,
+    )
+    builder.add_logical(
+        name=op.name,
+        input_names=[smaller_than_name, greater_than_name],
+        output_name=op.outputs[0].name,
+        mode="AND",
+    )
diff --git a/coremltools/converters/mil/backend/nn/passes/__init__.py b/coremltools/converters/mil/backend/nn/passes/__init__.py
new file mode 100644
index 000000000..12e97024f
--- /dev/null
+++ b/coremltools/converters/mil/backend/nn/passes/__init__.py
@@ -0,0 +1,25 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+# Import all passes in this dir
+from os.path import dirname, basename, isfile, join
+import glob
+
+excluded_files = [
+    "__init__.py",
+    "nn_passes.py",
+]
+modules = glob.glob(join(dirname(__file__), "*.py"))
+pass_modules = [
+    basename(f)[:-3]
+    for f in modules
+    if isfile(f)
+    and basename(f)[:1] != "_"  # Follow python convention to hide _* files.
+    and basename(f)[:4] != "test"
+    and basename(f) not in excluded_files
+]
+__all__ = pass_modules
+
+from . import *  # import everything in __all__
diff --git a/coremltools/converters/mil/backend/nn/passes/alert_return_type_cast.py b/coremltools/converters/mil/backend/nn/passes/alert_return_type_cast.py
new file mode 100644
index 000000000..1b0eb0869
--- /dev/null
+++ b/coremltools/converters/mil/backend/nn/passes/alert_return_type_cast.py
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+from coremltools.converters.mil.mil import Var, types
+import logging
+
+
+@register_pass(namespace="nn_backend")
+def alert_return_type_cast(prog):
+    """
+    prog: Program
+
+    # NN always implicitly cast return types to fp32. Detect any return
+    # types that are not builtin.fp32 and alert user of the implicit
+    # casting. This pass must be at the end. Example:
+    #
+    # Given:
+    #
+    #    main(%x: (2, 3, fp32)) {
+    #      block0() {
+    #        %shape_0: (2,i32)* = const(val=[4, 7])
+    #      } -> (%shape_0)
+    #    }
+    #
+    # (Notice that %shape_0 is i32, not fp32)
+    #
+    # Result:
+    #
+    # The same program.
+    #
+    # Alert messages about %shape_0 being implicitly cast from i32 to fp32.
+    #
+    # Comment: This pass should do more proper casting as backend supports more types.
+    """
+    for f_name, f in prog.functions.items():
+        for v in f.outputs:
+            if isinstance(v, Var) and v.dtype != types.fp32:
+                msg = (
+                    "Output var {} of type {} in function {} is " + "cast to type fp32"
+                )
+                logging.warning(
+                    msg.format(v.name, types.builtin_to_string(v.dtype), f_name)
+                )
diff --git a/coremltools/converters/mil/backend/nn/passes/commingle_loop_vars.py b/coremltools/converters/mil/backend/nn/passes/commingle_loop_vars.py
new file mode 100644
index 000000000..a0b2a7eb5
--- /dev/null
+++ b/coremltools/converters/mil/backend/nn/passes/commingle_loop_vars.py
@@ -0,0 +1,79 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+
+
+def commingle_loop_vars_block(block):
+    for op in list(block.operations):
+        for b in op.blocks:
+            commingle_loop_vars_block(b)
+
+        if op.op_type != "while_loop":
+            continue
+
+        block = op.blocks[0]
+
+        for v_out, vx_in in zip(op.outputs, block.inputs):
+            # Disable check as v_out is not visible in block.
+            block.replace_uses_of_var_after_op(
+                anchor_op=None,
+                old_var=vx_in,
+                new_var=v_out,
+                no_check_var_visibility=True,
+            )
+
+        # replace block inputs
+        block._block_inputs = op.outputs
+
+
+@register_pass(namespace="nn_backend")
+def commingle_loop_vars(prog):
+    """
+    prog: Program
+
+    # NN backend expects output vars as loop vars. Example:
+    #
+    # Given:
+    #    main(%a: (1, 2, fp32),
+    #         %b: (1, 2, fp32)) {
+    #      block0() {
+    #        %loop:0: (1, 2, fp32), %loop:1: (1, 2, fp32) = \
+    #        while_loop(loop_vars=(%a, %b))
+    #          loop_cond(%a.x, %b.x) {
+    #            %cond_var: (bool) = some_op(x=%a.x, y=%b.x)
+    #          } -> (%cond_var)
+    #          loop_body(%a.x, %b.x) {
+    #            %add_0: (1, 2, fp32) = add(x=%a.x, y=%b.x)
+    #          } -> (%add_0, %b.x)
+    #      } -> (%loop:0, %loop:1)
+    #    }
+    #
+    # Result:
+    #    main(%a: (1, 2, fp32),
+    #         %b: (1, 2, fp32)) {
+    #      block0() {
+    #        %loop:0: (1, 2, fp32), %loop:1: (1, 2, fp32) = \
+    #        while_loop(loop_vars=(%a, %b))
+    #          loop_cond(%loop:0, %loop:1) {
+    #            %cond_var: (bool) = some_op(x=%loop:0, y=%loop:1)
+    #          } -> (%cond_var)
+    #          loop_body(%loop:0, %loop:1) {
+    #            %add_0: (1, 2, fp32) = add(x=%loop:0, y=%loop:1)
+    #          } -> (%add_0, %loop:1)
+    #      } -> (%loop:0, %loop:1)
+    #    }
+    #
+    # Comment: The resulting program is no longer SSA (multiple assignments on
+    # %loop:0).
+    """
+    for f_name, f in prog.functions.items():
+        commingle_loop_vars_block(f)
diff --git a/coremltools/converters/mil/backend/nn/passes/handle_return_inputs_as_outputs.py b/coremltools/converters/mil/backend/nn/passes/handle_return_inputs_as_outputs.py
new file mode 100644
index 000000000..0421c511b
--- /dev/null
+++ b/coremltools/converters/mil/backend/nn/passes/handle_return_inputs_as_outputs.py
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+
+
+def handle_return_inputs_as_outputs_func(f):
+    returned_inputs = []
+    for v_name, v in f.inputs.items():
+        if v not in f.outputs:
+            continue
+        returned_inputs.append(v)
+
+    with f:
+        for v in returned_inputs:
+            # copy twice since NN layer cannot have input name == output name
+            v_tmp = mb.identity(x=v, name=v.name + "_tmp")
+            res = mb.identity(x=v_tmp, name=v.name)
+            res.op.enclosing_block.replace_uses_of_var_after_op(
+                anchor_op=res.op, old_var=v, new_var=res
+            )
+
+
+@register_pass(namespace="nn_backend")
+def handle_return_inputs_as_outputs(prog):
+    """
+    prog: Program
+
+    # NN cannot handle returning input as output. Insert an identity op for
+    # those cases. Example:
+    #
+    # Given:
+    #    main(%a: (1, 2, fp32),
+    #         %b: (1, 2, fp32)) {
+    #      block0() {
+    #        %mul_0_y_0: (i32)* = const(val=2)
+    #        %mul_0: (1, 2, fp64) = mul(x=%a, y=%mul_0_y_0)
+    #      } -> (%mul_0, %b)
+    #    }
+    #
+    # (Notice that %b is returned from input. This causes error in NN)
+    #
+    # Result:
+    #    main(%a: (1, 2, fp32),
+    #         %b: (1, 2, fp32)) {
+    #      block0() {
+    #        %mul_0_y_0: (i32)* = const(val=2)
+    #        %mul_0: (1, 2, fp64) = mul(x=%a, y=%mul_0_y_0)
+    #        %b_tmp: (1, 2, fp32) = identity(x=%b)
+    #        %b: (1, 2, fp32) = identity(x=%b_tmp)
+    #      } -> (%mul_0, %b)
+    #    }
+    #
+    # where identity is applied twice since NN layer cannot have
+    # input name == output name
+    """
+    for f_name, f in prog.functions.items():
+        handle_return_inputs_as_outputs_func(f)
diff --git a/coremltools/converters/mil/backend/nn/passes/handle_return_unused_inputs.py b/coremltools/converters/mil/backend/nn/passes/handle_return_unused_inputs.py
new file mode 100644
index 000000000..b1e04f3e4
--- /dev/null
+++ b/coremltools/converters/mil/backend/nn/passes/handle_return_unused_inputs.py
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+
+
+def handle_return_unused_inputs_func(f):
+    returned_unused_inputs = []
+    for v_name, v in f.inputs.items():
+        if v not in f.outputs:
+            continue
+        returned_unused_inputs.append(v)
+
+    with f:
+        for v in returned_unused_inputs:
+            # copy twice since NN layer cannot have input name == output name
+            v_tmp = mb.identity(x=v, name=v.name + "_tmp")
+            res = mb.identity(x=v_tmp, name=v.name)
+            res.op.enclosing_block.replace_uses_of_var_after_op(
+                anchor_op=res.op, old_var=v, new_var=res
+            )
+
+
+@register_pass(namespace="nn_backend")
+def handle_return_unused_inputs(prog):
+    """
+    prog: Program
+
+    # NN cannot handle returning input as output. Insert an identity op for
+    # those cases. Example:
+    #
+    # Given:
+    #    main(%a: (1, 2, fp32),
+    #         %b: (1, 2, fp32)) {
+    #      block0() {
+    #        %mul_0_y_0: (i32)* = const(val=2)
+    #        %mul_0: (1, 2, fp64) = mul(x=%a, y=%mul_0_y_0)
+    #      } -> (%mul_0, %b)
+    #    }
+    #
+    # (Notice that %b is returned from input. This causes error in NN)
+    #
+    # Result:
+    #    main(%a: (1, 2, fp32),
+    #         %b: (1, 2, fp32)) {
+    #      block0() {
+    #        %mul_0_y_0: (i32)* = const(val=2)
+    #        %mul_0: (1, 2, fp64) = mul(x=%a, y=%mul_0_y_0)
+    #        %b_tmp: (1, 2, fp32) = identity(x=%b)
+    #        %b: (1, 2, fp32) = identity(x=%b_tmp)
+    #      } -> (%mul_0, %b)
+    #    }
+    #
+    # where identity is applied twice since NN layer cannot have
+    # input name == output name
+    """
+    for f_name, f in prog.functions.items():
+        handle_return_unused_inputs_func(f)
diff --git a/coremltools/converters/mil/backend/nn/passes/handle_unused_inputs.py b/coremltools/converters/mil/backend/nn/passes/handle_unused_inputs.py
new file mode 100644
index 000000000..17d867b9c
--- /dev/null
+++ b/coremltools/converters/mil/backend/nn/passes/handle_unused_inputs.py
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+
+
+def handle_unused_inputs_func(f):
+    unused_inputs = [v for v_name, v in f.inputs.items() if len(v.child_ops) == 0]
+
+    with f:
+        for v in unused_inputs:
+            # copy twice since NN layer cannot have input name == output name
+            v_tmp = mb.identity(x=v, name=v.name + "_tmp")
+
+
+@register_pass(namespace="nn_backend")
+def handle_unused_inputs(prog):
+    """
+    prog: Program
+
+    # NN doesn't allow unused inputs. Insert an identity op to consume
+    # inputs (though its outputs are not used.). This pass must come after
+    # dead code elimination as all inserted code are "dead code". Example:
+    #
+    # Given:
+    #
+    #    main(%x: (2, 3, fp32)) {
+    #      block0() {
+    #        %shape_0_const: (2,i32)* = const(val=[4, 7])
+    #      } -> (%shape_0_const)
+    #    }
+    #
+    # (Notice that input %x is not consumed. This causes error in NN.)
+    #
+    # Result:
+    #
+    #    main(%x: (2, 3, fp32)) {
+    #      block0() {
+    #        %unused_var: (2, 3, fp32) = identity(x=%x)
+    #        %shape_0_const: (2,i32)* = const(val=[4, 7])
+    #      } -> (%shape_0_const)
+    #    }
+    """
+    for f_name, f in prog.functions.items():
+        handle_unused_inputs_func(f)
diff --git a/coremltools/converters/nnssa/coreml/graph_pass/mlmodel_passes.py b/coremltools/converters/mil/backend/nn/passes/mlmodel_passes.py
similarity index 73%
rename from coremltools/converters/nnssa/coreml/graph_pass/mlmodel_passes.py
rename to coremltools/converters/mil/backend/nn/passes/mlmodel_passes.py
index 0f3e1cde0..4f79517e4 100644
--- a/coremltools/converters/nnssa/coreml/graph_pass/mlmodel_passes.py
+++ b/coremltools/converters/mil/backend/nn/passes/mlmodel_passes.py
@@ -1,4 +1,10 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
@@ -6,16 +12,17 @@
 
 
 def _get_nn_spec(spec):
-    if spec.WhichOneof('Type') == 'neuralNetwork':
+    if spec.WhichOneof("Type") == "neuralNetwork":
         nn_spec = spec.neuralNetwork
-    elif spec.WhichOneof('Type') == 'neuralNetworkClassifier':
+    elif spec.WhichOneof("Type") == "neuralNetworkClassifier":
         nn_spec = spec.neuralNetworkClassifier
-    elif spec.WhichOneof('Type') == 'neuralNetworkRegressor':
+    elif spec.WhichOneof("Type") == "neuralNetworkRegressor":
         nn_spec = spec.neuralNetworkRegressor
     else:
-        raise ValueError('Specification must contain a neural network')
+        raise ValueError("Specification must contain a neural network")
     return nn_spec
 
+
 def _get_blob_out_degree(spec):
     """
     Computes use count of every tensor/node in NN graph
@@ -24,17 +31,20 @@ def _get_blob_out_degree(spec):
     :param nn_spec : NeuralNetworkSpecification
     :returns use_count_dict : str -> int, a dictionary with node name as a key and it's use count as a value
     """
+
     def _get_blob_out_degree_rec(nn_spec, out_degree):
         nn_layers = nn_spec.layers
         for layer in nn_layers:
-            layer_type = layer.WhichOneof('layer')
+            layer_type = layer.WhichOneof("layer")
             for inp in layer.input:
                 out_degree[inp] = out_degree.get(inp, 0) + 1
-            if layer_type == 'loop':
-                out_degree[layer.loop.conditionVar] = out_degree.get(layer.loop.conditionVar, 0) + 1
+            if layer_type == "loop":
+                out_degree[layer.loop.conditionVar] = (
+                    out_degree.get(layer.loop.conditionVar, 0) + 1
+                )
                 _get_blob_out_degree_rec(layer.loop.conditionNetwork, out_degree)
                 _get_blob_out_degree_rec(layer.loop.bodyNetwork, out_degree)
-            elif layer_type == 'branch':
+            elif layer_type == "branch":
                 _get_blob_out_degree_rec(layer.branch.ifBranch, out_degree)
                 _get_blob_out_degree_rec(layer.branch.elseBranch, out_degree)
 
@@ -49,13 +59,15 @@ def _get_blob_out_degree_rec(nn_spec, out_degree):
         use_count_dict[_output] = use_count_dict.get(_output, 0) + 1
     return use_count_dict
 
+
 def _is_layer(nn_layer, layer_type):
     """
     :param nn_layer : NN layer proto message
     :param layer_type : str Layer type to check against
     :returns True if nn_layer is of type `layer_type` otherwise False
     """
-    return nn_layer.WhichOneof('layer') == layer_type
+    return nn_layer.WhichOneof("layer") == layer_type
+
 
 def _get_input(layer, index=0):
     """
@@ -67,6 +79,7 @@ def _get_input(layer, index=0):
         return None
     return layer.input[index]
 
+
 def _get_output(layer, index=0):
     """
     :param layer : NN Layer Proto message
@@ -77,6 +90,7 @@ def _get_output(layer, index=0):
         return None
     return layer.output[index]
 
+
 def _get_network_output(spec):
     """
     :param spec : CoreML Specification
@@ -101,30 +115,37 @@ def transform_conv_crop(spec):
 
     nn_spec = _get_nn_spec(spec)
     nn_layers = nn_spec.layers
-    for i in range(0, len(nn_layers)-2):
+    for i in range(0, len(nn_layers) - 2):
 
         # If Convolution output is being using as a network output or more than one layers
         # that's acceptable
-        if not _is_layer(nn_layers[i], 'convolution'):
+        if not _is_layer(nn_layers[i], "convolution"):
             continue
 
         # Output of Crop layer must not be network output or used by more than one layer
-        if not (_is_layer(nn_layers[i+1], 'crop') \
-                and _get_input(nn_layers[i+1]) not in network_output_names \
-                and out_degree[_get_output(nn_layers[i+1])] == 1):
+        if not (
+            _is_layer(nn_layers[i + 1], "crop")
+            and _get_input(nn_layers[i + 1]) not in network_output_names
+            and out_degree[_get_output(nn_layers[i + 1])] == 1
+        ):
             continue
 
         layer_to_shuffle_with = -1
 
         # Output of Batchnorm layer must not be network output or used by more than one layer
-        if _is_layer(nn_layers[i+2], 'batchnorm') \
-            and out_degree[_get_output(nn_layers[i+2])] == 1:
-            layer_to_shuffle_with = i+2
+        if (
+            _is_layer(nn_layers[i + 2], "batchnorm")
+            and out_degree[_get_output(nn_layers[i + 2])] == 1
+        ):
+            layer_to_shuffle_with = i + 2
 
         # Output of Activation layer must not be network output or used by more than one layer
-        if i+3 < len(nn_layers) and _is_layer(nn_layers[i+3], 'activation') \
-            and out_degree[_get_output(nn_layers[i+3])] == 1:
-            layer_to_shuffle_with = i+3
+        if (
+            i + 3 < len(nn_layers)
+            and _is_layer(nn_layers[i + 3], "activation")
+            and out_degree[_get_output(nn_layers[i + 3])] == 1
+        ):
+            layer_to_shuffle_with = i + 3
 
         if layer_to_shuffle_with == -1:
             continue
@@ -134,19 +155,20 @@ def transform_conv_crop(spec):
         # 1. Conv --------------> BN ---> Activation ---> Layer1
         #        \            /
         #         ---> Crop --
-        nn_layers[i].output[0] = nn_layers[i+1].output[0]
+        nn_layers[i].output[0] = nn_layers[i + 1].output[0]
         # 2. Conv ---> BN ---> Activation ---> Layer1
         #      \                           /
         #        -----------------Crop ----
-        nn_layers[i+1].output[0] = nn_layers[layer_to_shuffle_with].output[0]
+        nn_layers[i + 1].output[0] = nn_layers[layer_to_shuffle_with].output[0]
         # 3. Conv ---> BN ---> Activation ---> Crop ---> Layer1
-        nn_layers[layer_to_shuffle_with].output[0] = nn_layers[i+1].input[0]
+        nn_layers[layer_to_shuffle_with].output[0] = nn_layers[i + 1].input[0]
 
         # Add Crop layer at new position and remove from current position
-        crop_layer = nn_layers[i+1]
+        crop_layer = nn_layers[i + 1]
         nn_layers.remove(crop_layer)
         nn_layers.insert(layer_to_shuffle_with, crop_layer)
 
+
 def remove_disconnected_layers(spec):
     """
     Removes layers from model specification if it's output is not
@@ -154,9 +176,9 @@ def remove_disconnected_layers(spec):
     """
 
     def _remove_layers_from_spec(nn_spec, layers_to_delete):
-            nn_layers = nn_spec.layers
-            for _layer in layers_to_delete:
-                nn_layers.remove(_layer)
+        nn_layers = nn_spec.layers
+        for _layer in layers_to_delete:
+            nn_layers.remove(_layer)
 
     def _get_disconnected_layers_rec(nn_spec):
         """
@@ -177,33 +199,58 @@ def _decrease_input_degree(layer):
         nn_layers = nn_spec.layers
         layers_to_delete = []
         for _layer in reversed(nn_layers):
-            layer_type = _layer.WhichOneof('layer')
-            if layer_type == 'loop':
-                condition_net_layers_to_delete = _get_disconnected_layers_rec(_layer.loop.conditionNetwork)
-                body_net_layers_to_delete = _get_disconnected_layers_rec(_layer.loop.bodyNetwork)
-                _remove_layers_from_spec(_layer.loop.conditionNetwork, condition_net_layers_to_delete)
-                _remove_layers_from_spec(_layer.loop.bodyNetwork, body_net_layers_to_delete)
+            layer_type = _layer.WhichOneof("layer")
+            if layer_type == "loop":
+                condition_net_layers_to_delete = _get_disconnected_layers_rec(
+                    _layer.loop.conditionNetwork
+                )
+                body_net_layers_to_delete = _get_disconnected_layers_rec(
+                    _layer.loop.bodyNetwork
+                )
+                _remove_layers_from_spec(
+                    _layer.loop.conditionNetwork, condition_net_layers_to_delete
+                )
+                _remove_layers_from_spec(
+                    _layer.loop.bodyNetwork, body_net_layers_to_delete
+                )
 
                 # NOTE: Debatable?
                 # If condition network or bodyNetwork is empty, delete loop layer
-                if len(_layer.loop.conditionNetwork.layers) == 0 or len(_layer.loop.bodyNetwork.layers) == 0:
+                if (
+                    len(_layer.loop.conditionNetwork.layers) == 0
+                    or len(_layer.loop.bodyNetwork.layers) == 0
+                ):
                     layers_to_delete.append(_layer)
                     _decrease_input_degree(_layer)
                 continue
 
-            if layer_type == 'branch':
-                if_layers_to_delete = _get_disconnected_layers_rec(_layer.branch.ifBranch)
-                else_layers_to_delete = _get_disconnected_layers_rec(_layer.branch.elseBranch)
+            if layer_type == "branch":
+                if_layers_to_delete = _get_disconnected_layers_rec(
+                    _layer.branch.ifBranch
+                )
+                else_layers_to_delete = _get_disconnected_layers_rec(
+                    _layer.branch.elseBranch
+                )
 
                 total_if_layers = len(_layer.branch.ifBranch.layers)
                 total_else_layers = len(_layer.branch.elseBranch.layers)
 
-                if len(if_layers_to_delete) != total_if_layers and len(else_layers_to_delete) != total_else_layers:
+                if (
+                    len(if_layers_to_delete) != total_if_layers
+                    and len(else_layers_to_delete) != total_else_layers
+                ):
                     # If both branches are non-empty after dead-layer elimination
                     # remove respective layers
-                    _remove_layers_from_spec(_layer.branch.ifBranch, if_layers_to_delete)
-                    _remove_layers_from_spec(_layer.branch.elseBranch, else_layers_to_delete)
-                elif len(if_layers_to_delete) == total_if_layers and len(else_layers_to_delete) == total_else_layers:
+                    _remove_layers_from_spec(
+                        _layer.branch.ifBranch, if_layers_to_delete
+                    )
+                    _remove_layers_from_spec(
+                        _layer.branch.elseBranch, else_layers_to_delete
+                    )
+                elif (
+                    len(if_layers_to_delete) == total_if_layers
+                    and len(else_layers_to_delete) == total_else_layers
+                ):
                     # If both branches are empty after dead-layer elimination
                     # remove branch layer altogehter
                     layers_to_delete.append(_layer)
@@ -239,6 +286,7 @@ def _remove_disconnected_layers_rec(nn_spec):
     # Initiate removal from high level Neural Network spec
     _remove_disconnected_layers_rec(nn_spec)
 
+
 def remove_redundant_transposes(spec):
     """
     Removes layers from model specification that are back to back transposes
@@ -246,10 +294,10 @@ def remove_redundant_transposes(spec):
     """
 
     def blob_name_to_layers(nn_layers):
-        '''
+        """
         output_to_layers: {str: layer_proto_message} : {blob name: layers that it feeds into}
         input_to_parent_layers: {str: layer_proto_message} : {blob name: parent layers that feed in}
-        '''
+        """
         output_to_layers = {}
         for layer in nn_layers:
             for input in layer.input:
@@ -261,9 +309,11 @@ def blob_name_to_layers(nn_layers):
         input_to_parent_layers = {}
         for layer in nn_layers:
             for output in layer.output:
-                if not layer.WhichOneof('layer') == 'copy':
-                    assert(output not in input_to_parent_layers,
-                        "\'{}\' blob is generated by more than 1 layers".format(output))
+                if not layer.WhichOneof("layer") == "copy":
+                    assert (
+                        output not in input_to_parent_layers,
+                        "'{}' blob is generated by more than 1 layers".format(output),
+                    )
                 input_to_parent_layers[output] = layer
 
         return input_to_parent_layers, output_to_layers
@@ -284,8 +334,12 @@ def _delete_layers(nn_spec, layers_to_delete):
             # Replace children's input by layer_start's input
             children = output_to_layers[end_layer.output[0]]
             for child in children:
-                idx = [i for i, input in enumerate(child.input) if input == end_layer.output[0]]
-                assert(len(idx) == 1)
+                idx = [
+                    i
+                    for i, input in enumerate(child.input)
+                    if input == end_layer.output[0]
+                ]
+                assert len(idx) == 1
                 idx = idx[0]
                 child.input[idx] = start_layer.input[0]
 
@@ -306,12 +360,15 @@ def _find_redundant_transposes(nn_spec):
 
         for layer in nn_layers:
             # Only start with the last element of the transpose layers sequence
-            if not layer.WhichOneof('layer') == 'transpose':
+            if not layer.WhichOneof("layer") == "transpose":
+                continue
+            if (
+                layer.output[0] in output_to_layers
+                and len(output_to_layers[layer.output[0]]) == 1
+                and output_to_layers[layer.output[0]][0].WhichOneof("layer")
+                == "transpose"
+            ):
                 continue
-            if layer.output[0] in output_to_layers and \
-               len(output_to_layers[layer.output[0]]) == 1 and \
-               output_to_layers[layer.output[0]][0].WhichOneof('layer') == 'transpose':
-               continue
 
             # Get the transpose layers sequence
             layers = []
@@ -322,17 +379,18 @@ def _find_redundant_transposes(nn_spec):
                 if not cursor.input[0] in input_to_parent_layers:
                     break
                 cursor = input_to_parent_layers[cursor.input[0]]
-                if cursor.WhichOneof('layer') != 'transpose':
+                if cursor.WhichOneof("layer") != "transpose":
                     break
                 if len(output_to_layers[cursor.output[0]]) != 1:
                     break
             layers = layers[::-1]
 
-            if len(layers) == 0: continue
+            if len(layers) == 0:
+                continue
 
             # Optimize for the number of layers which can be merged using dynamic programming
             def solve_dp(layers):
-                '''
+                """
                 The resulting dp[i] means the maximum length of transpose sequence resulting
                 in identity starting at index i
                 For example, dp[0] = 0 means there is no sequence starting at 0 results in identity
@@ -345,9 +403,9 @@ def solve_dp(layers):
                 # e.g. if dic[(1,2,0)] = 34, it means that starting from the 1st layer,
                 # the net transpose pattern  `(1,2,0)` is last seen at layer id 34. No layer after 34-th
                 # layer will result in the net pattern `(1,2,0)`
-                '''
+                """
                 dim = len(layers[0].transpose.axes)
-                dp = [0]*len(layers)
+                dp = [0] * len(layers)
                 dic = {}
                 axes = list(range(dim))
                 dic[tuple(axes)] = 0
@@ -355,9 +413,9 @@ def solve_dp(layers):
                     axes = [axes[k] for k in layers[i].transpose.axes]
                     key = tuple(axes)
                     if key in dic:
-                        dp[dic[key]] = i-dic[key]+1
-                    dic[key] = i+1
-                for i in range(len(layers)-1,-1,-1):
+                        dp[dic[key]] = i - dic[key] + 1
+                    dic[key] = i + 1
+                for i in range(len(layers) - 1, -1, -1):
                     j = i + dp[i]
                     if j < len(layers):
                         dp[i] = dp[i] + dp[j]
@@ -365,7 +423,7 @@ def solve_dp(layers):
 
             dp = solve_dp(layers)
 
-            '''
+            """
             Once we know the maximum identity sequence starts at each index, we solve
             for the maximum total node we can remove.
             I think there must be lots of different solution for this, but I use DP again.
@@ -377,27 +435,27 @@ def solve_dp(layers):
             nodes after 10, the first starting point is index 12.
             After construct sol_num and sol_bt by dynamic programming, we backtrack for the optimal
             solution using sol_bt.
-            '''
-            sol_num = [0]*len(dp)
-            sol_bt = [None]*len(dp)
+            """
+            sol_num = [0] * len(dp)
+            sol_bt = [None] * len(dp)
             if dp[-1] != 0:
                 sol_num[-1] = dp[-1]
-                sol_bt[-1] = len(dp)-1
-            for i in range(len(sol_num)-2,-1,-1):
+                sol_bt[-1] = len(dp) - 1
+            for i in range(len(sol_num) - 2, -1, -1):
                 if dp[i] == 0:
-                    sol_num[i] = sol_num[i+1]
-                    sol_bt[i] = sol_bt[i+1]
+                    sol_num[i] = sol_num[i + 1]
+                    sol_bt[i] = sol_bt[i + 1]
                 else:
                     num = dp[i]
                     j = i + dp[i]
                     if j < len(sol_num):
                         num += sol_num[j]
-                    if num > sol_num[i+1]:
+                    if num > sol_num[i + 1]:
                         sol_num[i] = num
                         sol_bt[i] = i
                     else:
-                        sol_num[i] = sol_num[i+1]
-                        sol_bt[i] = sol_bt[i+1]
+                        sol_num[i] = sol_num[i + 1]
+                        sol_bt[i] = sol_bt[i + 1]
 
             # Get layers to delete using sol_bt
             cursor = 0
@@ -415,4 +473,4 @@ def solve_dp(layers):
     layers_to_delete = _find_redundant_transposes(nn_spec)
     if len(layers_to_delete) > 0:
         _delete_layers(nn_spec, layers_to_delete)
-        print('{} transpose pairs deleted'.format(len(layers_to_delete)))
+        print("{} transpose pairs deleted".format(len(layers_to_delete)))
diff --git a/coremltools/converters/mil/backend/nn/passes/nn_passes.py b/coremltools/converters/mil/backend/nn/passes/nn_passes.py
new file mode 100644
index 000000000..3e9f21a9e
--- /dev/null
+++ b/coremltools/converters/mil/backend/nn/passes/nn_passes.py
@@ -0,0 +1,26 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY
+import logging
+
+
+def nn_backend_passes(prog):
+    passes = [
+        "nn_backend::commingle_loop_vars",  # after loop_invariant_elimination
+        "nn_backend::handle_return_inputs_as_outputs",
+        "common::const_elimination",
+        "common::dead_code_elimination",
+        "nn_backend::handle_unused_inputs",  # must come after dce.
+        "nn_backend::alert_return_type_cast",  # must be at the end.
+    ]
+
+    prog.validate()
+    for p in passes:
+        logging.info('Performing passes for nn_backend: "{}"'.format(p))
+        PASS_REGISTRY[p](prog)
+        # No more validation from this point on as prog is not SSA anymore.
+
+    logging.debug("Program after nn backend passes:\n{}".format(prog))
diff --git a/coremltools/converters/mil/backend/nn/passes/test_mlmodel_passes.py b/coremltools/converters/mil/backend/nn/passes/test_mlmodel_passes.py
new file mode 100644
index 000000000..a8ef295de
--- /dev/null
+++ b/coremltools/converters/mil/backend/nn/passes/test_mlmodel_passes.py
@@ -0,0 +1,1051 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import copy
+import pytest
+import numpy as np
+import unittest
+from sys import platform
+
+from coremltools._deps import _IS_MACOS
+import coremltools.models.datatypes as datatypes
+from coremltools.models.utils import _macos_version
+from coremltools.models import neural_network as neural_network
+from coremltools.models import MLModel
+from coremltools.models.neural_network.printer import print_network_spec
+from coremltools.converters.mil.backend.nn.passes.mlmodel_passes import (
+    remove_disconnected_layers,
+    transform_conv_crop,
+    remove_redundant_transposes,
+)
+
+DEBUG = False
+np.random.seed(10)
+
+
+class MLModelPassesTest(unittest.TestCase):
+    def test_load_constant_remove(self):
+        input_features = [("data", datatypes.Array(*(3, 4)))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_activation("relu1", "RELU", "data", "relu1")
+        builder.add_load_constant_nd(
+            "const1", "c1", constant_value=np.ones((5,)), shape=(5,)
+        )
+        builder.add_activation("relu2", "RELU", "relu1", "out")
+        builder.add_load_constant_nd(
+            "const2", "c2", constant_value=np.ones((5,)), shape=(5,)
+        )
+        builder.add_load_constant_nd(
+            "const3", "c3", constant_value=np.ones((5,)), shape=(5,)
+        )
+        spec = builder.spec
+        np.testing.assert_equal(5, len(spec.neuralNetwork.layers))
+        remove_disconnected_layers(spec)
+        np.testing.assert_equal(2, len(spec.neuralNetwork.layers))
+
+    def test_dead_layer_remove(self):
+        input_features = [("data", datatypes.Array(*(3, 4)))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_activation("relu1", "RELU", "data", "relu1")
+        builder.add_load_constant_nd(
+            "const1", "c1", constant_value=np.ones((5,)), shape=(5,)
+        )
+        builder.add_load_constant_nd(
+            "const2", "c2", constant_value=np.ones((5,)), shape=(5,)
+        )
+        builder.add_split_nd(
+            "splitnd1", "const2", ["s1", "s2", "s3"], axis=0, num_splits=3
+        )
+        builder.add_squeeze("squeeze", "s1", "squeeze_out")
+        builder.add_activation("relu4", "RELU", "s2", "relu4")
+        builder.add_activation("relu5", "RELU", "relu4", "relu5")
+        builder.add_load_constant_nd(
+            "const3", "c3", constant_value=np.ones((5,)), shape=(5,)
+        )
+        builder.add_activation("relu2", "RELU", "relu1", "out")
+        spec = builder.spec
+        np.testing.assert_equal(9, len(spec.neuralNetwork.layers))
+        remove_disconnected_layers(spec)
+        np.testing.assert_equal(2, len(spec.neuralNetwork.layers))
+
+    @pytest.mark.xfail
+    def test_dead_layer_remove_branch(self):
+        convergence_tolerance = 1e-8
+
+        input_features = [("input", datatypes.Array(*(2,)))]
+        output_features = [("out", None)]
+
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        # add condition to break from the loop, if convergence criterion is met
+        builder.add_less_than("cond", ["input"], "cond", alpha=convergence_tolerance)
+        branch_layer = builder.add_branch("branch_layer", "cond")
+        builder_ifbranch = neural_network.NeuralNetworkBuilder(
+            nn_spec=branch_layer.branch.ifBranch
+        )
+        builder_ifbranch.add_activation("relu1", "RELU", "input", "relu1_out")
+        builder_ifbranch.add_activation("relu2_out", "RELU", "relu1_out", "relu2_out")
+        builder_elsebranch = neural_network.NeuralNetworkBuilder(
+            nn_spec=branch_layer.branch.elseBranch
+        )
+        builder_elsebranch.add_activation("linear1", "LINEAR", "input", "linear1_out")
+        builder_elsebranch.add_activation(
+            "linear2", "LINEAR", "linear1_out", "relu2_out"
+        )
+        builder.add_squeeze("out", "input", "out", squeeze_all=True)
+
+        mlmodel = MLModel(builder.spec)
+        data = np.random.rand(2,)
+        data_dict = {"input": data}
+        if _IS_MACOS:
+            before_pass_out = mlmodel.predict(data_dict)["out"]
+            if DEBUG:
+                print(
+                    "\n mlmodel description before remove disconnected layers pass: \n"
+                )
+                print_network_spec(builder.spec, style="coding")
+            remove_disconnected_layers(builder.spec)
+            if DEBUG:
+                print(
+                    "\n mlmodel description after remove disconnected layers pass: \n"
+                )
+                print_network_spec(builder.spec, style="coding")
+            mlmodel = MLModel(builder.spec)
+            after_pass_out = mlmodel.predict(data_dict)["out"]
+
+            np.testing.assert_almost_equal(before_pass_out, after_pass_out, decimal=2)
+            np.testing.assert_equal(len(builder.spec.neuralNetwork.layers), 1)
+
+    @pytest.mark.xfail
+    def test_dead_layer_partial_branch(self):
+        convergence_tolerance = 1e-8
+
+        input_features = [("input", datatypes.Array(*(2,)))]
+        output_features = [("out", None)]
+
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        # add condition to break from the loop, if convergence criterion is met
+        builder.add_less_than("cond", ["input"], "cond", alpha=convergence_tolerance)
+        branch_layer = builder.add_branch("branch_layer", "cond")
+        builder_ifbranch = neural_network.NeuralNetworkBuilder(
+            nn_spec=branch_layer.branch.ifBranch
+        )
+        builder_ifbranch.add_activation("relu1", "RELU", "input", "relu1_out")
+        builder_ifbranch.add_activation("relu2_out", "RELU", "relu1_out", "relu2_out")
+        builder_elsebranch = neural_network.NeuralNetworkBuilder(
+            nn_spec=branch_layer.branch.elseBranch
+        )
+        builder_elsebranch.add_activation("linear1", "LINEAR", "input", "linear1_out")
+        builder_elsebranch.add_activation(
+            "linear_red_1", "LINEAR", "input", "linear_red1_out"
+        )
+        builder_elsebranch.add_activation(
+            "linear_red_2", "LINEAR", "linear_red1_out", "linear_red2_out"
+        )
+        builder_elsebranch.add_activation(
+            "linear2", "LINEAR", "linear1_out", "relu2_out"
+        )
+        builder.add_squeeze("out", "relu2_out", "out", squeeze_all=True)
+
+        mlmodel = MLModel(builder.spec)
+        data = np.random.rand(2,)
+        data_dict = {"input": data}
+        before_pass_out = mlmodel.predict(data_dict)["out"]
+        if DEBUG:
+            print("\n mlmodel description before remove disconnected layers pass: \n")
+            print_network_spec(builder.spec, style="coding")
+        old_spec = copy.copy(builder.spec)
+        remove_disconnected_layers(builder.spec)
+        if DEBUG:
+            print("\n mlmodel description after remove disconnected layers pass: \n")
+            print_network_spec(builder.spec, style="coding")
+        mlmodel = MLModel(builder.spec)
+        after_pass_out = mlmodel.predict(data_dict)["out"]
+
+        np.testing.assert_almost_equal(before_pass_out, after_pass_out, decimal=2)
+        np.testing.assert_equal(
+            len(old_spec.neuralNetwork.layers[1].branch.ifBranch.layers),
+            len(builder.spec.neuralNetwork.layers[1].branch.ifBranch.layers),
+        )
+        np.testing.assert_equal(
+            len(builder.spec.neuralNetwork.layers[1].branch.elseBranch.layers), 2
+        )
+
+    def test_conv_crop_bn_to_conv_bn_crop(self):
+        input_features = [("data", datatypes.Array(1, 10, 10))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        W = np.ones((1, 2, 2, 2), dtype=np.float32)
+        builder.add_convolution(
+            name="conv",
+            kernel_channels=1,
+            output_channels=2,
+            height=2,
+            width=2,
+            stride_height=1,
+            stride_width=1,
+            border_mode="valid",
+            groups=1,
+            W=W,
+            b=None,
+            has_bias=False,
+            input_name="data",
+            output_name="conv_out",
+        )
+        builder.add_crop(
+            name="crop",
+            left=1,
+            right=1,
+            top=1,
+            bottom=1,
+            offset=0,
+            input_names=["conv_out"],
+            output_name="crop_out",
+        )
+        builder.add_batchnorm(
+            name="bn",
+            channels=2,
+            gamma=np.ones(2,).astype(np.float32),
+            beta=np.ones(2,).astype(np.float32),
+            mean=np.ones(2,).astype(np.float32),
+            variance=np.ones(2,).astype(np.float32),
+            input_name="crop_out",
+            output_name="out",
+        )
+        # Conv -> Crop -> BN
+        spec = builder.spec.neuralNetwork
+        np.testing.assert_equal("crop", spec.layers[1].WhichOneof("layer"))
+        np.testing.assert_equal("batchnorm", spec.layers[2].WhichOneof("layer"))
+
+        # Predict
+        if _IS_MACOS:
+            mlmodel = MLModel(builder.spec)
+            data = np.random.rand(1, 10, 10)
+            data_dict = {"data": data}
+            before_pass_out = mlmodel.predict(data_dict, useCPUOnly=True)["out"]
+
+        # transform the pattern
+        transform_conv_crop(builder.spec)
+        # Conv -> BN -> Crop
+        np.testing.assert_equal("batchnorm", spec.layers[1].WhichOneof("layer"))
+        np.testing.assert_equal("crop", spec.layers[2].WhichOneof("layer"))
+
+        if _IS_MACOS:
+            # Predict
+            mlmodel = MLModel(builder.spec)
+            after_pass_out = mlmodel.predict(data_dict, useCPUOnly=True)["out"]
+            np.testing.assert_almost_equal(before_pass_out, after_pass_out, decimal=3)
+
+    def test_conv_crop_bn_relu_to_conv_bn_relu_crop(self):
+        input_features = [("data", datatypes.Array(1, 10, 10))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+
+        W = np.ones((1, 2, 2, 2), dtype=np.float32)
+        builder.add_convolution(
+            name="conv",
+            kernel_channels=1,
+            output_channels=2,
+            height=2,
+            width=2,
+            stride_height=1,
+            stride_width=1,
+            border_mode="valid",
+            groups=1,
+            W=W,
+            b=None,
+            has_bias=False,
+            input_name="data",
+            output_name="conv_out",
+        )
+        builder.add_crop(
+            name="crop",
+            left=1,
+            right=1,
+            top=1,
+            bottom=1,
+            offset=0,
+            input_names=["conv_out"],
+            output_name="crop_out",
+        )
+        builder.add_batchnorm(
+            name="bn",
+            channels=2,
+            gamma=np.ones(2,).astype(np.float32),
+            beta=np.ones(2,).astype(np.float32),
+            mean=np.ones(2,).astype(np.float32),
+            variance=np.ones(2,).astype(np.float32),
+            input_name="crop_out",
+            output_name="bn_out",
+        )
+        builder.add_activation(
+            name="relu", non_linearity="RELU", input_name="bn_out", output_name="out"
+        )
+        # Conv -> Crop -> BN -> ReLU
+        spec = builder.spec.neuralNetwork
+        np.testing.assert_equal("crop", spec.layers[1].WhichOneof("layer"))
+        np.testing.assert_equal("batchnorm", spec.layers[2].WhichOneof("layer"))
+        np.testing.assert_equal("activation", spec.layers[3].WhichOneof("layer"))
+
+        # Predict
+        if _IS_MACOS:
+            mlmodel = MLModel(builder.spec)
+            data = np.random.rand(1, 10, 10)
+            data_dict = {"data": data}
+            before_pass_out = mlmodel.predict(data_dict, useCPUOnly=True)["out"]
+
+        # transform the pattern
+        transform_conv_crop(builder.spec)
+        # Conv -> BN -> ReLU -> Crop
+        np.testing.assert_equal("batchnorm", spec.layers[1].WhichOneof("layer"))
+        np.testing.assert_equal("activation", spec.layers[2].WhichOneof("layer"))
+        np.testing.assert_equal("crop", spec.layers[3].WhichOneof("layer"))
+
+        # Predict
+        mlmodel = MLModel(builder.spec)
+        if _IS_MACOS:
+            after_pass_out = mlmodel.predict(data_dict, useCPUOnly=True)["out"]
+            np.testing.assert_almost_equal(before_pass_out, after_pass_out, decimal=3)
+
+
+@unittest.skipIf(
+    platform != "darwin" or _macos_version() < (10, 15), "Requires MacOS 10.15 or later"
+)
+class Redundant_Transposees_Test(unittest.TestCase):
+    def _test_builder(self, builder, input_shape, expected_layer_num=None):
+
+        data = np.random.rand(*input_shape)
+
+        # Mlmodel before
+        mlmodel = MLModel(builder.spec)
+        output_before = mlmodel.predict({"data": data})["out"]
+        num_layers_before = len(builder.spec.neuralNetwork.layers)
+
+        remove_redundant_transposes(builder.spec)
+
+        layers = builder.spec.neuralNetwork.layers
+        if expected_layer_num == None:
+            self.assertTrue(len(layers) < num_layers_before)
+        else:
+            self.assertEqual(len(layers), expected_layer_num)
+
+        # Mlmodel after
+        mlmodel = MLModel(builder.spec)
+        output_after = mlmodel.predict({"data": data})["out"]
+
+        np.testing.assert_almost_equal(output_before, output_after, decimal=3)
+
+    def test_output_edge_case(self):
+
+        # For now for safety purpose, the node which are output should't be merged
+        input_shape = (1, 10, 5)
+        input_features = [("data", datatypes.Array(*input_shape))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_transpose(
+            name="first_transpose",
+            axes=[2, 0, 1],
+            input_name="data",
+            output_name="first_transpose_out",
+        )
+        builder.add_transpose(
+            name="second_transpose",
+            axes=[1, 2, 0],
+            input_name="first_transpose_out",
+            output_name="out",
+        )
+
+        self._test_builder(builder, input_shape, 2)
+
+    def test_output_edge_case_2(self):
+
+        # For now for safety purpose, the node which are output should't be merged
+        input_shape = (1, 10, 5)
+        input_features = [("data", datatypes.Array(*input_shape))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_transpose(
+            name="ranspose", axes=[1, 2, 0], input_name="data", output_name="out"
+        )
+
+        self._test_builder(builder, input_shape, 1)
+
+    def test_remove_single_identity_transpose(self):
+
+        # A single identity transpose (like 0,1,2) should also be removed
+        input_shape = (1, 10, 5)
+        input_features = [("data", datatypes.Array(*input_shape))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_transpose(
+            name="uselss_transpose",
+            axes=[0, 1, 2],
+            input_name="data",
+            output_name="useless_transpose_out",
+        )
+        builder.add_activation(
+            name="relu",
+            non_linearity="RELU",
+            input_name="useless_transpose_out",
+            output_name="out",
+        )
+
+        self._test_builder(builder, input_shape, 1)
+
+    def test_remove_three_transpose(self):
+
+        # Three transpose layer which can be removed
+        input_shape = (1, 10, 5)
+        input_features = [("data", datatypes.Array(*input_shape))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        transpose = [[2, 1, 0], [1, 0, 2], [2, 0, 1]]
+        input_name = "data"
+        for i, axes in enumerate(transpose):
+            name = "transpose_" + str(i)
+            output_name = name + "_out"
+            builder.add_transpose(
+                name=name, axes=axes, input_name=input_name, output_name=output_name
+            )
+            input_name = output_name
+
+        builder.add_activation(
+            name="relu", non_linearity="RELU", input_name=input_name, output_name="out"
+        )
+
+        self._test_builder(builder, input_shape, 1)
+
+    def test_remove_thousands_identity_transpose(self):
+
+        """
+               INPUT
+                 |
+                 v
+                [t1]
+                 |
+                 v
+                [t2]
+                 |
+                 v
+                 .
+                 .
+                 .
+                 |
+                 v
+               [t1000]
+                 |
+                 v
+                RELU
+        tk are all identity
+        Remove a sequence of 1000 identity transpose
+        """
+        input_shape = (1, 10, 5)
+        input_features = [("data", datatypes.Array(*input_shape))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+
+        num_layers = 1000
+        input_name = "data"
+        for i in range(num_layers):
+            output_name = "layer_" + str(i) + "_output"
+            name = "layer_" + str(i)
+            builder.add_transpose(
+                name=name,
+                axes=[0, 1, 2],
+                input_name=input_name,
+                output_name=output_name,
+            )
+            input_name = output_name
+
+        builder.add_activation(
+            name="relu", non_linearity="RELU", input_name=input_name, output_name="out"
+        )
+
+        self._test_builder(builder, input_shape, 1)
+
+    def test_remove_thousands_identity_transpose_with_activation_between(self):
+        """
+               INPUT
+                 |
+                 v
+                [t1]
+                 |
+                 v
+                 .
+                 .
+                 .
+                [t500]
+                 |
+                 v
+                RELU_1
+                 |
+                 v
+                 .
+                 .
+                 .
+                 |
+                 v
+               [t1000]
+                 |
+                 v
+                RELU_2
+        tk are all identity
+        Remove a sequence of 1000 identity transpose but with a RELU in the middle,
+        the final output should be
+               INPUT
+                 |
+                 v
+                RELU_1
+                 |
+                 v
+                RELU_2
+
+        """
+        input_shape = (1, 10, 5)
+        input_features = [("data", datatypes.Array(*input_shape))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+
+        num_layers = 1000
+        input_name = "data"
+        for i in range(num_layers):
+            output_name = "layer_" + str(i) + "_output"
+            name = "layer_" + str(i)
+            builder.add_transpose(
+                name=name,
+                axes=[0, 1, 2],
+                input_name=input_name,
+                output_name=output_name,
+            )
+            input_name = output_name
+            if i == num_layers / 2:
+                builder.add_activation(
+                    name="relu_inter",
+                    non_linearity="ReLU",
+                    input_name=input_name,
+                    output_name="relu_out",
+                )
+                input_name = "relu_out"
+        builder.add_activation(
+            name="relu", non_linearity="RELU", input_name=input_name, output_name="out"
+        )
+        self._test_builder(builder, input_shape, 2)
+
+    def test_remove_thousands_random_transpose_layers(self):
+        """
+               INPUT
+                 |
+                 v
+                [t_0]
+                 |
+                 v
+                [t_1]
+                 |
+                 v
+                 .
+                 .
+                 .
+                 |
+                 v
+               [t_999]
+                 |
+                 v
+                RELU
+        tk are randomly generated,
+        under this certain seed, the result should be
+                INPUT
+                 |
+                 v
+                [t_0]
+                 |
+                 v
+                [t_1]
+                 |
+                 v
+                RELU
+        """
+
+        from itertools import permutations
+        import random
+
+        random.seed(1000)
+        input_shape = (3, 10, 5)
+        input_features = [("data", datatypes.Array(*input_shape))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+
+        num_layers = 1000
+        dim = 3
+        input_name = "data"
+        debug = []
+        for i in range(num_layers):
+            axes = list(permutations(range(dim)))
+            random.shuffle(axes)
+            output_name = "layer_" + str(i) + "_output"
+            name = "layer_" + str(i)
+            debug.append(axes[0])
+            builder.add_transpose(
+                name=name, axes=axes[0], input_name=input_name, output_name=output_name
+            )
+            input_name = output_name
+        builder.add_activation(
+            name="relu", non_linearity="RELU", input_name=input_name, output_name="out"
+        )
+        self._test_builder(builder, input_shape, None)
+
+    def test_remove_thousands_random_transpose_layers_case_2(self):
+        """
+        Same test as the previous one, but add more layers and dimension.
+        """
+        from itertools import permutations
+        import random
+
+        random.seed(0)
+        input_shape = (3, 10, 5, 2, 4)
+        input_features = [("data", datatypes.Array(*input_shape))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+
+        num_layers = 5000
+        dim = 5
+        input_name = "data"
+        for i in range(num_layers):
+            axes = list(permutations(range(dim)))
+            random.shuffle(axes)
+            output_name = "layer_" + str(i) + "_output"
+            name = "layer_" + str(i)
+            builder.add_transpose(
+                name=name, axes=axes[0], input_name=input_name, output_name=output_name
+            )
+            input_name = output_name
+        builder.add_activation(
+            name="relu", non_linearity="RELU", input_name=input_name, output_name="out"
+        )
+        self._test_builder(builder, input_shape, None)
+
+    def test_branch_structure(self):
+        """
+                INPUT
+                 |
+                 v
+                [t_0]
+                 |
+                 v
+                [t_1]
+                 |
+                 v
+                [t_3] --.
+                 |      |
+                 v      v
+                [t_4]  RELU_1
+                 |
+                 v
+                [t_5]
+                 |
+                 v
+                RELU_2
+        t_0, t_1, t_3 can be merged.
+        t_4, t_5 can be merged.
+        The output shuld be
+                INPUT
+                 |
+                 .------.
+                 |      |
+                 v      v
+               RELU_2  RELU_1
+
+        """
+        input_shape = (1, 10, 5)
+        input_features = [("data", datatypes.Array(1, 10, 5))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        transpose = [[2, 1, 0], [2, 1, 0], [0, 1, 2], [2, 0, 1], [1, 2, 0]]
+        input_name = "data"
+        for i, axes in enumerate(transpose):
+            name = "transpose_" + str(i)
+            output_name = name + "_out"
+            builder.add_transpose(
+                name=name, axes=axes, input_name=input_name, output_name=output_name
+            )
+            input_name = output_name
+
+        builder.add_activation(
+            name="relu", non_linearity="RELU", input_name=input_name, output_name="out"
+        )
+        builder.add_activation(
+            name="dumpy",
+            non_linearity="RELU",
+            input_name="transpose_2_out",
+            output_name="dumpy",
+        )
+        self._test_builder(builder, input_shape, 2)
+
+    def test_branch_case_2(self):
+        """
+                INPUT
+                 |
+                 v
+                [t_0] --.
+                 |      |
+                 v      v
+                [t_1]  RELU_1
+                 |
+                 v
+                RELU_2
+        Even though t_0, t_1 can be merged, but there is a branch from t_0,
+        so we shouldn't remove anything here.
+
+        """
+        input_shape = (1, 10, 5)
+        input_features = [("data", datatypes.Array(*input_shape))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        transpose = [[2, 1, 0], [2, 1, 0]]
+        input_name = "data"
+        for i, axes in enumerate(transpose):
+            name = "transpose_" + str(i)
+            output_name = name + "_out"
+            builder.add_transpose(
+                name=name, axes=axes, input_name=input_name, output_name=output_name
+            )
+            input_name = output_name
+
+        builder.add_activation(
+            name="relu", non_linearity="RELU", input_name=input_name, output_name="out"
+        )
+        builder.add_activation(
+            name="dumpy",
+            non_linearity="RELU",
+            input_name="transpose_0_out",
+            output_name="dumpy",
+        )
+        self._test_builder(builder, input_shape, 4)
+
+    def test_fork_structure_case_3(self):
+        """
+                INPUT
+                 |
+                 v
+                [t_0]
+                 |
+                 v
+                [t_1]--.
+                 |     |
+                 |     v
+                 |    RELU_1
+                 |
+                 v
+                [t_2]--.
+                 |     |
+                 |     v
+                 |    RELU_2
+                [t_3]
+                 |
+                 v
+                [t_4]--.
+                 |     |
+                 |     v
+                 |    RELU_3
+                 v
+                RELU_4
+
+        Even though t_0, t_1 can be merged, t_2 is identity, t_3, t_4 can be merge,
+        The final output should be
+                   INPUT
+                     |
+        .------------.----------.
+        |        |       |      |
+        v        v       v      v
+      RELU_1   RELU_2  RELU_3  RELU_4
+
+        """
+        input_shape = (1, 10, 5)
+        input_features = [("data", datatypes.Array(1, 10, 5))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        transpose = [[2, 1, 0], [2, 1, 0], [0, 1, 2], [2, 1, 0], [2, 1, 0]]
+        input_name = "data"
+        for i, axes in enumerate(transpose):
+            name = "transpose_" + str(i)
+            output_name = name + "_out"
+            builder.add_transpose(
+                name=name, axes=axes, input_name=input_name, output_name=output_name
+            )
+            input_name = output_name
+
+        builder.add_activation(
+            name="relu", non_linearity="RELU", input_name=input_name, output_name="out"
+        )
+        builder.add_activation(
+            name="dumpy_1",
+            non_linearity="RELU",
+            input_name="transpose_1_out",
+            output_name="dumpy_1",
+        )
+        builder.add_activation(
+            name="dumpy_2",
+            non_linearity="RELU",
+            input_name="transpose_2_out",
+            output_name="dumpy_2",
+        )
+        builder.add_activation(
+            name="dumpy_4",
+            non_linearity="RELU",
+            input_name="transpose_4_out",
+            output_name="dumpy_4",
+        )
+
+        self._test_builder(builder, input_shape, 4)
+
+    def test_fork(self):
+        """
+                   INPUT
+                     |
+              .------.------.
+              |             |
+              v             v
+             [t_1]         [t_3]
+              |             |
+              v             v
+             [t_2]         [t_4]
+              |             |
+              v             v
+            RELU_1         RELU_2
+
+            t_1,t_2 can be merged and t_3,t_4 can be merged.
+            The result output would be
+
+                   INPUT
+                     |
+              .------.------.
+              |             |
+              v             v
+            RELU_1         RELU_2
+
+        """
+        input_shape = (1, 10, 5)
+        input_features = [("data", datatypes.Array(*input_shape))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        transpose = [[2, 1, 0], [2, 1, 0]]
+        input_name = "data"
+        for i, axes in enumerate(transpose):
+            name = "transpose_" + str(i)
+            output_name = name + "_out"
+            builder.add_transpose(
+                name=name, axes=axes, input_name=input_name, output_name=output_name
+            )
+            input_name = output_name
+
+        builder.add_activation(
+            name="relu", non_linearity="RELU", input_name=input_name, output_name="out"
+        )
+
+        input_name = "data"
+        for i, axes in enumerate(transpose):
+            name = "transpose_branch_2_" + str(i)
+            output_name = name + "_out"
+            builder.add_transpose(
+                name=name, axes=axes, input_name=input_name, output_name=output_name
+            )
+            input_name = output_name
+
+        builder.add_activation(
+            name="relu_branch_2",
+            non_linearity="RELU",
+            input_name=input_name,
+            output_name="out_branch_2",
+        )
+        self._test_builder(builder, input_shape, 2)
+
+    def test_fork_and_add(self):
+        """
+                   INPUT
+                     |
+              .------.------.
+              |             |
+              v             v
+             [t_1]         [t_3]
+              |             |
+              v             v
+             [t_2]         [t_4]
+              |             |
+              .-----. .-----.
+                    | |
+                    v v
+                    Add
+
+            t_1,t_2 can be merged and t_3,t_4 can be merged.
+            The result output would be
+
+                   INPUT
+                     |
+              .------.------.
+              |             |
+              .-----. .-----.
+                    | |
+                    v v
+                    Add
+
+        """
+        input_shape = (1, 10, 5)
+        input_features = [("data", datatypes.Array(*input_shape))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        transpose = [[2, 1, 0], [2, 1, 0]]
+        input_name = "data"
+        for i, axes in enumerate(transpose):
+            name = "transpose_" + str(i)
+            output_name = name + "_out"
+            builder.add_transpose(
+                name=name, axes=axes, input_name=input_name, output_name=output_name
+            )
+            input_name = output_name
+
+        input_1 = input_name
+
+        input_name = "data"
+        for i, axes in enumerate(transpose):
+            name = "transpose_branch_2_" + str(i)
+            output_name = name + "_out"
+            builder.add_transpose(
+                name=name, axes=axes, input_name=input_name, output_name=output_name
+            )
+            input_name = output_name
+
+        input_2 = input_name
+
+        builder.add_add_broadcastable(
+            name="add", input_names=[input_1, input_2], output_name="out"
+        )
+        self._test_builder(builder, input_shape, 1)
+
+    def test_transpose(self):
+        def _build_and_test_network(input_size, transpose_layers, expected_layers):
+            """
+            Helper function for testing transpose removal.
+
+            Args:
+                input_size: Size of the input network tensor.
+                transpose_layers: Array of transpose axes definitions.
+                expected_layers: Array of indices into transpose_layers indicating
+                    which of the transpose layers should be present after the
+                    graph pass.
+            """
+            input_features = [("data", datatypes.Array(*input_size))]
+            output_features = [("out", None)]
+            builder = neural_network.NeuralNetworkBuilder(
+                input_features, output_features
+            )
+            spec = builder.spec.neuralNetwork.layers
+
+            last_layer = "data"
+            for idx, axes in enumerate(transpose_layers):
+                name = "t{}".format(idx)
+                if idx == len(transpose_layers) - 1:
+                    output_name = "out"
+                else:
+                    output_name = name + "_out"
+                builder.add_transpose(
+                    name=name, axes=axes, input_name=last_layer, output_name=output_name
+                )
+                last_layer = output_name
+
+            spec = builder.spec.neuralNetwork
+            # Check the network before the graph pass.
+            for idx in range(len(transpose_layers)):
+                np.testing.assert_equal(
+                    "transpose", spec.layers[idx].WhichOneof("layer")
+                )
+            # Run the removal pass.
+            remove_redundant_transposes(builder.spec)
+            # Verify only the expected layers remain.
+            np.testing.assert_equal(len(spec.layers), len(expected_layers))
+            for output_layer_idx, input_layer_idx in enumerate(expected_layers):
+                np.testing.assert_equal(
+                    "transpose", spec.layers[output_layer_idx].WhichOneof("layer")
+                )
+                np.testing.assert_array_equal(
+                    transpose_layers[input_layer_idx],
+                    spec.layers[output_layer_idx].transpose.axes,
+                )
+
+        _build_and_test_network(
+            input_size=[1, 10, 10],
+            # These transposes are not inverses.
+            transpose_layers=[[2, 0, 1], [2, 0, 1]],
+            expected_layers=[0, 1],
+        )
+
+        _build_and_test_network(
+            input_size=[1, 1, 10, 10, 3],
+            # First two are the identity, then an extra.
+            transpose_layers=[[2, 4, 1, 0, 3], [3, 2, 0, 4, 1], [1, 0, 2, 3, 4]],
+            expected_layers=[2],
+        )
+
+        # A slightly more complicated test case where there are two transposes
+        # in topological order, but are actually in parallel in the graph.
+        builder = neural_network.NeuralNetworkBuilder(
+            [("data", datatypes.Array(2, 4, 8))], [("out", None)]
+        )
+        last_layer = "data"
+        builder.add_transpose(
+            name="t1", axes=[0, 2, 1], input_name="data", output_name="t1"
+        )
+        builder.add_transpose(
+            name="t2", axes=[0, 2, 1], input_name="data", output_name="t2"
+        )
+        builder.add_stack(name="stack", input_names=["t1", "t2"], output_name="out")
+        spec = builder.spec.neuralNetwork
+        # Run the removal pass.
+        remove_redundant_transposes(builder.spec)
+        # Verify nothing was removed.
+        np.testing.assert_equal(len(spec.layers), 3)
+
+
+if __name__ == "__main__":
+    RUN_ALL_TESTS = True
+    if RUN_ALL_TESTS:
+        unittest.main()
+    else:
+        suite = unittest.TestSuite()
+        suite.addTest(MLModelPassesTest("test_load_constant_remove"))
+        unittest.TextTestRunner().run(suite)
diff --git a/coremltools/converters/mil/backend/nn/passes/test_passes.py b/coremltools/converters/mil/backend/nn/passes/test_passes.py
new file mode 100644
index 000000000..e08d2b542
--- /dev/null
+++ b/coremltools/converters/mil/backend/nn/passes/test_passes.py
@@ -0,0 +1,81 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import copy
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY
+from coremltools.converters.mil.testing_utils import assert_model_is_valid
+from coremltools.converters.mil.testing_utils import assert_same_output_names
+
+
+def test_commingle_loop_vars():
+    def body(a, b):
+        # b is a loop invariant
+        return mb.add(x=a, y=b), b
+
+    def cond(a, b):
+        a_mean = mb.reduce_mean(x=a, axes=[0, 1])
+        b_mean = mb.reduce_mean(x=b, axes=[0, 1])
+        return mb.less(x=a_mean, y=b_mean)
+
+    @mb.program(
+        input_specs=[mb.TensorSpec(shape=(1, 2)), mb.TensorSpec(shape=(1, 2)),]
+    )
+    def prog(a, b):
+        return mb.while_loop(_cond=cond, _body=body, loop_vars=(a, b))
+
+    while_op = prog.find_ops(op_type="while_loop", exactly_one=True)[0]
+    assert while_op.blocks[0].inputs[0].name == "a.x"
+    assert while_op.blocks[0].inputs[1].name == "b.x"
+
+    prev_prog = copy.deepcopy(prog)
+    PASS_REGISTRY["nn_backend::commingle_loop_vars"](prog)
+    assert_same_output_names(prev_prog, prog)
+
+    while_op = prog.find_ops(op_type="while_loop", exactly_one=True)[0]
+    assert while_op.blocks[0].inputs[0].name == while_op.outputs[0].name
+    assert while_op.blocks[0].inputs[1].name == while_op.outputs[1].name
+
+    prog.validate()
+
+    # The program is not ssa and thus cannot be converted
+
+
+def test_handle_return_return_inputs_as_outputs():
+    @mb.program(
+        input_specs=[mb.TensorSpec(shape=(1, 2)), mb.TensorSpec(shape=(1, 2)),]
+    )
+    def prog(a, b):
+        return mb.mul(x=a, y=2), b
+
+    prev_main_output_names = [o.name for o in prog["main"].outputs]
+    assert prog["main"].outputs[1].op is None  # output comes from input
+
+    prev_prog = copy.deepcopy(prog)
+    PASS_REGISTRY["nn_backend::handle_return_inputs_as_outputs"](prog)
+    assert_same_output_names(prev_prog, prog)
+
+    assert prog["main"].outputs[1].op is not None  # output comes from an op
+    assert prog["main"].outputs[1].op.op_type == "identity"
+
+    assert_model_is_valid(prog, {"a": (1, 2), "b": (1, 2)})
+
+
+def test_handle_unused_inputs():
+    @mb.program(
+        input_specs=[mb.TensorSpec(shape=(1, 2)),]
+    )
+    def prog(unused_input):
+        return mb.const(val=[3, 2])
+
+    prev_prog = copy.deepcopy(prog)
+    PASS_REGISTRY["nn_backend::handle_unused_inputs"](prog)
+    assert_same_output_names(prev_prog, prog)
+
+    id_op = prog.find_ops(op_type="identity", exactly_one=True)[0]
+    # Assert that input var is consumed by an identity op.
+    assert id_op in prog["main"].inputs["unused_input"].child_ops
+
+    assert_model_is_valid(prog, {"unused_input": (1, 2)})
diff --git a/coremltools/converters/mil/converter.py b/coremltools/converters/mil/converter.py
new file mode 100644
index 000000000..16f210709
--- /dev/null
+++ b/coremltools/converters/mil/converter.py
@@ -0,0 +1,124 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters._profile_utils import _profile
+from .mil.passes.common_pass import common_pass
+
+
+class ConverterRegistry:
+    frontends = {}
+    backends = {}
+
+    @staticmethod
+    def frontend(converter):
+        ConverterRegistry.frontends[converter.name] = converter
+        return converter
+
+    @staticmethod
+    def backend(converter):
+        ConverterRegistry.backends[converter.name] = converter
+        return converter
+
+
+@ConverterRegistry.frontend
+class MILFrontend:
+    name = "mil"
+
+    def __call__(self, model, *args, **kwargs):
+        return model
+
+
+@ConverterRegistry.frontend
+class TensorFlowFrontend:
+    name = "tensorflow"
+
+    def __call__(self, *args, **kwargs):
+        from .frontend.tensorflow.load import TF1Loader
+
+        tf1_loader = TF1Loader(*args, **kwargs)
+        return tf1_loader.load()
+
+
+@ConverterRegistry.frontend
+class TensorFlow2Frontend:
+    name = "tensorflow2"
+
+    def __call__(self, *args, **kwargs):
+        from .frontend.tensorflow2.load import TF2Loader
+
+        tf2_loader = TF2Loader(*args, **kwargs)
+        return tf2_loader.load()
+
+
+@ConverterRegistry.frontend
+class TorchFrontend:
+    name = "torch"
+
+    def __call__(self, *args, **kwargs):
+        from .frontend.torch import load
+
+        return load(*args, **kwargs)
+
+
+@ConverterRegistry.backend
+class NNProtoBackend:
+    name = "nn_proto"
+
+    def __call__(self, *args, **kwargs):
+        from .backend.nn import load
+
+        return load(*args, **kwargs)
+
+
+@ConverterRegistry.frontend
+class CustomFrontend:
+    name = "custom"
+
+    def __call__(self, *args, **kwargs):
+        from coremltools.converters.mil.mil.passes.common_pass import common_pass
+
+        return common_pass(*args, **kwargs)
+
+
+@_profile
+def _convert(
+    model,
+    convert_from="TensorFlow",
+    convert_to="nn_proto",
+    converter_registry=ConverterRegistry,
+    **kwargs
+):
+    """
+    Convert from an external representation.
+
+    Args:
+        model (Any): The model to convert.
+        convert_from (str): The name of the input converter.
+        convert_to (str): The name of the output converter.
+        converter_registry: Converter registries.
+    Returns:
+        The converted model.
+    """
+    frontend_converter_type = converter_registry.frontends.get(convert_from.lower())
+    if not frontend_converter_type:
+        msg = 'Frontend converter "{}" not implemented, must be one of: {}'
+        raise NotImplementedError(
+            msg.format(convert_from, list(converter_registry.frontends.keys()))
+        )
+    frontend_converter = frontend_converter_type()
+
+    backend_converter_type = converter_registry.backends.get(convert_to.lower())
+    if not backend_converter_type:
+        msg = 'Backend converter "{}" not implemented, must be one of: {}'
+        raise NotImplementedError(
+            msg.format(convert_to, list(converter_registry.backends.keys()))
+        )
+    backend_converter = backend_converter_type()
+
+    prog = frontend_converter(model, **kwargs)
+    common_pass(prog)
+    out = backend_converter(prog, **kwargs)
+
+    return out
diff --git a/coremltools/converters/mil/frontend/__init__.py b/coremltools/converters/mil/frontend/__init__.py
new file mode 100644
index 000000000..81edbd196
--- /dev/null
+++ b/coremltools/converters/mil/frontend/__init__.py
@@ -0,0 +1,8 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from . import tensorflow
+from . import tensorflow2
+from . import torch
diff --git a/coremltools/converters/mil/frontend/tensorflow/__init__.py b/coremltools/converters/mil/frontend/tensorflow/__init__.py
new file mode 100644
index 000000000..e4a6f0580
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/__init__.py
@@ -0,0 +1,20 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools._deps import _HAS_TF_1
+
+# suppress TensorFlow stdout prints
+import os
+import logging
+
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"  # FATAL
+logging.getLogger("tensorflow").setLevel(logging.FATAL)
+
+register_tf_op = None
+
+if _HAS_TF_1:
+    from .ops import *  # register all
+    from .dialect_ops import *  # register tf extension ops
+    from .tf_op_registry import register_tf_op
diff --git a/coremltools/converters/mil/frontend/tensorflow/basic_graph_ops.py b/coremltools/converters/mil/frontend/tensorflow/basic_graph_ops.py
new file mode 100644
index 000000000..85274ec66
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/basic_graph_ops.py
@@ -0,0 +1,362 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+import six
+
+
+def connect_edge(g, source, dest):
+    if isinstance(source, six.string_types):
+        source = g[source]
+    if isinstance(dest, six.string_types):
+        dest = g[dest]
+    source.outputs.append(dest.name)
+    dest.inputs.append(source.name)
+
+
+def connect_edge_at_index(g, source, dest, idx):
+    if isinstance(source, six.string_types):
+        source = g[source]
+    if isinstance(dest, six.string_types):
+        dest = g[dest]
+    source.outputs.insert(idx, dest.name)
+    dest.inputs.insert(idx, source.name)
+
+
+def replace_source(g, source, dest, new_source):
+    if isinstance(source, six.string_types):
+        source = g[source]
+    if isinstance(dest, six.string_types):
+        dest = g[dest]
+    if isinstance(new_source, six.string_types):
+        new_source = g[new_source]
+    dest_inputs = []
+    for inp in dest.inputs:
+        if inp == source.name:
+            dest_inputs.append(new_source.name)
+            g[new_source.name].outputs.append(dest.name)
+        else:
+            dest_inputs.append(inp)
+    dest.inputs = dest_inputs
+    source.outputs = [i for i in g[source.name].outputs if i != dest.name]
+
+
+def replace_control_source(g, source, dest, new_source):
+    if isinstance(source, six.string_types):
+        source = g[source]
+    if isinstance(dest, six.string_types):
+        dest = g[dest]
+    if isinstance(new_source, six.string_types):
+        new_source = g[new_source]
+    dest_inputs = []
+    for inp in dest.control_inputs:
+        if inp == source.name:
+            dest_inputs.append(new_source.name)
+            g[new_source.name].control_outputs.append(dest.name)
+        else:
+            dest_inputs.append(inp)
+    dest.control_inputs = dest_inputs
+    source.control_outputs = [i for i in g[source.name].outputs if i != dest.name]
+
+
+def replace_dest(g, source, dest, new_dest):
+    if isinstance(source, six.string_types):
+        source = g[source]
+    if isinstance(dest, six.string_types):
+        dest = g[dest]
+    if isinstance(new_dest, six.string_types):
+        new_dest = g[new_dest]
+    for idx, d in enumerate(source.outputs):
+        if d == dest.name:
+            source.outputs[idx] = new_dest.name
+            new_dest.inputs = new_dest.inputs[:] + [source.name]
+
+    dest.inputs = [i for i in dest.inputs if i != source.name]
+
+
+def replace_control_dest(g, source, dest, new_dest):
+    if isinstance(source, six.string_types):
+        source = g[source]
+    if isinstance(dest, six.string_types):
+        dest = g[dest]
+    if isinstance(new_dest, six.string_types):
+        new_dest = g[new_dest]
+    for idx, d in enumerate(source.control_outputs):
+        if d == dest.name:
+            source.control_outputs[idx] = new_dest.name
+            new_dest.control_inputs = new_dest.control_inputs[:] + [source.name]
+
+    dest.control_inputs = [i for i in dest.control_inputs if i != source.name]
+
+
+def connect_dests(g, source, dests):
+    for i in dests:
+        connect_edge(g, source, i)
+
+
+def connect_sources(g, sources, dest):
+    for i in sources:
+        connect_edge(g, i, dest)
+
+
+def disconnect_edge(g, source, dest):
+    if isinstance(source, six.string_types):
+        source = g[source]
+    if isinstance(dest, six.string_types):
+        dest = g[dest]
+    source.outputs = [i for i in source.outputs if i != dest.name]
+
+    dest.inputs = [i for i in dest.inputs if i != source.name]
+
+
+def disconnect_control_edge(g, source, dest):
+    if isinstance(source, six.string_types):
+        source = g[source]
+    if isinstance(dest, six.string_types):
+        dest = g[dest]
+    source.control_outputs = [i for i in source.control_outputs if i != dest.name]
+
+    dest.control_inputs = [i for i in dest.control_inputs if i != source.name]
+
+
+def disconnect_vertex_outs(g, source):
+    if isinstance(source, six.string_types):
+        source = g[source]
+    for out in source.outputs:
+        g[out].inputs = [i for i in g[out].inputs if i != source.name]
+    source.outputs = []
+
+
+def disconnect_vertex_ins(g, dest):
+    if isinstance(dest, six.string_types):
+        dest = g[dest]
+    for inp in dest.inputs:
+        if isinstance(inp, six.string_types):
+            innode = g[inp]
+        else:
+            innode = inp
+        innode.outputs = [i for i in innode.outputs if i != dest.name]
+    dest.inputs = []
+
+
+def disconnect_vertex_control_ins(g, dest):
+    if isinstance(dest, six.string_types):
+        dest = g[dest]
+    for inp in dest.control_inputs:
+        if isinstance(inp, six.string_types):
+            innode = g[inp]
+        else:
+            innode = inp
+        innode.control_outputs = [i for i in innode.control_outputs if i != dest.name]
+    dest.control_inputs = []
+
+
+def disconnect_vertex_control_outs(g, source):
+    if isinstance(source, six.string_types):
+        source = g[source]
+    for out in source.control_outputs:
+        g[out].control_inputs = [i for i in g[out].control_inputs if i != source.name]
+    source.control_outputs = []
+
+
+def delete_node(g, node):
+    if not isinstance(node, six.string_types):
+        node = node.name
+    disconnect_vertex_ins(g, node)
+    disconnect_vertex_outs(g, node)
+    disconnect_vertex_control_ins(g, node)
+    disconnect_vertex_control_outs(g, node)
+    del g[node]
+
+
+def replace_node(g, original_node, new_node):
+    if isinstance(new_node, six.string_types):
+        new_node = g[new_node]
+    if not isinstance(original_node, six.string_types):
+        original_node = original_node.name
+
+    for o in list(g[original_node].control_outputs):
+        replace_control_source(g, original_node, o, new_node)
+    for o in list(g[original_node].outputs):
+        replace_source(g, original_node, o, new_node)
+    for i in list(g[original_node].control_inputs):
+        replace_control_dest(g, i, original_node, new_node)
+    for i in list(g[original_node].inputs):
+        replace_dest(g, i, original_node, new_node)
+
+
+def fill_outputs(gd):
+    """
+    Fills the output lists of of a graph of ParsedNode
+
+    Takes a graph in "dict{str, ParsedNode}" form, and returns a new graph.
+    """
+    # fill outputs
+    for k, v in gd.items():
+        for i in v.inputs:
+            gd[i].outputs.append(v.name)
+        for i in v.control_inputs:
+            gd[i].control_outputs.append(v.name)
+    get_tuple_ops = ["Split", "SplitV", "LSTMBlock"]
+    for k, v in gd.items():
+        if v.op in get_tuple_ops:
+            outputs = [[out, int(gd[out].attr["index"])] for out in v.outputs]
+            outputs.sort(key=lambda x: x[1])
+            gd[k].outputs = [out for [out, _] in outputs]
+
+    return gd
+
+
+def check_connections(gd):
+    """
+    Given a graph, checks that all
+     - inputs/outputs are symmetric
+     - control_inputs/control_outputs are symmetric
+     - The graph does not reference vertices outside of the graph
+
+    Takes a graph in "dict{str, ParsedNode}" form. Does not return,
+    asserts false on failure.
+    """
+    # check that inputs and outputs line up
+    for k, v in gd.items():
+        for i in v.inputs:
+            if isinstance(i, six.string_types):
+                assert k in gd[i].outputs
+            else:
+                assert k in gd[i.name].outputs
+        for i in v.outputs:
+            inputs = [
+                inp if isinstance(inp, six.string_types) else inp.name
+                for inp in gd[i].inputs
+            ]
+            assert k in inputs
+        for i in v.control_inputs:
+            if isinstance(i, six.string_types):
+                assert k in gd[i].control_outputs
+            else:
+                assert k in gd[i.name].control_outputs
+        for i in v.control_outputs:
+            control_inputs = [
+                inp if isinstance(inp, six.string_types) else inp.name
+                for inp in gd[i].control_inputs
+            ]
+            assert k in control_inputs
+
+
+def const_determined_nodes(gd, assume_variable_nodes=None):
+    """
+    Given a graph, extract all nodes that only depends on const nodes.
+
+    # TODO: extract nodes that depends on the "const part" of placeholders.
+    """
+    if assume_variable_nodes is None:
+        assume_variable_nodes = []
+    vis = {}
+
+    def visit(node):
+        # make sure node is a ParsedNode
+        if isinstance(node, six.string_types):
+            node = gd[node]
+        if node.name in vis:
+            return
+
+        if "Const" in node.op:
+            vis[node.name] = True
+        elif "Variable" in node.op:
+            vis[node.name] = False
+        elif "Placeholder" in node.op:
+            vis[node.name] = False
+        # TF1 uses TensorArray* while TF2 uses TensorList* ops
+        elif "TensorArray" in node.op or "TensorList" in node.op:
+            vis[node.name] = False
+        elif "function" in node.op:
+            vis[node.name] = False
+        elif "global" in node.op:
+            vis[node.name] = False
+        elif node.name in assume_variable_nodes:
+            vis[node.name] = False
+        else:
+            ret = True
+            vis[node.name] = False
+            for innode in node.inputs:
+                if isinstance(innode, six.string_types):
+                    inname = innode
+                else:
+                    inname = innode.name
+                if inname not in vis:
+                    visit(innode)
+                if not vis[inname]:
+                    ret = False
+                    break
+            vis[node.name] = ret
+
+    for k, v in gd.items():
+        if k in vis:
+            continue
+        visit(k)
+
+    ret = []
+    for k, v in vis.items():
+        if v:
+            ret.append(k)
+    return ret
+
+
+def topsort(graph):
+    if len(graph) == 0:
+        return []
+    inedge_count = {k: len(v.inputs) + len(v.control_inputs) for k, v in graph.items()}
+    ret = []
+    curboundary = [k for k, v in inedge_count.items() if v == 0]
+    nextboundary = []
+    if len(curboundary) == 0:
+        raise ValueError("Graph is not a DAG!")
+
+    while len(curboundary) > 0:
+        ret.extend(curboundary)
+        for b in curboundary:
+            for o in graph[b].outputs + graph[b].control_outputs:
+                inedge_count[o] -= 1
+                if inedge_count[o] == 0:
+                    nextboundary.append(o)
+        curboundary = nextboundary
+        nextboundary = []
+    if len(ret) != len(graph):
+        raise ValueError("Graph is not a DAG!")
+    return ret
+
+
+def simple_topsort(inputs):
+    if len(inputs) == 0:
+        return []
+    outputs = {k: [] for k in inputs}
+    for k in inputs:
+        for o in inputs[k]:
+            outputs[o].append(k)
+
+    inedge_count = {k: len(v) for k, v in inputs.items()}
+    ret = []
+    curboundary = [k for k, v in inedge_count.items() if v == 0]
+    nextboundary = []
+    if len(curboundary) == 0:
+        raise ValueError("Graph is not a DAG!")
+
+    while len(curboundary) > 0:
+        ret.extend(curboundary)
+        for b in curboundary:
+            for o in outputs[b]:
+                inedge_count[o] -= 1
+                if inedge_count[o] == 0:
+                    nextboundary.append(o)
+        curboundary = nextboundary
+        nextboundary = []
+    if len(ret) != len(inputs):
+        raise ValueError("Graph is not a DAG!")
+    return ret
diff --git a/coremltools/converters/mil/frontend/tensorflow/convert_utils.py b/coremltools/converters/mil/frontend/tensorflow/convert_utils.py
new file mode 100644
index 000000000..5e4da7e07
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/convert_utils.py
@@ -0,0 +1,201 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import logging
+from .basic_graph_ops import topsort
+from coremltools.converters.mil.mil.types.symbolic import is_symbolic, any_variadic
+from coremltools.converters.mil.mil import types
+from .tf_op_registry import _TF_OPS_REGISTRY
+from coremltools.converters.mil.mil.var import ListVar
+from collections import defaultdict
+from tqdm import tqdm as _tqdm
+
+
+def compatible_shapes(tf_shape, inf_shape):
+    def compare_elem(dt, ds):
+        if dt is None or dt < 0:
+            return True
+        elif dt == ds:
+            return True
+        else:
+            return False
+
+    if tf_shape is None or any_variadic(inf_shape):
+        return True
+    else:
+        return all(compare_elem(dt, ds) for dt, ds in zip(tf_shape, inf_shape))
+
+
+def check_output_shapes(x, node):
+    """
+    x: list[Var] or tuple[Var]
+    node: ParsedTFNode
+    """
+    if isinstance(x, ListVar):
+        # No check on list.
+        return
+    if not isinstance(x, (list, tuple)):
+        x = [x]
+    tf_shapes = node.attr.get("_output_shapes", None)
+    if tf_shapes is None:
+        return
+    inf_shapes = []
+    for y in x:
+        if y is None:
+            msg = "TF convert returns None type in TF node {}"
+            raise TypeError(msg.format(node.name))
+        if types.is_tensor(y.sym_type):
+            inf_shapes.append(list(y.shape))
+        elif types.is_scalar(y.sym_type):
+            inf_shapes.append([])
+        else:
+            msg = "Output type {} not understood"
+            raise ValueError(msg.format(y))
+
+    for t, s in zip(tf_shapes, inf_shapes):
+        if not compatible_shapes(t, s):
+            msg = (
+                "Op {} ({}) type inference ({}) and TF output shape " + "({}) mismatch"
+            )
+            raise ValueError(msg.format(node.name, node.op, s, t))
+
+
+def connect_global_initializer(graph):
+    # In TF1, variable initialization (from frozen graph) is done by a
+    # DAG in main function that is disconnected from the rest of the main
+    # function. For example:
+    #
+    # Initialization DAG (disconnected from Main DAG):
+    #   Const -> set_global(variable='v1')
+    #
+    # Main DAG:
+    #   Placeholder               ---
+    #                               |
+    #   get_global(variable='v1') ----> some_output
+    #
+    # (Note that in this example there's no loop or other function.)
+    #
+    # If the variable does not cross block boundary, we can always represent
+    # `get_global` by the input to `set_global`, which may or may not be
+    # Const, following the control dependency.
+    #
+    # Note that this is incorrect if global variable crosses, say,
+    # while_loop block boundary, which needs a more complex resource inference
+    # to support and is not supported in this function.
+    #
+    # Due to the lack of control depeendency between thhe two DAG, we could be
+    # converting `set_global` after `get_global`, which makes it impossible to
+    # perform eager type inference, as type information (e.g., tensor shape)
+    # is only provided by `set_global` (whether setting it to a const or a
+    # non-const).
+    #
+    # Here we remedy the simpler case: when `set_global` takes in a Const,
+    # we assume it's initialization and thus must
+    # run before get_global, i.e. all get_global(variable='v1') must be a
+    # control_output of set_global(variable='v1') where set_global's input is
+    # Const (with and control_inputs set symmetrically). Note that multiple
+    # `get_global(variable='v1')` might have dependences among themselves, but
+    # they should all take the constant `set_global(variable='v1')` as control
+    # dependency.
+
+    # Phase 1: Collect get_global nodes for each variable.
+    # variable name to list[ParsedTFNode]
+    var_to_get_global_nodes = defaultdict(list)
+    for node in graph.values():
+        if node.op == "get_global":
+            variable_name = node.attr["variable"]
+            var_to_get_global_nodes[variable_name].append(node)
+
+    # Phase 2: Find set_global with compile time values
+    for node_name, node in graph.items():
+        if node.op != "set_global":
+            continue
+        input_name = node.inputs[0]
+        input_node = graph[input_name]
+        if input_node.op != "Const":
+            continue
+        variable_name = node.attr["variable"]
+        for get_node in var_to_get_global_nodes[variable_name]:
+            logging.info(
+                "add {} as control inputs of {}".format(node_name, get_node.name)
+            )
+            get_node.control_inputs.append(node_name)
+            node.control_outputs.append(get_node.name)
+
+
+def convert_graph(context, graph, outputs=None):
+    """
+    Construct Core ML ops corresponding to `graph`.
+
+    Inputs:
+
+    - context (TranscriptContext)
+
+    - graph (dict of str -> ParsedTFNode): op name --> ParsedTFNode
+
+    - outputs (list[str]): List of output names. If outputs is None, the last
+      node graph (after topsort) must have op type return.
+
+    Returns:
+
+    list[Var]: the output Vars of the constructed Block.
+    """
+    connect_global_initializer(graph)
+    nodes = topsort(graph)
+
+    if outputs is None:
+        # infer outputs from return
+        last_node = graph[nodes[-1]]
+        if last_node.op != "return":
+            msg = "Expect the last node in graph to be 'return'; Got {}"
+            raise ValueError(msg.format(last_node.op))
+        second_last_node = graph[last_node.inputs[0]]
+        if second_last_node.op == "make_tuple":
+            outputs = second_last_node.inputs
+        else:
+            # single output function
+            outputs = second_last_node.name
+
+    # Translate the non-placeholder ops.
+    num_nodes = len(nodes)
+    for i, node_name in enumerate(
+        _tqdm(nodes, desc="Converting Frontend ==> MIL Ops", unit=" ops")
+    ):
+        node = graph[node_name]
+        if node.op == "return":
+            continue
+        logging.info(
+            "[{}/{}] Converting {} op '{}'".format(i + 1, num_nodes, node.op, node.name)
+        )
+
+        if node.op == "NoOp":
+            continue
+        _add_op = _TF_OPS_REGISTRY.get(node.op, None)
+        if _add_op is None:
+            msg = "Conversion for TF op '{0}' not implemented.\n \n{1}".format(
+                node.op, node.original_node
+            )
+            raise NotImplementedError(msg)
+        _add_op(context, node)
+
+        if len(node.outputs) > 0:
+            # set_global / get_global / NoOp has no direct consumer / outputs
+            x = context[node.name]
+            check_output_shapes(x, node)
+
+    output_is_list = isinstance(outputs, (tuple, list))
+    if not output_is_list:
+        outputs = [outputs]
+
+    output_vars = []
+    for output in outputs:
+        x = context[output.split(":")[0]]
+        if isinstance(x, (tuple, list)):
+            idx = int(output.split(":")[1])
+            output_vars.append(x[idx])
+        else:
+            output_vars.append(x)
+
+    return output_vars if output_is_list else output_vars[0]
diff --git a/coremltools/converters/mil/frontend/tensorflow/converter.py b/coremltools/converters/mil/frontend/tensorflow/converter.py
new file mode 100644
index 000000000..2743f364d
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/converter.py
@@ -0,0 +1,395 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import six
+import logging
+from coremltools.converters.mil.input_types import (
+    InputType,
+    TensorType,
+    ImageType,
+    RangeDim,
+    _get_shaping_class,
+)
+from coremltools.converters.mil.input_types import Shape as InputShape
+from coremltools.converters.mil.mil.var import Var
+from coremltools.converters.mil.mil import get_new_symbol
+from coremltools.converters.mil.mil.types.symbolic import is_symbolic
+
+from coremltools.converters.mil.mil import types
+from .basic_graph_ops import topsort, simple_topsort
+
+from .convert_utils import convert_graph
+
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil import Program
+from coremltools.converters.mil.mil import Function
+from .ssa_passes.tf_passes import tensorflow_passes
+from coremltools.converters._profile_utils import _profile
+
+
+# TranscriptionContext maintains a map of tf_node.name --> ssa_var available
+# to the current TF --> tfssa transcription.
+class TranscriptionContext:
+    def __init__(self, name=None):
+        self.name = name if name is not None else ""
+        self.context = {}
+        self.graphs = {}
+
+        # TF loops are represented as functions, so nested loops becomes
+        # stacked functions. Stacked functions are translated to nested
+        # blocks in Program, like
+        #
+        # while_loop(loop_vars=(%a, %b))
+        #  cond_block1(%a.x, %b.x) {
+        #    ...some ops
+        #  } -> (%bool_var1)
+        #  body_block1(%a.x, %b.x) {
+        #    %ret_axx = while_loop(loop_vars=(%a.x,))
+        #      cond_block2(%a.x.x) {
+        #        ...some ops
+        #      } -> (%bool_var2)
+        #      body_block2(%a.x.x) {
+        #       ...some ops
+        #      } -> (%new_a.x.x)
+        #    } -> (%ret_axx)
+        #    ....some ops using %ret_a
+        #  } -> (%ret_ax, %ret_bx)
+        #
+        # During the translation of cond_block2, we'd have func_input_stack
+        #
+        # (%a.x.x,)
+        # (%a.x, %b.x)
+        #
+        # where [%a.x.x] would be unstacked once cond_block2 is done.
+        self.func_input_stack = []  # list of tuple[Var]
+
+    def add(self, tf_name, ssa_vars, is_new_var=True):
+        """
+        ssa_vars: list[Var] / tuple[Var] (multiple outputs) or
+        Var (single_output)
+        is_new_var: True if ssa_vars are newly created for tf_name.
+        """
+        if tf_name in self.context:
+            logging.warning("TF var %s is added again.", tf_name)
+            return
+        if is_new_var and isinstance(ssa_vars, Var) and tf_name != ssa_vars.name:
+            msg = (
+                "MIL op's name ({}) does not match TensorFlow's node name ({})."
+                " Warning: Node added to context must have the same name as the name passed to context."
+            )
+            raise ValueError(msg.format(tf_name, ssa_vars.name))
+        self.context[tf_name] = ssa_vars
+
+    def add_graph(self, graph_name, graph):
+        self.graphs[graph_name] = graph
+
+    def get_graph(self, graph_name):
+        if graph_name not in self.graphs:
+            msg = "Graph '{}' not found in: {}"
+            raise KeyError(msg.format(graph_name, list(self.graphs.keys())))
+        return self.graphs[graph_name]
+
+    def stack_func_inputs(self, inputs):
+        self.func_input_stack.append(inputs)
+
+    def unstack_func_inputs(self):
+        if len(self.func_input_stack) == 0:
+            raise ValueError("No func input available")
+        self.func_input_stack.pop()
+
+    def get_func_inputs(self):
+        if len(self.func_input_stack) == 0:
+            raise ValueError("No func input available")
+        return self.func_input_stack[-1]
+
+    def __getitem__(self, tf_name):
+        if tf_name not in self.context:
+            msg = "TF var {} not found in context {}"
+            raise KeyError(msg.format(tf_name, self.name))
+        return self.context[tf_name]
+
+    def __contains__(self, tf_name):
+        return tf_name in self.context
+
+
+class TFConverter:
+    def __init__(self, tfssa, inputs=None, outputs=None, **kwargs):
+        """
+        tfssa: TensorFlow IR.
+        inputs: list of TensorType or ImageType, optional, defaults to None.
+        outputs: list of str or str, optional, defaults to None.
+            A list of names of the output nodes or a str for single output name.
+            If None, the converter will try to extract the output information from
+            TensorFlow model.
+        """
+        self.tfssa = tfssa
+        self.global_type = {}
+        self.inputs = None
+
+        main_func = tfssa.functions["main"]
+        graph = main_func.graph
+
+        # Filter the inputs to only Placeholder names
+        tf_placeholder_names = [n for n in graph if graph[n].op == "Placeholder"]
+        placeholder_names = []
+        if inputs is not None:
+            # Check inputs format
+            if not isinstance(inputs, (list, tuple)):
+                raise ValueError(
+                    "Type of inputs should be list or tuple, got {} instead.".format(
+                        type(inputs)
+                    )
+                )
+            if not all([isinstance(i, InputType) for i in inputs]):
+                raise ValueError(
+                    "Type of inputs should be list or tuple of TensorType or ImageType, got {} instead.".format(
+                        [type(i) for i in inputs]
+                    )
+                )
+
+            # Special case: if there's only 1 input and 1 placeholder, we match them.
+            if len(tf_placeholder_names) == 1 and len(inputs) == 1:
+                if inputs[0].name is None:
+                    inputs[0].name = tf_placeholder_names[0]
+
+            # We fill in shapes for user-specified input that doesn't have shape
+            for inp in inputs:
+                # Check inputs existence
+                if inp.name is None:
+                    raise ValueError(
+                        "Unable to infer input's name or input name was not provided"
+                    )
+                if inp.name not in tf_placeholder_names:
+                    raise ValueError(
+                        "Input ({}) provided is not found in given tensorflow graph. Placeholders in graph are: {}".format(
+                            inp.name, tf_placeholder_names
+                        )
+                    )
+                if inp.shape is None:
+                    if graph[inp.name].attr.get("_output_shapes", None) is not None:
+                        shape = graph[inp.name].attr["_output_shapes"][0]
+                        if shape is None:
+                            # Scalar is given as None
+                            shape = []
+                    elif graph[inp.name].attr.get("shape", None) is not None:
+                        shape = graph[inp.name].attr["shape"]
+                    else:
+                        raise ValueError(
+                            "Can't extract shape from attribute of ({})".format(
+                                inp.name
+                            )
+                        )
+                    inp.shape = _get_shaping_class(shape)
+
+            # Extract placeholders that users didn't specify.
+            user_input_names = [inp.name for inp in inputs]
+            for name in tf_placeholder_names:
+                if name not in user_input_names:
+                    placeholder_names.append(name)
+        else:
+            inputs = []
+            placeholder_names = tf_placeholder_names
+
+        placeholder_inputs = {}
+        for inp in main_func.inputs:
+            if inp not in placeholder_names:
+                continue
+            if graph[inp].attr.get("_output_shapes", None) is not None:
+                placeholder_inputs.update({inp: graph[inp].attr["_output_shapes"][0]})
+            elif graph[inp].attr.get("shape", None) is not None:
+                placeholder_inputs.update({inp: graph[inp].attr["shape"]})
+            else:
+                raise ValueError("Can't find input shape for ({})".format(inp))
+
+        if len(placeholder_inputs) > 0:
+            logging.info(
+                "Adding Input not specified by users: '{}'".format(placeholder_inputs)
+            )
+
+        for k, v in placeholder_inputs.items():
+            inputs.append(TensorType(name=k, shape=v))
+        for idx, inp in enumerate(inputs):
+            # We set the default image format in TF as NHWC, since NHWC is used
+            # for TF unless GPU is specified as device.
+            if isinstance(inp, ImageType) and inputs[idx].channel_first is None:
+                inputs[idx].channel_first = False
+        self.inputs = tuple(inputs)
+
+        for inputtype in self.inputs:
+            if not isinstance(inputtype.shape, InputShape):
+                continue
+            if any([isinstance(s, RangeDim) for s in inputtype.shape.shape]):
+                continue
+            node = graph[inputtype.name]
+            shape = [-1 if is_symbolic(s) else s for s in inputtype.shape.shape]
+            node.attr["_output_shapes"] = [shape]  # list of length 1
+
+        # infer outputs if not provided
+        self._validate_outputs(tfssa, outputs)
+        outputs = main_func.outputs if outputs is None else outputs
+        outputs = outputs if isinstance(outputs, (tuple, list)) else [outputs]
+        outputs = [x if isinstance(x, six.string_types) else x.name for x in outputs]
+        self.outputs = outputs
+
+        # We would like a stack so that we run conversion sequentially.
+        self.graph_stack = self._get_stack(tfssa, root="main")
+        self.context = TranscriptionContext()
+        self.tensorflow_passes = tensorflow_passes
+
+    def _get_stack(self, tfssa, root="main"):
+        # We're trying to get a order of how to loop through the graphs.
+        # This is NOT necessarily a DAG.
+        dep = {x: [] for x in tfssa.functions}
+        for fname in tfssa.functions:
+            for node in tfssa.functions[fname].graph.values():
+                func_x, func_y = None, None
+
+                if node.op == "while":
+                    func_x = node.attr["body_function"]
+                    func_y = node.attr["cond_function"]
+
+                if func_x and fname not in dep[func_x]:
+                    dep[func_x].append(fname)
+                if func_y and fname not in dep[func_y]:
+                    dep[func_y].append(fname)
+
+        assert len(dep[root]) == 0
+        graph_stack = simple_topsort(dep)
+
+        return graph_stack
+
+    @staticmethod
+    def _get_tensor_name(tensor):
+        ret = None
+        if isinstance(tensor, six.string_types):
+            ret = tensor
+        else:
+            ret = tensor.name
+        return ret.split(":")[0]
+
+    @staticmethod
+    def _create_placeholder(node):
+        node.parse_from_attr()
+        shape = []
+        dtype = node.attr["dtype"]
+        if types.is_tensor(node.datatype):
+            shape = node.datatype.get_shape()
+            shape = tuple(get_new_symbol() if s is None or s < 0 else s for s in shape)
+        return mb.placeholder(shape, dtype=dtype)
+
+    def _validate_outputs(self, tfssa, outputs):
+        if outputs is None:
+            return
+        outputs = outputs if isinstance(outputs, (tuple, list)) else [outputs]
+        output_nodes = []
+        for f in tfssa.functions.values():
+            output_nodes += list(f.outputs)
+        all_nodes = []
+        for f in tfssa.functions.values():
+            all_nodes += list(f.graph.keys())
+        for n in outputs:
+            if self._get_tensor_name(n) not in output_nodes + all_nodes:
+                raise KeyError('Output node name "{}" does exist.'.format(n))
+
+    def check_placeholder_output(self, prog):
+        """
+        Handle the cases where placeholder is output.
+        There is a case where the program is like
+            main(%Placeholder: (5,fp32)) {
+                block3() {
+                } -> (%Placeholder)
+            }
+        But self.outputs = ["Placeholder:0"]
+        We need to change the block output to Placeholder:0 by inserting an identity
+        """
+        block = prog["main"]
+        input_name = [x.name for x in list(block.inputs.values())]
+        output_name = [x.name for x in block.outputs]
+        placeholder_output_name = [
+            x for x in output_name if x in input_name and x not in self.outputs
+        ]
+        with block:
+            new_outputs = [
+                x for x in block.outputs if x.name not in placeholder_output_name
+            ]
+            for name in placeholder_output_name:
+                x = block.inputs[name]
+                x = mb.identity(x=x, name=name + ":0")
+                new_outputs.append(x)
+            block.set_outputs(new_outputs)
+
+    def convert_main_graph(self, prog, graph):
+        func_inputs = {}
+        for input_type in self.inputs:
+            node = graph[input_type.name]
+            func_inputs[input_type.name] = TFConverter._create_placeholder(node)
+        prog.set_main_input_types(self.inputs)
+
+        with Function(func_inputs) as ssa_func:
+            # Get the input Var
+            for name in func_inputs.keys():
+                self.context.add(name, ssa_func.inputs[name])
+            outputs = convert_graph(self.context, graph, self.outputs)
+            ssa_func.set_outputs(outputs)
+            prog.add_function("main", ssa_func)
+
+        # Rename outputs to TF's name. This is needed when the last op doesn't
+        # generate a new Var (e.g., get_tuple, Identity etc.), and thus the
+        # last Var would have a different name than the last TF op's name.
+        #
+        # Example:
+        #
+        # TF code:
+        #    x = tf.placeholder(tf.float32, shape=(1,))
+        #    y = tf.placeholder(tf.float32, shape=(1,))
+        #    c = lambda i, j: \
+        #            tf.less(tf.math.reduce_mean(i), tf.math.reduce_mean(j))
+        #    b = lambda i, j: (tf.add(i, 1), j)
+        #    res = tf.while_loop(c, b, [x, y])
+        #
+        # Resulting nodes (excluding the nodes in while loop cond & body):
+        #
+        # node name: Placeholder op type: Placeholder inputs: []
+        # node name: Placeholder_1 op type: Placeholder inputs: []
+        # node name: make_input_0 op type: make_tuple inputs: ['Placeholder',
+        #         'Placeholder_1']
+        # node name: while_0 op type: while inputs: ['make_input_0']
+        # node name: while/Exit op type: get_tuple inputs: ['while_0']
+        # node name: while/Exit_1 op type: get_tuple inputs: ['while_0']
+        #
+        # Observe that return node `while/Exit` is an output from get_tuple,
+        # which in our translation simply unpack a python tuple of Vars
+        # ('while_0:0', 'while_0:1') returned from while_0 SSA op. We need to
+        # rename `while_0:0` to `while/Exit` in order for users to find the
+        # output.
+        # Note: only rename the output if the output is not Placeholder.
+
+        input_names = [x.name for x in self.inputs]
+        for v_o, out_name in zip(prog["main"].outputs, self.outputs):
+            if v_o.name != out_name and v_o.name not in input_names:
+                logging.info(
+                    "Renaming output var: '{}' -> '{}'".format(v_o.name, out_name)
+                )
+                v_o.name = out_name
+        self.check_placeholder_output(prog)
+
+    @_profile
+    def convert(self):
+        prog = Program()
+        if len(self.graph_stack) == 0:
+            raise ValueError("At least one TF function must be present")
+        if self.graph_stack[0] != "main":
+            msg = "TF root graph must be named 'main'. Got {}"
+            raise ValueError(msg.format(self.graph_stack[0]))
+        graph = self.tfssa.functions["main"].graph
+        for g_name in self.graph_stack[1:]:
+            self.context.add_graph(g_name, self.tfssa.functions[g_name].graph)
+        self.convert_main_graph(prog, graph)
+
+        # Apply TF frontend passes on Program. These passes are different
+        # from passes applied to tfssa.
+        self.tensorflow_passes(prog)
+
+        return prog
diff --git a/coremltools/converters/mil/frontend/tensorflow/dialect_ops.py b/coremltools/converters/mil/frontend/tensorflow/dialect_ops.py
new file mode 100644
index 000000000..00363c84c
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/dialect_ops.py
@@ -0,0 +1,164 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil import types
+from coremltools.converters.mil.mil import Operation
+from coremltools.converters.mil.mil.input_type import *
+from coremltools.converters.mil.mil.ops.registry import SSAOpRegistry
+
+register_op = SSAOpRegistry.register_op
+
+
+# This file contains the TF dialect of SSA. Briefly, these ops are only
+# understandable in the TF frontend and not acceptable in the standard op set.
+# No backend would support any of the op here. These ops exist to facilitate
+# frontend SSA passes, but must be replaced with standard ops during SSA
+# passes.
+
+# All tf op must start with 'tf_' prefix.
+#
+# tf_make_list allows elem_shape to be unspecified. core op make_list does
+# not allow that.
+@register_op(doc_str="TODO", namespace="tf")
+class tf_make_list(Operation):
+    input_spec = InputSpec(
+        init_length=IntInputType(optional=True, default=1),
+        dynamic_length=BoolInputType(optional=True, default=True),
+        elem_shape=TensorInputType(const=True, optional=True),
+        dtype=StringInputType(const=True, optional=True, default="fp32"),
+    )
+
+    def __init__(self, **kwargs):
+        super(tf_make_list, self).__init__(**kwargs)
+
+    def type_inference(self):
+        init_length = self.init_length.val
+        if self.elem_shape is None or self.elem_shape.sym_val is None:
+            return types.list(
+                types.unknown,
+                init_length=init_length,
+                dynamic_length=self.dynamic_length.val,
+            )
+        builtin_dtype = types.string_to_builtin(self.dtype.val)
+        if builtin_dtype is None:
+            raise ValueError("Unsupported dtype {}".format(self.dtype.val))
+        elem_type = types.tensor(builtin_dtype, self.elem_shape.sym_val)
+        return types.list(
+            elem_type, init_length=init_length, dynamic_length=self.dynamic_length.val
+        )
+
+
+class TfLSTMBase(Operation):
+    """
+    Common LSTM inputs for BlockLSTMCell and BlockLSTM.
+    """
+
+    input_spec = InputSpec(
+        c_prev=TensorInputType(),  # [batch, hidden_dim]
+        h_prev=TensorInputType(),  # [batch, hidden_dim]
+        # weight: [input_dim + hidden_dim, 4*hidden_dim] (icfo layout)
+        weight=TensorInputType(const=True),
+        forget_bias=FloatInputType(const=True, optional=True, default=1.0),
+        # cell_clip == None implies not using cell clip
+        cell_clip=FloatInputType(const=True, optional=True),
+        # If use_peephole == False, weight_peep_* is ignored
+        use_peephole=BoolInputType(const=True, optional=True, default=False),
+        weight_peep_i=TensorInputType(const=True, optional=True),  # [hidden_dim,]
+        weight_peep_f=TensorInputType(const=True, optional=True),  # [hidden_dim,]
+        weight_peep_o=TensorInputType(const=True, optional=True),  # [hidden_dim,]
+        bias=TensorInputType(const=True),  # [4*hidden_dim] (icfo layout)
+    )
+
+    def _check_peephole_weights(self):
+        # Check weight_peep_*
+        if self.use_peephole.val:
+            if (
+                self.weight_peep_i is None
+                or self.weight_peep_f is None
+                or self.weight_peep_o is None
+            ):
+                raise ValueError(
+                    "weight_peep_* cannot be None when use_peephole is True"
+                )
+
+
+@register_op(
+    doc_str="""
+                     xh = [x, h_prev]
+                     [i, ci, f, o] = xh * w + b
+                     f = f + forget_bias
+                     if not use_peephole:
+                       wci = wcf = wco = 0
+                     i = sigmoid(cs_prev .* wci + i)
+                     f = sigmoid(cs_prev .* wcf + f)
+                     ci = tanh(ci)
+                     cs = ci .* i + cs_prev .* f
+                     cs = clip(cs, cell_clip)
+                     o = sigmoid(cs * wco + o)
+                     co = tanh(cs)
+                     h = co .* o
+                     """,
+    namespace="tf",
+)
+class tf_lstm_block_cell(TfLSTMBase):
+    input_spec = (
+        InputSpec(x=TensorInputType(),) + TfLSTMBase.input_spec  # [batch, input_dim]
+    )
+
+    def __init__(self, **kwargs):
+        super(tf_lstm_block_cell, self).__init__(**kwargs)
+
+    def type_inference(self):
+        self._check_peephole_weights()
+        # all return shapes are [batch, hidden_dim]
+        ret_shape = self.c_prev.shape
+        dtype = self.x.dtype
+        # See
+        # https://www.tensorflow.org/api_docs/python/tf/raw_ops/LSTMBlockCell
+        # All returned shapes are [batch, hidden_dim]
+        return (
+            types.tensor(dtype, ret_shape),  # i
+            types.tensor(dtype, ret_shape),  # cs
+            types.tensor(dtype, ret_shape),  # f
+            types.tensor(dtype, ret_shape),  # o
+            types.tensor(dtype, ret_shape),  # ci
+            types.tensor(dtype, ret_shape),  # co
+            types.tensor(dtype, ret_shape),
+        )  # h
+
+
+@register_op(
+    doc_str="""
+                     Apply LSTM to an input sequence
+                     """,
+    namespace="tf",
+)
+class tf_lstm_block(TfLSTMBase):
+    input_spec = (
+        InputSpec(
+            seq_len=IntInputType(),  # int
+            x=TensorInputType(),  # [padded_len, batch, input_dim]
+        )
+        + TfLSTMBase.input_spec
+    )
+
+    def __init__(self, **kwargs):
+        super(tf_lstm_block, self).__init__(**kwargs)
+
+    def type_inference(self):
+        self._check_peephole_weights()
+        padded_len = self.x.shape[0]
+        ret_shape = [padded_len] + list(self.c_prev.shape)
+        dtype = self.x.dtype
+        # All returned shapes are [padded_len, b, hidden_dim]
+        return (
+            types.tensor(dtype, ret_shape),  # i
+            types.tensor(dtype, ret_shape),  # cs
+            types.tensor(dtype, ret_shape),  # f
+            types.tensor(dtype, ret_shape),  # o
+            types.tensor(dtype, ret_shape),  # ci
+            types.tensor(dtype, ret_shape),  # co
+            types.tensor(dtype, ret_shape),
+        )  # h
diff --git a/coremltools/converters/mil/frontend/tensorflow/dot_visitor.py b/coremltools/converters/mil/frontend/tensorflow/dot_visitor.py
new file mode 100644
index 000000000..a0c772060
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/dot_visitor.py
@@ -0,0 +1,156 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+from coremltools.converters.mil.mil import types
+
+
+class DotVisitor(object):
+    """
+    Generates a dot description of a graph in dictionary form.
+    """
+
+    def __init__(self, annotation=None):
+        self.result = []
+        self.visited_memo = {}
+        self.highlights = {}
+        self.alternate_labeller = None
+        self.annotation = annotation
+
+    def labeller(self, labeller):
+        self.alternate_labeller = labeller
+        return self
+
+    def highlight_nodes(self, nodeset, color="yellow"):
+        for i in nodeset:
+            self.highlights[i] = color
+        return self
+
+    def visit(self, graph, node, nodename_prefix=""):
+        if node.name in self.visited_memo:
+            return self
+
+        # For printing datatype, breaks type
+        if node.attr.get("symbolic_datatype", None) is not None:
+            dtype = str(types.get_type_info(node.attr["symbolic_datatype"]))
+        elif node.datatype is not None:
+            dtype = str(types.get_type_info(node.datatype))
+        else:
+            dtype = "Unknown"
+
+        label = ""
+        if self.alternate_labeller is not None:
+            label = self.alternate_labeller(node)
+        else:
+            if len(node.outputs) == 0:
+                label = "\\n{" + node.name + "}"
+            if "Placeholder" in node.op:
+                label = "\\n{" + node.name + "}"
+            if node.op == "while":
+                label = (
+                    "\\n{body: "
+                    + node.attr["body_function"]
+                    + " cond:"
+                    + node.attr["cond_function"]
+                    + "}"
+                )
+            if node.op == "function":
+                label = "\\n{body: " + node.attr["function_name"] + "}"
+            if node.op == "function_entry":
+                label = "\\n{" + node.name + "}"
+            label = node.op + ":" + dtype + label
+
+        if node.name in self.highlights:
+            self.result.append(
+                '"'
+                + nodename_prefix
+                + node.name
+                + '"'
+                + '[label="'
+                + label
+                + '",fillcolor=%s,style=filled,fontcolor=%s]'
+                % (
+                    self.highlights[node.name],
+                    "violetred" if node.attr.get(self.annotation, False) else "black",
+                )
+            )
+        else:
+            self.result.append(
+                '"'
+                + nodename_prefix
+                + node.name
+                + '"'
+                + '[label="'
+                + label
+                + '",fontcolor=%s]'
+                % ("violetred" if node.attr.get(self.annotation, False) else "black")
+            )
+
+        for i in node.inputs:
+            input_name = i
+            edge = (
+                '"'
+                + nodename_prefix
+                + input_name
+                + '"'
+                + " -> "
+                + '"'
+                + nodename_prefix
+                + node.name
+                + '"'
+            )
+            innode = graph[input_name]
+            self.result.append(edge)
+
+        for i in node.control_inputs:
+            input_name = i
+            edge = (
+                '"'
+                + nodename_prefix
+                + input_name
+                + '"'
+                + " -> "
+                + '"'
+                + nodename_prefix
+                + node.name
+                + '"'
+            )
+            innode = graph[input_name]
+            edge = edge + " [style=dotted]"
+            self.result.append(edge)
+
+        self.visited_memo[node.name] = 1
+
+        for i in node.inputs:
+            input_name = i
+            if input_name[0] == "^":
+                input_name = input_name[1:]
+            assert input_name in graph
+            self.visit(graph, graph[input_name], nodename_prefix)
+        return self
+
+    def visit_all(self, graph, nodename_prefix=""):
+        for i in graph:
+            self.visit(graph, graph[i], nodename_prefix)
+        return self
+
+    def get_result(self, graphtype="digraph", graph_name="g"):
+        return (
+            graphtype
+            + " "
+            + graph_name
+            + " {\n\t"
+            + "\n\t".join(str(i) for i in self.result)
+            + ';\n\tlabel="'
+            + graph_name[8:]
+            + '";\n\tfontsize=96;\n}'
+        )
+
+    def __str__(self):
+        return self.get_result()
diff --git a/coremltools/converters/mil/frontend/tensorflow/load.py b/coremltools/converters/mil/frontend/tensorflow/load.py
new file mode 100644
index 000000000..8bf128aa1
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/load.py
@@ -0,0 +1,287 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+
+import logging
+import os
+import gc
+
+import six
+
+import tensorflow as tf
+
+from tempfile import mktemp
+from .basic_graph_ops import fill_outputs
+from .converter import TFConverter
+from .tf_graph_pass import *  # pylint: disable=unused-wildcard-import,wildcard-import
+from .tfssa import NetworkEnsemble, SSAFunction
+from .parsed_tf_node import ParsedTFNode
+from coremltools.converters._profile_utils import _profile
+from tqdm import tqdm as _tqdm
+
+
+class TFLoader:
+    """Abstract class for TensorFlow model loader."""
+
+    def __init__(self, model, debug=False, **kwargs):
+        """
+        TensorFlow model loader.
+
+        Parameters
+        ----------
+        model: TensorFlow model
+            Model generated using TensorFlow.
+        debug: bool, optional, defaults to False
+            If true, display verbose logging and visualizations.
+        kwargs: dict(str, Any), optional, defaults to None
+            Dictionary of additional arguments.
+        """
+        self.model = model
+        self.debug = debug
+        self.kwargs = kwargs
+        self._graph_def = None
+        self._tf_ssa = None
+
+    @_profile
+    def load(self):
+        """Load TensorFlow model into MIL program."""
+
+        logging.info("Loading TensorFlow model '{}'".format(self.model))
+        outputs = self.kwargs.get("outputs", None)
+        self._graph_def = self._graph_def_from_model(outputs)
+
+        if self._graph_def is not None and len(self._graph_def.node) == 0:
+            msg = "tf.Graph should have at least 1 node, Got empty graph."
+            raise ValueError(msg)
+
+        self._tf_ssa = self._tf_ssa_from_graph_def()
+
+        del self._graph_def
+        gc.collect()
+
+        if self.debug:
+            import graphviz
+
+            dot_string = self._tf_ssa.get_dot_string(
+                annotation=True, name_and_op_style=True, highlight_debug_nodes=[]
+            )
+            graphviz.Source(dot_string).view(
+                filename="/tmp/ssa_before_tf_passes", cleanup=True
+            )
+
+        program = self._program_from_tf_ssa()
+        logging.debug("program:\n{}".format(program))
+        return program
+
+    # @abstractmethod
+    def _graph_def_from_model(self, outputs=None):
+        """Load TensorFlow model into GraphDef. Overwrite for different TF versions."""
+        pass
+
+    # @abstractmethod
+    def _tf_ssa_from_graph_def(self, fn_name="main"):
+        """Load GraphDef and parse into NetworkEnsemble (TFSSA)."""
+        pass
+
+    # @abstractmethod
+    def _program_from_tf_ssa(self):
+        """Load NetworkEnsemble (TFSSA) and parse into MIL program."""
+        pass
+
+    @staticmethod
+    def extract_sub_graph(graph_def, outputs=None):
+        """Extract sub-graph based on user-provided outputs."""
+        if outputs is None or len(outputs) == 0:
+            return graph_def
+        msg = "Extracting sub-graph based on outputs '{}' from the full model"
+        logging.debug(msg.format(outputs))
+        outputs = outputs if isinstance(outputs, list) else [outputs]
+        outputs = [i.split(":")[0] for i in outputs]
+        return tf.compat.v1.graph_util.extract_sub_graph(graph_def, outputs)
+
+
+class TF1Loader(TFLoader):
+    def __init__(self, model, debug=False, **kwargs):
+        """
+        TensorFlow 1.x model loader.
+
+        Parameters
+        ----------
+        model: Model created with TensorFlow 1.x
+            One of the following model format:
+                - TensorFlow tf.Graph object or frozen graph (.pb) file path
+                - TensorFlow tf.keras.Model object or HDF5 (.h5) file path
+                - TensorFlow SavedModel directory path
+        debug: bool, optional. Defaults to False.
+            This flag should generally be False except for debugging purposes
+            for diagnosing conversion errors. Setting this flag to True will
+            cause graph pass errors to be ignored, forcefully returning a
+            NetworkEnsemble object.
+        kwargs: dict(str, Any), optional
+            Dictionary of additional arguments.
+        """
+        TFLoader.__init__(self, model, debug, **kwargs)
+
+    def _graph_def_from_model(self, outputs=None):
+        """Overwrites TFLoader._graph_def_from_model()"""
+        msg = "Expected model format: [tf.Graph | .pb | SavedModel | tf.keras.Model | .h5], got {}"
+        if isinstance(self.model, tf.Graph) and hasattr(self.model, "as_graph_def"):
+            graph_def = self.model.as_graph_def(add_shapes=True)
+            return self.extract_sub_graph(graph_def, outputs)
+        elif isinstance(self.model, tf.keras.Model):
+            graph_def = self._from_tf_keras_model(self.model)
+            return self.extract_sub_graph(graph_def, outputs)
+        elif isinstance(self.model, six.string_types):
+            if not os.path.exists(str(self.model)):
+                raise ValueError('Input model "{}" does not exist'.format(self.model))
+            elif os.path.isfile(str(self.model)) and self.model.endswith(".pb"):
+                with tf.io.gfile.GFile(self.model, "rb") as f:
+                    gd = tf.compat.v1.GraphDef()
+                    gd.ParseFromString(f.read())
+                with tf.Graph().as_default() as graph:
+                    tf.graph_util.import_graph_def(gd, name="")
+                graph_def = graph.as_graph_def(add_shapes=True)
+                return self.extract_sub_graph(graph_def, outputs)
+            elif os.path.isfile(str(self.model)) and self.model.endswith(".h5"):
+                graph_def = self._from_tf_keras_model(self.model)
+                return self.extract_sub_graph(graph_def, outputs)
+            elif os.path.isdir(str(self.model)):
+                graph_def = self._from_saved_model(self.model)
+                return self.extract_sub_graph(graph_def, outputs)
+            else:
+                raise NotImplementedError(msg.format(self.model))
+        else:
+            raise NotImplementedError(msg.format(self.model))
+
+    def _tf_ssa_from_graph_def(self, fn_name="main"):
+        """Overwrites TFLoader._tf_ssa_from_graph_def()"""
+        graph_dict = {}
+        for node in self._graph_def.node:
+            graph_dict[node.name] = ParsedTFNode(node)
+
+        tensor_array_resource_removal(graph_dict)
+        graph = insert_get_tuple(graph_dict)
+        graph = fill_outputs(graph)
+        delete_disconnected_nodes(graph)
+
+        tf_ssa = NetworkEnsemble()
+        tf_ssa.functions[fn_name] = SSAFunction(graph)
+        return tf_ssa
+
+    def _program_from_tf_ssa(self):
+        """Overwrites TFLoader._mil_program_from_tf_ssa()"""
+        # Applying frontend passes on TFSSA. Note that these are different from
+        # passes applied to MIL in TF frontend.
+        tf_passes = [
+            delete_asserts,
+            functionalize_loops,
+            constant_propagation,
+            cond_to_where,
+            remove_variable_nodes,
+            fuse_dilation_conv,
+        ]
+
+        if self.debug:
+            for tf_pass in _tqdm(
+                tf_passes, desc="Running TensorFlow Graph Passes", unit=" passes"
+            ):
+                try:
+                    tf_pass(self._tf_ssa)
+                except Exception as e:
+                    logging.exception('Exception in pass "{}": {}'.format(tf_pass, e))
+                    logging.info("Ignoring exception and continuing to next pass")
+        else:
+            for tf_pass in _tqdm(
+                tf_passes, desc="Running TensorFlow Graph Passes", unit=" passes"
+            ):
+                tf_pass(self._tf_ssa)
+
+        if self.debug:
+            import graphviz
+
+            dot_string = self._tf_ssa.get_dot_string(
+                annotation=True, name_and_op_style=True, highlight_debug_nodes=[]
+            )
+            graphviz.Source(dot_string).view(
+                filename="/tmp/ssa_after_tf_passes", cleanup=True
+            )
+
+        converter = TFConverter(self._tf_ssa, **self.kwargs)
+        return converter.convert()
+
+    @staticmethod
+    def _from_saved_model(saved_model_dir):
+        from tensorflow.python.tools import freeze_graph
+
+        # must import here as tf.contrib is only available on TF 1.x
+        from tensorflow.contrib.saved_model.python.saved_model import reader
+
+        saved_model_tags = reader.get_saved_model_tag_sets(saved_model_dir)[0]
+        if not saved_model_tags:
+            msg = "Unsupported SavedModel directory format: no tag_sets available"
+            raise NotImplementedError(msg)
+
+        # get model outputs
+        output_node_names = []
+        with tf.compat.v1.Session() as sess:
+            metagraph = tf.saved_model.loader.load(
+                sess, saved_model_tags, saved_model_dir
+            )
+            for sd in metagraph.signature_def.values():
+                output_node_names += [o.name.split(":")[0] for o in sd.outputs.values()]
+
+        # get frozen graph
+        output_graph = mktemp()
+        tf.compat.v1.reset_default_graph()
+        freeze_graph.freeze_graph(
+            input_graph=None,
+            input_saver=None,
+            input_binary=None,
+            input_checkpoint=None,
+            output_node_names=",".join(output_node_names),
+            restore_op_name=None,
+            filename_tensor_name=None,
+            output_graph=output_graph,
+            clear_devices=True,
+            initializer_nodes="",
+            variable_names_whitelist="",
+            variable_names_blacklist="",
+            input_meta_graph=None,
+            input_saved_model_dir=saved_model_dir,
+            saved_model_tags=",".join(saved_model_tags),
+        )
+
+        graph_def = tf.compat.v1.GraphDef()
+        with open(output_graph, "rb") as f:
+            graph_def.ParseFromString(f.read())
+        graph_def = tf.compat.v1.graph_util.remove_training_nodes(graph_def)
+        with tf.Graph().as_default() as graph:
+            tf.graph_util.import_graph_def(graph_def, name="")
+        return graph.as_graph_def(add_shapes=True)
+
+    @staticmethod
+    def _from_tf_keras_model(keras_model):
+        from tensorflow.python.keras.saving import saving_utils
+        from tensorflow.python.framework.convert_to_constants import (
+            convert_variables_to_constants_v2,
+        )
+
+        if not isinstance(keras_model, tf.keras.Model):
+            keras_model = tf.keras.models.load_model(keras_model, None)
+
+        tf.keras.backend.clear_session()
+        tf.keras.backend.set_learning_phase(False)
+        fn = saving_utils.trace_model_call(keras_model)
+        cf = fn.get_concrete_function()
+        try:
+            frozen_fn = convert_variables_to_constants_v2(cf)
+            return frozen_fn.graph.as_graph_def(add_shapes=True)
+        except Exception:
+            raise NotImplementedError("Unhandled tf.keras model format")
diff --git a/coremltools/converters/mil/frontend/tensorflow/naming_utils.py b/coremltools/converters/mil/frontend/tensorflow/naming_utils.py
new file mode 100644
index 000000000..10c754b72
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/naming_utils.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+import six
+
+_varname_charset = set(
+    [chr(i) for i in range(ord("A"), ord("Z") + 1)]
+    + [chr(i) for i in range(ord("a"), ord("z") + 1)]
+    + [chr(i) for i in range(ord("0"), ord("9") + 1)]
+    + ["_"]
+)
+
+
+def escape_name(name):
+    ret = "".join([i if i in _varname_charset else "_" for i in name])
+    if ret.endswith("_"):
+        return ret
+    else:
+        return ret + "_"
+
+
+def escape_fn_name(name):
+    ret = "".join([i if i in _varname_charset else "_" for i in name])
+    ret = escape_name(name)
+    if ret.startswith("f_"):
+        return ret
+    else:
+        return "f_" + ret
+
+
+def normalize_names(names):
+    if isinstance(names, six.string_types):
+        return names.replace(":", "__").replace("/", "__")
+    return [i.replace(":", "__").replace("/", "__") for i in names]
diff --git a/coremltools/converters/mil/frontend/tensorflow/ops.py b/coremltools/converters/mil/frontend/tensorflow/ops.py
new file mode 100644
index 000000000..380fe3dc1
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/ops.py
@@ -0,0 +1,2628 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import logging as _logging
+from six import string_types as _string_types
+import numpy as _np
+
+from coremltools.converters.mil.mil.ops import get_const_mode
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.ops.defs._utils import broadcast_shapes
+from .convert_utils import convert_graph
+from .tf_op_registry import register_tf_op
+from coremltools.converters.mil.mil import types
+from coremltools.converters.mil.mil.types.symbolic import is_symbolic
+
+
+def _is_scalar(type_):
+    if type_ is None:
+        return False
+    result = types.is_int(type_) or types.is_float(type_) or types.is_bool(type_)
+    if types.is_tensor(type_) and (len(type_.get_shape()) == 0):
+        result = True
+    return result
+
+
+def _transpose_NHWC_to_NCHW(x):
+    return mb.transpose(x=x, perm=[0, 3, 1, 2])
+
+
+def _transpose_NCHW_to_NHWC(x, node_name):
+    return mb.transpose(x=x, perm=[0, 2, 3, 1], name=node_name)
+
+
+def _transpose_NDHWC_to_NCDHW(x):
+    return mb.transpose(x=x, perm=[0, 4, 1, 2, 3])
+
+
+def _transpose_NCDHW_to_NDHWC(x, node_name):
+    return mb.transpose(x=x, perm=[0, 2, 3, 4, 1], name=node_name)
+
+
+def _check_axes_type(x):
+    if x is None or x.val is None:
+        return None
+    if isinstance(x.val, _np.int32):
+        return _np.array([x.val])
+    return x.val
+
+
+def _value_at(x, idx):
+    """
+    input x: 1D tensor (vector).
+    return value at index idx. x[idx].
+    """
+    assert x.rank == 1
+    return mb.slice_by_index(x=x, begin=[idx], end=[0], squeeze_mask=[True])
+
+
+@register_tf_op(tf_alias=["BiasAdd", "AddV2"])
+def Add(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.add(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Abs(context, node):
+    x = context[node.inputs[0]]
+    x = mb.abs(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Acos(context, node):
+    x = context[node.inputs[0]]
+    x = mb.acos(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def All(context, node):
+    x = context[node.inputs[0]]
+    axes = _check_axes_type(context[node.inputs[1]])
+    keep_dims = node.attr.get("keep_dims", False)
+    x = mb.reduce_prod(x=x, axes=axes, keep_dims=keep_dims, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Any(context, node):
+    x = context[node.inputs[0]]
+    axes = _check_axes_type(context[node.inputs[1]])
+    keep_dims = node.attr.get("keep_dims", False)
+    x = mb.reduce_sum(x=x, axes=axes, keep_dims=keep_dims)
+    x = mb.greater(x=x, y=0.0, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def ArgMax(context, node):
+    x = context[node.inputs[0]]
+    axis = context[node.inputs[1]]
+    x = mb.reduce_argmax(x=x, axis=axis, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def ArgMin(context, node):
+    x = context[node.inputs[0]]
+    axis = context[node.inputs[1]]
+    x = mb.reduce_argmin(x=x, axis=axis, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Asin(context, node):
+    x = context[node.inputs[0]]
+    x = mb.asin(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Atan(context, node):
+    x = context[node.inputs[0]]
+    x = mb.atan(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Atanh(context, node):
+    x = context[node.inputs[0]]
+    x = mb.atanh(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def AvgPool(context, node):
+    x = context[node.inputs[0]]
+    in_shape = x.sym_type.get_shape()
+    d_rank = len(in_shape) - 2
+    data_format = node.attr.get("data_format", "NHWC")
+    ksize = node.attr.get("ksize", None)
+    kernel_sizes = _pool_pads_or_strides(ksize, data_format, d_rank)
+    strides = node.attr.get("strides", None)
+    if strides is not None:
+        strides = _pool_pads_or_strides(strides, data_format, d_rank)
+    pad_type = node.attr["padding"].lower()
+    if data_format == "NHWC":
+        x = _transpose_NHWC_to_NCHW(x)
+        x = mb.avg_pool(
+            x=x,
+            kernel_sizes=kernel_sizes,
+            strides=strides,
+            pad_type=pad_type,
+            exclude_padding_from_average=True,
+        )
+        x = _transpose_NCHW_to_NHWC(x, node.name)
+    else:
+        x = mb.avg_pool(
+            x=x,
+            kernel_sizes=kernel_sizes,
+            strides=strides,
+            pad_type=pad_type,
+            exclude_padding_from_average=True,
+            name=node.name,
+        )
+    context.add(node.name, x)
+
+
+@register_tf_op
+def AvgPool3D(context, node):
+    x = context[node.inputs[0]]
+    d_rank = x.rank - 2
+    data_format = node.attr.get("data_format", "NDHWC")
+    ksize = node.attr.get("ksize", None)
+    kernel_sizes = _pool_pads_or_strides(ksize, data_format, d_rank)
+    strides = node.attr.get("strides", None)
+    if strides is not None:
+        strides = _pool_pads_or_strides(strides, data_format, d_rank)
+    pad_type = node.attr["padding"].lower()
+    if data_format == "NDHWC":
+        x = _transpose_NDHWC_to_NCDHW(x)
+        x = mb.avg_pool(
+            x=x,
+            kernel_sizes=kernel_sizes,
+            strides=strides,
+            pad_type=pad_type,
+            exclude_padding_from_average=True,
+        )
+        x = _transpose_NCDHW_to_NDHWC(x, node.name)
+    else:
+        x = mb.avg_pool(
+            x=x,
+            kernel_sizes=kernel_sizes,
+            strides=strides,
+            pad_type=pad_type,
+            exclude_padding_from_average=True,
+            name=node.name,
+        )
+
+    context.add(node.name, x)
+
+
+@register_tf_op
+def AddN(context, node):
+    values = [context[name] for name in node.inputs]
+    x = mb.addn(values=values, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def BatchToSpaceND(context, node):
+    x = context[node.inputs[0]]
+    block_shape = context[node.inputs[1]].val
+    crops = context[node.inputs[2]].val
+
+    if x.rank != 3 and x.rank != 4:
+        raise NotImplementedError("rank of input must be 3 or 4!")
+
+    if block_shape is None or crops is None:
+        raise NotImplementedError(
+            "Not support dynamic block_shape and paddings for BatchToSpaceND!"
+        )
+
+    if len(block_shape.flatten()) > 2:
+        raise NotImplementedError("rank of spatial shape > 2 is not yet supported")
+
+    if x.rank == 3 or (x.rank == 4 and len(block_shape) == 1):
+
+        input_shape = mb.shape(x=x)
+        rank = x.rank
+        spatial_rank = len(block_shape)
+
+        # reshape input to [block_shape] + [batch_size/prod(block_shape)] + x.shape[1:]
+        batch_size = _value_at(input_shape, 0)
+        block_shape_prod = _np.prod(block_shape)
+        resize_batch_size = mb.real_div(x=batch_size, y=block_shape_prod)
+        resize_batch_size = [mb.cast(x=resize_batch_size, dtype="int32")]
+        remain_dims = [_value_at(input_shape, i) for i in range(1, rank)]
+        block_dims = [dim for dim in block_shape]
+        reshape_values = block_dims + resize_batch_size + remain_dims
+        reshape_shape = mb.concat(values=reshape_values, axis=0)
+        reshaped = mb.reshape(x=x, shape=reshape_shape)
+
+        # permute the tensor to shape [batch / prod(block_shape)] +
+        #                             [input_shape[1], block_shape[0], ..., input_shape[M], block_shape[M-1]] +
+        #                             [input_shape[M+1], ..., input_shape[N-1]]
+        block_shape_dims = list(range(spatial_rank))
+        batch_dim = [spatial_rank]
+        input_shape_dims = list(range(spatial_rank + 1, reshaped.rank))
+        perm = [batch_dim[0]]
+        for i in range(spatial_rank):
+            perm += [input_shape_dims[i], block_shape_dims[i]]
+        perm += input_shape_dims[spatial_rank:]
+        permuted = mb.transpose(x=reshaped, perm=perm)
+
+        # reshape tensor to shape [batch / prod(block_shape)] +
+        #                         [input_shape[1] * block_shape[0], ..., input_shape[M] * block_shape[M-1]] +
+        #                         [input_shape[M+1], ..., input_shape[N-1]]
+        spatial_dims = []
+        for i in range(spatial_rank):
+            spatial_dims.append(
+                mb.mul(x=_value_at(input_shape, i + 1), y=block_shape[i])
+            )
+        remain_dims = [_value_at(input_shape, i) for i in range(spatial_rank + 1, rank)]
+        reshape_values = resize_batch_size + spatial_dims + remain_dims
+        reshape_shape = mb.concat(values=reshape_values, axis=0)
+        reshape_permuted = mb.reshape(x=permuted, shape=reshape_shape)
+
+        # crop the tensor using stride slice
+        begin = [0]
+        for i in range(spatial_rank):
+            begin.append(crops[i][0])
+        for i in range(spatial_rank + 1, rank):
+            begin.append(0)
+        end = [resize_batch_size[0]]
+        for i in range(spatial_rank):
+            end.append(mb.sub(x=spatial_dims[i], y=crops[i][1]))
+        for i in range(spatial_rank + 1, rank):
+            end += remain_dims
+        end = mb.concat(values=end, axis=0)
+        x = mb.slice_by_index(x=reshape_permuted, begin=begin, end=end, name=node.name)
+    else:
+        if len(block_shape.flatten()) != 2:
+            raise NotImplementedError(
+                "rank of spatial shape != 2 is not yet supported for 4d input."
+            )
+        if block_shape[0] != block_shape[1]:
+            raise NotImplementedError("non-equal block shape is not yet supported")
+
+        needs_cropping = any(crops.flatten())
+
+        x = mb.transpose(x=x, perm=[3, 0, 1, 2])
+
+        x = mb.depth_to_space(x=x, block_size=block_shape[0])
+        if needs_cropping:
+            x = mb.crop(
+                x=x,
+                crop_height=[crops[0][0], crops[0][1]],
+                crop_width=[crops[1][0], crops[1][1]],
+            )
+
+        x = mb.transpose(x=x, perm=[1, 2, 3, 0], name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Ceil(context, node):
+    x = context[node.inputs[0]]
+    x = mb.ceil(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Const(context, node):
+    if node.value is None:
+        raise ValueError("Const node '{}' cannot have no value".format(node.name))
+    mode = get_const_mode(node.value.val)
+    x = mb.const(val=node.value.val, mode=mode, name=node.name)
+    context.add(node.name, x)
+
+
+def _conv2d3d_strides_or_dilations(name, value, data_format, default_value=1):
+    """Compute strides or dilation values for 2D and 3D convolutions."""
+    if value is None:
+        value = default_value
+    if not isinstance(value, (int, list)):
+        raise ValueError("{} must be an int or list".format(name))
+
+    # Parse number of spatial dimensions from `data_format`, assuming N (batch) and C
+    # (input channels) are present
+    n_dims = len(data_format) - 2
+
+    if isinstance(value, int):
+        return [value] * n_dims
+
+    if len(value) == 1:
+        return value * n_dims
+    if len(value) == n_dims:
+        return value
+    if len(value) != n_dims + 2:
+        raise ValueError(
+            "{} must have length 1, {}, or {}".format(name, n_dims, n_dims + 2)
+        )
+
+    if data_format == "NHWC":
+        # Only support stride/dilation along N, C == 1
+        if not (value[0] == value[3] == 1):
+            raise ValueError(
+                "{} along N and C other than 1 not implemented".format(name)
+            )
+        return value[1:3]
+    elif data_format == "NCHW" or data_format == "NCDHW":
+        if not (value[0] == value[1] == 1):
+            raise ValueError(
+                "{} along N and C other than 1 not implemented".format(name)
+            )
+        return value[2:]
+    # "NDHWC"
+    if not (value[0] == value[4] == 1):
+        raise ValueError("{} along N and C other than 1 not implemented".format(name))
+    return value[1:4]
+
+
+@register_tf_op
+def Cos(context, node):
+    x = context[node.inputs[0]]
+    x = mb.cos(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Cosh(context, node):
+    x = context[node.inputs[0]]
+    x = mb.cosh(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Einsum(context, node):
+    equation = node.attr["equation"]
+    if equation == "bnqd,bnkd->bnqk":
+        a = context[node.inputs[0]]
+        b = context[node.inputs[1]]
+        x = mb.matmul(x=a, y=b, transpose_x=False, transpose_y=True, name=node.name)
+        context.add(node.name, x)
+    elif equation == "abc,cd->abd":
+        a = context[node.inputs[0]]
+        b = context[node.inputs[1]]
+        x = mb.matmul(x=a, y=b, transpose_x=False, transpose_y=False, name=node.name)
+        context.add(node.name, x)
+    elif equation == "abc,cde->abde":
+        a = context[node.inputs[0]]
+        b = context[node.inputs[1]]
+        x_1 = mb.reshape(x=a, shape=[a.shape[0] * a.shape[1], a.shape[2]])
+        x_2 = mb.reshape(x=b, shape=[b.shape[0], b.shape[1] * b.shape[2]])
+        x = mb.matmul(x=x_1, y=x_2, transpose_x=False, transpose_y=False)
+        x = mb.reshape(
+            x=x, shape=[a.shape[0], a.shape[1], b.shape[1], b.shape[2]], name=node.name
+        )
+        context.add(node.name, x)
+    elif equation == "BTNH,BFNH->BNFT":
+        a = context[node.inputs[0]]
+        b = context[node.inputs[1]]
+        a = mb.transpose(x=a, perm=[0, 2, 1, 3])
+        b = mb.transpose(x=b, perm=[0, 2, 1, 3])
+        x = mb.matmul(x=b, y=a, transpose_x=False, transpose_y=True, name=node.name)
+        context.add(node.name, x)
+    elif equation == "BNFT,BTNH->BFNH":
+        a = context[node.inputs[0]]
+        b = context[node.inputs[1]]
+        b = mb.transpose(x=b, perm=[0, 2, 1, 3])
+        x = mb.matmul(x=a, y=b, transpose_x=False, transpose_y=False)
+        x = mb.transpose(x=x, perm=[0, 2, 1, 3], name=node.name)
+        context.add(node.name, x)
+    elif equation == "abcd,cde->abe":
+        a = context[node.inputs[0]]
+        b = context[node.inputs[1]]
+        x_1 = mb.reshape(x=a, shape=[a.shape[0], a.shape[1], a.shape[2] * a.shape[3]])
+        x_2 = mb.reshape(x=b, shape=[b.shape[0] * b.shape[1], b.shape[2]])
+        x = mb.matmul(
+            x=x_1, y=x_2, transpose_x=False, transpose_y=False, name=node.name
+        )
+        context.add(node.name, x)
+    else:
+        raise NotImplementedError(
+            "Einsum unsupported equation format: ", node.attr["equation"]
+        )
+
+
+@register_tf_op
+def Equal(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.equal(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def ExtractImagePatches(context, node):
+    x = context[node.inputs[0]]
+    sizes = node.attr.get("ksizes")
+    strides = node.attr.get("strides")
+    rates = node.attr.get("rates")
+    padding = node.attr.get("padding")
+    if x.rank != 4:
+        raise ValueError("input for ExtractImagePatches should be a 4D tensor.")
+    if not all([rate == 1 for rate in rates]):
+        raise NotImplementedError(
+            "only rates with all 1s is implemented for ExtractImagePatches."
+        )
+    if len(sizes) != 4 or sizes[0] != 1 or sizes[3] != 1:
+        raise ValueError(
+            "ExtractImagePatches only supports sizes (4D tensor) with 1s for batch and channel dimensions."
+        )
+    if len(sizes) != 4 or strides[0] != 1 or strides[3] != 1:
+        raise ValueError(
+            "ExtractImagePatches only supports strides (4D tensor) with 1s for batch and channel dimensions."
+        )
+    if not padding in ["VALID", "SAME"]:
+        raise ValueError("non-supported padding for ExtractImagePatches.")
+    h, w = x.shape[1], x.shape[2]
+
+    # padding for SAME mode
+    if padding == "SAME":
+        delta_h = h % strides[1] if h % strides[1] != 0 else strides[1]
+        delta_w = w % strides[2] if w % strides[2] != 0 else strides[2]
+        last_h = h - delta_h + 1
+        last_w = w - delta_w + 1
+        pad_h = max(0, last_h + sizes[1] - 1 - h)
+        pad_w = max(0, last_w + sizes[2] - 1 - w)
+        pad_h = [pad_h // 2, pad_h // 2 if pad_h % 2 == 0 else pad_h // 2 + 1]
+        pad_w = [pad_w // 2, pad_w // 2 if pad_w % 2 == 0 else pad_w // 2 + 1]
+        pad = _np.array([[0, 0], pad_h, pad_w, [0, 0]]).astype(_np.int32)
+        pad = pad.reshape(-1)
+        if not all(pad == 0):
+            x = mb.pad(x=x, pad=pad, mode="constant", constant_val=0.0)
+            h, w = x.shape[1], x.shape[2]
+
+    # compute boxes
+    batch = x.shape[0]
+    boxes = []
+    h_index = list(range(0, h - sizes[1] + 1, strides[1]))
+    w_index = list(range(0, w - sizes[2] + 1, strides[2]))
+    for hi in h_index:
+        for wi in w_index:
+            boxes.append((hi, wi, hi + sizes[1] - 1, wi + sizes[2] - 1))
+
+    boxes = _np.array(boxes)
+    box_indices = _np.arange(batch)
+    box_indices = _np.tile(box_indices, (len(boxes), 1))
+    box_indices = _np.transpose(box_indices)
+    box_indices = box_indices.reshape(-1, 1)
+    boxes = _np.tile(boxes, (batch, 1))
+    boxes = _np.concatenate([box_indices, boxes], axis=1)
+    boxes = boxes.reshape(boxes.shape[0], 1, boxes.shape[1], 1, 1)
+
+    # use crop_and_resize
+    x = _transpose_NHWC_to_NCHW(x)
+    x = mb.crop_resize(
+        x=x,
+        roi=boxes,
+        target_height=sizes[1],
+        target_width=sizes[2],
+        normalized_coordinates=False,
+        spatial_scale=1.0,
+        box_coordinate_mode="CORNERS_HEIGHT_FIRST",
+        sampling_mode="ALIGN_CORNERS",
+    )
+    x = mb.squeeze(x=x, axes=[1])
+    x = _transpose_NCHW_to_NHWC(x, node_name=node.name + "_transpose_to_nhwc")
+    x = mb.reshape(x=x, shape=(batch, len(h_index), len(w_index), -1), name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Exp(context, node):
+    x = context[node.inputs[0]]
+    x = mb.exp(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Floor(context, node):
+    x = context[node.inputs[0]]
+    x = mb.floor(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def FloorDiv(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.floor_div(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Greater(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.greater(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def GreaterEqual(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.greater_equal(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Less(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.less(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def LessEqual(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.less_equal(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Log(context, node):
+    x = context[node.inputs[0]]
+    x = mb.log(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def LogicalAnd(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.logical_and(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def LogicalNot(context, node):
+    x = context[node.inputs[0]]
+    x = mb.logical_not(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def LogicalOr(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.logical_or(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def LogicalXor(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.logical_xor(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def LRN(context, node):
+    x = context[node.inputs[0]]
+    depth_radius = node.attr.get("depth_radius")
+    size = (depth_radius * 2) + 1
+    alpha = node.attr.get("alpha") * size
+    beta = node.attr.get("beta")
+    bias = node.attr.get("bias")
+    x = _transpose_NHWC_to_NCHW(x)
+    x = mb.local_response_norm(x=x, size=size, alpha=alpha, beta=beta, k=bias)
+    x = _transpose_NCHW_to_NHWC(x, node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Maximum(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.maximum(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Minimum(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.minimum(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def FloorMod(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    floor = mb.floor_div(x=x, y=y, name=node.name + "_floor_div")
+    floor_mutiply = mb.mul(x=floor, y=y, name=node.name + "_multiply")
+    x = mb.sub(x=x, y=floor_mutiply, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Mul(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.mul(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Neg(context, node):
+    x = context[node.inputs[0]]
+    x = mb.mul(x=x, y=-1, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def NotEqual(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.not_equal(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Pow(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.pow(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def DepthwiseConv2dNative(context, node):
+    # [kH, kW, C_in, multiplier]
+    W_hwim = context[node.inputs[1]]  # m = multiplier
+    # [kH, kW, 1, C_in * multipler]
+    shape_hw1o = list(W_hwim.shape[:2]) + [1, W_hwim.shape[2] * W_hwim.shape[3]]
+    W_hw1o = mb.reshape(x=W_hwim, shape=shape_hw1o)
+    # [C_in * multipler, 1, kH, kW]. Note that C_in * multiplier = C_out in
+    # MIL. C_in / groups = 1 in depthwise conv.
+    W_o1hw = mb.transpose(x=W_hw1o, perm=[3, 2, 0, 1])
+    data_format = node.attr.get("data_format", "NHWC")
+    HW_dilations = _conv2d3d_strides_or_dilations(
+        "dilations", node.attr.get("dilations"), data_format
+    )
+    HW_strides = _conv2d3d_strides_or_dilations(
+        "strides", node.attr.get("strides"), data_format
+    )
+
+    pad_type = node.attr.get("padding")
+    if pad_type not in ["VALID", "SAME"]:
+        raise ValueError("Invalid padding type for tf.nn.depthwise_conv2d")
+
+    pad_type = pad_type.lower()
+    x = context[node.inputs[0]]
+    C_in = x.shape[-1]
+    if data_format == "NHWC":
+        x = _transpose_NHWC_to_NCHW(x)
+    # Only the last op should have the same name as node.name
+    conv_name = node.name + "x" if data_format == "NHWC" else node.name
+    x = mb.conv(
+        x=x,
+        weight=W_o1hw,
+        pad_type=pad_type,
+        strides=HW_strides,
+        dilations=HW_dilations,
+        groups=C_in,
+        name=conv_name,
+    )
+    if data_format == "NHWC":
+        x = _transpose_NCHW_to_NHWC(x, node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Conv2D(context, node):
+    W_hwio = context[node.inputs[1]]
+    W_oihw = mb.transpose(x=W_hwio, perm=[3, 2, 0, 1])
+    data_format = node.attr.get("data_format", "NHWC")
+    HW_dilations = _conv2d3d_strides_or_dilations(
+        "dilations", node.attr.get("dilations"), data_format
+    )
+    HW_strides = _conv2d3d_strides_or_dilations(
+        "strides", node.attr.get("strides"), data_format
+    )
+
+    pad_type = node.attr.get("padding")
+    pad_type = pad_type.lower()
+    pad_type = "custom" if pad_type == "explicit" else pad_type
+    assert pad_type in {"same", "valid", "custom"}
+    x = context[node.inputs[0]]
+    if data_format == "NHWC":
+        x = _transpose_NHWC_to_NCHW(x)
+        if pad_type == "custom":
+            pad_val = node.attr["explicit_paddings"]
+            pad_val = pad_val[2:-2]
+    elif data_format == "NCHW" and pad_type == "custom":
+        pad_val = node.attr["explicit_paddings"]
+        pad_val = pad_val[4:]
+    # Only the last op should have the same name as node.name
+    conv_name = node.name + "x" if data_format == "NHWC" else node.name
+    if pad_type == "custom":
+        x = mb.conv(
+            x=x,
+            weight=W_oihw,
+            pad_type=pad_type,
+            strides=HW_strides,
+            dilations=HW_dilations,
+            name=conv_name,
+            pad=pad_val,
+        )
+    else:
+        x = mb.conv(
+            x=x,
+            weight=W_oihw,
+            pad_type=pad_type,
+            strides=HW_strides,
+            dilations=HW_dilations,
+            name=conv_name,
+        )
+    if data_format == "NHWC":
+        x = _transpose_NCHW_to_NHWC(x, node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Conv3D(context, node):
+    W_dhwio = context[node.inputs[1]]
+    W_oidhw = mb.transpose(x=W_dhwio, perm=[4, 3, 0, 1, 2])
+    data_format = node.attr.get("data_format", "NDHWC")
+    DHW_dilations = _conv2d3d_strides_or_dilations(
+        "dilations", node.attr.get("dilations"), data_format
+    )
+    DHW_strides = _conv2d3d_strides_or_dilations(
+        "strides", node.attr.get("strides"), data_format
+    )
+
+    pad_type = node.attr.get("padding")
+    if not isinstance(pad_type, _string_types):
+        pad_type = "custom"
+        raise NotImplementedError("Custom padding not implemented for TF")
+    pad_type = pad_type.lower()
+    x = context[node.inputs[0]]
+    if data_format == "NDHWC":
+        # Convert input to NCDHW
+        x = _transpose_NDHWC_to_NCDHW(x)
+    # Only the last op should have the same name as node.name
+    conv_name = node.name + "x" if data_format == "NDHWC" else node.name
+    x = mb.conv(
+        x=x,
+        weight=W_oidhw,
+        pad_type=pad_type,
+        strides=DHW_strides,
+        dilations=DHW_dilations,
+        name=conv_name,
+    )
+    if data_format == "NDHWC":
+        # Convert input back to NDHWC (from NCDHW)
+        x = _transpose_NCDHW_to_NDHWC(x, node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Conv3DBackpropInputV2(context, node):
+    # Output shape: [N, D_out, H_out, W_out, C_out]
+    output_shape = context[node.inputs[0]].val
+    # Weight shape: [D, H, W, C_out, C_in]
+    W_dhwoi = context[node.inputs[1]]
+    W_oidhw = mb.transpose(x=W_dhwoi, perm=[3, 4, 0, 1, 2])
+    # Input shape: [N, D_in, H_in, W_in, C_in]
+    x = context[node.inputs[2]]
+
+    data_format = node.attr.get("data_format", "NDHWC")
+    DHW_dilations = _conv2d3d_strides_or_dilations(
+        "dilations", node.attr.get("dilations"), data_format
+    )
+    DHW_strides = _conv2d3d_strides_or_dilations(
+        "strides", node.attr.get("strides"), data_format
+    )
+    pad_type = node.attr.get("padding", None)
+
+    if pad_type is None:
+        raise ValueError("Padding type not specified for op: {}".format(node.name))
+
+    if not isinstance(pad_type, _string_types):
+        pad_type = "custom"
+        raise NotImplementedError("Custom padding not implemented for TF")
+    pad_type = pad_type.lower()
+
+    if data_format == "NDHWC":
+        # Convert input to NCDHW
+        x = _transpose_NDHWC_to_NCDHW(x)
+        if output_shape is not None:
+            output_shape = [output_shape[1], output_shape[2], output_shape[3]]
+    else:
+        if output_shape is not None:
+            output_shape = [output_shape[2], output_shape[3], output_shape[4]]
+
+    # Only the last op should have the same name as node.name
+    conv_name = node.name + "_x" if data_format == "NDHWC" else node.name
+    # Pass output shape provided above
+    # TODO: rdar://63968613 ([deconv3d] Deconv_3d top_shapes_for_bottom_shapes does not sets output channel if output shape is provided)
+    x = mb.conv_transpose(
+        x=x,
+        weight=W_oidhw,
+        pad_type=pad_type,
+        strides=DHW_strides,
+        output_shape=None,
+        dilations=DHW_dilations,
+        name=conv_name,
+    )
+    if data_format == "NDHWC":
+        # Convert input back to NDHWC (from NCDHW)
+        x = _transpose_NCDHW_to_NDHWC(x, node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def DepthToSpace(context, node):
+    x = context[node.inputs[0]]
+    block_size = node.attr.get("block_size")
+    data_format = node.attr.get("data_format", "NHWC")
+    if data_format == "NHWC":
+        x = _transpose_NHWC_to_NCHW(x)
+        x = mb.depth_to_space(x=x, block_size=block_size)
+        x = _transpose_NCHW_to_NHWC(x, node.name)
+    else:
+        x = mb.depth_to_space(x=x, block_size=block_size, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def EuclideanNorm(context, node):
+    x = context[node.inputs[0]]
+    axes = _check_axes_type(context[node.inputs[1]])
+    keep_dims = node.attr.get("keep_dims", False)
+    x = mb.reduce_l2_norm(x=x, axes=axes, keep_dims=keep_dims, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def ExpandDims(context, node):
+    x = context[node.inputs[0]]
+    axis = context[node.inputs[1]]
+    if axis.op.op_type == "const" and (axis.val is not None and axis.val.size == 1):
+        axis = axis.val[0] if axis.shape == (1,) else axis.val
+    else:
+        raise ValueError("Expand Dims: Invalid value for parameter axis")
+    x = mb.expand_dims(x=x, axes=[axis], name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op(tf_alias=["FusedBatchNormV2", "FusedBatchNormV3"])
+def FusedBatchNorm(context, node):
+    # Get attributes
+    data_format = node.attr.get("data_format", "NHWC")
+    epsilon = node.attr.get("epsilon", None)
+
+    # Get inputs
+    x = context[node.inputs[0]]
+    scale = context[node.inputs[1]]
+    offset = context[node.inputs[2]]
+    mean = context[node.inputs[3]]
+    variance = context[node.inputs[4]]
+    if data_format == "NHWC":
+        # TF's FusedBatchNorm is only for 4D inputs
+        x = _transpose_NHWC_to_NCHW(x)
+        x = mb.batch_norm(
+            x=x, mean=mean, variance=variance, gamma=scale, beta=offset, epsilon=epsilon
+        )
+        x = _transpose_NCHW_to_NHWC(x, node.name + ":0")
+    else:
+        x = mb.batch_norm(
+            x=x,
+            mean=mean,
+            variance=variance,
+            gamma=scale,
+            beta=offset,
+            epsilon=epsilon,
+            name=node.name + ":0",
+        )
+    # Inference only batch norm does not have meaningful outputs for
+    # batch_mean, batch_variance etc.
+    context.add(node.name, [x, mean, variance])
+
+
+@register_tf_op
+def Fill(context, node):
+    shape = context[node.inputs[0]]
+    value = context[node.inputs[1]]
+    x = mb.fill(shape=shape, value=value, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def RealDiv(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.real_div(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Rsqrt(context, node):
+    x = context[node.inputs[0]]
+    x = mb.rsqrt(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Sub(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.sub(x=x, y=y, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def StopGradient(context, node):
+    Identity(context, node)
+
+
+@register_tf_op
+def Identity(context, node):
+    x = context[node.inputs[0]]
+    # In many cases we can skip and just make downstream ops reference the
+    # pre-identity op. However, when identity is an output or pre-identity
+    # is a placeholder, an identity op, or mb.mul(x, 1.0) is required.
+    if len(node.outputs) != 0 or x.op is not None:
+        context.add(node.name, x, is_new_var=False)
+    else:
+        x = mb.mul(x=x, y=1.0, name=node.name)
+        context.add(node.name, x)
+
+
+@register_tf_op
+def Print(context, node):
+    Identity(context, node)
+
+
+@register_tf_op
+def Placeholder(context, node):
+    # no-op as we add Placeholder separately.
+    pass
+
+
+def _pool_pads_or_strides(tf_spec, data_format, d_rank):
+    if tf_spec is None:
+        d_spec = [1] * d_rank
+    elif not isinstance(tf_spec, list):
+        d_spec = [tf_spec] * d_rank
+    elif len(tf_spec) == 2:
+        d_spec = tf_spec
+    elif len(tf_spec) == 4:
+        if data_format == "NHWC":
+            d_spec = tf_spec[1:3]
+        else:
+            d_spec = tf_spec[2:]
+    elif len(tf_spec) == 5:
+        if data_format == "NDHWC":
+            d_spec = tf_spec[1:4]
+        else:
+            # NCDHW
+            d_spec = tf_spec[2:]
+    else:
+        raise ValueError("Unsupported tf_spec: %s" % tf_spec)
+    return d_spec
+
+
+@register_tf_op(tf_alias=["BatchMatMul", "BatchMatMulV2"])
+def MatMul(context, node):
+    a = context[node.inputs[0]]
+    b = context[node.inputs[1]]
+    transpose_a = node.attr.get("adj_x", False) or node.attr.get("transpose_a", False)
+    transpose_b = node.attr.get("adj_y", False) or node.attr.get("transpose_b", False)
+    x = mb.matmul(
+        x=a, y=b, transpose_x=transpose_a, transpose_y=transpose_b, name=node.name
+    )
+    context.add(node.name, x)
+
+
+@register_tf_op
+def MaxPool(context, node):
+    x = context[node.inputs[0]]
+    in_shape = x.sym_type.get_shape()
+    d_rank = len(in_shape) - 2
+    data_format = node.attr.get("data_format", "NHWC")
+    ksize = node.attr.get("ksize", None)
+    kernel_sizes = _pool_pads_or_strides(ksize, data_format, d_rank)
+    strides = node.attr.get("strides", None)
+    if strides is not None:
+        strides = _pool_pads_or_strides(strides, data_format, d_rank)
+    pad_type = node.attr["padding"].lower()
+    if data_format == "NHWC":
+        x = _transpose_NHWC_to_NCHW(x)
+        x = mb.max_pool(
+            x=x, kernel_sizes=kernel_sizes, strides=strides, pad_type=pad_type
+        )
+        x = _transpose_NCHW_to_NHWC(x, node.name)
+    else:
+        x = mb.max_pool(
+            x=x,
+            kernel_sizes=kernel_sizes,
+            strides=strides,
+            pad_type=pad_type,
+            name=node.name,
+        )
+    context.add(node.name, x)
+
+
+@register_tf_op
+def MaxPool3D(context, node):
+    x = context[node.inputs[0]]
+    d_rank = x.rank - 2
+    data_format = node.attr.get("data_format", "NDHWC")
+    ksize = node.attr.get("ksize", None)
+    kernel_sizes = _pool_pads_or_strides(ksize, data_format, d_rank)
+    strides = node.attr.get("strides", None)
+    if strides is not None:
+        strides = _pool_pads_or_strides(strides, data_format, d_rank)
+    pad_type = node.attr["padding"].lower()
+    if data_format == "NDHWC":
+        x = _transpose_NDHWC_to_NCDHW(x)
+        x = mb.max_pool(
+            x=x, kernel_sizes=kernel_sizes, strides=strides, pad_type=pad_type
+        )
+        x = _transpose_NCDHW_to_NDHWC(x, node.name)
+    else:
+        x = mb.max_pool(
+            x=x,
+            kernel_sizes=kernel_sizes,
+            strides=strides,
+            pad_type=pad_type,
+            name=node.name,
+        )
+
+    context.add(node.name, x)
+
+
+@register_tf_op
+def MatrixBandPart(context, node):
+    x = context[node.inputs[0]]
+    lower = context[node.inputs[1]]
+    upper = context[node.inputs[2]]
+    x = mb.band_part(x=x, lower=lower, upper=upper, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Max(context, node):
+    x = context[node.inputs[0]]
+    axes = _check_axes_type(context[node.inputs[1]])
+    keep_dims = node.attr.get("keep_dims", False)
+    x = mb.reduce_max(x=x, axes=axes, keep_dims=keep_dims, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Min(context, node):
+    x = context[node.inputs[0]]
+    axes = _check_axes_type(context[node.inputs[1]])
+    keep_dims = node.attr.get("keep_dims", False)
+    x = mb.reduce_min(x=x, axes=axes, keep_dims=keep_dims, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Prod(context, node):
+    x = context[node.inputs[0]]
+    axes = _check_axes_type(context[node.inputs[1]])
+    keep_dims = node.attr.get("keep_dims", False)
+    x = mb.reduce_prod(x=x, axes=axes, keep_dims=keep_dims, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Cast(context, node):
+    type_map = {
+        types.float: "fp32",
+        types.double: "fp64",
+        types.int32: "int32",
+        types.int64: "int64",
+    }
+    if node.attr["DstT"] not in type_map.keys():
+        raise NotImplementedError(
+            "Cast: Provided destination type {} not "
+            "supported.".format(types.get_type_info(node.attr["DstT"]))
+        )
+    x = context[node.inputs[0]]
+    dtype = type_map[node.attr["DstT"]]
+    x = mb.cast(x=x, dtype=dtype, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Round(context, node):
+    x = context[node.inputs[0]]
+    x = mb.round(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Sign(context, node):
+    x = context[node.inputs[0]]
+    x = mb.sign(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Sin(context, node):
+    x = context[node.inputs[0]]
+    x = mb.sin(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Sinh(context, node):
+    x = context[node.inputs[0]]
+    x = mb.sinh(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Slice(context, node):
+    x = context[node.inputs[0]]
+    begin = context[node.inputs[1]]
+    size = context[node.inputs[2]]
+    res = mb.slice_by_size(x=x, begin=begin, size=size, name=node.name)
+    context.add(node.name, res)
+
+
+@register_tf_op
+def Sqrt(context, node):
+    x = context[node.inputs[0]]
+    x = mb.sqrt(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Square(context, node):
+    x = context[node.inputs[0]]
+    x = mb.square(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def StridedSlice(context, node):
+    x = context[node.inputs[0]]
+    begin = context[node.inputs[1]]
+    end = context[node.inputs[2]]
+    stride = context[node.inputs[3]]
+
+    def bitmask_to_array(bit):
+        arr = []
+        while bit > 0:
+            if bit & 1:
+                arr.append(True)
+            else:
+                arr.append(False)
+            bit >>= 1
+        return arr
+
+    begin_mask = bitmask_to_array(node.attr.get("begin_mask", 0))
+    end_mask = bitmask_to_array(node.attr.get("end_mask", 0))
+    squeeze_mask = bitmask_to_array(node.attr.get("shrink_axis_mask", 0))
+    ellipsis_mask = bitmask_to_array(node.attr.get("ellipsis_mask", 0))
+    new_axis_mask = bitmask_to_array(node.attr.get("new_axis_mask", 0))
+
+    def _pad_mask(
+        x,
+        begin,
+        end,
+        stride,
+        begin_mask,
+        end_mask,
+        squeeze_mask,
+        ellipsis_mask,
+        new_axis_mask,
+    ):
+        # This function pad the masks, stride, begin and end to the same rank as the input tensor.
+        if begin.rank != 1:
+            raise ValueError(
+                "begin should be 1-D tensor, got {}-D tensor instead".format(begin.rank)
+            )
+        if end.rank != 1:
+            raise ValueError(
+                "end should be 1-D tensor, got {}-D tensor instead".format(end.rank)
+            )
+
+        # check if inputs can be determined
+        begin_cache = begin
+        end_cache = end
+        begin = [] if begin.val is None else begin.val.tolist()
+        end = [] if end.val is None else end.val.tolist()
+        stride = [] if stride is None else stride.val.tolist()
+
+        # pad masks function
+        new_dims = sum(i == True for i in new_axis_mask)
+        if new_dims > 0:
+            x_rank = x.rank + new_dims
+        else:
+            x_rank = x.rank
+
+        def pad_array(arr, max_rank, idx, default_value):
+            """
+            This function pads the arr to x_rank with default_value.
+            idx is the index where ellipis_mask = True.
+            max_rank is the maximum rank of the masks, stride, begin and end.
+            """
+            mask = arr[:]
+            mask += [default_value] * (x_rank - len(mask))
+            new_mask = []
+
+            for i in range(max_rank):
+                num = 1 if i != idx else x_rank - max_rank + 1
+                new_mask += [mask[i]] * num
+            return new_mask
+
+        mask_list = [
+            begin_mask,
+            end_mask,
+            squeeze_mask,
+            ellipsis_mask,
+            new_axis_mask,
+            stride,
+            begin,
+            end,
+        ]
+        max_rank = max([len(arr) for arr in mask_list])
+
+        # If ellipsis_mask is given, the last element of it would be True
+        # Otherwise, we simply pad each mask by appending default value
+        if ellipsis_mask != []:
+            rank = max_rank
+            idx = len(ellipsis_mask) - 1
+        else:
+            rank = x_rank
+            idx = -1
+
+        begin_mask = pad_array(begin_mask, rank, idx, False)
+        end_mask = pad_array(end_mask, rank, idx, False)
+        squeeze_mask = pad_array(squeeze_mask, rank, idx, False)
+        ellipsis_mask = pad_array(ellipsis_mask, rank, idx, False)
+        new_axis_mask = pad_array(new_axis_mask, rank, idx, False)
+        stride = pad_array(stride, rank, idx, 1)
+
+        # pad begin and end if they are determined during compile time
+        if begin != []:
+            begin = pad_array(begin, rank, idx, 0)
+        if end != []:
+            end = pad_array(end, rank, idx, 0)
+
+        # make sure begin_mask, end_mask, and stride are consistent with ellipsis mask
+        # begin_mask and end_mask should be True, and stride should be 1.
+        for i, mask in enumerate(ellipsis_mask):
+            if mask:
+                begin_mask[i] = True
+                end_mask[i] = True
+                stride[i] = 1
+
+        # make sure begin_mask, end_mask, and stride are consistent with new axis mask
+        # begin_mask and end_mask should be True, and stride should be 1.
+        for i, mask in enumerate(new_axis_mask):
+            if mask:
+                begin_mask[i] = True
+                end_mask[i] = True
+                stride[i] = 1
+
+        # convert begin and end back to cache value if they are run-time determined
+        if begin == []:
+            begin = begin_cache
+
+        if end == []:
+            end = end_cache
+
+        # check which mask is adding by our default value
+        # This happens when the given index is less than the tensor rank,
+        # for instance, indexing a 3D tensor A with A[:1, :1] is equivalent to
+        # A[:1, :1, :]. In this case we should append True to begin_mask and end_mask
+        if ellipsis_mask == [False] * x_rank:
+            for i in range(max_rank, x_rank):
+                begin_mask[i] = True
+                end_mask[i] = True
+
+        return begin, end, stride, begin_mask, end_mask, squeeze_mask, new_axis_mask
+
+    begin, end, stride, begin_mask, end_mask, squeeze_mask, new_axis_mask = _pad_mask(
+        x,
+        begin,
+        end,
+        stride,
+        begin_mask,
+        end_mask,
+        squeeze_mask,
+        ellipsis_mask,
+        new_axis_mask,
+    )
+
+    if sum(i == True for i in new_axis_mask) > 0:
+        axes = [i for i, val in enumerate(new_axis_mask) if val == True]
+        x = mb.expand_dims(x=x, axes=axes, name=node.name + "_new_axes")
+
+    x = mb.slice_by_index(
+        x=x,
+        name=node.name,
+        begin=begin,
+        end=end,
+        stride=stride,
+        begin_mask=begin_mask,
+        end_mask=end_mask,
+        squeeze_mask=squeeze_mask,
+    )
+
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Sum(context, node):
+    x = context[node.inputs[0]]
+    axes = _check_axes_type(context[node.inputs[1]])
+    keep_dims = node.attr.get("keep_dims", False)
+    x = mb.reduce_sum(x=x, axes=axes, keep_dims=keep_dims, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Tan(context, node):
+    x = context[node.inputs[0]]
+    x = mb.tan(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def get_tuple(context, node):
+    x = context[node.inputs[0]]
+    if not isinstance(x, (list, tuple)):
+        raise ValueError(
+            "Op '{}' should return multiple output.".format(node.inputs[0])
+        )
+    idx = node.attr["index"]
+    if idx >= len(x):
+        msg = "Index {} out of range, op '{}' only has {} outputs: {}"
+        raise IndexError(msg.format(idx, node.inputs[0], len(x), [v.name for v in x]))
+    context.add(node.name, x[idx], is_new_var=False)
+
+
+@register_tf_op
+def Mean(context, node):
+    x = context[node.inputs[0]]
+    axes = _check_axes_type(context[node.inputs[1]])
+    keep_dims = node.attr.get("keep_dims", False)
+    x = mb.reduce_mean(x=x, axes=axes, keep_dims=keep_dims, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def MirrorPad(context, node):
+    x = context[node.inputs[0]]
+    pad = context[node.inputs[1]]
+    constant_val = node.attr.get("constant_val", 0.0)
+
+    if pad is None:
+        raise ValueError("TF `paddings` in Pad op must be const.")
+
+    mode = node.attr.get("mode", "reflect").lower()
+    in_rank = len(x.sym_type.get_shape())
+
+    if in_rank > 5 or in_rank < 2:
+        raise ValueError(
+            "Unsupported Pad configuration with input rank {}!".format(str(in_rank))
+        )
+
+    if pad.val.shape != (in_rank, 2):
+        raise ValueError("Padding must have length as input tensor rank.")
+
+    pad = pad.val
+
+    # get axix which is non zero
+    non_zero_axis = []
+    for i in range(len(pad)):
+        if not all(pad[i] == 0):
+            non_zero_axis.append(i)
+
+    if len(non_zero_axis) > 2:
+        raise ValueError("Unsupported configuration for Pad layer!")
+
+    # make padding a 2 x 2 tensor if len(non_zero_axis) < 2
+    if len(non_zero_axis) == 0:
+        non_zero_axis = [0, 1]
+
+    if len(non_zero_axis) == 1:
+        if non_zero_axis[0] != len(pad) - 1:
+            non_zero_axis.append(len(pad) - 1)
+        else:
+            non_zero_axis = [0, non_zero_axis[0]]
+
+    # transpose the input such that the padding dim is the last two
+    perm = [i for i in range(in_rank) if i not in non_zero_axis] + non_zero_axis
+    x = mb.transpose(x=x, perm=perm, name=node.name + "_transpose_1")
+    pad = pad[non_zero_axis, :]
+    pad = pad.reshape(-1)
+    x = mb.pad(
+        x=x, pad=pad, name=node.name + "_pad", constant_val=constant_val, mode=mode
+    )
+    inverse_perm = [-1] * len(perm)
+    for i, index in enumerate(perm):
+        inverse_perm[index] = i
+    x = mb.transpose(x=x, perm=inverse_perm, name=node.name)
+
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Pad(context, node):
+    x = context[node.inputs[0]]
+    pad = context[node.inputs[1]]
+    if pad.val is None:
+        raise ValueError("TF `paddings` in Pad op must be const.")
+
+    mode = node.attr.get("mode", "constant").lower()
+    constant_val = node.attr.get("constant_val", 0.0)
+    in_rank = len(x.sym_type.get_shape())
+
+    if in_rank > 5:
+        raise ValueError("Unsupported Pad configuration!")
+
+    pad = pad.val.reshape(-1)
+    x = mb.pad(x=x, pad=pad, name=node.name, mode=mode, constant_val=constant_val)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def PadV2(context, node):
+    # compared to tf.raw_ops.Pad, tf.raw_ops.PadV2 allow constant values rather than 0.
+    x = context[node.inputs[0]]
+    pad = context[node.inputs[1]]
+    constant_val = context[node.inputs[2]]
+
+    if pad.val is None or constant_val.val is None:
+        raise NotImplementedError(
+            "TF `paddings`, `constant_values` in PadV2 op must be const."
+        )
+    if constant_val.shape != ():
+        raise NotImplementedError(
+            "TF `constant_values` in PadV2 op must be const scalar."
+        )
+    in_rank = x.rank
+    if in_rank > 5:
+        raise ValueError("Unsupported Pad configuration!")
+
+    pad = pad.val.reshape(-1)
+    constant_val = constant_val.val
+    if constant_val == -_np.inf:
+        INT_MIN = -_np.iinfo(_np.int64).max - 1
+        constant_val = _np.float(INT_MIN)
+
+    if constant_val == _np.inf:
+        INT_MAX = _np.iinfo(_np.int64).max
+        constant_val = _np.float(INT_MAX)
+
+    x = mb.pad(x=x, pad=pad, name=node.name, mode="constant", constant_val=constant_val)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Relu(context, node):
+    x = context[node.inputs[0]]
+    x = mb.relu(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Reciprocal(context, node):
+    x = context[node.inputs[0]]
+    x = mb.inverse(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Relu6(context, node):
+    x = context[node.inputs[0]]
+    x = mb.relu6(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Reshape(context, node):
+    x = context[node.inputs[0]]
+    new_shape = context[node.inputs[1]]
+    x = mb.reshape(x=x, shape=new_shape, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op(tf_alias=["ReverseV2"])
+def Reverse(context, node):
+    x = context[node.inputs[0]]
+    axes = context[node.inputs[1]]
+    x = mb.reverse(x=x, axes=axes, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def ReverseSequence(context, node):
+    x = context[node.inputs[0]]
+    lengths = context[node.inputs[1]]
+    seq_axis = node.attr.get("seq_dim")
+    batch_axis = node.attr.get("batch_dim")
+    x = mb.reverse_sequence(
+        x=x, lengths=lengths, seq_axis=seq_axis, batch_axis=batch_axis, name=node.name
+    )
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Transpose(context, node):
+    x = context[node.inputs[0]]
+    perm = context[node.inputs[1]]
+    x = mb.transpose(x=x, perm=perm, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Squeeze(context, node):
+    x = context[node.inputs[0]]
+    axes = node.attr.get("squeeze_dims", [])
+    if axes == []:
+        axes = None
+    x = mb.squeeze(x=x, axes=axes, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Multinomial(context, node):
+    x = context[node.inputs[0]]
+    size = context[node.inputs[1]]
+    x = mb.random_categorical(x=x, size=size, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op(tf_alias=["Elu"])
+def ELU(context, node):
+    x = context[node.inputs[0]]
+    x = mb.elu(x=x, alpha=1.0, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op(tf_alias=["Erf"])
+def ERF(context, node):
+    x = context[node.inputs[0]]
+    x = mb.erf(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op(tf_alias=["LeakyRelu"])
+def LeakyReLU(context, node):
+    x = context[node.inputs[0]]
+    alpha = node.attr["alpha"]
+    x = mb.leaky_relu(x=x, alpha=alpha, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Selu(context, node):
+    x = context[node.inputs[0]]
+    x = mb.elu(x=x, alpha=1.6732632423543772)
+    x = mb.mul(x=x, y=1.0507009873554805, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op(tf_alias=["SelectV2"])
+def Select(context, node):
+    cond = context[node.inputs[0]]
+    a = context[node.inputs[1]]
+    b = context[node.inputs[2]]
+
+    # broadcast vector type cond
+    rank_cond = cond.rank
+    rank_a = a.rank
+    if rank_cond == 1 and rank_a > 1:
+        axes = [-i - 1 for i in range(rank_a - rank_cond)]
+        cond = mb.expand_dims(x=cond, axes=axes)
+
+    x = mb.select(cond=cond, a=a, b=b, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Sigmoid(context, node):
+    x = context[node.inputs[0]]
+    x = mb.sigmoid(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Softplus(context, node):
+    x = context[node.inputs[0]]
+    x = mb.softplus(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Softsign(context, node):
+    x = context[node.inputs[0]]
+    x = mb.softsign(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Softmax(context, node):
+    logit = context[node.inputs[0]]
+    axis = node.attr.get("axis")
+    x = mb.softmax(logit=logit, axis=axis, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def SpaceToBatchND(context, node):
+    x = context[node.inputs[0]]
+    block_shape = context[node.inputs[1]].val
+    paddings = context[node.inputs[2]].val
+
+    if x.rank != 3 and x.rank != 4:
+        raise NotImplementedError("rank of input must be 3 or 4!")
+
+    if block_shape is None or paddings is None:
+        raise NotImplementedError(
+            "Not support dynamic block_shape and paddings for SpaceToBatchND!"
+        )
+
+    if len(block_shape.flatten()) > 2:
+        raise NotImplementedError("rank of spatial shape > 2 is not yet supported")
+
+    # use sequence of ops to implement spacetobatch for cases:
+    # (1) x.rank == 3
+    # (2) x.rank == 4 and len(block_shape) == 1
+    if x.rank == 3 or (x.rank == 4 and len(block_shape) == 1):
+
+        rank = x.rank
+        spatial_rank = len(block_shape)
+
+        # expand padding to have shape [x.rank, 2]
+        paddings = _np.concatenate(
+            [[[0, 0]], paddings, _np.zeros(shape=(3, 2), dtype=_np.int32)], axis=0
+        )
+        paddings = paddings[: x.rank, :]
+        needs_paddings = any(paddings.flatten())
+        if needs_paddings:
+            padded = mb.pad(x=x, pad=paddings.flatten(), mode="constant")
+        else:
+            padded = x
+        padded_shape = mb.shape(x=padded)
+
+        # padded_shape = [batch_size] + [spatial_dims] + [remaining_dims]
+        batch_size = [_value_at(padded_shape, 0)]
+        spatial_dims = [_value_at(padded_shape, i) for i in range(1, spatial_rank + 1)]
+        remaining_dims = [
+            _value_at(padded_shape, i) for i in range(spatial_rank + 1, rank)
+        ]
+
+        # padded_shape = [batch_size] + [s0, s1, ..., sm] + [remaining_dims]
+        # reshape_shape = [batch_size] +
+        #                 [s0/block_shape[0],block_shape[0],...,sm/block_shape[m],block_shape[m]] +
+        #                 [remaining_dims]
+        values = []
+        for i in range(spatial_rank):
+            dim = mb.real_div(x=spatial_dims[i], y=block_shape[i])
+            values.append(mb.cast(x=dim, dtype="int32"))
+            values.append(block_shape[i])
+        values = batch_size + values + remaining_dims
+        reshape_shape = mb.concat(values=values, axis=0)
+        reshaped_padded = mb.reshape(x=padded, shape=reshape_shape)
+
+        # permute the shape to : [block_shape] + [batch_size] +
+        #                        [s0/block_shape[0],...,sm/block_shape[m]] +
+        #                        [remaining_dims]
+        batch_axis = [0]
+        block_shape_axis = [2 + 2 * i for i in range(spatial_rank)]
+        spatial_axis = [1 + 2 * i for i in range(spatial_rank)]
+        remaining_axis = list(range(block_shape_axis[-1] + 1, len(values)))
+        perm = block_shape_axis + batch_axis + spatial_axis + remaining_axis
+        permuted_reshaped_padded = mb.transpose(x=reshaped_padded, perm=perm)
+
+        # reshape the tensor to [prod(block_shape)*batch_size] +
+        #                       [s0/block_shape[0],...,sm/block_shape[m],block_shape[m]] +
+        #                       [remaining_dims]
+        prod_block_shape = _np.prod(block_shape.flatten())
+        resize_batch_size = [mb.mul(x=values[0], y=prod_block_shape)]
+        resize_spatial_dims = [values[1 + 2 * i] for i in range(spatial_rank)]
+        final_reshape_values = resize_batch_size + resize_spatial_dims + remaining_dims
+        final_shape = mb.concat(values=final_reshape_values, axis=0)
+        x = mb.reshape(x=permuted_reshaped_padded, shape=final_shape, name=node.name)
+    else:
+
+        if block_shape[0] != block_shape[1]:
+            raise NotImplementedError(
+                "non-equal block shape is not yet supported for 4d input."
+            )
+        needs_paddings = any(paddings.flatten())
+
+        x = mb.transpose(x=x, perm=[3, 0, 1, 2])
+
+        if needs_paddings:
+            x = mb.pad(x=x, pad=paddings.flatten(), mode="constant")
+
+        x = mb.space_to_depth(x=x, block_size=block_shape[0])
+        x = mb.transpose(x=x, perm=[1, 2, 3, 0], name=node.name)
+
+    context.add(node.name, x)
+
+
+@register_tf_op
+def SpaceToDepth(context, node):
+    x = context[node.inputs[0]]
+    block_size = node.attr.get("block_size")
+    data_format = node.attr.get("data_format", "NHWC")
+    if data_format == "NHWC":
+        x = _transpose_NHWC_to_NCHW(x)
+        x = mb.space_to_depth(x=x, block_size=block_size)
+        x = _transpose_NCHW_to_NHWC(x, node.name)
+    else:
+        x = mb.space_to_depth(x=x, block_size=block_size, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Tanh(context, node):
+    x = context[node.inputs[0]]
+    x = mb.tanh(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op(tf_alias=["TopKV2"])
+def TopK(context, node):
+    x = context[node.inputs[0]]
+    k = context[node.inputs[1]]
+    x = mb.topk(x=x, k=k.val, axis=-1, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Cumsum(context, node):
+    x = context[node.inputs[0]]
+    axis = context[node.inputs[1]]
+    exclusive = node.attr.get("exclusive", False)
+    reverse = node.attr.get("reverse", False)
+    x = mb.cumsum(x=x, axis=axis, exclusive=exclusive, reverse=reverse, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Gather(context, node):
+    x = context[node.inputs[0]]
+    indices = context[node.inputs[1]]
+    axis = 0
+    x = mb.gather(x=x, indices=indices, axis=axis, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def GatherV2(context, node):
+    x = context[node.inputs[0]]
+    indices = context[node.inputs[1]]
+    axis = context[node.inputs[2]]
+    x = mb.gather(x=x, indices=indices, axis=axis, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def GatherNd(context, node):
+    x = context[node.inputs[0]]
+    indices = context[node.inputs[1]]
+    x = mb.gather_nd(x=x, indices=indices, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Tile(context, node):
+    x = context[node.inputs[0]]
+    reps = context[node.inputs[1]]
+    x = mb.tile(x=x, reps=reps, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Where(context, node):
+    x = context[node.inputs[0]]
+    x = mb.non_zero(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def SquaredDifference(context, node):
+    x = context[node.inputs[0]]
+    y = context[node.inputs[1]]
+    x = mb.sub(x=x, y=y)
+    x = mb.square(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Conv2DBackpropInput(context, node):
+    # Output shape: [N, H_out, W_out, C_out]
+    output_shape = context[node.inputs[0]].val
+    # Weight shape: [H, W, C_out, C_in]
+    W_hwoi = context[node.inputs[1]]
+    W_oihw = mb.transpose(x=W_hwoi, perm=[2, 3, 0, 1])
+    # Input shape: [N, H_in, W_in, C_in]
+    x = context[node.inputs[2]]
+
+    data_format = node.attr.get("data_format", "NHWC")
+    HW_dilations = _conv2d3d_strides_or_dilations(
+        "dilations", node.attr.get("dilations"), data_format
+    )
+    HW_strides = _conv2d3d_strides_or_dilations(
+        "strides", node.attr.get("strides"), data_format
+    )
+    pad_type = node.attr.get("padding")
+
+    if not isinstance(pad_type, _string_types):
+        pad_type = "custom"
+        raise NotImplementedError("Custom padding not implemented for TF")
+
+    pad_type = pad_type.lower()
+    # CoreML expects input to be in NCHW format
+    # Transpose input to NCHW format
+    if data_format == "NHWC":
+        x = _transpose_NHWC_to_NCHW(x)
+        if output_shape is not None:
+            output_shape = [output_shape[1], output_shape[2]]
+    else:
+        if output_shape is not None:
+            output_shape = [output_shape[2], output_shape[3]]
+
+    # Only the last op should have the same name as node.name
+    conv_name = node.name + "x" if data_format == "NHWC" else node.name
+    # Pass output shape provided above
+    x = mb.conv_transpose(
+        x=x,
+        weight=W_oihw,
+        pad_type=pad_type,
+        output_shape=output_shape,
+        strides=HW_strides,
+        dilations=HW_dilations,
+        name=conv_name,
+    )
+
+    # Convert NCHW output back to NHWC format
+    if data_format == "NHWC":
+        x = _transpose_NCHW_to_NHWC(x, node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Range(context, node):
+    start = context[node.inputs[0]]
+    end = context[node.inputs[1]]
+    step = context[node.inputs[2]]
+    x = mb.range_1d(start=start, end=end, step=step, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def RandomUniform(context, node):
+    shape = context[node.inputs[0]]
+    seed = node.attr["seed"]
+    x = mb.random_uniform(shape=shape, seed=seed, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def RandomStandardNormal(context, node):
+    shape = context[node.inputs[0]]
+    seed = node.attr["seed"]
+    x = mb.random_normal(shape=shape, seed=seed, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def OneHot(context, node):
+    indices = context[node.inputs[0]]
+    depth = context[node.inputs[1]]
+    on_value = context[node.inputs[2]]
+    off_value = context[node.inputs[3]]
+    axis = node.attr.get("axis", -1)
+    x = mb.one_hot(
+        indices=indices,
+        one_hot_vector_size=depth,
+        axis=axis,
+        on_value=on_value,
+        off_value=off_value,
+        name=node.name,
+    )
+    context.add(node.name, x)
+
+
+@register_tf_op(tf_alias=["NonMaxSuppressionV3"])
+def NonMaxSuppression(context, node):
+    boxes = context[node.inputs[0]]
+    scores = context[node.inputs[1]]
+    max_boxes = context[node.inputs[2]]
+    iou_threshold = context[node.inputs[3]]
+    score_threshold = context[node.inputs[4]]
+    if score_threshold.val == float("-inf"):
+        # TensorFlow's default value for score_threshold, Core ML does not
+        # have float('-inf') support, converted to minimum float32 instead
+        score_threshold = -3.4e38
+    boxes = mb.expand_dims(x=boxes, axes=[0])
+    scores = mb.expand_dims(x=scores, axes=[0, -1])
+    _, _, x, _ = mb.non_maximum_suppression(
+        boxes=boxes,
+        scores=scores,
+        max_boxes=max_boxes,
+        iou_threshold=iou_threshold,
+        score_threshold=score_threshold,
+    )
+    x = mb.squeeze(x=x, axes=[0], name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Shape(context, node):
+    x = context[node.inputs[0]]
+    x = mb.shape(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def ResizeNearestNeighbor(context, node):
+    # "ResizeNearestNeighbor" op in TF is always in the channel last mode
+    # instead of upsample factor, it uses output size, which is the second input
+    x = context[node.inputs[0]]
+
+    input_shape = x.shape  # (N,Hin,Win,C)
+    if len(input_shape) != 4:
+        raise ValueError('"ResizeNearestNeighbor" op: input rank is not 4')
+    Hin, Win = input_shape[1:3]
+
+    if context[node.inputs[1]].val is None:
+        raise ValueError(
+            '"ResizeNearestNeighbor" op: the second input, which is the output size, must be known statically'
+        )
+
+    if len(context[node.inputs[1]].val) != 2:
+        raise ValueError(
+            '"ResizeNearestNeighbor" op: the second input, which is the output size, must have 2 elements'
+        )
+
+    Hout, Wout = context[node.inputs[1]].val
+
+    if not (
+        isinstance(Hout, (_np.int32, _np.int64))
+        and isinstance(Wout, (_np.int32, _np.int64))
+    ):
+        raise ValueError(
+            '"ResizeNearestNeighbor" op: the second input, which is the output size, must have elements of type int32'
+        )
+
+    if Hout < Hin and Wout < Win:
+        ResizeBilinear(context, node)
+        return
+
+    if Hout % Hin > 0 or Wout % Win > 0:
+        raise ValueError(
+            '"ResizeNearestNeighbor" op: fractional upsampling factors not supported'
+        )
+
+    scaling_factor_h = int(Hout / Hin)
+    scaling_factor_w = int(Wout / Win)
+
+    # first transpose to from channel last to channel first format for coreml
+    x = _transpose_NHWC_to_NCHW(x)
+    # add the upsample layer
+    x = mb.upsample_nearest_neighbor(
+        x=x,
+        upscale_factor_height=scaling_factor_h,
+        upscale_factor_width=scaling_factor_w,
+        name=node.name + "_channel_first_upsample",
+    )
+    # transpose again
+    x = _transpose_NCHW_to_NHWC(x, node.name)
+
+    context.add(node.name, x)
+
+
+@register_tf_op
+def ResizeBilinear(context, node):
+    # "ResizeBilinear" op in TF is always in the channel last mode
+    # second input is the output size
+
+    x = context[node.inputs[0]]
+    input_shape = x.shape  # (N,Hin,Win,C)
+    if len(input_shape) != 4:
+        raise ValueError('"ResizeBilinear" op: input rank is not 4')
+    Hin, Win = input_shape[1:3]
+
+    if context[node.inputs[1]].val is None:
+        raise ValueError(
+            '"ResizeBilinear" op: the second input, which is the output size, must be known statically'
+        )
+
+    if len(context[node.inputs[1]].val) != 2:
+        raise ValueError(
+            '"ResizeBilinear" op: the second input, which is the output size, must have 2 elements'
+        )
+
+    Hout, Wout = context[node.inputs[1]].val
+
+    if not (isinstance(Hout, _np.int32) and isinstance(Wout, _np.int32)):
+        raise ValueError(
+            '"ResizeBilinear" op: the second input, which is the output size, must have elements of type int32'
+        )
+
+    align_corners = node.attr.get("align_corners", False)
+    half_pixel_centers = node.attr.get("half_pixel_centers", False)
+
+    # first transpose to from channel last to channel first format for coreml
+    x = _transpose_NHWC_to_NCHW(x)
+
+    # add either the resize_bilinear layer or the upsample layer
+
+    # [align_corners = True, half_pixel_centers = False]
+    if align_corners and not half_pixel_centers:
+        x = mb.resize_bilinear(
+            x=x,
+            target_size_height=Hout,
+            target_size_width=Wout,
+            sampling_mode="STRICT_ALIGN_CORNERS",
+            name=node.name + "_channel_first_resize_bilinear",
+        )
+
+    # [align_corners = False, half_pixel_centers = False]
+    elif not align_corners and not half_pixel_centers:
+        x = mb.resize_bilinear(
+            x=x,
+            target_size_height=Hout,
+            target_size_width=Wout,
+            sampling_mode="DEFAULT",
+            name=node.name + "_channel_first_resize_bilinear",
+        )
+
+    # [align_corners = False, half_pixel_centers = True]
+    elif not align_corners and half_pixel_centers:
+        x = mb.upsample_bilinear(
+            x=x,
+            scale_factor_height=(float(Hout) + 1e-2) / float(Hin),
+            scale_factor_width=(float(Wout) + 1e-2) / float(Win),
+            align_corners=False,
+            name=node.name + "_channel_first_upsample_bilinear",
+        )
+
+    else:
+        # we should not come here since TF does not support align_corners=True and half_pixel_centers=True
+        raise ValueError(
+            '"ResizeBilinear" op: "align_corners" and "half_pixel_centers" are both True and this mode is not supported'
+        )
+
+    # transpose again
+    x = _transpose_NCHW_to_NHWC(x, node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def make_tuple(context, node):
+    res = tuple([context[in_name] for in_name in node.inputs])
+    context.add(node.name, res)
+
+
+@register_tf_op
+def function_entry(context, node):
+    if context.get_func_inputs() is None:
+        msg = (
+            "function_entry requires function inputs stored in "
+            + "context.curr_func_inputs"
+        )
+        raise ValueError(msg)
+    context.add(node.name, context.get_func_inputs())
+
+
+@register_tf_op(tf_alias=["while"])
+def While(context, node):
+    # TF while will never have break statement, because break can always be
+    # transformed into while and condition. Example:
+    #
+    #   while pred:
+    #    a = op1(...)
+    #    if a == 0:
+    #      break
+    #    b = op2(...)
+    #
+    # is equivalent to
+    #
+    #   while pred and not break_a:
+    #    a = op1(...)
+    #    break_a = a == 0
+    #    if not break_a:
+    #      b = op2(...)
+
+    # node.inputs[0] == 'make_tuple_X' (always a make_tuple)
+    loop_vars = context[node.inputs[0]]  # python tuple of Vars
+    cond_graph = context.get_graph(node.attr["cond_function"])
+    body_graph = context.get_graph(node.attr["body_function"])
+
+    def cond(*loop_vars):
+        context.stack_func_inputs(loop_vars)
+
+        # convert_graph uses context to convert cond_graph. During conversion
+        # it constructs operations (mb.some_op). Note that cond(*loop_vars) is
+        # only evaluated inside while_loop's type_inference(), not here. In
+        # other words, we use python's deferred function evaluation to defer
+        # the SSA block construction until inside while_loop Operation.
+        res = convert_graph(context, cond_graph)
+        # Done with translating the function
+        context.unstack_func_inputs()
+        return res
+
+    def body(*loop_vars):
+        context.stack_func_inputs(loop_vars)
+        res = convert_graph(context, body_graph)
+        # Done with translating the function
+        context.unstack_func_inputs()
+        return res
+
+    x = mb.while_loop(_cond=cond, _body=body, loop_vars=loop_vars, name=node.name)
+    # wraps x as tuple for get_tuple that always follow the while node.
+    if not isinstance(x, (tuple, list)):
+        x = (x,)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def iff(context, node):
+    pred = context[node.inputs[0]]
+
+    # this is always a tensor, as TF uses one iff op for each returned value.
+    #
+    # Example TF program:
+    #
+    #  x = tf.placeholder(tf.float32, shape=(1,))
+    #  y = tf.placeholder(tf.float32, shape=(1,))
+    #  z = tf.multiply(x, y)
+    #  pred = tf.less(tf.math.reduce_mean(x), tf.math.reduce_mean(y))
+    #  def true_fn(): return tf.add(x, z), x
+    #  def false_fn(): return tf.square(y), z
+    #  res = tf.cond(pred, true_fn, false_fn)
+    #
+    # There will be 2 iffs:
+    #
+    # iff('cond/pred_id', 'cond/Add', 'cond/Square')
+    # iff('cond/pred_id', 'cond/Add/Switch', 'cond/Switch_1')
+    #
+    # where
+    #   'cond/pred_id': pred
+    #   'cond/Add': tf.add(x, z)
+    #   'cond/Square': tf.square(y)
+    #   'cond/Add/Switch': x
+    #   'cond/Switch_1': z
+    #
+    # And both branches are executed, and one of the results will be
+    # discarded at iff nodes.
+    #
+    # Note that the above program would translate to two cond ops, each with
+    # two blocks.
+    true_output_var = context[node.inputs[1]]
+    false_output_var = context[node.inputs[2]]
+
+    def true_fn():
+        return mb.identity(x=true_output_var)
+
+    def false_fn():
+        return mb.identity(x=false_output_var)
+
+    x = mb.cond(pred=pred, _true_fn=true_fn, _false_fn=false_fn, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Concat(context, node):
+    values = [context[input] for input in node.inputs[1:]]
+    axis = context[node.inputs[0]]
+    x = mb.concat(values=values, axis=axis, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def ConcatV2(context, node):
+    values = [context[input] for input in node.inputs[:-1]]
+    axis = context[node.inputs[-1]]
+    x = mb.concat(values=values, axis=axis, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Pack(context, node):
+    values = [context[name] for name in node.inputs]
+    axis = node.attr["axis"]
+    if axis < 0:
+        # TF axis = -1 creates new dim at the end
+        axis += values[0].rank + 1
+    if len(values) == 1:
+        # for example:
+        # y = tf.raw_ops.Pack(values=[2], axis=0).
+        # or y = tf.raw_ops.Pack(values=[tf.constant([1,2])], axis=0)
+        input_type = values[0].sym_type
+        if _is_scalar(input_type):
+            x = mb.mul(x=_np.array([1], dtype=_np.int32), y=values[0], name=node.name)
+        else:
+            x = mb.expand_dims(x=values[0], axes=[axis], name=node.name)
+    else:
+        if all([_is_scalar(input.sym_type) for input in values]):
+            x = mb.concat(values=values, axis=axis, name=node.name)
+        else:
+            x = mb.stack(values=values, axis=axis, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Unpack(context, node):
+    x = context[node.inputs[0]]
+    axis = int(node.attr["axis"])
+    num_splits = node.attr.get("num", None)
+    if num_splits is None:
+        num_splits = x.shape[axis]
+    y = mb.split(x=x, num_splits=num_splits, axis=axis, name=node.name + "_unsqueezed")
+    output_vars = []
+    for i in range(num_splits):
+        output_vars.append(
+            mb.squeeze(x=y[i], axes=[axis], name=node.name + ":{}".format(i))
+        )
+
+    context.add(node.name, output_vars)
+
+
+@register_tf_op
+def SplitV(context, node):
+    x = context[node.inputs[0]]
+    split_sizes = context[node.inputs[1]]
+    axis = context[node.inputs[2]]
+    if "num_split" not in node.attr:
+        raise ValueError("num_splits not found in TF op {}".format(node.name))
+    num_splits = node.attr["num_split"]
+    if num_splits == 1:
+        Identity(context, node)
+    else:
+        x = mb.split(
+            x=x,
+            num_splits=num_splits,
+            split_sizes=split_sizes,
+            axis=axis,
+            name=node.name,
+        )
+        context.add(node.name, x)
+
+
+@register_tf_op
+def ScatterNd(context, node):
+    indices = context[node.inputs[0]]
+    updates = context[node.inputs[1]]
+    shape = context[node.inputs[2]]
+    x = mb.fill(shape=shape, value=0)
+    x = mb.scatter_nd(data=x, indices=indices, updates=updates, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def ZerosLike(context, node):
+    x = context[node.inputs[0]]
+    if x.rank == 0:
+        np_type = types.nptype_from_builtin(x.sym_type)
+        x = mb.const(val=np_type(0), name=node.name)
+    else:
+        np_type = types.nptype_from_builtin(x.sym_type.get_primitive())
+        x = mb.fill(shape=mb.shape(x=x), value=np_type(0), name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def IsFinite(context, node):
+    x = context[node.inputs[0]]
+    x = mb.isfinite(x=x, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def Split(context, node):
+    axis = context[node.inputs[0]]
+    x = context[node.inputs[1]]
+    if "num_split" not in node.attr:
+        raise ValueError("num_splits not found in TF op {}".format(node.name))
+    num_splits = node.attr["num_split"]
+    if num_splits == 1:
+        if len(node.outputs) == 0:
+            x = mb.mul(x=x, y=1.0, name=node.name)
+            context.add(node.name, x)
+        else:
+            # Don't change tfssa. Just make downstream ops reference the pre-identity op.
+            context.add(node.name, [x], is_new_var=False)
+    else:
+        x = mb.split(x=x, num_splits=num_splits, axis=axis, name=node.name)
+        context.add(node.name, x)
+        # TODO (rdar://60358242) If tf.split output is returned, there's no
+        # get_tuple nodes. Some graph pass is needed. Example:
+        #
+        #    x = tf.placeholder(tf.float32, shape=input_shape1)
+        #    res = tf.split(x, 3, axis=0)
+        #
+        # res are ['split:0', 'split:1', 'split']
+        #
+        # but node.outputs == ['gto_1', 'gto_2', 'gto_3']
+
+
+@register_tf_op
+def CropAndResize(context, node):
+    x = context[node.inputs[0]]
+    input_shape = x.shape  # (B, h_in, w_in, C)
+    if len(input_shape) != 4:
+        raise ValueError(
+            '"CropResize" op: expected input rank 4, got {}'.format(x.rank)
+        )
+    Hin, Win = input_shape[1:3]
+
+    const_box_info = True
+    if context[node.inputs[1]].val is None or context[node.inputs[2]].val is None:
+        const_box_info = False
+
+    crop_size = context[node.inputs[3]].val
+    method = "bilinear" if len(node.inputs) < 5 else context[node.inputs[4]].val
+    extrapolation_value = 1.0 if len(node.inputs) < 6 else context[node.inputs[5]].val
+
+    # CoreML index information along with boxes
+    if const_box_info:
+        boxes = context[node.inputs[1]].val
+        box_indices = context[node.inputs[2]].val
+        box_indices = _np.expand_dims(box_indices, axis=1)
+        boxes = _np.concatenate([box_indices, boxes], axis=1)
+        # CoreML expects boxes/ROI in
+        # [N, 1, 5, 1, 1] format
+        boxes = boxes.reshape(boxes.shape[0], 1, boxes.shape[1], 1, 1)
+    else:
+        box_indices = context[node.inputs[2]]
+        boxes = context[node.inputs[1]]
+        box_indices = mb.expand_dims(x=box_indices, axes=[1])
+        boxes = mb.concat(values=(box_indices, boxes), axis=1)
+        # TODO: Dynamic rank: Use GetShape and select indices dynamically
+        boxes = mb.reshape(x=boxes, shape=[boxes.shape[0], 1, boxes.shape[1], 1, 1])
+
+    # Get Height and Width of crop
+    h_out, w_out = crop_size[0], crop_size[1]
+
+    # TF `nearest` mode not supported
+    method_map = {"bilinear": "ALIGN_CORNERS"}
+    if method not in method_map:
+        raise ValueError(
+            "CropResize op: Unsupported method {}. Supports {}".format(
+                method, method_map.keys()
+            )
+        )
+    method = method_map[method]
+
+    # TF input format: [B, h_in, w_in, C]
+    # CoreML input format: [B, C, h_in, w_in]
+    x = _transpose_NHWC_to_NCHW(x)
+
+    # Crop Resize
+    x = mb.crop_resize(
+        x=x,
+        roi=boxes,
+        target_height=h_out,
+        target_width=w_out,
+        normalized_coordinates=True,
+        spatial_scale=extrapolation_value,
+        box_coordinate_mode="CORNERS_HEIGHT_FIRST",
+        sampling_mode=method,
+    )
+
+    # CoreML output format: [N, 1, C, h_out, w_out]
+    # TF output format: [N, h_out, w_out, C]
+    x = mb.squeeze(x=x, axes=[1])
+    x = _transpose_NCHW_to_NHWC(x, node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op
+def TensorArrayV3(context, node):
+    if "infer_shape" in node.attr:
+        if not node.attr["infer_shape"]:
+            raise ValueError("Only fixed size TensorArray is supported")
+
+    dynamic_length = node.attr.get("dynamic_size", True)
+    elem_shape = node.attr.get("element_shape", None)
+    size = node.attr.get("size", None)
+    if size is None:
+        size = context[node.inputs[0]]
+    builtin_dtype = node.attr["dtype"]
+    dtype_str = types.builtin_to_string(builtin_dtype)
+    if elem_shape is not None:
+        ls = mb.make_list(
+            init_length=size,
+            dtype=dtype_str,
+            elem_shape=elem_shape,
+            dynamic_length=dynamic_length,
+            name=node.name,
+        )
+    else:
+        ls = mb.tf_make_list(
+            init_length=size,
+            dtype=dtype_str,
+            dynamic_length=dynamic_length,
+            name=node.name,
+        )
+    context.add(node.name, ls)
+
+
+@register_tf_op
+def TensorArrayWriteV3(context, node):
+    index = context[node.inputs[0]]
+    new_val = context[node.inputs[1]]
+    ls = context[node.inputs[2]]
+    new_list = mb.list_write(ls=ls, index=index, value=new_val, name=node.name)
+    context.add(node.name, new_list)
+
+
+@register_tf_op
+def TensorArraySizeV3(context, node):
+    ls = context[node.inputs[0]]
+    length = mb.list_length(ls=ls, name=node.name)
+    context.add(node.name, length)
+
+
+@register_tf_op
+def TensorArrayGatherV3(context, node):
+    indices = context[node.inputs[0]]
+    ls = context[node.inputs[1]]
+    tensor = mb.list_gather(ls=ls, indices=indices, name=node.name)
+    context.add(node.name, tensor)
+
+
+@register_tf_op
+def TensorArrayReadV3(context, node):
+    idx = context[node.inputs[0]]
+    ls = context[node.inputs[1]]
+    ls = mb.list_read(ls=ls, index=idx, name=node.name)
+    context.add(node.name, ls)
+
+
+@register_tf_op
+def TensorArrayScatterV3(context, node):
+    indices = context[node.inputs[0]]
+    value = context[node.inputs[1]]
+    ls = context[node.inputs[2]]
+    ls = mb.list_scatter(ls=ls, indices=indices, value=value, name=node.name)
+    context.add(node.name, ls)
+
+
+@register_tf_op
+def BroadcastTo(context, node):
+    x = context[node.inputs[0]]
+    shape = context[node.inputs[1]]
+    if shape.val is None:  # dynamic shape
+        raise NotImplementedError("dynamic shape not yet supported")
+    else:  # static shape
+        target_shape = tuple(shape.val)
+        broadcast_shape = broadcast_shapes(x.shape, target_shape)
+        if target_shape != broadcast_shape:
+            msg = "shapes are not broadcastable: {} vs. {}"
+            raise ValueError(msg.format(x.shape, target_shape))
+        target_rank = len(target_shape)
+        if x.rank != target_rank:
+            axes = [i for i in range(target_rank - x.rank)]
+            x = mb.expand_dims(x=x, axes=axes)
+        reps = [1] * target_rank
+        for i in range(target_rank):
+            reps[i] = target_shape[i] // x.shape[i]
+
+    x = mb.tile(x=x, reps=reps, name=node.name)
+    context.add(node.name, x)
+
+
+@register_tf_op()
+def get_global(context, node):
+    # Design comment: This is only works if variable doesn't cross block
+    # boundary (e.g. while_loop, cond, function)
+    variable_name = node.attr["variable"]
+    x = context[variable_name]  # This must've been set by set_global
+    context.add(node.name, x, is_new_var=False)
+
+
+@register_tf_op()
+def set_global(context, node):
+    x = context[node.inputs[0]]
+    variable_name = node.attr["variable"]
+    context.add(variable_name, x, is_new_var=False)
+
+
+def _get_const_or_raise(variable):
+    if variable.val is None:
+        raise ValueError("Var {} must be const".format(variable.name))
+    return variable.val
+
+
+@register_tf_op()
+def LSTMBlockCell(context, node):
+    x = context[node.inputs[0]]  # [batch, input_dim]
+    c_prev = context[node.inputs[1]]  # [b, hidden_dim]
+    h_prev = context[node.inputs[2]]  # [b, hidden_dim]
+    # W layout is ifco
+    W = context[node.inputs[3]]  # [input_dim + hidden_dim, 4*hidden_dim]
+
+    kwargs = {}
+    use_peephole = node.attr["use_peephole"]
+    if use_peephole:
+        peep_i = context[node.inputs[4]]  # [hidden_dim,]
+        peep_f = context[node.inputs[5]]  # [hidden_dim,]
+        peep_o = context[node.inputs[6]]  # [hidden_dim,]
+        kwargs["weight_peep_i"] = peep_i
+        kwargs["weight_peep_f"] = peep_f
+        kwargs["weight_peep_o"] = peep_o
+
+    bias = context[node.inputs[7]]  # [4*hidden_dim,]
+
+    forget_bias = node.attr["forget_bias"]
+    cell_clip = None
+    if node.attr["cell_clip"] is not None and node.attr["cell_clip"] > 0:
+        cell_clip = node.attr["cell_clip"]
+
+    res = mb.tf_lstm_block_cell(
+        x=x,
+        c_prev=c_prev,
+        h_prev=h_prev,
+        weight=W,
+        bias=bias,
+        forget_bias=forget_bias,
+        cell_clip=cell_clip,
+        use_peephole=use_peephole,
+        name=node.name,
+        **kwargs
+    )
+    context.add(node.name, res)
+
+
+@register_tf_op()
+def BlockLSTM(context, node):
+    seq_len = context[node.inputs[0]]  # int
+    x = context[node.inputs[1]]  # [padded_len, batch, input_dim]
+    init_c = context[node.inputs[2]]  # [1, hidden_dim]
+    init_h = context[node.inputs[3]]  # [1, hidden_dim]
+    weight = context[node.inputs[4]]  # [input_dim + hidden_dim, 4*hidden_dim]
+
+    kwargs = {}
+    use_peephole = node.attr["use_peephole"]
+    if use_peephole:
+        peep_i = context[node.inputs[5]]  # [hidden_dim,]
+        peep_f = context[node.inputs[6]]  # [hidden_dim,]
+        peep_o = context[node.inputs[7]]  # [hidden_dim,]
+        kwargs["weight_peep_i"] = peep_i
+        kwargs["weight_peep_f"] = peep_f
+        kwargs["weight_peep_o"] = peep_o
+
+    bias = context[node.inputs[8]]  # [4*hidden_dim,]
+
+    forget_bias = node.attr["forget_bias"]
+    cell_clip = None
+    if node.attr["cell_clip"] is not None and node.attr["cell_clip"] > 0:
+        cell_clip = node.attr["cell_clip"]
+
+    res = mb.tf_lstm_block(
+        seq_len=seq_len,
+        x=x,
+        c_prev=init_c,
+        h_prev=init_h,
+        weight=weight,
+        bias=bias,
+        forget_bias=forget_bias,
+        cell_clip=cell_clip,
+        use_peephole=use_peephole,
+        name=node.name,
+        **kwargs
+    )
+    context.add(node.name, res)
diff --git a/coremltools/converters/mil/frontend/tensorflow/parse.py b/coremltools/converters/mil/frontend/tensorflow/parse.py
new file mode 100644
index 000000000..cad219946
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/parse.py
@@ -0,0 +1,137 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil import types
+from tensorflow.core.framework.types_pb2 import DataType
+from tensorflow.python.framework.dtypes import _TF_TO_NP
+
+import logging as _logging
+import numpy as _np
+
+
+def parse_type(t):
+    mapping = {
+        DataType.DT_FLOAT: types.float,
+        DataType.DT_DOUBLE: types.double,
+        DataType.DT_INT32: types.int32,
+        DataType.DT_UINT8: types.uint8,
+        DataType.DT_INT16: types.int16,
+        DataType.DT_INT8: types.int8,
+        DataType.DT_STRING: types.str,
+        DataType.DT_INT64: types.int64,
+        DataType.DT_BOOL: types.bool,
+        DataType.DT_UINT16: types.uint16,
+        DataType.DT_UINT32: types.uint32,
+        DataType.DT_UINT64: types.uint64,
+    }
+    t = int(t)
+    if t in mapping:
+        return mapping[t]
+    else:
+        _logging.info("Type %d cannot be mapped", t)
+        return None
+
+
+def parse_shape(t):
+    if t.unknown_rank:
+        return None
+    ret = [d.size for d in t.dim]
+    return ret
+
+
+def parse_tensor(t):
+    typ = parse_type(t.dtype)
+    shape = parse_shape(t.tensor_shape)
+
+    retval = None
+    if len(t.half_val) > 0:
+        retval = _np.array(t.half_val, dtype=_TF_TO_NP[t.dtype])
+    elif len(t.float_val) > 0:
+        retval = _np.array(t.float_val, dtype=_TF_TO_NP[t.dtype])
+    elif len(t.double_val) > 0:
+        retval = _np.array(t.double_val, dtype=_TF_TO_NP[t.dtype])
+    elif len(t.int_val) > 0:
+        retval = _np.array(t.int_val, dtype=_TF_TO_NP[t.dtype])
+    elif len(t.int64_val) > 0:
+        retval = _np.array(t.int64_val, dtype=_TF_TO_NP[t.dtype])
+    elif len(t.bool_val) > 0:
+        retval = _np.array(t.bool_val, dtype=_TF_TO_NP[t.dtype])
+    elif hasattr(t, "uint32_val") and len(t.uint32_val) > 0:
+        retval = _np.array(t.uint32_val, dtype=_TF_TO_NP[t.dtype])
+    elif hasattr(t, "uint64_val") and len(t.uint64_val) > 0:
+        retval = _np.array(t.uint64_val, dtype=_TF_TO_NP[t.dtype])
+
+    if not t.tensor_shape.unknown_rank and len(shape) == 0:
+        retobj = typ()
+        if retval is not None:
+            retobj.val = retval[0]
+    else:
+        rettype = types.tensor(typ, tuple(shape))
+        retobj = rettype()
+        retobj.shape = shape
+        if retval is not None:
+            retobj.val = retval
+
+    return retobj
+
+
+def parse_string(s):
+    if isinstance(s, bytes):
+        return s.decode("utf-8")
+    else:
+        return s
+
+
+def parse_list(t):
+    if len(t.s) > 0:
+        return list(parse_string(s) for s in t.s)
+    elif len(t.i) > 0:
+        return list(t.i)
+    elif len(t.f) > 0:
+        return list(t.f)
+    elif len(t.b) > 0:
+        return list(t.b)
+    elif len(t.type) > 0:
+        return list(parse_type(z) for z in t.type)
+    elif len(t.shape) > 0:
+        return list(parse_shape(z) for z in t.shape)
+    elif len(t.tensor) > 0:
+        return list(parse_tensor(z) for z in t.tensor)
+    else:
+        return []
+
+
+def parse_func(f):
+    return f.name
+
+
+def parse_attr(attr):
+    if attr.HasField("s"):
+        return parse_string(attr.s)
+    elif attr.HasField("i"):
+        return attr.i
+    elif attr.HasField("f"):
+        return attr.f
+    elif attr.HasField("b"):
+        return attr.b
+    elif attr.HasField("type"):
+        return parse_type(attr.type)
+    elif attr.HasField("shape"):
+        return parse_shape(attr.shape)
+    elif attr.HasField("tensor"):
+        return parse_tensor(attr.tensor)
+    elif attr.HasField("list"):
+        return parse_list(attr.list)
+    elif attr.HasField("func"):
+        return parse_func(attr.func)
+    elif attr.HasField("placeholder"):
+        raise NotImplementedError("placeholder not yet implemented")
+    raise ValueError("unintelligible TFNode attributes")
diff --git a/coremltools/converters/mil/frontend/tensorflow/parsed_tf_node.py b/coremltools/converters/mil/frontend/tensorflow/parsed_tf_node.py
new file mode 100644
index 000000000..4c4978f7d
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/parsed_tf_node.py
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+from coremltools.converters.mil.mil import types
+from .tfssa import ParsedNode
+
+
+class ParsedTFNode(ParsedNode):
+    """
+    A parsed TensorFlow Node.
+
+    name: The name of the node (str)
+    op: The operation represented by the node (str)
+    datatype: The type of the node. (type)
+    value: The value of the node if available
+    inputs: The list of nodes which are inputs to this node (list[str])
+    control_inputs: The list of nodes which have to be executed before this node (list[str])
+    attr: The attributes of the node
+    outputs: The list of nodes which consume the result of this node (list[str])
+    control_outputs: The list of nodes which have to be executed after this node (list[str])
+    """
+
+    def __init__(self, tfnode=None):
+        super(ParsedTFNode, self).__init__()
+        self.original_node = tfnode
+
+        if tfnode is not None:
+            from .parse import parse_attr
+
+            self.name = tfnode.name
+            if tfnode.op == "PlaceholderWithDefault":
+                self.op = "Placeholder"
+            else:
+                self.op = tfnode.op
+            self.inputs = [x for x in tfnode.input if not x.startswith("^")]
+            self.control_inputs = [x[1:] for x in tfnode.input if x.startswith("^")]
+            self.attr = {k: parse_attr(v) for k, v in tfnode.attr.items()}
+
+    def parse_from_attr(self):
+        if "value" in self.attr:
+            self.datatype = self.attr["value"].__class__
+        elif "_output_shapes" in self.attr:
+            output_shapes = self.attr["_output_shapes"]
+            if output_shapes[0] is not None and len(output_shapes[0]) > 0:
+                if "dtype" in self.attr:
+                    rettype = types.tensor(self.attr["dtype"], tuple(output_shapes[0]))
+                elif "T" in self.attr:
+                    rettype = types.tensor(self.attr["T"], tuple(output_shapes[0]))
+                elif "Tparams" in self.attr:
+                    rettype = types.tensor(
+                        self.attr["Tparams"], tuple(output_shapes[0])
+                    )
+                else:
+                    raise NotImplementedError(
+                        "Op-(%s) %s not implemented\nWith attribute:"
+                        + str(self.attr) % (self.op, self.name)
+                    )
+                self.datatype = rettype
+            elif "dtype" in self.attr:
+                self.datatype = self.attr["dtype"]
+        elif "shape" in self.attr:
+            shape = self.attr["shape"]
+            assert "dtype" in self.attr
+            if len(shape) == 0:
+                self.datatype = self.attr["dtype"]
+            else:
+                self.datatype = types.tensor(self.attr["dtype"], shape)
+        elif "dtype" in self.attr:
+            self.datatype = self.attr["dtype"]
+
+    def _copy_impl(self, dest):
+        dest = super(ParsedTFNode, self)._copy_impl(dest)
+        dest.original_node = self.original_node
+        return dest
+
+    def __copy__(self):
+        return self._copy_impl(ParsedTFNode())
diff --git a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/__init__.py b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/__init__.py
new file mode 100644
index 000000000..a659bb2d3
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/__init__.py
@@ -0,0 +1,25 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+# Import all passes in this dir
+from os.path import dirname, basename, isfile, join
+import glob
+
+excluded_files = [
+    "__init__.py",
+    "tf_passes.py",
+]
+modules = glob.glob(join(dirname(__file__), "*.py"))
+pass_modules = [
+    basename(f)[:-3]
+    for f in modules
+    if isfile(f)
+    and basename(f)[:1] != "_"  # Follow python convention to hide _* files.
+    and basename(f)[:4] != "test"
+    and basename(f) not in excluded_files
+]
+__all__ = pass_modules
+
+from . import *  # import everything in __all__
diff --git a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/backfill_make_list_elem_type.py b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/backfill_make_list_elem_type.py
new file mode 100644
index 000000000..d3bf48515
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/backfill_make_list_elem_type.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil import types
+
+
+@register_pass(namespace="tensorflow")
+def backfill_make_list_elem_type(prog):
+    """
+    TF's TensorArrayV3 (represented as make_list in mil) doesn't necessarily
+    contain elem shape/type, which is known when write is performed. We
+    backfill elem type info to make_list
+
+    Inputs:
+
+        prog: Program
+    """
+    for f_name, f in prog.functions.items():
+        backfill_make_list_elem_type_block(f)
+
+
+def backfill_make_list_elem_type_block(block):
+    # shallow copy hides changes on f.operations during the loop
+    for op in block.operations[:]:
+        for b in op.blocks:
+            backfill_make_list_elem_type_block(b)
+
+        if op.op_type != "tf_make_list":
+            continue
+        # op is `make_list`
+
+        if op.outputs[0].elem_type != types.unknown:
+            # elem_type of the list is known
+            continue
+
+        list_var = op.outputs[0]
+        elem_type = infer_elem_type(list_var)  # types.tensor
+        if elem_type is None:
+            msg = (
+                "No list_write or list_scatter op to infer make_list "
+                + "'{}' element type. Block:\n{}"
+            )
+            raise ValueError(msg.format(op.name, op.enclosing_block))
+
+        with block:
+            new_list = mb.make_list(
+                init_length=op.init_length,
+                dynamic_length=op.dynamic_length,
+                # elem_shape cannot be symbolic by definition of list.
+                elem_shape=elem_type.get_shape(),
+                dtype=op.inputs["dtype"],
+                before_op=op,
+                name=op.name,
+            )
+
+        block.replace_uses_of_var_after_op(
+            anchor_op=op, old_var=op.outputs[0], new_var=new_list
+        )
+        block.remove_ops([op])
+
+
+def infer_elem_type(list_var):
+    """
+    Returns types.tensor. None if failed to infer element type.
+    Example:
+
+    Given:
+
+    main(%update: (2,fp32)) {
+      block0() {
+        %list: List[unknown] = tf_make_list(...) # unknown elem type
+        %while_loop_0:0: (i32), %while_loop_0:1: List[(2,fp32)] = while_loop(loop_vars=(...))
+          while_loop_0_body(...) {
+            %list_write_0: List[(2,fp32)] = list_write(index=..., ls=%list, value=%update)
+          } -> (%add_0, %list_write_0)
+
+        Result:
+
+        main(%update: (2,fp32)) {
+          block0() {
+        %list: List[(2,fp32)] = tf_make_list(...) # Get the elem type from list_write
+        %while_loop_0:0: (i32), %while_loop_0:1: List[(2,fp32)] = while_loop(loop_vars=(...))
+          while_loop_0_body(...) {
+            %list_write_0: List[(2,fp32)] = list_write(index=..., ls=%list, value=%update)
+          } -> (%add_0, %list_write_0)
+    """
+    # Search for child op that have informative element types
+    for o in list_var.child_ops:
+        if o.op_type in ["list_write", "list_scatter"]:
+            return o.outputs[0].elem_type
+        if o.op_type == "while_loop":
+            idx = list(o.loop_vars).index(list_var)
+            block = o.blocks[0]
+            # the corresponding Var in body block
+            block_var = block.inputs[idx]
+            elem_type = infer_elem_type(block_var)
+            if elem_type is not None:
+                return elem_type
+            # otherwise continue to other block_var (a list_var can be
+            # passed into while_loop twice).
+    return None
diff --git a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/expand_tf_lstm.py b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/expand_tf_lstm.py
new file mode 100644
index 000000000..a61b58650
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/expand_tf_lstm.py
@@ -0,0 +1,232 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil import types
+import numpy as np
+import logging
+
+
+@register_pass(namespace="tensorflow")
+def expand_tf_lstm(prog):
+    """
+    Expand tf_lstm_block_cell to fine-grained SSA ops following:
+
+    xh = [x, h_prev]
+    [i, ci, f, o] = xh * w + b
+    f = f + forget_bias
+    if not use_peephole:
+      wci = wcf = wco = 0
+    i = sigmoid(cs_prev .* wci + i)
+    f = sigmoid(cs_prev .* wcf + f)
+    ci = tanh(ci)
+    cs = ci .* i + cs_prev .* f
+    cs = clip(cs, cell_clip)
+    o = sigmoid(cs * wco + o)
+    co = tanh(cs)
+    h = co .* o
+
+    Inputs:
+
+        prog: Program
+    """
+    for f_name, f in prog.functions.items():
+        expand_tf_lstm_helper(f)
+
+
+def expand_tf_lstm_helper(block):
+    # shallow copy hides changes on f.operations during the loop
+    for op in block.operations[:]:
+        for b in op.blocks:
+            expand_tf_lstm_helper(b)
+
+        if op.op_type == "tf_lstm_block_cell":
+            expand_tf_lstm_block_cell(op)
+            logging.info("Expanding {} (op_type: {})".format(op.name, op.op_type))
+
+        if op.op_type == "tf_lstm_block":
+            # only cs, h are supported for now. Can be easily extended to other outputs at performance hit.
+            i, cs, f, o, ci, co, h = op.outputs
+            if all(
+                [
+                    len(ov.child_ops) <= 0 and len(ov.consuming_blocks) <= 0
+                    for ov in [i, f, o, ci, co]
+                ]
+            ):
+                expand_tf_lstm_block(op)
+                logging.info("Expanding {} (op_type: {})".format(op.name, op.op_type))
+
+
+def _lstm_cell_builder(op, x, h_prev, cs_prev, before_op=None):
+    b = op.bias  # [4*hidden_dim]
+    forget_bias = op.forget_bias.val  # python:float
+
+    # xh = [x, h_prev]
+    # xh shape: [b, input_dim+hidden_dim]
+    xh = mb.concat(values=[x, h_prev], axis=-1, before_op=before_op)
+
+    # w: [4*hidden_dim, input_dim + hidden_dim] (icfo layout)
+    w = np.transpose(op.weight.val)
+    # [i, ci, f, o] = xh * w + b. Shape is [b, 4*hidden_dim]
+    icfo = mb.linear(x=xh, weight=w, bias=b, before_op=before_op)
+
+    # i, ci, f, o shape: [b, hidden_dim]
+    i, ci, f, o = mb.split(x=icfo, num_splits=4, axis=-1, before_op=before_op)
+    if op.forget_bias.val != 0:
+        f = mb.add(x=f, y=forget_bias, before_op=before_op)
+
+    # i = sigmoid(cs_prev .* wci + i)
+    # f = sigmoid(cs_prev .* wcf + f)
+    if op.use_peephole.val:
+        wci = op.weight_peep_i.val  # [hidden_dim]
+        wcf = op.weight_peep_f.val  # [hidden_dim]
+
+        x = mb.mul(x=cs_prev, y=wci, before_op=before_op)
+        pre_i = mb.add(x=x, y=i, before_op=before_op)
+
+        x = mb.mul(x=cs_prev, y=wcf, before_op=before_op)
+        pre_f = mb.add(x=x, y=f, before_op=before_op)
+    else:
+        pre_i = i
+        pre_f = f
+
+    i = mb.sigmoid(x=pre_i, before_op=before_op)
+    f = mb.sigmoid(x=pre_f, before_op=before_op)
+
+    # ci = tanh(ci)
+    ci = mb.tanh(x=ci, before_op=before_op)
+
+    # cs = ci .* i + cs_prev .* f
+    x = mb.mul(x=ci, y=i, before_op=before_op)
+    y = mb.mul(x=cs_prev, y=f, before_op=before_op)
+    cs = mb.add(x=x, y=y, before_op=before_op)
+
+    # cs = clip(cs, cell_clip)
+    if op.cell_clip is not None:
+        clip_val = op.cell_clip.val
+        cs = mb.clip(x=cs, alpha=-clip_val, beta=clip_val, before_op=before_op)
+
+    # o = sigmoid(cs * wco + o)
+    if op.use_peephole.val:
+        wco = op.weight_peep_o.val
+        x = mb.mul(x=cs, y=wco, before_op=before_op)
+        pre_o = mb.add(x=x, y=o, before_op=before_op)
+    else:
+        pre_o = o
+    o = mb.sigmoid(x=pre_o, before_op=before_op)
+
+    # co = tanh(cs)
+    co = mb.tanh(x=cs, before_op=before_op)
+
+    # h = co .* o
+    h = mb.mul(x=co, y=o, before_op=before_op)
+
+    return [i, cs, f, o, ci, co, h]
+
+
+def expand_tf_lstm_block_cell(op):
+    if op.op_type != "tf_lstm_block_cell":
+        raise ValueError()
+
+    with op.enclosing_block as block:
+        x = op.x  # [b, input_dim]
+        h_prev = op.h_prev  # [b, hidden_dim]
+        cs_prev = op.c_prev  # [b, hidden_dim]
+
+        i, cs, f, o, ci, co, h = _lstm_cell_builder(
+            op, x, h_prev, cs_prev, before_op=op
+        )
+
+        # Replace all outputs
+        new_outputs = [i, cs, f, o, ci, co, h]
+        for old_v, new_v in zip(op.outputs, new_outputs):
+            block.replace_uses_of_var_after_op(
+                anchor_op=op, old_var=old_v, new_var=new_v
+            )
+        block.remove_ops([op])
+
+
+def expand_tf_lstm_block(op):
+    if op.op_type != "tf_lstm_block":
+        raise ValueError()
+
+    with op.enclosing_block as block:
+        x = op.x  # [s, b, input_dim]
+        h_prev = op.h_prev  # [b, hidden_dim]
+        cs_prev = op.c_prev  # [b, hidden_dim]
+
+        # Allocate two lists: cs & h
+        x_shape = mb.shape(x=x, before_op=op)
+        length = mb.slice_by_index(x=x_shape, begin=[0], end=[1], before_op=op)
+        h_shape = mb.shape(x=h_prev, before_op=op)
+        list_shape = mb.concat(values=[length, h_shape], axis=0, before_op=op)
+        cs_list = mb.fill(shape=list_shape, before_op=op)
+        h_list = mb.fill(shape=list_shape, before_op=op)
+
+        # append initial state at index 0
+        cs_prev = mb.expand_dims(x=cs_prev, axes=[0], before_op=op)
+        cs_list = mb.concat(values=[cs_prev, cs_list], axis=0, before_op=op)
+        h_prev = mb.expand_dims(x=h_prev, axes=[0], before_op=op)
+        h_list = mb.concat(values=[h_prev, h_list], axis=0, before_op=op)
+
+        def cond(i, cs_list, h_list):
+            return mb.less(x=i, y=length)
+
+        def body(i, cs_list, h_list):
+            xi = mb.gather(x=x, indices=i, axis=0)
+            h_prev = mb.gather(x=h_list, indices=i, axis=0)
+            cs_prev = mb.gather(x=cs_list, indices=i, axis=0)
+
+            ig, cs, fg, og, ci, co, h = _lstm_cell_builder(op, xi, h_prev, cs_prev)
+
+            counter = mb.add(x=i, y=1)
+
+            return (
+                counter,
+                mb.scatter(data=cs_list, indices=counter, updates=cs),
+                mb.scatter(data=h_list, indices=counter, updates=h),
+            )
+
+        _, cs_list, h_list = mb.while_loop(
+            _cond=cond, _body=body, loop_vars=([0], cs_list, h_list), before_op=op
+        )
+
+        # strip initial state or element at index 0
+        begin, end = [1, 0, 0], [0, 0, 0]
+        begin_mask = [False, True, True]
+        end_mask = [True, True, True]
+        cs = mb.slice_by_index(
+            x=cs_list,
+            begin=begin,
+            end=end,
+            begin_mask=begin_mask,
+            end_mask=end_mask,
+            before_op=op,
+        )
+        h = mb.slice_by_index(
+            x=h_list,
+            begin=begin,
+            end=end,
+            begin_mask=begin_mask,
+            end_mask=end_mask,
+            before_op=op,
+        )
+
+        # Replace all outputs
+        new_outputs = [cs, h]
+        for old_v, new_v in zip(
+            [ov for index, ov in enumerate(op.outputs) if index in [1, 6]], new_outputs
+        ):
+            block.replace_uses_of_var_after_op(
+                anchor_op=op, old_var=old_v, new_var=new_v
+            )
+        block.remove_ops([op])
diff --git a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/test_passes.py b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/test_passes.py
new file mode 100644
index 000000000..662e39af0
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/test_passes.py
@@ -0,0 +1,56 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil import types
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY
+from coremltools.converters.mil.testing_utils import (
+    assert_model_is_valid,
+    assert_same_output_names,
+)
+import copy
+import pytest
+
+pytest.importorskip("tensorflow", minversion="1.14.0")
+
+
+def test_backfill_make_list_elem_type():
+    # The while_loop appends [1, 2]*i to `ls` for each iteration
+    # i = 0, ... num_iters-1.
+
+    elem_shape = (2,)
+
+    @mb.program(
+        input_specs=[mb.TensorSpec(shape=elem_shape),]
+    )
+    def prog(update):
+        def body(i, ls):
+            return mb.add(x=i, y=1), mb.list_write(ls=ls, index=i, value=update)
+
+        def cond(i, ls):
+            return mb.less(x=i, y=num_iters)
+
+        i = 0
+        ls = mb.tf_make_list(init_length=1)
+        num_iters = 3
+        _, final_tensor_list = mb.while_loop(_cond=cond, _body=body, loop_vars=(i, ls))
+        list_len = mb.list_length(ls=final_tensor_list)
+        indices = mb.range_1d(start=0, end=list_len, step=1)
+        return mb.list_gather(ls=final_tensor_list, indices=indices)
+
+    # tf_make_list has no elem_type info
+    make_list_op = prog.find_ops(op_type="tf_make_list", exactly_one=True)[0]
+    assert make_list_op.outputs[0].elem_type == types.unknown
+
+    prev_prog = copy.deepcopy(prog)
+    PASS_REGISTRY["tensorflow::backfill_make_list_elem_type"](prog)
+    assert_same_output_names(prev_prog, prog)
+    prog.validate()
+
+    # tf_make_list is replaced with make_list and should have elem_type now
+    make_list_op = prog.find_ops(op_type="make_list", exactly_one=True)[0]
+    assert make_list_op.outputs[0].elem_type.get_shape() == elem_shape
+
+    assert_model_is_valid(prog, {"update": elem_shape})
diff --git a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/tf_lstm_to_core_lstm.py b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/tf_lstm_to_core_lstm.py
new file mode 100644
index 000000000..0185f1d97
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/tf_lstm_to_core_lstm.py
@@ -0,0 +1,153 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil import types
+import numpy as np
+import logging
+
+
+@register_pass(namespace="tensorflow")
+def tf_lstm_to_core_lstm(prog):
+    """
+    Try to map TF dialect ops `tf_lstm_block` and `tf_lstm_block_cell` to
+    `lstm` in the core op set if compatible. They are compatible if all of the
+    followings are satisfied:
+
+    - If tf_lstm_block: only h output is consumed. tf_lstm_block has 7
+      sequence outputs: [i, cs, f, o, ci, co, h]. Each of them (e.g., i) has
+      shape [seq_len, batch, hidden_dim] (see tf_lstm_block op doc string).
+      core lstm only supports sequence output for hidden state h, and thus if
+      any outputs other than `h` is consumed, we cannot convert to lstm in the
+      core op set.
+
+    - If tf_lstm_block_cell: only cs, h output (outputs[1], outputs[6])
+      are consumed. Similar to above.
+
+    - batch size == 1 (due to bugs in core lstm backend impl rdar://62475041)
+
+    Inputs:
+
+        prog: Program
+    """
+    for f_name, f in prog.functions.items():
+        tf_lstm_to_core_lstm_block(f)
+
+
+def tf_lstm_to_core_lstm_block(block):
+    # shallow copy hides changes on f.operations during the loop
+    for op in block.operations[:]:
+        for b in op.blocks:
+            tf_lstm_to_core_lstm_block(b)
+
+        if op.op_type in ["tf_lstm_block_cell", "tf_lstm_block"]:
+            if try_replace_with_core_lstm(op):
+                logging.info("Successfully map {} to lstm".format(op.op_type))
+            else:
+                logging.info("Unable to map {} to lstm".format(op.op_type))
+
+
+def try_replace_with_core_lstm(op):
+    """
+    Inputs:
+
+    op (Operation): op.op_type must be 'tf_lstm_block_cell' or `tf_lstm_block`
+
+    Returns:
+
+    True if op can be represented by mb.lstm op in SSA. False otherwise
+    """
+    if op.op_type == "tf_lstm_block_cell":
+        batch = op.x.shape[0]
+    else:  # tf_lstm_block
+        batch = op.x.shape[1]
+
+    # Check for unsupported configuration
+    # 1. Peephole is present
+    # TODO: rdar://62913058 ([LSTM] Incorrect output when pass peephole values to LSTM/rnn_arch)
+    if op.use_peephole.val:
+        return False
+    # 2. Clip is provided
+    # TODO: rdar://62913148 ([LSTM] Incorrect output when clip is used for LSTM/rnn_arch)
+    if op.cell_clip is not None:
+        return False
+
+    # Check if tf_lstm_block_cell can be replaced with lstm op
+    i, cs, f, o, ci, co, h = op.outputs
+    if op.op_type == "tf_lstm_block_cell":
+        unsupported_outputs = [i, f, o, ci, co]  # only cs, h are supported
+        for ov in unsupported_outputs:
+            if len(ov.child_ops) > 0 or len(ov.consuming_blocks) > 0:
+                return False
+    else:  # tf_lstm_block
+        unsupported_outputs = [i, cs, f, o, ci, co]  # only h is supported
+        for ov in unsupported_outputs:
+            if len(ov.child_ops) > 0 or len(ov.consuming_blocks) > 0:
+                return False
+    # op is compatible with lstm
+
+    hidden_dim = op.c_prev.shape[1]
+
+    mb_peep = None
+    if op.use_peephole.val:
+        mb_peep = np.stack(
+            [op.weight_peep_i.val, op.weight_peep_f.val, op.weight_peep_o.val]
+        )
+
+    # weights. TF1 W is icfo. Need to convert to ifco
+    tf_w = op.weight.val  # [input_dim+outut_dim, 4*hidden_dim] in icfo layout
+    tf_w_i, tf_w_c, tf_w_f, tf_w_o = np.split(tf_w, 4, axis=1)
+    w = np.concatenate([tf_w_i, tf_w_f, tf_w_o, tf_w_c], axis=1)
+
+    # Bias is icfo. Convert to ssa LSTM's ifoc layout
+    tf_b = op.bias.val
+    tf_b_i, tf_b_c, tf_b_f, tf_b_o = np.split(tf_b, 4, axis=0)
+    tf_b_f += op.forget_bias.val  # add forget bias to bias
+    bias = np.concatenate([tf_b_i, tf_b_f, tf_b_o, tf_b_c], axis=0)
+
+    # TF's bias = input_bias + recurrence_bias [4*hidden_dims]. Use all zeros
+    # for recurrence bias. mil lstm expects bias shape [2, 4*hidden_dims]
+    bias = np.stack([np.zeros_like(bias), bias])
+
+    cell_clip = None if op.cell_clip is None else op.cell_clip.val
+
+    output_sequence = op.op_type == "tf_lstm_block"
+
+    block = op.enclosing_block
+    with block:
+        # x: [seq_len, batch, input_dim]
+        if op.op_type == "tf_lstm_block_cell":
+            x = mb.expand_dims(x=op.x, axes=[0], before_op=op)
+        else:  # tf_lstm_block
+            x = op.x
+        new_h_all, new_h, new_cs = mb.lstm(
+            x=x,
+            initial_c=op.c_prev,
+            initial_h=op.h_prev,
+            weight=w,
+            bias=bias,
+            # activations[2] should be "sigmoid" (rdar://62272632)
+            activations=("sigmoid", "tanh", "tanh"),
+            peephole=mb_peep,
+            clip=cell_clip,
+            output_sequence=output_sequence,
+            name=op.name,
+            before_op=op,
+        )
+
+    if op.op_type == "tf_lstm_block_cell":
+        block.replace_uses_of_var_after_op(anchor_op=op, old_var=cs, new_var=new_cs)
+        block.replace_uses_of_var_after_op(anchor_op=op, old_var=h, new_var=new_h)
+    else:  # 'tf_lstm_block'
+        block.replace_uses_of_var_after_op(anchor_op=op, old_var=h, new_var=new_h_all)
+    block.remove_ops([op])
+    return True
diff --git a/coremltools/converters/mil/frontend/tensorflow/ssa_passes/tf_passes.py b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/tf_passes.py
new file mode 100644
index 000000000..499a6f325
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/ssa_passes/tf_passes.py
@@ -0,0 +1,32 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY
+import logging
+from coremltools.converters._profile_utils import _profile
+
+
+@_profile
+def tensorflow_passes(prog):
+    passes = [
+        "common::dead_code_elimination",
+        "common::loop_invariant_elimination",
+        "tensorflow::backfill_make_list_elem_type",
+        # DCE to reduce tf_lstm_block outputs and allow lstm_rewrite to
+        # ssa lstm
+        "common::dead_code_elimination",
+        # tensorflow::tf_lstm_to_core_lstm must come before
+        # tensorflow::expand_tf_lstm
+        "tensorflow::tf_lstm_to_core_lstm",
+        "tensorflow::expand_tf_lstm",
+    ]
+
+    prog.validate()
+    for p in passes:
+        logging.info('Performing passes for tf1 frontend: "{}"'.format(p))
+        PASS_REGISTRY[p](prog)
+        prog.validate()
+
+    logging.debug("Program after tf1 frontend passes:\n{}".format(prog))
diff --git a/coremltools/converters/mil/frontend/tensorflow/test/__init__.py b/coremltools/converters/mil/frontend/tensorflow/test/__init__.py
new file mode 100644
index 000000000..61aafff42
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/test/__init__.py
@@ -0,0 +1,4 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
diff --git a/coremltools/converters/mil/frontend/tensorflow/test/test_custom_ops.py b/coremltools/converters/mil/frontend/tensorflow/test/test_custom_ops.py
new file mode 100644
index 000000000..f9d415116
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/test/test_custom_ops.py
@@ -0,0 +1,274 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.testing_reqs import *
+from coremltools.converters.mil.frontend.tensorflow.test.testing_utils import (
+    make_tf_graph,
+    tf_graph_to_proto,
+    run_compare_tf,
+)
+
+# Custom Op imports
+from coremltools.converters.mil.frontend.tensorflow.tf_op_registry import register_tf_op
+
+# Importing _TF_OPS_REGISTRY to ensure `overriding` existing TF op does not break
+# testing of default op
+# pytest imports all the tests and hence overriding op invokes custom op which is not expected
+# In real usecase, importing following is not recommended!!
+from coremltools.converters.mil.frontend.tensorflow.tf_op_registry import (
+    _TF_OPS_REGISTRY,
+)
+from coremltools.converters.mil.mil.ops.defs._op_reqs import *
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.types.symbolic import is_symbolic
+
+
+class TestCustomMatMul:
+    # Define SSA Custom Op for Sparse MatMul
+    # This will map to `custom_op` in SSA with binding information
+    # to bind input spec to the custom implementation
+    @register_op(doc_str="Sparse MatMul Layer", is_custom_op=True)
+    class custom_sparse_matmul(Operation):
+        # Defining input spec for current op
+        input_spec = InputSpec(
+            x=TensorInputType(),
+            y=TensorInputType(),
+            transpose_x=BoolInputType(const=True, default=False),
+            transpose_y=BoolInputType(const=True, default=False),
+            x_is_sparse=BoolInputType(const=True, default=False),
+            y_is_sparse=BoolInputType(const=True, default=False),
+        )
+
+        # Specifying binding for custom op for specifying inputs,
+        # parameters required for creating custom op to be synced with Swift API
+        bindings = {
+            "class_name": "SparseMatMul",
+            "input_order": ["x", "y"],
+            "parameters": ["transpose_x", "transpose_y", "x_is_sparse", "y_is_sparse"],
+            "description": "Custom Sparse MatMul Layer",
+        }
+
+        def __init__(self, **kwargs):
+            super(TestCustomMatMul.custom_sparse_matmul, self).__init__(**kwargs)
+
+        def type_inference(self):
+            x_type = self.x.dtype
+            x_shape = self.x.shape
+            y_shape = self.y.shape
+            # For illustration purpose, assumming getting valid shape
+            # Ideally, should consider transpose_?, ?_is_sparse parameters into consideration
+            # for computing output shape
+            ret_shape = [x_shape[0], y_shape[1]]
+            return types.tensor(x_type, [x_shape[0], y_shape[1]])
+
+    # TensorFlow Sparse Matmul Op
+    @register_tf_op()
+    def SparseMatMul(context, node):
+        a = context[node.inputs[0]]
+        b = context[node.inputs[1]]
+        transpose_a = node.attr.get("transpose_a", False)
+        transpose_b = node.attr.get("transpose_b", False)
+        a_is_sparse = node.attr.get("a_is_sparse", False)
+        b_is_sparse = node.attr.get("b_is_sparse", False)
+
+        x = mb.custom_sparse_matmul(
+            x=a,
+            y=b,
+            transpose_x=transpose_a,
+            transpose_y=transpose_b,
+            x_is_sparse=a_is_sparse,
+            y_is_sparse=b_is_sparse,
+            name=node.name,
+        )
+        context.add(node.name, x)
+
+    @pytest.mark.skipif(not testing_reqs._HAS_TF_1, reason=MSG_TF1_NOT_FOUND)
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, transpose_a, transpose_b," "a_is_sparse, b_is_sparse",
+        itertools.product(
+            [True], backends, [True, False], [True, False], [True, False], [True, False]
+        ),
+    )
+    def test_tf(
+        self, use_cpu_only, backend, transpose_a, transpose_b, a_is_sparse, b_is_sparse
+    ):
+        rank = 2
+        shape = list(np.random.randint(low=3, high=100, size=1)) * rank
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=shape)
+            y = tf.placeholder(tf.float32, shape=shape)
+            ref = tf.sparse_matmul(
+                x,
+                y,
+                transpose_a=transpose_a,
+                transpose_b=transpose_b,
+                a_is_sparse=a_is_sparse,
+                b_is_sparse=b_is_sparse,
+            )
+            spec, _, _, _ = tf_graph_to_proto(
+                graph,
+                {
+                    x: random_gen(shape, rand_min=-100, rand_max=100),
+                    y: random_gen(shape, rand_min=-100, rand_max=100),
+                },
+                ref,
+                backend=backend,
+            )
+            layers = spec.neuralNetwork.layers
+            assert layers[-1].custom is not None, "Expecting a custom layer"
+            assert (
+                "SparseMatMul" == layers[-1].custom.className
+            ), "Custom Layer class name mis-match"
+            assert (
+                transpose_a == layers[-1].custom.parameters["transpose_x"].boolValue
+            ), "Incorrect parameter value k"
+            assert (
+                transpose_b == layers[-1].custom.parameters["transpose_y"].boolValue
+            ), "Incorrect parameter value k"
+            assert (
+                a_is_sparse == layers[-1].custom.parameters["x_is_sparse"].boolValue
+            ), "Incorrect parameter value k"
+            assert (
+                b_is_sparse == layers[-1].custom.parameters["y_is_sparse"].boolValue
+            ), "Incorrect parameter value k"
+
+
+# TODO: rdar://61241807 ([MIL] [Polish] Custom layer operator documentation)
+# Following logging is to ensure testing of TopK implemented in tf converter
+# default path is testing with appropriate conversion function
+# Log default tf topk
+default_tf_topk = _TF_OPS_REGISTRY.get("TopKV2", None)
+
+
+# Override TopK op with override=True flag
+@register_tf_op(tf_alias=["TopKV2"], override=True)
+def CustomTopK(context, node):
+    x = context[node.inputs[0]]
+    k = context[node.inputs[1]]
+    sorted = node.attr.get("sorted", False)
+    x = mb.custom_topk(x=x, k=k.val, axis=-1, sorted=sorted, name=node.name)
+    context.add(node.name, x)
+
+
+# Custom TF TopK
+custom_tf_topk = _TF_OPS_REGISTRY["TopKV2"]
+
+
+def _set_tf_op(op_type, _op_func):
+    _TF_OPS_REGISTRY[op_type] = _op_func
+
+
+class TestCustomTopK:
+    # Defining SSA TopK Op
+    @register_op(doc_str="Custom TopK Layer", is_custom_op=True)
+    class custom_topk(Operation):
+        input_spec = InputSpec(
+            x=TensorInputType(),
+            k=IntInputType(const=True, default=1),
+            axis=IntInputType(const=True, default=-1),
+            sorted=BoolInputType(const=True, default=False),
+        )
+
+        bindings = {
+            "class_name": "TopK",
+            "input_order": ["x"],
+            "parameters": ["k", "axis", "sorted"],
+            "description": "Top K Custom layer",
+        }
+
+        def __init__(self, **kwargs):
+            super(TestCustomTopK.custom_topk, self).__init__(**kwargs)
+
+        def type_inference(self):
+            x_type = self.x.dtype
+            x_shape = self.x.shape
+            k = self.k.val
+            axis = self.axis.val
+
+            if not is_symbolic(x_shape[axis]) and k > x_shape[axis]:
+                msg = "K={} is greater than size of the given axis={}"
+                raise ValueError(msg.format(k, axis))
+
+            ret_shape = list(x_shape)
+            ret_shape[axis] = k
+            return types.tensor(x_type, ret_shape), types.tensor(types.int32, ret_shape)
+
+    @pytest.mark.skipif(not testing_reqs._HAS_TF_1, reason=MSG_TF1_NOT_FOUND)
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, k",
+        itertools.product([True], backends, [rank for rank in range(1, 4)], [1, 2],),
+    )
+    def test_tf(self, use_cpu_only, backend, rank, k):
+        # Set TopK to custom TF function
+        _set_tf_op("TopKV2", custom_tf_topk)
+        shape = np.random.randint(low=3, high=6, size=rank)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=shape)
+            ref = tf.math.top_k(x, k=k, sorted=True)
+            ref = (ref[1], ref[0])
+            spec, _, _, _ = tf_graph_to_proto(
+                graph,
+                {x: random_gen(shape, rand_min=-100, rand_max=100)},
+                ref,
+                backend=backend,
+            )
+            layers = spec.neuralNetwork.layers
+            assert layers[-1].custom is not None, "Expecting a custom layer"
+            assert (
+                "TopK" == layers[-1].custom.className
+            ), "Custom Layer class name mis-match"
+            assert (
+                k == layers[-1].custom.parameters["k"].intValue
+            ), "Incorrect parameter value k"
+            assert (
+                True == layers[-1].custom.parameters["sorted"].boolValue
+            ), "Incorrect parameter value for Sorted"
+        # Set TopK to default conversion function
+        _set_tf_op("TopKV2", default_tf_topk)
+
+
+default_selu = _TF_OPS_REGISTRY.get("Selu", None)
+
+
+@register_tf_op(tf_alias=[], override=True)
+def Selu(context, node):
+    x = context[node.inputs[0]]
+    alpha = 1.6732631921768188
+    lamda = 1.0507010221481323
+    out_elu = mb.elu(x=x, alpha=alpha)
+    out = mb.mul(x=out_elu, y=lamda, name=node.name)
+    context.add(node.name, out)
+
+
+composite_selu = _TF_OPS_REGISTRY["Selu"]
+
+
+class TestCompositeOp:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, list(range(1, 5))),
+    )
+    def test_selu(self, use_cpu_only, backend, rank):
+        _set_tf_op("Selu", composite_selu)
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.keras.activations.selu(x)
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(input_shape, -10.0, 10.0)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+        _set_tf_op("Selu", default_selu)
diff --git a/coremltools/converters/mil/frontend/tensorflow/test/test_load.py b/coremltools/converters/mil/frontend/tensorflow/test/test_load.py
new file mode 100644
index 000000000..e85392442
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/test/test_load.py
@@ -0,0 +1,411 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import numpy as np
+import os
+import six
+import pytest
+import shutil
+import tempfile
+import coremltools as ct
+import coremltools.converters as converter
+from coremltools._deps import _IS_MACOS
+import coremltools.proto.FeatureTypes_pb2 as ft
+from coremltools import TensorType, ImageType, RangeDim, EnumeratedShapes
+from coremltools.converters.mil.testing_utils import random_gen
+from coremltools.converters.mil.frontend.tensorflow.converter import TFConverter
+from coremltools.converters.mil.frontend.tensorflow.test.testing_utils import (
+    frontend,
+    make_tf_graph,
+    run_compare_tf,
+    get_tf_keras_io_names,
+)
+
+tf = pytest.importorskip("tensorflow")
+
+
+class TestTf1ModelInputsOutputs:
+    def setup(self):
+        self.saved_model_dir = tempfile.mkdtemp()
+        _, self.model_path_h5 = tempfile.mkstemp(
+            suffix=".h5", prefix=self.saved_model_dir
+        )
+        _, self.model_path_pb = tempfile.mkstemp(
+            suffix=".pb", prefix=self.saved_model_dir
+        )
+
+    def teardown(self):
+        if os.path.exists(self.saved_model_dir):
+            shutil.rmtree(self.saved_model_dir)
+
+    def test_infer_inputs(self):
+        x_shape = (3, 4, 5)
+
+        @make_tf_graph([x_shape])
+        def build_model(x):
+            return tf.nn.relu(x)
+
+        model, inputs, outputs = build_model
+        if not isinstance(outputs, (tuple, list)):
+            outputs = [outputs]
+
+        output_names = [
+            j if isinstance(j, six.string_types) else j.op.name for j in outputs
+        ]
+        mlmodel = converter.convert(model, outputs=output_names)
+        assert mlmodel is not None
+
+        input_values = [random_gen(x_shape, -10.0, 10.0)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(model, input_dict, outputs)
+
+    def test_infer_outputs(self):
+        x_shape = (3, 4, 5)
+
+        @make_tf_graph([x_shape])
+        def build_model(x):
+            return tf.nn.relu(x)
+
+        model, inputs, outputs = build_model
+        input_name = (
+            inputs[0] if isinstance(inputs[0], six.string_types) else inputs[0].op.name
+        )
+        mlmodel = converter.convert(model, inputs=[TensorType(input_name, (3, 4, 5))])
+        assert mlmodel is not None
+
+        input_values = [random_gen(x_shape, -10.0, 10.0)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(model, input_dict, outputs)
+
+    def test_infer_inputs_and_outputs(self):
+        x_shape = (3, 4, 5)
+
+        @make_tf_graph([x_shape])
+        def build_model(x):
+            return tf.nn.relu(x)
+
+        model, inputs, outputs = build_model
+        mlmodel = converter.convert(model)
+        assert mlmodel is not None
+
+        input_values = [random_gen(x_shape, -10.0, 10.0)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(model, input_dict, outputs)
+
+    def test_extract_sub_model(self):
+        x_shape = (3, 4, 5)
+        y_shape = (3, 4, 5)
+
+        @make_tf_graph([x_shape, y_shape])
+        def build_model(x, y):
+            return tf.nn.relu(x), tf.math.add(x, y)
+
+        model, inputs, outputs = build_model
+        if isinstance(outputs[0], six.string_types):
+            first_output_name = outputs[0]
+        else:
+            first_output_name = outputs[0].name.split(":")[0]
+        mlmodel = converter.convert(model, outputs=[first_output_name])
+        assert mlmodel is not None
+
+    def test_auto_image_nhwc_input_names(self):
+        x_shape = (4, 5, 3)
+
+        @make_tf_graph([x_shape])
+        def build_model(x):
+            return tf.nn.relu(x)
+
+        model, inputs, outputs = build_model
+
+        mlmodel = converter.convert(model, inputs=[ImageType()])
+        assert mlmodel is not None
+
+    def test_auto_image_nchw_input_names(self):
+        x_shape = (3, 4, 5)
+
+        @make_tf_graph([x_shape])
+        def build_model(x):
+            return tf.nn.relu(x)
+
+        model, inputs, outputs = build_model
+
+        mlmodel = converter.convert(model, inputs=[ImageType(channel_first=True)])
+        assert mlmodel is not None
+
+    def test_invalid_input_names(self):
+        x_shape = (3, 4, 5)
+
+        @make_tf_graph([x_shape])
+        def build_model(x):
+            return tf.nn.relu(x)
+
+        model, inputs, outputs = build_model
+
+        with pytest.raises(ValueError) as e:
+            converter.convert(model, inputs=[TensorType("invalid_name", x_shape)])
+        e.match(
+            r"Input \(invalid_name\) provided is not found in given tensorflow graph. Placeholders in graph are: .*"
+        )
+
+    @pytest.mark.parametrize(
+        "target",
+        [ct.target.iOS13, ct.target.macOS15, ct.target.watchOS6, ct.target.tvOS13],
+    )
+    def test_invalid_deployment_target_cumsum(self, target):
+        x_shape = (3, 4, 5)
+
+        @make_tf_graph([x_shape])
+        def build_model(x):
+            return tf.math.cumsum(x, axis=-1, reverse=False, exclusive=False)
+
+        model, inputs, outputs = build_model
+
+        with pytest.raises(ValueError) as e:
+            converter.convert(model, minimum_deployment_target=target)
+        e.match(
+            r"Provided minimum deployment target .* version 4 but converted model "
+            r"uses .* available from version 5 onwards.\n    1. Cumsum operation\n"
+        )
+
+    @pytest.mark.parametrize(
+        "target",
+        [ct.target.iOS14, ct.target.macOS16, ct.target.watchOS7, ct.target.tvOS14],
+    )
+    def test_valid_deployment_target_cumsum(self, target):
+        x_shape = (3, 4, 5)
+
+        @make_tf_graph([x_shape])
+        def build_model(x):
+            return tf.math.cumsum(x, axis=-1, reverse=False, exclusive=False)
+
+        model, inputs, outputs = build_model
+
+        # successful conversion
+        converter.convert(model, minimum_deployment_target=target)
+
+    def test_invalid_output_names(self):
+        x_shape = (3, 4, 5)
+
+        @make_tf_graph([x_shape])
+        def build_model(x):
+            return tf.nn.relu(x)
+
+        model, inputs, outputs = build_model
+        with pytest.raises(AssertionError) as e:
+            converter.convert(model, source=frontend, outputs=["invalid_name"])
+        e.match(r".* is not in graph")
+
+    def test_shaping_utils(self):
+        @make_tf_graph([(None, 4, 5)])
+        def build_flexible_model(x):
+            return tf.nn.relu(x)
+
+        model, inputs, outputs = build_flexible_model
+        input_name = TFConverter._get_tensor_name(inputs[0])
+        output_name = TFConverter._get_tensor_name(outputs[0])
+
+        # static-Flexible shape
+        mlmodel = converter.convert(
+            model, inputs=[TensorType(name=input_name)], outputs=[output_name]
+        )
+        assert mlmodel is not None
+        input_values = [random_gen((3, 4, 5), -10.0, 10.0)]
+        input_dict = {input_name: input_values[0]}
+        if _IS_MACOS:
+            ret = mlmodel.predict(input_dict)
+            np.allclose(ret[output_name], np.maximum(input_values[0], 0.0))
+
+        # Enumerate shape
+        inputs_shape = [
+            TensorType(input_name, EnumeratedShapes(shapes=[(3, 4, 5), (4, 4, 5)]))
+        ]
+        mlmodel = converter.convert(model, inputs=inputs_shape, outputs=[output_name])
+        assert mlmodel is not None
+        input_values = [random_gen((3, 4, 5), -10.0, 10.0)]
+        input_dict = {input_name: input_values[0]}
+        if _IS_MACOS:
+            ret = mlmodel.predict(input_dict)
+            np.allclose(ret[output_name], np.maximum(input_values[0], 0.0))
+
+        input_values = [random_gen((4, 4, 5), -10.0, 10.0)]
+        input_dict = {input_name: input_values[0]}
+        if _IS_MACOS:
+            ret = mlmodel.predict(input_dict)
+            np.allclose(ret[output_name], np.maximum(input_values[0], 0.0))
+
+        if _IS_MACOS:
+            with pytest.raises(RuntimeError) as e:
+                input_values = [random_gen((5, 4, 5), -10.0, 10.0)]
+                input_dict = {input_name: input_values[0]}
+                ret = mlmodel.predict(input_dict)
+
+        # Ranged shape
+        inputs_shape = [TensorType(input_name, [RangeDim(3, 5), 4, 5])]
+        mlmodel = converter.convert(model, inputs=inputs_shape, outputs=[output_name])
+        assert mlmodel is not None
+        input_values = [random_gen((3, 4, 5), -10.0, 10.0)]
+        input_dict = {input_name: input_values[0]}
+        if _IS_MACOS:
+            ret = mlmodel.predict(input_dict)
+            np.allclose(ret[output_name], np.maximum(input_values[0], 0.0))
+
+        input_values = [random_gen((4, 4, 5), -10.0, 10.0)]
+        input_dict = {input_name: input_values[0]}
+        if _IS_MACOS:
+            ret = mlmodel.predict(input_dict)
+            np.allclose(ret[output_name], np.maximum(input_values[0], 0.0))
+
+        if _IS_MACOS:
+            with pytest.raises(RuntimeError) as e:
+                input_values = [random_gen((2, 4, 5), -10.0, 10.0)]
+                input_dict = {input_name: input_values[0]}
+                ret = mlmodel.predict(input_dict)
+
+    def test_default_data_types(self):
+        @make_tf_graph([(2, 2)])
+        def build_model(x):
+            return tf.nn.relu(x)
+
+        model, inputs, outputs = build_model
+        mlmodel = converter.convert(model)
+        assert mlmodel is not None
+        spec = mlmodel.get_spec()
+
+        # Defaults should be FLOAT32 instead of DOUBLE
+        it = spec.description.input[0].type.multiArrayType.dataType
+        assert it == ft.ArrayFeatureType.ArrayDataType.Value("FLOAT32")
+        ot = spec.description.output[0].type.multiArrayType.dataType
+        assert ot == ft.ArrayFeatureType.ArrayDataType.Value("FLOAT32")
+
+
+class TestTf1ModelFormats:
+    def setup(self):
+        self.saved_model_dir = tempfile.mkdtemp()
+        _, self.model_path_h5 = tempfile.mkstemp(
+            suffix=".h5", prefix=self.saved_model_dir
+        )
+        _, self.model_path_pb = tempfile.mkstemp(
+            suffix=".pb", prefix=self.saved_model_dir
+        )
+
+    def teardown(self):
+        if os.path.exists(self.saved_model_dir):
+            shutil.rmtree(self.saved_model_dir)
+
+    def test_graph_def(self):
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=(3, 4, 5))
+            out = tf.nn.relu(x)
+        mlmodel = converter.convert(
+            graph, inputs=[TensorType(x.op.name, (3, 4, 5))], outputs=[out.op.name]
+        )
+        assert mlmodel is not None
+
+    def test_graph_def_file(self):
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=(3, 4, 5))
+            out = tf.nn.relu(x)
+        tf.io.write_graph(
+            graph, self.saved_model_dir, self.model_path_pb, as_text=False
+        )
+        mlmodel = converter.convert(
+            self.model_path_pb,
+            inputs=[TensorType(x.op.name, (3, 4, 5))],
+            outputs=[out.op.name],
+        )
+        assert mlmodel is not None
+
+    def test_saved_model_from_simple_save(self):
+        with tf.compat.v1.Session() as sess:
+            x = tf.placeholder(shape=(1, 3, 5), dtype=tf.float32)
+            y = tf.nn.relu(x)
+            inputs = {"x": x}
+            outputs = {"y": y}
+            tf.compat.v1.saved_model.simple_save(
+                sess, self.saved_model_dir, inputs, outputs
+            )
+        mlmodel = converter.convert(self.saved_model_dir)
+        assert mlmodel is not None
+
+    def test_tf_keras(self):
+        keras_model = tf.keras.Sequential(
+            [tf.keras.layers.ReLU(input_shape=(4, 5), batch_size=3)]
+        )
+        input_names, output_names = get_tf_keras_io_names(keras_model)
+        mlmodel = converter.convert(
+            keras_model,
+            inputs=[TensorType(input_names[0], (3, 4, 5))],
+            outputs=["Identity"],
+            source=frontend,
+        )
+        assert mlmodel is not None
+
+    def test_tf_keras_hdf5_file(self):
+        keras_model = tf.keras.Sequential(
+            [tf.keras.layers.ReLU(input_shape=(4, 5), batch_size=3)]
+        )
+        keras_model.save(self.model_path_h5)
+        input_names, output_names = get_tf_keras_io_names(keras_model)
+        mlmodel = converter.convert(
+            self.model_path_h5,
+            inputs=[TensorType(input_names[0], (3, 4, 5))],
+            outputs=["Identity"],
+            source=frontend,
+        )
+        assert mlmodel is not None
+
+    def test_model_metadata(self):
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=(3, 4, 5))
+            out = tf.nn.relu(x)
+        mlmodel = converter.convert(
+            graph, inputs=[TensorType(x.op.name, (3, 4, 5))], outputs=[out.op.name]
+        )
+        metadata_keys = mlmodel.get_spec().description.metadata.userDefined
+        assert "com.github.apple.coremltools.version" in metadata_keys
+        assert "com.github.apple.coremltools.source" in metadata_keys
+        assert "tensorflow==1." in metadata_keys["com.github.apple.coremltools.source"]
+
+    def test_invalid_format_none(self):
+        with pytest.raises(NotImplementedError) as e:
+            converter.convert(None, source="tensorflow")
+        e.match(r"Expected model format: .* .pb")
+
+    def test_invalid_format_invalid_extension(self):
+        _, invalid_filename = tempfile.mkstemp(
+            suffix=".invalid", prefix=self.saved_model_dir
+        )
+        with pytest.raises(NotImplementedError) as e:
+            converter.convert(invalid_filename, source="tensorflow")
+        e.match(r"Expected model format: .* .pb")
+
+    def test_invalid_converter_source(self):
+        with pytest.raises(ValueError) as e:
+            converter.convert(None, source="invalid")
+        expected_msg = r'Unrecognized value of argument "source": .*'
+        e.match(expected_msg)
+
+    def test_invalid_converter_minimum_deployment_flag(self):
+        with pytest.raises(TypeError) as e:
+            converter.convert(
+                None, source="tensorflow", minimum_deployment_target="iOs14"
+            )
+        expected_msg = (
+            "Unrecognized value of argument 'minimum_deployment_target': iOs14. "
+            "It needs to be a member of 'coremltools.target' enumeration"
+        )
+
+        e.match(expected_msg)
+
+    def test_invalid_converter_target(self):
+        with pytest.raises(NotImplementedError) as e:
+            converter.convert(None, convert_to="invalid", source="tensorflow")
+        e.match(r"Backend converter .* not implemented")
+
+    def test_invalid_format_non_exist(self):
+        non_exist_filename = self.model_path_pb.replace(".pb", "_non_exist.pb")
+        with pytest.raises(ValueError) as e:
+            converter.convert(non_exist_filename, source="tensorflow")
+        e.match(r"Input model .* does not exist")
diff --git a/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py b/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py
new file mode 100644
index 000000000..6dba08898
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/test/test_ops.py
@@ -0,0 +1,5019 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.testing_reqs import *
+from coremltools.converters.mil.frontend.tensorflow.test.testing_utils import (
+    make_tf_graph,
+    run_compare_tf,
+    layer_counts,
+)
+import math
+
+backends = testing_reqs.backends
+
+tf = pytest.importorskip("tensorflow")
+
+
+class TestPlacehoderAsOutput:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(6)]),
+    )
+    def test(self, use_cpu_only, backend, rank):
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+
+        @make_tf_graph([input_shape, input_shape, input_shape])
+        def build_model(x, y, z):
+            return x, y, x + 1, x + y
+
+        model, inputs, outputs = build_model
+        input_values = [random_gen(input_shape, -1, 1), random_gen(input_shape, -1, 1)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestActivationElu:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(1, 6)]),
+    )
+    def test(self, use_cpu_only, backend, rank):
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.nn.elu(x)
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(input_shape, -1, 1)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestAddN:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, num_inputs",
+        itertools.product([True, False], backends, list(range(6)), list(range(1, 10)),),
+    )
+    def test(self, use_cpu_only, backend, rank, num_inputs):
+        if use_cpu_only is False and rank is 5 and num_inputs is 9:
+            # <rdar://63680019> Failure on this specific parameter set
+            return
+        if backend == "mil_proto" and rank == 0:
+            return
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+        input_shapes = [input_shape[:] for _ in range(num_inputs)]
+
+        @make_tf_graph(input_shapes)
+        def build_model(*inputs):
+            return tf.raw_ops.AddN(inputs=inputs)
+
+        model, inputs, outputs = build_model
+        input_values = [random_gen(shape, -1, 1) for shape in input_shapes]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestActivationLeakyReLU:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(1, 6)]),
+    )
+    def test(self, use_cpu_only, backend, rank):
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.nn.leaky_relu(x, 0.2)
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(input_shape, -1, 1)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestActivationReLU:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(1, 6)]),
+    )
+    def test(self, use_cpu_only, backend, rank):
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.nn.relu(x)
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(input_shape, -10.0, 10)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestActivationReLU6:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(1, 6)]),
+    )
+    def test(self, use_cpu_only, backend, rank):
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.nn.relu6(x)
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(input_shape, -1, 1)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestGeluTanhApproximation:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True], backends, [rank for rank in range(2, 3)]),
+    )
+    def test(self, use_cpu_only, backend, rank):
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            a = 0.5 * (
+                1.0 + tf.tanh((math.sqrt(2 / math.pi) * (x + 0.044715 * tf.pow(x, 3))))
+            )
+            return a * x
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(input_shape, -5, 5)]
+        input_dict = dict(zip(inputs, input_values))
+        spec = run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+        assert len(spec.neuralNetwork.layers) == 1
+        assert spec.neuralNetwork.layers[0].WhichOneof("layer") == "gelu"
+
+
+class TestActivationSigmoid:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(1, 6)]),
+    )
+    def test(self, use_cpu_only, backend, rank):
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.math.sigmoid(x)
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(input_shape, -1, 1)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestActivationSoftPlus:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(1, 6)]),
+    )
+    def test(self, use_cpu_only, backend, rank):
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.math.softplus(x)
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(input_shape, -1, 1)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestActivationSoftmax:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank_and_axes",
+        itertools.product(
+            [True, False],
+            backends,
+            [(rank, axis) for rank in range(1, 6) for axis in range(-1, rank)],
+        ),
+    )
+    def test(self, use_cpu_only, backend, rank_and_axes):
+        rank, axis = rank_and_axes
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.nn.softmax(x, axis=axis)
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(input_shape, -1, 1)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestActivationSoftSign:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(1, 6)]),
+    )
+    def test(self, use_cpu_only, backend, rank):
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.math.softsign(x)
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(input_shape, -1, 1)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestActivationSelu:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(1, 6)]),
+    )
+    def test(self, use_cpu_only, backend, rank):
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.nn.selu(x)
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(input_shape, -1.0, 1.0)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestSelect:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, broadcast, dynamic",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(1, 6)],
+            [True, False],
+            [True, False],
+        ),
+    )
+    def test_select(self, use_cpu_only, backend, rank, broadcast, dynamic):
+        shape = np.random.randint(low=1, high=4, size=rank)
+        cond_shape = np.array([shape[0]]) if broadcast else shape
+
+        cond_val = np.random.randint(low=0, high=2, size=cond_shape).astype(np.bool)
+        a_val = random_gen(shape=shape, rand_min=-1962.0, rand_max=0.0)
+        b_val = random_gen(shape=shape, rand_min=0.0, rand_max=1964.0)
+
+        if dynamic:
+            cond_shape = [None] * len(cond_shape) + [tf.bool]
+            a_shape = [None] * len(shape) + [tf.float32]
+            b_shape = [None] * len(shape) + [tf.float32]
+        else:
+            cond_shape = cond_shape.tolist() + [tf.bool]
+            a_shape = shape.tolist() + [tf.float32]
+            b_shape = shape.tolist() + [tf.float32]
+
+        @make_tf_graph([cond_shape, a_shape, b_shape])
+        def build_model_select(cond, a, b):
+            return tf.raw_ops.Select(condition=cond, x=a, y=b)
+
+        model, inputs, outputs = build_model_select
+        inputs_dic = dict(zip(inputs, [cond_val, a_val, b_val]))
+        run_compare_tf(
+            model, inputs_dic, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+
+class TestWhere:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(1, 6)]),
+    )
+    def test_where_1_input(self, use_cpu_only, backend, rank):
+        with tf.Graph().as_default() as graph:
+            shape = np.random.randint(low=1, high=4, size=rank)
+            x_val = np.random.randint(low=-1, high=2, size=shape).astype(np.float32)
+            x = tf.placeholder(tf.float32, shape=shape)
+            run_compare_tf(
+                graph,
+                {x: x_val},
+                tf.where(x),
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(1, 6)]),
+    )
+    def test_where(self, use_cpu_only, backend, rank):
+        shape = np.random.randint(low=1, high=4, size=rank)
+        cond_val = np.random.randint(low=0, high=2, size=shape).astype(np.int32)
+        a_val = random_gen(shape=shape, rand_min=-1962.0, rand_max=0.0)
+        b_val = random_gen(shape=shape, rand_min=0.0, rand_max=1964.0)
+        with tf.Graph().as_default() as graph:
+            cond = tf.placeholder(tf.bool, shape=shape)
+            a = tf.placeholder(tf.float32, shape=shape)
+            b = tf.placeholder(tf.float32, shape=shape)
+            ref = tf.where(cond, a, b)
+            run_compare_tf(
+                graph,
+                {cond: cond_val, a: a_val, b: b_val},
+                ref,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+
+class TestCond:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], ["nn_proto"],)
+    )
+    def test_cond_naive(self, use_cpu_only, backend):
+        @make_tf_graph([(1,), (1,)])
+        def build_model(x, y):
+            return tf.cond(tf.constant(True), lambda: x + y, lambda: x * y)
+
+        model, inputs, outputs = build_model
+        input_values = [
+            np.array([1], dtype=np.float32),
+            np.array([6], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_cond(self, use_cpu_only, backend):
+        @make_tf_graph([(1,), (1,)])
+        def build_model(x, y):
+            z = tf.multiply(x, y)
+            pred = tf.less(tf.math.reduce_mean(x), tf.math.reduce_mean(y))
+            return tf.cond(pred, lambda: tf.add(x, z), lambda: tf.square(y))
+
+        model, inputs, outputs = build_model
+        input_values = [
+            np.array([1], dtype=np.float32),
+            np.array([2], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_cond_multi_returns(self, use_cpu_only, backend):
+        @make_tf_graph([(1,), (1,)])
+        def build_model(x, y):
+            z = tf.multiply(x, y)
+            pred = tf.less(tf.math.reduce_mean(x), tf.math.reduce_mean(y))
+
+            def true_fn():
+                return tf.add(x, z), tf.math.multiply(x, z)
+
+            def false_fn():
+                return tf.square(y), tf.sqrt(z)
+
+            return tf.cond(pred, true_fn, false_fn)
+
+        model, inputs, outputs = build_model
+        input_values = [
+            np.array([1], dtype=np.float32),
+            np.array([2], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_cond_with_identity(self, use_cpu_only, backend):
+        @make_tf_graph([(1,), (1,)])
+        def build_model(x, y):
+            z = tf.multiply(x, y)
+            pred = tf.less(tf.math.reduce_mean(x), tf.math.reduce_mean(y))
+            return tf.cond(pred, lambda: z, lambda: tf.square(y))
+
+        model, inputs, outputs = build_model
+        input_values = [
+            np.array([1], dtype=np.float32),
+            np.array([2], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_cond_multi_returns_with_identity(self, use_cpu_only, backend):
+        @make_tf_graph([(1,), (1,)])
+        def build_model(x, y):
+            z = tf.multiply(x, y)
+            pred = tf.less(tf.math.reduce_mean(x), tf.math.reduce_mean(y))
+
+            def true_fn():
+                return tf.add(x, z), x
+
+            def false_fn():
+                return tf.square(y), z
+
+            return tf.cond(pred, true_fn, false_fn)
+
+        model, inputs, outputs = build_model
+        input_values = [
+            np.array([1], dtype=np.float32),
+            np.array([2], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_cond_nested_0(self, use_cpu_only, backend):
+        @make_tf_graph([(1,), (1,)])
+        def build_model(x, y):
+            z = tf.multiply(x, y)
+            t = tf.less(tf.math.reduce_mean(x), tf.math.reduce_mean(y))
+            f = tf.less(tf.math.reduce_mean(z), tf.math.reduce_mean(y))
+            inner_cond = tf.cond(
+                f, lambda: tf.pow(x, y), lambda: tf.math.subtract(x, y)
+            )
+            return tf.cond(t, lambda: inner_cond, lambda: tf.square(y))
+
+        model, inputs, outputs = build_model
+
+        input_values = [
+            np.array([2], dtype=np.float32),
+            np.array([3], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_cond_nested_1(self, use_cpu_only, backend):
+        @make_tf_graph([(1,), (1,)])
+        def build_model(x, y):
+            z = tf.multiply(x, y)
+            t = tf.less(tf.math.reduce_mean(x), tf.math.reduce_mean(y))
+            f = tf.less(tf.math.reduce_mean(z), tf.math.reduce_mean(y))
+            cond_1 = tf.cond(f, lambda: tf.pow(x, y), lambda: tf.math.subtract(x, y))
+            cond_2 = tf.cond(t, lambda: tf.multiply(x, y), lambda: tf.math.mod(x, y))
+            cond_3 = tf.cond(f, lambda: tf.math.divide(x, y), lambda: cond_2)
+            return tf.cond(t, lambda: cond_1, lambda: cond_3)
+
+        model, inputs, outputs = build_model
+
+        input_values = [
+            np.array([2], dtype=np.float32),
+            np.array([3], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+
+class TestWhileLoop:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_while_loop_0(self, use_cpu_only, backend):
+        @make_tf_graph([(1,)])
+        def build_model(x):
+            c = lambda i: tf.greater(tf.math.reduce_mean(i), 5)
+            b = lambda i: i - 1
+            return tf.while_loop(c, b, [x])
+
+        model, inputs, outputs = build_model
+        input_values = [np.array([10], dtype=np.float32)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_while_loop_1(self, use_cpu_only, backend):
+        @make_tf_graph([(1,), (1,)])
+        def build_model(x, y):
+            c = lambda i, j: tf.greater(tf.math.reduce_mean(i), tf.math.reduce_mean(j))
+            b = lambda i, j: (tf.add(i, 1), tf.square(j))
+            return tf.while_loop(c, b, [x, y])
+
+        model, inputs, outputs = build_model
+        input_values = [
+            np.array([1], dtype=np.float32),
+            np.array([2], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_while_loop_2(self, use_cpu_only, backend):
+        @make_tf_graph([(1,), (1, 2)])
+        def build_model(x, y):
+            c = lambda i, j: tf.greater(tf.math.reduce_mean(i), 5)
+            b = lambda i, j: (i - 3, j * 2)
+            return tf.while_loop(c, b, [x, y])
+
+        model, inputs, outputs = build_model
+        input_values = [
+            np.array([10], dtype=np.float32),
+            np.array([[2, 3]], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_while_loop_3(self, use_cpu_only, backend):
+        @make_tf_graph([(1,), (1, 2), (1,)])
+        def build_model(x, y, z):
+            c = lambda i, j, k: tf.greater(
+                tf.math.reduce_mean(i), tf.math.reduce_mean(j)
+            )
+            b = lambda i, j, k: (i / 3, j ** 2, k - 2)
+            return tf.while_loop(c, b, [x, y, z])
+
+        model, inputs, outputs = build_model
+        input_values = [
+            np.array([10], dtype=np.float32),
+            np.array([[2, 3]], dtype=np.float32),
+            np.array([5], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_while_loop_4(self, use_cpu_only, backend):
+        @make_tf_graph([(1,), (1, 2), (1,), (2, 1)])
+        def build_model(x, y, z, m):
+            c = lambda i, j, k, l: tf.greater(
+                tf.math.reduce_mean(i), tf.math.reduce_mean(j)
+            )
+            b = lambda i, j, k, l: (i / 3, j ** 2, k - 2, l % 2)
+            return tf.while_loop(c, b, [x, y, z, m])
+
+        model, inputs, outputs = build_model
+        input_values = [
+            np.array([10], dtype=np.float32),
+            np.array([[2, 3]], dtype=np.float32),
+            np.array([5], dtype=np.float32),
+            np.array([[2], [3]], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_nested_while_body(self, use_cpu_only, backend):
+        @make_tf_graph([(1,), (1,)])
+        def build_model(x, y):
+            # The following while loop:
+            #
+            # i, j = 0, 10
+            # while i < j:
+            #   while 2*i < i+2:
+            #     i += 1
+            #   i += 2
+
+            def cond2(i):
+                return tf.less(2 * tf.math.reduce_mean(i), tf.math.reduce_mean(i + 2))
+
+            def body2(i):
+                return i + 1
+
+            def cond1(i, j):
+                return tf.less(tf.math.reduce_mean(i), tf.math.reduce_mean(j))
+
+            def body1(i, j):
+                new_i = tf.while_loop(cond2, body2, [i])
+                return new_i + 2, j
+
+            return tf.while_loop(cond1, body1, [x, y])
+
+        model, inputs, outputs = build_model
+        input_values = [
+            np.array([0], dtype=np.float32),
+            np.array([10], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_nested_while_cond(self, use_cpu_only, backend):
+        @make_tf_graph([(1,), (1,)])
+        def build_model(x, y):
+            # The following while loop:
+            #
+            # def cond(i, j):
+            #  while 2*i < i+2:
+            #    i += 1
+            #  return i < j
+            #
+            # i, j = 0, 10
+            # while cond(i, j):
+            #   i += 2
+            #   j += 1
+
+            def cond2(i):
+                return tf.less(2 * tf.math.reduce_mean(i), tf.math.reduce_mean(i + 2))
+
+            def body2(i):
+                return i + 1
+
+            def cond1(i, j):
+                new_i = tf.while_loop(cond2, body2, [i])
+                return tf.less(tf.squeeze(new_i), tf.squeeze(j))
+
+            def body1(i, j):
+                return i + 2, j + 1
+
+            return tf.while_loop(cond1, body1, [x, y])
+
+        model, inputs, outputs = build_model
+        input_values = [
+            np.array([0], dtype=np.float32),
+            np.array([10], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+
+class TestConv:
+    @pytest.mark.parametrize(
+        ",".join(
+            [
+                "use_cpu_only",
+                "backend",
+                "conv_dim",  # 1d or 2d conv
+                "padding",
+                "data_format",
+                "HWkHkW",
+                "strides",
+                "dilations",
+                "dynamic_weights",
+                "batch_size",
+            ]
+        ),
+        itertools.product(
+            [True, False],
+            backends,
+            ["conv1d", "conv2d"],
+            ["SAME", "VALID", [[2, 3], [3, 2]]],
+            ["NHWC"],  # NCHW not supported by TF.
+            [(11, 12, 3, 2), (12, 11, 2, 3)],
+            [(1, 1), (2, 3)],
+            [(1, 1), (2, 3)],
+            [True, False],
+            [1, 3],
+        ),
+    )
+    def test(
+        self,
+        use_cpu_only,
+        backend,
+        conv_dim,
+        padding,
+        data_format,
+        HWkHkW,
+        strides,
+        dilations,
+        dynamic_weights,
+        batch_size,
+    ):
+        H, W, kH, kW = HWkHkW
+        N, C_in, C_out = batch_size, 2, 3
+        if data_format == "NHWC":
+            input_shape = (N, W, C_in) if conv_dim == "conv1d" else (N, H, W, C_in)
+            if isinstance(padding, list):
+                padding = [[0, 0]] + padding + [[0, 0]]
+            if conv_dim == "conv1d":
+                data_format = "NWC"
+                if isinstance(padding, list):
+                    # No explicit padding for conv1d in TF
+                    return
+        else:  # 'NCHW'
+            input_shape = (N, C_in, W) if conv_dim == "conv1d" else (N, C_in, H, W)
+            if isinstance(padding, list):
+                padding = [[0, 0], [0, 0]] + padding
+            if conv_dim == "conv1d":
+                data_format = "NCW"
+                if isinstance(padding, list):
+                    # No explicit padding for conv1d in TF
+                    return
+        W_shape = (kW, C_in, C_out) if conv_dim == "conv1d" else (kH, kW, C_in, C_out)
+        dilations = dilations[1] if conv_dim == "conv1d" else dilations
+
+        # We do not support dynamic weight when dilations != 1.
+        if dynamic_weights and dilations == (1, 1):
+
+            @make_tf_graph([input_shape, W_shape])
+            def build_model_dynamic_weights(x, W):
+                if conv_dim == "conv1d":
+                    conv = tf.nn.conv1d(
+                        x,
+                        W,
+                        stride=strides[1],
+                        padding=padding,
+                        dilations=dilations,
+                        data_format=data_format,
+                    )
+                else:
+                    conv = tf.nn.conv2d(
+                        x,
+                        W,
+                        strides=strides,
+                        padding=padding,
+                        dilations=dilations,
+                        data_format=data_format,
+                    )
+                return conv
+
+            model, inputs, outputs = build_model_dynamic_weights
+            input_values = [
+                random_gen(input_shape, -10.0, 10.0),
+                random_gen(W_shape, -1.0, 1.0),
+            ]
+            input_dict = dict(zip(inputs, input_values))
+
+        else:
+
+            @make_tf_graph([input_shape])
+            def build_model_static_weights(x):
+                W = tf.constant(np.random.rand(*W_shape), tf.float32)
+                if conv_dim == "conv1d":
+                    conv = tf.nn.conv1d(
+                        x,
+                        W,
+                        stride=strides[1],
+                        padding=padding,
+                        dilations=dilations,
+                        data_format=data_format,
+                    )
+                else:
+                    conv = tf.nn.conv2d(
+                        x,
+                        W,
+                        strides=strides,
+                        padding=padding,
+                        dilations=dilations,
+                        data_format=data_format,
+                    )
+                return conv
+
+            model, inputs, outputs = build_model_static_weights
+            input_values = [random_gen(input_shape, -10.0, 10.0)]
+            input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestConv3d:
+    @pytest.mark.parametrize(
+        ",".join(
+            [
+                "use_cpu_only",
+                "backend",
+                "data_format",
+                "input_size",
+                "kernel_size",
+                "strides",
+                "dilations",
+                "padding_type",
+                "batch_size",
+            ]
+        ),
+        itertools.product(
+            [True, False],  # use_cpu_only
+            backends,
+            ["NDHWC"],  # NCDHW not supported by TF.
+            [(7, 11, 13), (32, 16, 8)],  # input_size
+            [(1, 1, 1), (3, 3, 3), (1, 2, 3)],  # kernel_size
+            [(1, 1, 1), (2, 2, 2), (3, 2, 1)],  # strides
+            [
+                (1, 1, 1)
+            ],  # , (2, 2, 2), (2, 3, 1)],  # dilations: dilations greater than 1 not supported on CPU
+            ["SAME", "VALID"],  # padding_type
+            [1, 3],  # batch_size
+        ),
+    )
+    def test_tf(
+        self,
+        use_cpu_only,
+        backend,
+        data_format,
+        input_size,
+        kernel_size,
+        strides,
+        dilations,
+        padding_type,
+        batch_size,
+    ):
+        C_in = np.random.randint(low=1, high=5)
+        C_out = np.random.randint(low=1, high=(C_in + 1))
+        input_shape = [batch_size] + list(input_size) + [C_in]
+        weights_shape = list(kernel_size) + [C_in, C_out]
+        # TF1 and TF2 tf.nn.conv3d require dilations and strides to have length 5 or greater, with values of 1 for
+        # indices 0 and 4 (batch and channel in NDHWC format)
+        tf_strides = [1] + list(strides) + [1]
+        tf_dilations = [1] + list(dilations) + [1]
+
+        @make_tf_graph([input_shape])
+        def build_model_static_weights(x):
+            W = tf.constant(np.random.rand(*weights_shape), tf.float32)
+            return tf.nn.conv3d(
+                x,
+                W,
+                strides=tf_strides,
+                padding=padding_type,
+                data_format=data_format,
+                dilations=tf_dilations,
+            )
+
+        model, inputs, outputs = build_model_static_weights
+        input_values = [random_gen(input_shape, -10.0, 10.0)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+            frontend_only=False,
+            atol=1e-03,  # default 1e-04
+            rtol=2e-03,  # default 1e-05
+        )
+
+
+class TestDepthwiseConv:
+    @pytest.mark.parametrize(
+        ",".join(
+            [
+                "use_cpu_only",
+                "backend",
+                "padding",
+                "HWkHkW",
+                "strides",
+                "dilations",
+                "dynamic_weights",
+                "batch_size",
+            ]
+        ),
+        itertools.product(
+            [True, False],
+            backends,
+            ["SAME", "VALID"],
+            [(11, 12, 3, 2), (12, 11, 2, 3)],
+            # TF doesn't support non-square strides for depthwise
+            # https://github.com/tensorflow/tensorflow/issues/33005
+            [(1, 1, 1, 1), (1, 2, 2, 1)],
+            [
+                (1, 1),
+                (2, 2),
+            ],  # rdar://60668562 (MIL: Conversion for TF op 'SpaceToBatchND' not implemented.)
+            [True, False],
+            [1, 3],
+        ),
+    )
+    def test_depthwise_conv(
+        self,
+        use_cpu_only,
+        backend,
+        padding,
+        HWkHkW,
+        strides,
+        dilations,
+        dynamic_weights,
+        batch_size,
+    ):
+        if np.sum(strides) != len(strides) and np.sum(dilations) != len(dilations):
+            # TF doesn't compute correct output for non-one stride+dilation
+            return
+        H, W, kH, kW = HWkHkW
+        N, C_in, C_out = batch_size, 2, 6
+        input_shape = (N, H, W, C_in)
+        data_format = "NHWC"
+        assert C_out % C_in == 0
+        multiplier = int(C_out / C_in)
+        W_shape = (kH, kW, C_in, multiplier)
+
+        def test_static_W():
+            W = np.random.rand(*W_shape).astype(np.float32)
+
+            @make_tf_graph([input_shape])
+            def build_model_static_weights(x):
+                return tf.nn.depthwise_conv2d(
+                    x,
+                    W,
+                    strides=strides,
+                    padding=padding,
+                    dilations=dilations,
+                    data_format=data_format,
+                )
+
+            model, inputs, outputs = build_model_static_weights
+
+            input_values = [(np.random.rand(*input_shape).astype(np.float32))]
+            input_dict = dict(zip(inputs, input_values))
+
+            proto = run_compare_tf(
+                model,
+                input_dict,
+                outputs,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+                frontend_only=False,
+            )
+
+            assert layer_counts(proto, "reorganizeData") == 0
+
+        def test_dynamic_W():
+            @make_tf_graph([input_shape, W_shape])
+            def build_model_dynamic_weights(x, W):
+                return tf.nn.depthwise_conv2d(
+                    x,
+                    W,
+                    strides=strides,
+                    padding=padding,
+                    dilations=dilations,
+                    data_format=data_format,
+                )
+
+            model, inputs, outputs = build_model_dynamic_weights
+
+            input_values = [
+                (np.random.rand(*input_shape).astype(np.float32)),
+                (np.random.rand(*W_shape).astype(np.float32)),
+            ]
+            input_dict = dict(zip(inputs, input_values))
+
+            run_compare_tf(
+                model,
+                input_dict,
+                outputs,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+                frontend_only=False,
+            )
+
+        # We do not support dynamic weight when dilations != 1.
+        test_dynamic_W() if dynamic_weights and dilations == (1, 1) else test_static_W()
+
+
+class TestSeparableConv:
+    @pytest.mark.parametrize(
+        ",".join(
+            [
+                "use_cpu_only",
+                "backend",
+                "padding",
+                "HWkHkW",
+                "strides",
+                "dilations",
+                "dynamic_weights",
+                "batch_size",
+            ]
+        ),
+        itertools.product(
+            [True, False],
+            backends,
+            ["SAME", "VALID"],
+            [(11, 12, 3, 2), (12, 11, 2, 3)],
+            [(1, 1, 1, 1), (1, 2, 2, 1)],
+            [(1, 1), (2, 2)],
+            [True, False],
+            [1, 3],
+        ),
+    )
+    def test_separable_conv(
+        self,
+        use_cpu_only,
+        backend,
+        padding,
+        HWkHkW,
+        strides,
+        dilations,
+        dynamic_weights,
+        batch_size,
+    ):
+        H, depthwise_filter, kH, kW = HWkHkW
+        N, C_in, C_out = batch_size, 2, 6
+        input_shape = (N, H, depthwise_filter, C_in)
+        data_format = "NHWC"
+        assert C_out % C_in == 0
+        multiplier = int(C_out / C_in)
+        depthwise_filter_shape = (kH, kW, C_in, multiplier)
+        pointwise_filter_shape = [1, 1, multiplier * C_in, C_out]
+        if dilations != (1, 1):
+            strides = (1, 1, 1, 1)
+
+        def test_dynamic_W():
+            @make_tf_graph(
+                [input_shape, depthwise_filter_shape, pointwise_filter_shape]
+            )
+            def build_model_dynamic_weights(x, depthwise_filter, pointwise_filter):
+                return tf.nn.separable_conv2d(
+                    x,
+                    depthwise_filter,
+                    pointwise_filter,
+                    strides=strides,
+                    padding=padding,
+                    dilations=dilations,
+                    data_format=data_format,
+                )
+
+            model, inputs, outputs = build_model_dynamic_weights
+
+            input_values = [
+                (np.random.rand(*input_shape).astype(np.float32)),
+                (np.random.rand(*depthwise_filter_shape).astype(np.float32)),
+                (np.random.rand(*pointwise_filter_shape).astype(np.float32)),
+            ]
+            input_dict = dict(zip(inputs, input_values))
+
+            run_compare_tf(
+                model,
+                input_dict,
+                outputs,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+                frontend_only=False,
+            )
+
+        def test_static_W():
+            depthwise_filter = np.random.rand(*depthwise_filter_shape).astype(
+                np.float32
+            )
+            pointwise_filter = np.random.rand(*pointwise_filter_shape).astype(
+                np.float32
+            )
+
+            @make_tf_graph([input_shape])
+            def build_model_static_weights(x):
+                return tf.nn.separable_conv2d(
+                    x,
+                    depthwise_filter,
+                    pointwise_filter,
+                    strides=strides,
+                    padding=padding,
+                    dilations=dilations,
+                    data_format=data_format,
+                )
+
+            model, inputs, outputs = build_model_static_weights
+
+            input_values = [(np.random.rand(*input_shape).astype(np.float32))]
+            input_dict = dict(zip(inputs, input_values))
+
+            run_compare_tf(
+                model,
+                input_dict,
+                outputs,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+                frontend_only=False,
+            )
+
+        test_static_W()
+        test_dynamic_W()
+
+
+class TestConvTranspose:
+    @pytest.mark.parametrize(
+        ",".join(
+            [
+                "use_cpu_only",
+                "backend",
+                "conv_dim",  # 1d or 2d conv
+                "padding",
+                "data_format",
+                "HWkHkW",
+                "strides",
+                "dilations",
+            ]
+        ),
+        itertools.product(
+            [True, False],
+            backends,
+            ["conv1d", "conv2d"],
+            ["SAME", "VALID"],
+            ["NHWC"],  # NCHW not supported by TF
+            [(12, 12, 2, 2), (2, 2, 2, 3), (5, 5, 3, 3)],
+            [(1, 1), (1, 2)],
+            [(1, 1)],  # Dilation > 1 not supported by TF
+        ),
+    )
+    def test_conv_transpose(
+        self,
+        use_cpu_only,
+        backend,
+        conv_dim,
+        padding,
+        data_format,
+        HWkHkW,
+        strides,
+        dilations,
+    ):
+        H, W, kH, kW = HWkHkW
+        N, C_in, C_out = 1, 1, 2
+
+        if padding == "SAME":
+            oH = H * strides[0]
+            oW = W * strides[1]
+        else:
+            oH = (H - 1) * strides[0] + (kH - 1) * dilations[0] + 1
+            oW = (W - 1) * strides[1] + (kW - 1) * dilations[1] + 1
+
+        if data_format == "NHWC":
+            input_shape = (N, W, C_in) if conv_dim == "conv1d" else (N, H, W, C_in)
+            if conv_dim == "conv1d":
+                data_format = "NWC"
+            output_shape = (
+                [N, oH, C_out] if conv_dim == "conv1d" else [N, oH, oW, C_out]
+            )
+        else:  # 'NCHW'
+            input_shape = (N, C_in, W) if conv_dim == "conv1d" else (N, C_in, H, W)
+            if conv_dim == "conv1d":
+                data_format = "NCW"
+            output_shape = (
+                [N, C_out, oH] if conv_dim == "conv1d" else [N, C_out, oH, oW]
+            )
+
+        w_shape = (kH, C_out, C_in) if conv_dim == "conv1d" else (kH, kW, C_out, C_in)
+
+        def test_static_W():
+            x_input = np.random.randn(*input_shape)
+            w_val = np.random.randn(*w_shape)
+            with tf.Graph().as_default() as graph:
+                x = tf.placeholder(tf.float32, shape=input_shape)
+                w = tf.constant(w_val, tf.float32)
+                if conv_dim == "conv1d":
+                    conv = tf.nn.conv1d_transpose(
+                        x,
+                        w,
+                        output_shape=output_shape,
+                        strides=strides[0],
+                        padding=padding,
+                        dilations=dilations[0],
+                        data_format=data_format,
+                    )
+                else:
+                    conv = tf.nn.conv2d_transpose(
+                        x,
+                        w,
+                        output_shape=output_shape,
+                        strides=strides,
+                        padding=padding,
+                        dilations=dilations,
+                        data_format=data_format,
+                    )
+                run_compare_tf(
+                    graph,
+                    {x: x_input},
+                    conv,
+                    use_cpu_only=use_cpu_only,
+                    frontend_only=False,
+                    backend=backend,
+                )
+
+        test_static_W()
+
+    @pytest.mark.parametrize(
+        ",".join(
+            [
+                "use_cpu_only",
+                "backend",
+                "padding",
+                "data_format",
+                "DHWkDkHkW",
+                "strides",
+                "dilations",
+            ]
+        ),
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                "SAME"
+            ],  # VALID padding requires padding due to different output shape computation
+            # and blocked by rdar://63245116 ([deconv3d] Deconv 3d with deconv_out* parameter executes
+            # deconvolution kernel instead of deconv3d kernel on CPU)
+            ["NDHWC"],
+            [
+                (10, 12, 14, 2, 3, 5),
+                (4, 4, 4, 2, 3, 1),
+                (6, 6, 6, 3, 3, 3),
+                (5, 5, 5, 2, 4, 2),
+            ],
+            [(1, 1, 1), (1, 2, 3)],
+            [(1, 1, 1)],  # Dilation > 1 not supported by TF
+        ),
+    )
+    def test_conv3d_transpose(
+        self, use_cpu_only, backend, padding, data_format, DHWkDkHkW, strides, dilations
+    ):
+        D, H, W, kD, kH, kW = DHWkDkHkW
+        N, C_in, C_out = 2, 1, 2
+
+        if padding == "SAME":
+            oD = D * strides[0]
+            oH = H * strides[1]
+            oW = W * strides[2]
+        else:
+            oD = (D - 1) * strides[0] + (kD - 1) * dilations[0] + 1
+            oH = (H - 1) * strides[1] + (kH - 1) * dilations[1] + 1
+            oW = (W - 1) * strides[2] + (kW - 1) * dilations[2] + 1
+        if data_format == "NDHWC":
+            input_shape = (N, D, H, W, C_in)
+            output_shape = [N, oD, oH, oW, C_out]
+        else:  # 'NCDHW'
+            input_shape = (N, C_in, D, H, W)
+            output_shape = [N, C_out, oD, oH, oW]
+
+        w_shape = (kD, kH, kW, C_out, C_in)
+        x_input = np.random.randn(*input_shape)
+        w_val = np.random.randn(*w_shape)
+
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=input_shape)
+            w = tf.constant(w_val, tf.float32)
+            conv = tf.nn.conv3d_transpose(
+                x,
+                w,
+                output_shape=output_shape,
+                strides=strides,
+                padding=padding,
+                dilations=dilations,
+                data_format=data_format,
+            )
+
+            run_compare_tf(
+                graph,
+                {x: x_input},
+                conv,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+
+class TestElementWiseBinary:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, mode",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(0, 4)],
+            [
+                "add",
+                "floor_div",
+                "floor_mod",
+                "maximum",
+                "minimum",
+                "mod",
+                "mul",
+                "pow",
+                "real_div",
+                "sub",
+                "squared_difference",
+            ],
+        ),
+    )
+    def test_binary(self, use_cpu_only, backend, rank, mode):
+        # TODO: rdar://problem/63030405. Rank 0 tensor for MIL
+        if rank == 0 and backend == "mil_proto":
+            return
+        x_shape = list(np.random.randint(low=2, high=6, size=rank))
+        y_shape = x_shape[:]
+        for i in range(rank):
+            if np.random.randint(4) == 0:
+                y_shape[i] = 1
+        if np.random.randint(2) == 0:
+            y_shape = [1] + y_shape
+
+        if use_cpu_only:
+            dtype = np.float32
+        else:
+            dtype = np.float16
+
+        if mode == "add":
+            res = tf.math.add
+            x_val = np.random.randint(low=-1000, high=1000, size=x_shape).astype(dtype)
+            y_val = np.random.randint(low=-1000, high=1000, size=y_shape).astype(dtype)
+        elif mode == "floor_div":
+            res = tf.math.floordiv
+            x_val = np.random.randint(low=0, high=1000, size=x_shape).astype(dtype)
+            y_val = np.random.randint(low=1, high=20, size=y_shape).astype(dtype)
+        elif mode == "floor_mod":
+            res = tf.math.floormod
+            x_val = np.random.randint(low=0, high=100, size=x_shape).astype(dtype)
+            y_val = np.random.randint(low=1, high=20, size=y_shape).astype(dtype)
+        elif mode == "maximum":
+            res = tf.math.maximum
+            x_val = np.random.randint(low=-10, high=10, size=x_shape).astype(dtype)
+            y_val = np.random.randint(low=-10, high=10, size=y_shape).astype(dtype)
+        elif mode == "minimum":
+            res = tf.math.minimum
+            x_val = np.random.randint(low=-10, high=10, size=x_shape).astype(dtype)
+            y_val = np.random.randint(low=-10, high=10, size=y_shape).astype(dtype)
+        elif mode == "mod":
+            res = tf.math.mod
+            x_val = np.random.randint(low=0, high=1000, size=x_shape).astype(dtype)
+            y_val = np.random.randint(low=1, high=20, size=y_shape).astype(dtype)
+        elif mode == "mul":
+            res = tf.math.multiply
+            x_val = np.random.randint(low=-100, high=100, size=x_shape).astype(dtype)
+            y_val = np.random.randint(low=-100, high=100, size=y_shape).astype(dtype)
+        elif mode == "pow":
+            res = tf.math.pow
+            x_val = np.random.randint(low=-5, high=5, size=x_shape).astype(dtype)
+            y_val = np.random.randint(low=-5, high=5, size=y_shape).astype(dtype)
+        elif mode == "real_div":
+            res = tf.math.truediv
+            x_val = np.random.randint(low=0, high=1000, size=x_shape).astype(dtype)
+            y_val = np.random.randint(low=1, high=20, size=y_shape).astype(dtype)
+        elif mode == "sub":
+            res = tf.math.subtract
+            x_val = np.random.randint(low=-1000, high=1000, size=x_shape).astype(dtype)
+            y_val = np.random.randint(low=-1000, high=1000, size=y_shape).astype(dtype)
+        elif mode == "squared_difference":
+            if backend == "mil_proto":
+                return  # TODO
+            res = tf.math.squared_difference
+            x_val = np.random.randint(low=-5, high=5, size=x_shape).astype(dtype)
+            y_val = np.random.randint(low=-5, high=5, size=y_shape).astype(dtype)
+
+        @make_tf_graph([x_shape, y_shape])
+        def build_model(x, y):
+            return res(x, y)
+
+        model, inputs, outputs = build_model
+        input_values = [x_val, y_val]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(0, 4)]),
+    )
+    def test_equal(self, use_cpu_only, backend, rank):
+        if rank == 0 and backend == "mil_proto":
+            return
+        x_shape = list(np.random.randint(low=2, high=6, size=rank))
+        y_shape = x_shape[:]
+        for i in range(rank):
+            if np.random.randint(4) == 0:
+                y_shape[i] = 1
+        if np.random.randint(2) == 0:
+            y_shape = [1] + y_shape
+
+        if use_cpu_only:
+            dtype = np.float32
+        else:
+            dtype = np.float16
+
+        @make_tf_graph([x_shape, y_shape])
+        def build_model(x, y):
+            return tf.equal(x, y)
+
+        model, inputs, outputs = build_model
+
+        input_values = [
+            np.random.randint(low=-5, high=3, size=x_shape).astype(dtype),
+            np.random.randint(low=-5, high=3, size=y_shape).astype(dtype),
+        ]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(0, 4)]),
+    )
+    def test_greater(self, use_cpu_only, backend, rank):
+        if rank == 0 and backend == "mil_proto":
+            return
+        x_shape = list(np.random.randint(low=2, high=6, size=rank))
+        y_shape = x_shape[:]
+        for i in range(rank):
+            if np.random.randint(4) == 0:
+                y_shape[i] = 1
+        if np.random.randint(2) == 0:
+            y_shape = [1] + y_shape
+
+        if use_cpu_only:
+            dtype = np.float32
+        else:
+            dtype = np.float16
+
+        @make_tf_graph([x_shape, y_shape])
+        def build_model(x, y):
+            return tf.greater(x, y)
+
+        model, inputs, outputs = build_model
+
+        input_values = [
+            np.random.randint(low=-5, high=3, size=x_shape).astype(dtype),
+            np.random.randint(low=-5, high=3, size=y_shape).astype(dtype),
+        ]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(0, 4)]),
+    )
+    def test_greater_equal(self, use_cpu_only, backend, rank):
+        if rank == 0 and backend == "mil_proto":
+            return
+        x_shape = list(np.random.randint(low=2, high=6, size=rank))
+        y_shape = x_shape[:]
+        for i in range(rank):
+            if np.random.randint(4) == 0:
+                y_shape[i] = 1
+        if np.random.randint(2) == 0:
+            y_shape = [1] + y_shape
+
+        if use_cpu_only:
+            dtype = np.float32
+        else:
+            dtype = np.float16
+
+        @make_tf_graph([x_shape, y_shape])
+        def build_model(x, y):
+            return tf.greater_equal(x, y)
+
+        model, inputs, outputs = build_model
+
+        input_values = [
+            np.random.randint(low=-5, high=3, size=x_shape).astype(dtype),
+            np.random.randint(low=-5, high=3, size=y_shape).astype(dtype),
+        ]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(0, 4)]),
+    )
+    def test_less(self, use_cpu_only, backend, rank):
+        if rank == 0 and backend == "mil_proto":
+            return
+        x_shape = list(np.random.randint(low=2, high=6, size=rank))
+        y_shape = x_shape[:]
+        for i in range(rank):
+            if np.random.randint(4) == 0:
+                y_shape[i] = 1
+        if np.random.randint(2) == 0:
+            y_shape = [1] + y_shape
+
+        if use_cpu_only:
+            dtype = np.float32
+        else:
+            dtype = np.float16
+
+        @make_tf_graph([x_shape, y_shape])
+        def build_model(x, y):
+            return tf.less(x, y)
+
+        model, inputs, outputs = build_model
+
+        input_values = [
+            np.random.randint(low=-5, high=3, size=x_shape).astype(dtype),
+            np.random.randint(low=-5, high=3, size=y_shape).astype(dtype),
+        ]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(0, 4)]),
+    )
+    def test_less_equal(self, use_cpu_only, backend, rank):
+        if rank == 0 and backend == "mil_proto":
+            return
+        x_shape = list(np.random.randint(low=2, high=6, size=rank))
+        y_shape = x_shape[:]
+        for i in range(rank):
+            if np.random.randint(4) == 0:
+                y_shape[i] = 1
+        if np.random.randint(2) == 0:
+            y_shape = [1] + y_shape
+
+        if use_cpu_only:
+            dtype = np.float32
+        else:
+            dtype = np.float16
+
+        @make_tf_graph([x_shape, y_shape])
+        def build_model(x, y):
+            return tf.less_equal(x, y)
+
+        model, inputs, outputs = build_model
+
+        input_values = [
+            np.random.randint(low=-5, high=3, size=x_shape).astype(dtype),
+            np.random.randint(low=-5, high=3, size=y_shape).astype(dtype),
+        ]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(0, 4)]),
+    )
+    def test_not_equal(self, use_cpu_only, backend, rank):
+        if rank == 0 and backend == "mil_proto":
+            return
+        x_shape = list(np.random.randint(low=2, high=6, size=rank))
+        y_shape = x_shape[:]
+        for i in range(rank):
+            if np.random.randint(4) == 0:
+                y_shape[i] = 1
+        if np.random.randint(2) == 0:
+            y_shape = [1] + y_shape
+
+        if use_cpu_only:
+            dtype = np.float32
+        else:
+            dtype = np.float16
+
+        @make_tf_graph([x_shape, y_shape])
+        def build_model(x, y):
+            return tf.not_equal(x, y)
+
+        model, inputs, outputs = build_model
+
+        input_values = [
+            np.random.randint(low=-5, high=3, size=x_shape).astype(dtype),
+            np.random.randint(low=-5, high=3, size=y_shape).astype(dtype),
+        ]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+
+class TestElementWiseUnary:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, mode",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(1, 6)],
+            [
+                "abs",
+                "acos",
+                "asin",
+                "atan",
+                "atanh",
+                "cast",
+                "ceil",
+                "clip",
+                "cos",
+                "cosh",
+                "erf",
+                "exp",
+                "floor",
+                "inverse",
+                "log",
+                "negative",
+                "round",
+                "rsqrt",
+                "sign",
+                "sin",
+                "sinh",
+                "sqrt",
+                "square",
+                "tan",
+                "tanh",
+            ],
+        ),
+    )
+    def test_unary(self, use_cpu_only, backend, rank, mode):
+        atol, rtol = 1e-4, 1e-5
+        input_shape = np.random.randint(low=2, high=6, size=rank)
+        if use_cpu_only:
+            dtype = np.float32
+        else:
+            dtype = np.float16
+
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=input_shape)
+            if mode == "abs":
+                res = tf.abs(x)
+                val = random_gen(input_shape, rand_min=-1, rand_max=1)
+            elif mode == "acos":
+                res = tf.acos(x)
+                val = random_gen(input_shape, rand_min=-1, rand_max=1)
+            elif mode == "asin":
+                res = tf.asin(x)
+                val = random_gen(input_shape, rand_min=-1, rand_max=1)
+            elif mode == "atan":
+                res = tf.atan(x)
+                val = random_gen(input_shape, rand_min=-100, rand_max=100)
+            elif mode == "atanh":
+                if backend == "mil_proto":
+                    # TODO
+                    return
+                res = tf.atanh(x)
+                val = random_gen(input_shape, rand_min=-0.9, rand_max=0.9)
+            elif mode == "cast":
+                if backend == "mil_proto":
+                    # TODO <rdar://problem/61400566> [MIL] Add cast operation in MIL backend and enable tests
+                    return
+                eps_from_int = 0.0
+                if not use_cpu_only:
+                    eps_from_int = 0.1
+                res = tf.cast(x, dtype=tf.int32)
+                val = random_gen(
+                    input_shape,
+                    rand_min=-10,
+                    rand_max=10,
+                    eps_from_int=eps_from_int,
+                    dtype=dtype,
+                )
+            elif mode == "ceil":
+                res = tf.ceil(x)
+                eps_from_int = 0.0
+                if not use_cpu_only:
+                    eps_from_int = 0.1
+                val = random_gen(
+                    input_shape,
+                    rand_min=-100,
+                    rand_max=100,
+                    eps_from_int=eps_from_int,
+                    dtype=dtype,
+                )
+            elif mode == "clip":
+                if backend == "mil_proto":
+                    # TODO
+                    return
+                res = tf.clip_by_value(x, clip_value_min=0.0, clip_value_max=5.0)
+                val = random_gen(input_shape, rand_min=-5, rand_max=10)
+            elif mode == "cos":
+                res = tf.cos(x)
+                rand_range = 1000
+                if not use_cpu_only:
+                    rand_range = 10
+                val = random_gen(input_shape, rand_min=-rand_range, rand_max=rand_range)
+            elif mode == "cosh":
+                res = tf.cosh(x)
+                val = random_gen(input_shape, rand_min=-4, rand_max=4)
+            elif mode == "erf":
+                res = tf.math.erf(x)
+                val = random_gen(input_shape, rand_min=1, rand_max=6)
+            elif mode == "exp":
+                if not use_cpu_only:
+                    # We skip GPU here, since exp(1) already differs in backend.
+                    return
+                res = tf.exp(x)
+                val = random_gen(input_shape, rand_min=-4, rand_max=20)
+            elif mode == "floor":
+                res = tf.floor(x)
+                eps_from_int = 0.0
+                if not use_cpu_only:
+                    eps_from_int = 0.1
+                val = random_gen(
+                    input_shape,
+                    rand_min=-100,
+                    rand_max=100,
+                    eps_from_int=eps_from_int,
+                    dtype=dtype,
+                )
+            elif mode == "inverse":
+                if backend == "mil_proto":
+                    return  # TODO
+                res = tf.reciprocal(x)
+                val = random_gen(input_shape, rand_min=0.1, rand_max=10)
+            elif mode == "log":
+                res = tf.log(x)
+                val = random_gen(input_shape, rand_min=0.2, rand_max=1000)
+            elif mode == "negative":
+                if backend == "mil_proto":
+                    return  # TODO
+                res = tf.math.negative(x)
+                val = random_gen(input_shape, rand_min=-100.0, rand_max=100.0)
+            elif mode == "round":
+                res = tf.round(x)
+                val = random_gen(
+                    input_shape, rand_min=-1000, rand_max=1000, dtype=dtype
+                )
+            elif mode == "rsqrt":
+                res = tf.rsqrt(x)
+                val = random_gen(input_shape, rand_min=0.5, rand_max=1000)
+            elif mode == "sign":
+                res = tf.sign(x)
+                val = random_gen(input_shape, rand_min=-5, rand_max=5)
+            elif mode == "sin":
+                res = tf.sin(x)
+                rand_range = 1000
+                if not use_cpu_only:
+                    rand_range = 10
+                val = random_gen(input_shape, rand_min=-rand_range, rand_max=rand_range)
+            elif mode == "sinh":
+                res = tf.sinh(x)
+                val = random_gen(input_shape, rand_min=-10, rand_max=10)
+            elif mode == "sqrt":
+                res = tf.sqrt(x)
+                val = random_gen(input_shape, rand_min=0.5, rand_max=1000)
+            elif mode == "square":
+                if backend == "mil_proto":
+                    return  # TODO
+                res = tf.math.square(x)
+                val = random_gen(input_shape, rand_min=-5, rand_max=5)
+                atol, rtol = 1e-2, 1e-3
+            elif mode == "tan":
+                res = tf.tan(x)
+                val = random_gen(input_shape, rand_min=-1000, rand_max=1000)
+            elif mode == "tanh":
+                res = tf.tanh(x)
+                val = random_gen(input_shape, rand_min=-1000, rand_max=1000)
+
+            run_compare_tf(
+                graph,
+                {x: val},
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+                atol=atol,
+                rtol=rtol,
+            )
+
+
+class TestImageResizing:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, input_shape, target_shape, align_corners, half_pixel_centers",
+        itertools.product(
+            [True, False],
+            backends,
+            [(1, 10, 20, 1), (2, 5, 1, 3)],
+            [(25, 30), (2, 20)],
+            [True, False],
+            [True, False],
+        ),
+    )
+    def test_resize_bilinear(
+        self,
+        use_cpu_only,
+        backend,
+        input_shape,
+        target_shape,
+        align_corners,
+        half_pixel_centers,
+    ):
+        if half_pixel_centers and align_corners:
+            return
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=input_shape)
+            ref = tf.raw_ops.ResizeBilinear(
+                images=x,
+                size=target_shape,
+                half_pixel_centers=half_pixel_centers,
+                align_corners=align_corners,
+            )
+            run_compare_tf(
+                graph,
+                {x: random_gen(input_shape, rand_min=-100, rand_max=100)},
+                ref,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, input_shape, upsample_factor, data_format",
+        itertools.product(
+            [True, False],
+            backends,
+            [(1, 1, 1, 3), (1, 10, 5, 3)],
+            [(1, 2), (4, 3)],
+            ["channels_last", "channels_first"],
+        ),
+    )
+    def test_upsampling_2d(
+        self, use_cpu_only, backend, input_shape, upsample_factor, data_format
+    ):
+        if data_format == "channels_last":
+            input_shape = (
+                input_shape[0],
+                input_shape[2],
+                input_shape[3],
+                input_shape[1],
+            )
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=input_shape)
+            ref = tf.keras.layers.UpSampling2D(
+                size=upsample_factor, data_format=data_format, interpolation="nearest"
+            )(x)
+            run_compare_tf(
+                graph,
+                {x: random_gen(input_shape, rand_min=-100, rand_max=100)},
+                ref,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, input_shape, num_of_crops, crop_size, method, dynamic",
+        itertools.product(
+            [True, False],
+            backends,
+            [(1, 64, 64, 1)],
+            [1, 3, 5],
+            [(2, 2), (1, 1), (4, 4), (128, 128)],
+            ["bilinear"],
+            [False, True],
+        ),
+    )
+    def test_crop_and_resize(
+        self,
+        use_cpu_only,
+        backend,
+        input_shape,
+        num_of_crops,
+        crop_size,
+        method,
+        dynamic,
+    ):
+        input = np.random.randn(*input_shape)
+        boxes = np.random.uniform(size=(num_of_crops, 4))
+        box_indices = np.random.randint(
+            size=(num_of_crops,), low=0, high=input_shape[0]
+        )
+
+        def test_static():
+            with tf.Graph().as_default() as graph:
+                x = tf.placeholder(tf.float32, shape=input_shape)
+                output = tf.raw_ops.CropAndResize(
+                    image=x,
+                    boxes=boxes,
+                    box_ind=box_indices,
+                    crop_size=crop_size,
+                    method=method,
+                )
+                run_compare_tf(
+                    graph,
+                    {x: input},
+                    output,
+                    use_cpu_only=use_cpu_only,
+                    backend=backend,
+                )
+
+        def test_dynamic():
+            with tf.Graph().as_default() as graph:
+                x = tf.placeholder(tf.float32, shape=input_shape)
+                boxes_pl = tf.placeholder(tf.float32, shape=boxes.shape)
+                box_indices_pl = tf.placeholder(tf.int32, shape=box_indices.shape)
+                output = tf.raw_ops.CropAndResize(
+                    image=x,
+                    boxes=boxes_pl,
+                    box_ind=box_indices_pl,
+                    crop_size=crop_size,
+                    method=method,
+                )
+                run_compare_tf(
+                    graph,
+                    {x: input, boxes_pl: boxes, box_indices_pl: box_indices},
+                    output,
+                    use_cpu_only=use_cpu_only,
+                    backend=backend,
+                )
+
+        test_dynamic() if dynamic else test_static()
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, width, height, strides, sizes, padding,",
+        list(
+            itertools.product(
+                [True, False],
+                backends,
+                [1, 3, 5, 50],
+                [1, 2, 7, 30],
+                [(1, 1), (2, 1), (3, 5), (7, 13)],
+                [(1, 1), (1, 2), (5, 3), (13, 7)],
+                ["VALID", "SAME"],
+            )
+        ),
+    )
+    def test_extract_patches(
+        self, use_cpu_only, backend, width, height, strides, sizes, padding
+    ):
+        # TODO: theoritically, the current extractpatches code handle batch size rather than 1,
+        # but there seems to have a bug in crop_resize when using GPU and batch_size > 1.
+        # We should test batch_size > 1 after the issue is fixed.
+        # <rdar://problem/61602238>
+        input = np.random.rand(1, height, width, 128)
+        if padding == "VALID":
+            size_h = min(sizes[0], height)
+            size_w = min(sizes[1], width)
+        else:
+            size_h = sizes[0]
+            size_w = sizes[1]
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=input.shape)
+            output = tf.extract_image_patches(
+                images=x,
+                ksizes=[1, size_h, size_w, 1],
+                strides=[1, strides[0], strides[1], 1],
+                rates=[1, 1, 1, 1],
+                padding=padding,
+            )
+            run_compare_tf(
+                graph, {x: input}, output, use_cpu_only=use_cpu_only, backend=backend
+            )
+
+
+class TestLinear:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, dim, transpose_a, transpose_b, use_constant",
+        itertools.product(
+            [True, False],
+            backends,
+            [2, 4, 8],
+            [True, False],
+            [True, False],
+            [True, False],
+        ),
+    )
+    def test_matmul(
+        self, use_cpu_only, backend, dim, transpose_a, transpose_b, use_constant
+    ):
+        shape_x = np.array([dim, dim * 2, dim * 4])
+        shape_y = np.array([dim * 4, dim * 2])
+
+        flip = (not transpose_a and transpose_b) or (transpose_a and not transpose_b)
+        shape_y = np.flip(shape_y) if flip else shape_y
+
+        if not use_constant:
+
+            @make_tf_graph([shape_x, shape_y])
+            def build_model(x, y):
+                return tf.linalg.matmul(
+                    x, y, transpose_a=transpose_a, transpose_b=transpose_b
+                )
+
+            input_values = [
+                random_gen(shape=shape_x, rand_min=-100, rand_max=100),
+                random_gen(shape=shape_y, rand_min=-1.0, rand_max=1.0),
+            ]
+        else:
+            y = random_gen(shape=shape_y, rand_min=-1.0, rand_max=1.0)
+
+            @make_tf_graph([shape_x])
+            def build_model(x):
+                return tf.linalg.matmul(
+                    x, y, transpose_a=transpose_a, transpose_b=transpose_b
+                )
+
+            input_values = [random_gen(shape=shape_x, rand_min=-100, rand_max=100)]
+
+        model, inputs, outputs = build_model
+
+        input_dict = dict(zip(inputs, input_values))
+
+        proto = run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+        for layer in proto.neuralNetwork.layers:
+            if layer.WhichOneof("layer") == "batchedMatmul":
+                wp = layer.batchedMatmul.weights
+                if use_constant:
+                    assert len(wp.floatValue) != 0
+                else:
+                    assert len(wp.floatValue) == 0
+
+
+class TestNormalization:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, shape_mode, epsilon",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(3, 6)],
+            [True, False],
+            [1e-1, 1e-10],
+        ),
+    )
+    def test_batch_norm(self, use_cpu_only, backend, rank, shape_mode, epsilon):
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+        if shape_mode:
+            # same shape with 1 for being normalized over
+            attr_shape = list(input_shape)
+            attr_shape[1] = 1
+            attr_shape[2] = 1
+        else:
+            # 1D tensor of the same size as channel dimension
+            attr_shape = [list(input_shape)[-1]]
+
+        @make_tf_graph([input_shape, attr_shape, attr_shape, attr_shape, attr_shape])
+        def build_model(x, m, v, o, s):
+            return tf.nn.batch_normalization(
+                x, mean=m, variance=v, offset=o, scale=s, variance_epsilon=epsilon
+            )
+
+        model, inputs, outputs = build_model
+
+        input_values = [
+            random_gen(shape=input_shape, rand_min=-100.0, rand_max=100.0),
+            random_gen(shape=attr_shape, rand_min=-1.0, rand_max=1.0),
+            random_gen(shape=attr_shape, rand_min=0.0, rand_max=10.0),
+            random_gen(shape=attr_shape, rand_min=1.0, rand_max=10.0),
+            random_gen(shape=attr_shape, rand_min=-1.0, rand_max=1.0),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+            atol=1e-2,
+            rtol=1e-3,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, shape_mode, epsilon, scale_after_normalization",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(3, 6)],
+            [True, False],
+            [1e-1, 1e-10],
+            [True, False],
+        ),
+    )
+    def test_batch_norm_with_global_normalization(
+        self,
+        use_cpu_only,
+        backend,
+        rank,
+        shape_mode,
+        epsilon,
+        scale_after_normalization,
+    ):
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+        if shape_mode:
+            # same shape with 1 for being normalized over
+            attr_shape = list(input_shape)
+            attr_shape[1] = 1
+            attr_shape[2] = 1
+        else:
+            # 1D tensor of the same size as channel dimension
+            attr_shape = [list(input_shape)[-1]]
+
+        if scale_after_normalization:
+
+            @make_tf_graph(
+                [input_shape, attr_shape, attr_shape, attr_shape, attr_shape]
+            )
+            def build_model(x, m, v, b, g):
+                return tf.nn.batch_norm_with_global_normalization(
+                    x,
+                    mean=m,
+                    variance=v,
+                    beta=b,
+                    gamma=g,
+                    variance_epsilon=epsilon,
+                    scale_after_normalization=scale_after_normalization,
+                )
+
+        else:
+
+            @make_tf_graph([input_shape, attr_shape, attr_shape, attr_shape])
+            def build_model(x, m, v, b):
+                return tf.nn.batch_norm_with_global_normalization(
+                    x,
+                    mean=m,
+                    variance=v,
+                    beta=b,
+                    gamma=None,
+                    variance_epsilon=epsilon,
+                    scale_after_normalization=scale_after_normalization,
+                )
+
+        model, inputs, outputs = build_model
+
+        input_values = [
+            random_gen(shape=input_shape, rand_min=-100.0, rand_max=100.0),
+            random_gen(shape=attr_shape, rand_min=-1.0, rand_max=1.0),
+            random_gen(shape=attr_shape, rand_min=0.0, rand_max=10.0),
+            random_gen(shape=attr_shape, rand_min=1.0, rand_max=10.0),
+        ]
+        if scale_after_normalization:
+            input_values.append(
+                random_gen(shape=attr_shape, rand_min=-1.0, rand_max=1.0)
+            )
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+            atol=1e-2,
+            rtol=1e-3,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, epsilon",
+        itertools.product([True, False], backends, [1e-1, 1e-10]),
+    )
+    def test_fused_batch_norm(self, use_cpu_only, backend, epsilon):
+        # TensorFlow's FusedBatchNorm is only for 4D inputs
+        input_shape = np.random.randint(low=1, high=6, size=4)
+        attr_shape = [list(input_shape)[-1]]
+
+        m = random_gen(shape=attr_shape, rand_min=-1.0, rand_max=1.0)
+        v = random_gen(shape=attr_shape, rand_min=0.0, rand_max=10.0)
+        o = random_gen(shape=attr_shape, rand_min=1.0, rand_max=10.0)
+        s = random_gen(shape=attr_shape, rand_min=-1.0, rand_max=1.0)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.compat.v1.nn.fused_batch_norm(
+                x,
+                mean=m,
+                variance=v,
+                offset=o,
+                scale=s,
+                epsilon=epsilon,
+                is_training=False,
+            )[0]
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(shape=input_shape, rand_min=-100.0, rand_max=100.0)]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+            atol=1e-2,
+            rtol=1e-3,
+        )
+
+    @pytest.mark.skip(reason="<rdar://63680019> Specific failure on CI")
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, axes, epsilon",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(3, 6)],
+            [(-1,), (-2,), (0, 1)],
+            [1e-5, 1e-10],
+        ),
+    )
+    def test_l2_normalize(self, use_cpu_only, backend, rank, axes, epsilon):
+        input_shape = np.random.randint(low=1, high=6, size=rank)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.math.l2_normalize(x, axis=axes, epsilon=epsilon)
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(input_shape, rand_min=-10, rand_max=10)]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+            atol=1e-2,
+            rtol=1e-3,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, size, alpha, beta, k",
+        itertools.product(
+            [True, False], backends, [1, 2, 3], [0.0001, 0.01], [0.75, 1.0], [1.0, 2.0],
+        ),
+    )
+    def test_local_response_normalization(
+        self, use_cpu_only, backend, size, alpha, beta, k
+    ):
+        # TensorFlow's local_response_normalization only supports rank 4
+        input_shape = np.random.randint(low=3, high=6, size=4)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.nn.local_response_normalization(
+                x, depth_radius=size, bias=k, alpha=alpha, beta=beta
+            )
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(shape=input_shape, rand_min=-100, rand_max=100)]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+            atol=1e-2,
+            rtol=1e-3,
+        )
+
+
+class TestPooling1d:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, kernel_sizes, strides, pad_type",
+        itertools.product(
+            [True, False], backends, [(1,)], [(1,), (2,)], ["same", "valid"]
+        ),
+    )
+    def test_avg_pool_1d(self, use_cpu_only, backend, kernel_sizes, strides, pad_type):
+        input_shape = np.random.randint(low=2, high=6, size=3)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.nn.avg_pool1d(
+                x, ksize=kernel_sizes[:], strides=strides[:], padding=pad_type.upper()
+            )
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(shape=input_shape, rand_min=-100, rand_max=100)]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, kernel_sizes, strides, pad_type",
+        itertools.product(
+            [True, False], backends, [(1,)], [(1,), (2,)], ["same", "valid"]
+        ),
+    )
+    def test_max_pool_1d(self, use_cpu_only, backend, kernel_sizes, strides, pad_type):
+        input_shape = np.random.randint(low=2, high=6, size=3)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.nn.max_pool1d(
+                x, ksize=kernel_sizes[:], strides=strides[:], padding=pad_type.upper()
+            )
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(shape=input_shape, rand_min=-100, rand_max=100)]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+
+class TestPooling2d:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, kernel_sizes, strides, pad_type",
+        itertools.product(
+            [True, False],
+            backends,
+            [(1,), (2,), (1, 1), (1, 2), (2, 2)],
+            [(1,), (2,), (1, 1), (1, 2), (2, 2)],
+            ["same", "valid"],
+        ),
+    )
+    def test_avg_pool_2d(self, use_cpu_only, backend, kernel_sizes, strides, pad_type):
+        input_shape = np.random.randint(low=2, high=6, size=4)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.nn.avg_pool(
+                x, ksize=kernel_sizes[:], strides=strides[:], padding=pad_type.upper()
+            )
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(shape=input_shape, rand_min=-100, rand_max=100)]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, kernel_sizes, strides, pad_type",
+        itertools.product(
+            [True, False],
+            backends,
+            [(1,), (2,), (1, 1), (1, 2), (2, 2)],
+            [(1,), (2,), (1, 1), (1, 2), (2, 2)],
+            ["same", "valid"],
+        ),
+    )
+    def test_max_pool_2d(self, use_cpu_only, backend, kernel_sizes, strides, pad_type):
+        input_shape = np.random.randint(low=2, high=6, size=4)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.nn.max_pool(
+                x, ksize=kernel_sizes[:], strides=strides[:], padding=pad_type.upper()
+            )
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(shape=input_shape, rand_min=-100, rand_max=100)]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+
+class TestPooling3d:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, kernel_sizes, strides, pad_type",
+        itertools.product(
+            [True, False],
+            backends,
+            [(1,), (2,), (1, 1, 1), (1, 2, 3), (2, 2, 3), (3, 3, 3)],
+            [(1,), (2,), (1, 1, 1), (1, 2, 3), (2, 2, 3), (3, 3, 3)],
+            ["same", "valid"],
+        ),
+    )
+    def test_avg_pool_3d(self, use_cpu_only, backend, kernel_sizes, strides, pad_type):
+        input_shape = np.random.randint(low=3, high=6, size=5)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.nn.avg_pool3d(
+                x, ksize=kernel_sizes[:], strides=strides[:], padding=pad_type.upper()
+            )
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(shape=input_shape, rand_min=-100, rand_max=100)]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, kernel_sizes, strides, pad_type",
+        itertools.product(
+            [True, False],
+            backends,
+            [(1,), (2,), (1, 1, 1), (1, 2, 3), (2, 2, 3), (3, 3, 3)],
+            [(1,), (2,), (1, 1, 1), (1, 2, 3), (2, 2, 3), (3, 3, 3)],
+            ["same", "valid"],
+        ),
+    )
+    def test_max_pool_3d(self, use_cpu_only, backend, kernel_sizes, strides, pad_type):
+        input_shape = np.random.randint(low=3, high=6, size=5)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.nn.max_pool3d(
+                x, ksize=kernel_sizes[:], strides=strides[:], padding=pad_type.upper()
+            )
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(shape=input_shape, rand_min=-100, rand_max=100)]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+
+class TestPrint:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [size for size in range(1, 5)],),
+    )
+    def test_print(self, use_cpu_only, backend, rank):
+        shape = np.random.randint(low=1, high=4, size=rank).astype(np.int32)
+
+        @make_tf_graph([shape])
+        def build_model(x):
+            print_layer = tf.raw_ops.Print(input=x, data=[])
+            res = print_layer + 1
+            return res
+
+        model, inputs, outputs = build_model
+        input_value = [random_gen(shape=shape, rand_min=-100, rand_max=100)]
+        input_dict = dict(zip(inputs, input_value))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+
+class TestRandom:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, size, rank, constant",
+        itertools.product(
+            [True, False],
+            backends,
+            [size for size in range(1, 5)],
+            [rank for rank in range(1, 6)],
+            [True, False],
+        ),
+    )
+    def test_random_binomial(self, use_cpu_only, backend, size, rank, constant):
+        if not constant and backend == "mil_proto":
+            return  # TODO: rdar://61948178 (MIL backend Random op does not support dynamic input shape)
+
+        shape = np.random.randint(low=1, high=4, size=rank).astype(np.int32)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=shape)
+            if constant:
+                ref = tf.add(x, tf.keras.backend.random_binomial(shape=shape, p=1.0))
+            else:
+                ref = tf.add(
+                    x,
+                    tf.keras.backend.random_binomial(
+                        shape=tf.raw_ops.Shape(input=x), p=1.0
+                    ),
+                )
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*shape)},
+                ref,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, size",
+        itertools.product([True, False], backends, [size for size in range(1, 10)]),
+    )
+    def test_random_categorical(self, use_cpu_only, backend, size):
+        # TensorFlow's input is 2-D tensor with shape [batch_size, num_classes].
+        shape = np.random.randint(low=1, high=6, size=2)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=shape)
+            ref = tf.random.categorical(x, size)
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*shape)},
+                ref,
+                use_cpu_only=use_cpu_only,
+                validate_shapes_only=True,
+                backend=backend,
+            )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, mean, rank, constant",
+        itertools.product(
+            [True, False],
+            backends,
+            [0.0],
+            [rank for rank in range(1, 6)],
+            [True, False],
+        ),
+    )
+    def test_random_normal(self, use_cpu_only, backend, mean, rank, constant):
+        if not constant and backend == "mil_proto":
+            return  # TODO: rdar://61948178 (MIL backend Random op does not support dynamic input shape)
+
+        shape = np.random.randint(low=1, high=4, size=rank).astype(np.int32)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=shape)
+            if constant:
+                ref = tf.add(x, tf.random.normal(shape=shape, mean=mean, stddev=0.0))
+            else:
+                ref = tf.add(
+                    x,
+                    tf.random.normal(
+                        shape=tf.raw_ops.Shape(input=x), mean=mean, stddev=0.0
+                    ),
+                )
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*shape)},
+                ref,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, mean, rank, constant",
+        itertools.product(
+            [True, False],
+            backends,
+            [0.0],
+            [rank for rank in range(1, 6)],
+            [True, False],
+        ),
+    )
+    def test_keras_random_normal(self, use_cpu_only, backend, mean, rank, constant):
+        if not constant and backend == "mil_proto":
+            return  # TODO: rdar://61948178 (MIL backend Random op does not support dynamic input shape)
+
+        shape = np.random.randint(low=1, high=4, size=rank).astype(np.int32)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=shape)
+            if constant:
+                ref = tf.add(
+                    x,
+                    tf.keras.backend.random_normal(shape=shape, mean=mean, stddev=0.0),
+                )
+            else:
+                ref = tf.add(
+                    x,
+                    tf.keras.backend.random_normal(
+                        shape=tf.raw_ops.Shape(input=x), mean=mean, stddev=0.0
+                    ),
+                )
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*shape)},
+                ref,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, low, high, rank, constant",
+        itertools.product(
+            [True, False],
+            backends,
+            [0.0],
+            [0.0],
+            [rank for rank in range(1, 2)],
+            [True, False],
+        ),
+    )
+    def test_random_uniform(self, use_cpu_only, backend, low, high, rank, constant):
+        if not constant and backend == "mil_proto":
+            return  # TODO: rdar://61948178 (MIL backend Random op does not support dynamic input shape)
+
+        shape = np.random.randint(low=1, high=4, size=rank).astype(np.int32)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=shape)
+            if constant:
+                ref = tf.add(x, tf.random.uniform(shape=shape, minval=low, maxval=high))
+            else:
+                ref = tf.add(
+                    x,
+                    tf.random.uniform(
+                        shape=tf.raw_ops.Shape(input=x), minval=low, maxval=high
+                    ),
+                )
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*shape)},
+                ref,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, low, high, rank, constant",
+        itertools.product(
+            [True, False],
+            backends,
+            [1.0],
+            [1.0],
+            [rank for rank in range(1, 6)],
+            [True, False],
+        ),
+    )
+    def test_keras_random_uniform(
+        self, use_cpu_only, backend, low, high, rank, constant
+    ):
+        if not constant and backend == "mil_proto":
+            return  # TODO: rdar://61948178 (MIL backend Random op does not support dynamic input shape)
+        shape = np.random.randint(low=1, high=4, size=rank).astype(np.int32)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=shape)
+            if constant:
+                ref = tf.add(
+                    x,
+                    tf.keras.backend.random_uniform(
+                        shape=shape, minval=low, maxval=high
+                    ),
+                )
+            else:
+                ref = tf.add(
+                    x,
+                    tf.keras.backend.random_uniform(
+                        shape=tf.raw_ops.Shape(input=x), minval=low, maxval=high
+                    ),
+                )
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*shape)},
+                ref,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+
+class TestReduction:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank_and_axes, keep_dims, tf_op",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                (1, (-1,)),
+                (2, (0,)),
+                (2, (-1, 0)),
+                (3, (1, -3)),
+                (3, (-2,)),
+                (4, (0, 1, 2)),
+                (4, (-2, -1, 0)),
+                (4, (1, -2)),
+                (5, (-3, -1)),
+                (5, (0, -1, 1, -2)),
+                (3, None),
+                (5, None),
+                (3, 1),
+                (5, -1),
+            ],
+            [True, False],
+            [
+                tf.reduce_all,
+                tf.math.reduce_euclidean_norm,
+                tf.reduce_max,
+                tf.reduce_mean,
+                tf.reduce_min,
+                tf.reduce_prod,
+                tf.reduce_sum,
+                tf.reduce_any,
+                tf.reduce_logsumexp,
+                tf.math.argmax,
+                tf.math.argmin,
+            ],
+        ),
+    )
+    def test_reduction(self, use_cpu_only, backend, rank_and_axes, keep_dims, tf_op):
+        rank, axes = rank_and_axes
+        shape = np.random.randint(low=1, high=6, size=rank)
+
+        def parse_axes(axes):
+            if axes is None:
+                axes = 0
+            elif isinstance(axes, (tuple, list)):
+                axes = axes[0]
+            return axes
+
+        def test_tf_argmax():
+            with tf.Graph().as_default() as graph:
+                x = tf.placeholder(tf.float32, shape=shape)
+                ref = tf.math.argmax(x, axis=parse_axes(axes))
+                run_compare_tf(
+                    graph,
+                    {x: random_gen(shape=shape, rand_min=-5.0, rand_max=5.0)},
+                    ref,
+                    use_cpu_only=use_cpu_only,
+                    backend=backend,
+                )
+
+        def test_tf_argmin():
+            with tf.Graph().as_default() as graph:
+                x = tf.placeholder(tf.float32, shape=shape)
+                ref = tf.math.argmin(x, axis=parse_axes(axes))
+                run_compare_tf(
+                    graph,
+                    {x: random_gen(shape=shape, rand_min=-5.0, rand_max=5.0)},
+                    ref,
+                    use_cpu_only=use_cpu_only,
+                    backend=backend,
+                )
+
+        def test_tf_reduction():
+            if isinstance(axes, list) and axes and len(axes) == rank and not keep_dims:
+                return  # TODO <rdar://problem/59152311> MIL: Add rank 0 and dim size 0 related tests for every op
+
+            with tf.Graph().as_default() as graph:
+                x = tf.placeholder(tf.float32, shape=shape)
+                x_val = random_gen(shape=shape, rand_min=-5.0, rand_max=5.0)
+                if tf_op in {tf.reduce_all, tf.reduce_any}:
+                    x = tf.placeholder(tf.bool, shape=shape)
+                    x_val = np.random.randint(low=0, high=2, size=shape).astype(
+                        np.float32
+                    )
+                elif tf_op in {tf.math.reduce_euclidean_norm}:
+                    x_val = random_gen(shape=shape, rand_min=0.0, rand_max=10.0)
+                elif tf_op in {tf.reduce_prod}:
+                    x_val = random_gen(shape=shape, rand_min=1.0, rand_max=1.5)
+                elif tf_op in {tf.reduce_logsumexp}:
+                    x_val = random_gen(shape=shape, rand_min=-5, rand_max=5)
+                ref = tf_op(x, axis=axes, keepdims=keep_dims)
+
+                if tf_op == tf.reduce_any:
+                    ref = tf.cast(ref, tf.float32)
+
+                run_compare_tf(
+                    graph, {x: x_val}, ref, use_cpu_only=use_cpu_only, backend=backend
+                )
+
+        if tf_op in {tf.math.argmax}:
+            test_tf_argmax()
+        elif tf_op in {tf.math.argmin}:
+            test_tf_argmin()
+        else:
+            test_tf_reduction()
+
+
+class TestScatterGather:
+    # TODO: <rdar://problem/59738824> [MIL] Gather layer with 0-d indices leads to input shape mismatch
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rankX_rankIndices_axis, mode",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                (1, 2, -1),
+                (2, 1, 0),
+                (3, 2, -2),
+                (2, 3, 1),
+                (2, 2, 1),
+                (1, 1, 0),
+                (3, 3, -2),
+                (3, 3, 2),
+                (3, 3, 0),
+                (1, 3, -1),
+                (3, 1, 2),
+                (3, 1, -1),
+            ],
+            ["Gather", "GatherV2", "gather"],
+        ),
+    )
+    def test_gather_function(self, use_cpu_only, backend, rankX_rankIndices_axis, mode):
+        x_rank, indices_rank, axis = rankX_rankIndices_axis
+        x_shape = np.random.randint(low=2, high=5, size=x_rank)
+        indices_shape = np.random.randint(low=2, high=5, size=indices_rank)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=x_shape)
+            indices = tf.placeholder(tf.int32, shape=indices_shape)
+            if mode == "Gather":
+                res = tf.raw_ops.Gather(params=x, indices=indices)
+                axis = 0
+            elif mode == "GatherV2":
+                res = tf.raw_ops.GatherV2(params=x, indices=indices, axis=axis)
+            elif mode == "gather":
+                res = tf.gather(x, indices, axis=axis)
+            run_compare_tf(
+                graph,
+                {
+                    x: np.random.rand(*x_shape),
+                    indices: np.random.randint(
+                        0, x_shape[axis], size=indices_shape, dtype=np.int32
+                    ),
+                },
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rankX_rankIndices",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                (1, 2),
+                (2, 2),
+                (3, 2),
+                (2, 3),
+                (1, 4),
+                (5, 2),
+                (2, 5),
+                (4, 3),
+                (3, 4),
+                (2, 4),
+                (4, 2),
+                (1, 5),
+            ],
+        ),
+    )
+    def test_gather_nd(self, use_cpu_only, backend, rankX_rankIndices):
+        x_rank, indices_rank = rankX_rankIndices
+        x_shape = np.random.randint(low=2, high=8, size=x_rank)
+        indices_shape = np.random.randint(low=2, high=8, size=indices_rank)
+        indices_shape[-1] = np.random.randint(low=1, high=x_rank + 1)
+
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=x_shape)
+            indices = tf.placeholder(tf.int32, shape=indices_shape)
+            res = tf.gather_nd(x, indices)
+
+            a = np.random.rand(*x_shape)
+            indices_list = []
+            for i in range(indices_shape[-1]):
+                indices_list.append(
+                    np.random.randint(0, x_shape[i], size=indices_shape[:-1])
+                )
+
+            input_values = {
+                x: a,
+                indices: np.stack(indices_list, axis=-1).astype(np.float),
+            }
+
+            run_compare_tf(
+                graph,
+                input_values,
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, data_rank, indices_rank",
+        itertools.product(
+            [True, False], backends, list(range(1, 4)), list(range(2, 4)),
+        ),
+    )
+    def test_scatter_nd_with_zeros(
+        self, use_cpu_only, backend, data_rank, indices_rank
+    ):
+
+        shape = np.random.randint(low=2, high=5, size=data_rank)
+        indices_shape = np.random.randint(low=2, high=5, size=indices_rank)
+        indices_shape[-1] = np.random.randint(low=1, high=data_rank + 1)
+        updates_shape = list(indices_shape[:-1]) + list(shape[indices_shape[-1] :])
+
+        updates = np.random.rand(*updates_shape)
+        indices_list = []
+        for i in range(indices_shape[-1]):
+            indices_list.append(np.random.randint(0, shape[i], size=indices_shape[:-1]))
+
+        indices = np.stack(indices_list, axis=-1).astype(np.int32)
+
+        @make_tf_graph(
+            [list(indices.shape) + [tf.int32], updates_shape, [data_rank, tf.int32]]
+        )
+        def build_model(indices, updates, shape):
+            return tf.raw_ops.ScatterNd(indices=indices, updates=updates, shape=shape)
+
+        model, inputs, outputs = build_model
+        input_values = [indices, updates, shape]
+
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestSlice:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, masking, trial",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(1, 5)],
+            [True, False],
+            list(range(10)),
+        ),
+    )
+    def test_slice_by_index(self, use_cpu_only, backend, rank, masking, trial):
+        input_shape = np.random.randint(low=2, high=6, size=rank)
+        begin_val = np.array(
+            [
+                np.random.randint(low=-input_shape[i], high=input_shape[i])
+                for i in range(rank)
+            ]
+        ).astype(np.int32)
+        end_val = np.array(
+            [
+                np.random.randint(low=-input_shape[i], high=input_shape[i])
+                for i in range(rank)
+            ]
+        ).astype(np.int32)
+        stride_val = np.array(
+            [
+                np.random.randint(low=-input_shape[i], high=input_shape[i])
+                for i in range(rank)
+            ]
+        ).astype(np.int32)
+        if not masking:
+            begin_mask = [False] * rank
+            end_mask = [False] * rank
+            squeeze_mask = [False] * rank
+        else:
+            begin_mask = np.array(
+                [np.random.choice([True, False, False]) for i in range(rank)]
+            ).astype(np.bool)
+            end_mask = np.array(
+                [np.random.choice([True, False, False]) for i in range(rank)]
+            ).astype(np.bool)
+            squeeze_flag = True
+            # We do not squeeze to scalar in nn
+            while squeeze_flag and backend == "nn_proto":
+                squeeze_mask = np.array(
+                    [np.random.choice([True, False]) for i in range(rank)]
+                ).astype(np.bool)
+                for i in range(rank):
+                    if begin_mask[i] or end_mask[i]:
+                        squeeze_mask[i] = False
+                for s in squeeze_mask:
+                    if not s:
+                        squeeze_flag = False
+
+        for i in range(rank):
+            if begin_mask[i] or end_mask[i]:
+                stride = 0
+                while stride == 0:
+                    stride = np.random.randint(low=-input_shape[i], high=input_shape[i])
+                stride_val[i] = stride
+
+                if not end_mask[i]:
+                    while True:
+                        end = np.random.randint(
+                            low=-input_shape[i], high=input_shape[i]
+                        )
+                        normalized_end = input_shape[i] + end if end < 0 else end
+                        if normalized_end == 0 and stride_val[i] > 0:
+                            continue
+                        elif normalized_end == input_shape[i] - 1 and stride_val[i] < 0:
+                            continue
+                        else:
+                            end_val[i] = end
+                            break
+                continue
+            if squeeze_mask[i]:
+                stride_val[i] = 1
+            while True:
+                end = np.random.randint(low=-input_shape[i], high=input_shape[i])
+                normalized_end = input_shape[i] + end if end < 0 else end
+                normalized_begin = (
+                    input_shape[i] + begin_val[i] if begin_val[i] < 0 else begin_val[i]
+                )
+                if normalized_end == normalized_begin:
+                    continue
+                if begin_mask[i] or end_mask[i] or squeeze_mask[i]:
+                    stride = 1
+                elif normalized_end < normalized_begin:
+                    stride = -np.random.randint(low=1, high=input_shape[i])
+                else:
+                    stride = np.random.randint(low=1, high=input_shape[i])
+                end_val[i] = end
+                stride_val[i] = stride
+                break
+
+        def _mask_to_bit(mask):
+            ret = 0
+            for x in mask[::-1]:
+                ret <<= 1
+                if x:
+                    ret += 1
+            return ret
+
+        @make_tf_graph(
+            [
+                input_shape,
+                list(begin_val.shape) + [tf.int32],
+                list(end_val.shape) + [tf.int32],
+            ]
+        )
+        def build_model(x, begin, end):
+            return tf.strided_slice(
+                x,
+                begin,
+                end,
+                stride_val,
+                begin_mask=_mask_to_bit(begin_mask),
+                end_mask=_mask_to_bit(end_mask),
+                shrink_axis_mask=_mask_to_bit(squeeze_mask),
+            )
+
+        model, inputs, outputs = build_model
+
+        input_values = [
+            np.array(list(range(np.prod(input_shape))))
+            .reshape(input_shape)
+            .astype(np.float32),
+            begin_val,
+            end_val,
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, testcase",
+        itertools.product(
+            [True, False],
+            backends,
+            # Change to slice representation for allowing iteration with a non-constant input
+            [
+                (
+                    slice(1, 2),
+                    slice(1, 2),
+                    slice(1, 2),
+                ),  # equivalent to [1:2, 1:2, 1:2]
+                (slice(-3, -2), slice(-4, -3), slice(-5, -4)),
+                (slice(0, -2), slice(0, -1), slice(-3, -2)),
+                (slice(-1, 0, -2), slice(-1, 1, -1), slice(-1, -3, -3)),
+                (slice(1, 2), slice(1, 3), slice(1, 4, 2)),
+                (slice(None, 2), slice(1, 3), slice(None, 4, 2)),
+                (
+                    slice(None),
+                    slice(1, None),
+                    slice(None, 4, 2),
+                ),  # equivalent to [:,1:,:4:2]
+                (slice(1, None, 1), 1, slice(None, 3, 2)),
+                (slice(None), slice(None), slice(None)),
+                (slice(1, 2), slice(1, 2), 1),
+                (slice(1, 2), slice(None), slice(None)),
+                (slice(None), slice(None), slice(None)),
+                (slice(1, 2), slice(None), slice(1, 2)),
+                (slice(None), slice(None), 1),
+                (0, 0, slice(None)),
+                (slice(1, 2)),
+                (slice(1, 2), slice(1, 2)),
+                (1),
+                (slice(0, 3)),
+                (slice(None)),
+                (slice(None), slice(None), slice(None, None, -1)),
+            ],
+        ),
+    )
+    def test_slice_by_index_from_scratch(self, use_cpu_only, backend, testcase):
+        input_shape = np.array([3, 4, 5])
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return x[testcase]
+
+        model, inputs, outputs = build_model
+
+        input_values = [
+            np.array(list(range(np.prod(input_shape))))
+            .reshape(input_shape)
+            .astype(np.float32)
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_slice_by_index_smoke(self, use_cpu_only, backend):
+        def test_two_slice_ops():
+            input_shape = [1, 64, 2]
+            x_val = np.random.rand(*input_shape).astype(np.float32)
+            y_val = np.random.rand(*input_shape).astype(np.float32)
+
+            @make_tf_graph([input_shape, input_shape])
+            def build_model(x, y):
+                x_slice = x[:, :, 0]
+                y_slice = y[:, :, 0]
+                return (x_slice, y_slice)
+
+            model, inputs, outputs = build_model
+
+            input_values = [x_val, y_val]
+            input_dict = dict(zip(inputs, input_values))
+            run_compare_tf(
+                model,
+                input_dict,
+                outputs,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+        def test_slice_new_axis():
+            input_shape = [4, 5, 64]
+            val = np.random.rand(*input_shape).astype(np.float32)
+            num_cases = 8
+
+            @make_tf_graph([input_shape] * num_cases)
+            def build_model(*args):
+                a, b, c, d, e, f, g, h = args
+                slice_0 = a[:1, tf.newaxis, :3, :]
+                slice_1 = b[:, tf.newaxis]
+                slice_2 = c[..., tf.newaxis]
+                slice_3 = d[..., tf.newaxis, :, 10]
+                slice_4 = e[:, 2, tf.newaxis, ...]
+                slice_5 = f[2, ..., :, tf.newaxis]
+                slice_6 = g[tf.newaxis, ..., tf.newaxis]
+                slice_7 = h[tf.newaxis, 2, tf.newaxis, ...]
+
+                return (
+                    slice_0,
+                    slice_1,
+                    slice_2,
+                    slice_3,
+                    slice_4,
+                    slice_5,
+                    slice_6,
+                    slice_7,
+                )
+
+            model, inputs, outputs = build_model
+
+            input_values = [val] * num_cases
+            input_dict = dict(zip(inputs, input_values))
+            run_compare_tf(
+                model,
+                input_dict,
+                outputs,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+        test_two_slice_ops()
+        test_slice_new_axis()
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, single_size, dynamic_size",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(1, 5)],
+            [True, False],
+            [True, False],
+        ),
+    )
+    def test_slice_by_size(
+        self, use_cpu_only, backend, rank, single_size, dynamic_size
+    ):
+        input_shape = np.random.randint(low=2, high=6, size=rank)
+        begin_val = np.array(
+            [np.random.randint(input_shape[i]) for i in range(rank)]
+        ).astype(np.int32)
+        size_val = np.array(
+            [np.random.randint(input_shape[i] - begin_val[i]) + 1 for i in range(rank)]
+        )
+        if single_size:
+            for r in range(rank):
+                size_val_r = np.array(
+                    [s if i == r else -1 for i, s in enumerate(size_val)]
+                ).astype(np.int32)
+
+                @make_tf_graph([input_shape, list(begin_val.shape) + [tf.int32]])
+                def build_model(x, begin):
+                    return tf.slice(x, begin, size_val_r)
+
+                @make_tf_graph(
+                    [
+                        input_shape,
+                        list(begin_val.shape) + [tf.int32],
+                        list(size_val_r.shape) + [tf.int32],
+                    ]
+                )
+                def build_model_dynamic_size(x, begin, size):
+                    return tf.slice(x, begin, size)
+
+                if dynamic_size:
+                    model, inputs, outputs = build_model_dynamic_size
+                    input_values = [
+                        random_gen(input_shape, rand_min=-100, rand_max=100),
+                        begin_val,
+                        size_val_r,
+                    ]
+                else:
+                    model, inputs, outputs = build_model
+                    input_values = [
+                        random_gen(input_shape, rand_min=-100, rand_max=100),
+                        begin_val,
+                    ]
+
+                input_dict = dict(zip(inputs, input_values))
+                run_compare_tf(
+                    model,
+                    input_dict,
+                    outputs,
+                    use_cpu_only=use_cpu_only,
+                    frontend_only=False,
+                    backend=backend,
+                )
+        else:
+            size_val = np.array(
+                [s if np.random.randint(2) == 0 else -1 for s in size_val]
+            ).astype(np.int32)
+
+            @make_tf_graph([input_shape, list(begin_val.shape) + [tf.int32]])
+            def build_model(x, begin):
+                return tf.slice(x, begin, size_val)
+
+            @make_tf_graph(
+                [
+                    input_shape,
+                    list(begin_val.shape) + [tf.int32],
+                    list(size_val.shape) + [tf.int32],
+                ]
+            )
+            def build_model_dynamic_size(x, begin, size):
+                return tf.slice(x, begin, size)
+
+            if dynamic_size:
+                model, inputs, outputs = build_model_dynamic_size
+                input_values = [
+                    random_gen(input_shape, rand_min=-100, rand_max=100),
+                    begin_val,
+                    size_val,
+                ]
+            else:
+                model, inputs, outputs = build_model
+                input_values = [
+                    random_gen(input_shape, rand_min=-100, rand_max=100),
+                    begin_val,
+                ]
+
+            input_dict = dict(zip(inputs, input_values))
+            run_compare_tf(
+                model,
+                input_dict,
+                outputs,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+
+class TestMatrixBandPart:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, lower_and_upper",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(2, 6)],
+            [(0, -1), (-1, 0), (0, 0)],
+        ),
+    )
+    def test_matrix_band_part(self, use_cpu_only, backend, rank, lower_and_upper):
+        lower, upper = lower_and_upper
+        shape = np.random.randint(low=3, high=4, size=rank)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=shape)
+            res = tf.matrix_band_part(x, num_lower=lower, num_upper=upper)
+            run_compare_tf(
+                graph,
+                {x: random_gen(shape, rand_min=-100, rand_max=100)},
+                res,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+
+class TestCumSum:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, reverse, exclusive",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(1, 6)],
+            [True, False],
+            [True, False],
+        ),
+    )
+    def test_cumsum(self, use_cpu_only, backend, rank, reverse, exclusive):
+        input_shape = np.random.randint(low=2, high=6, size=rank)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=input_shape)
+            for axis in range(-1, rank):
+                res = tf.math.cumsum(x, axis=axis, reverse=reverse, exclusive=exclusive)
+                if use_cpu_only:
+                    dtype = np.float32
+                else:
+                    dtype = np.float16
+                run_compare_tf(
+                    graph,
+                    {
+                        x: random_gen(
+                            input_shape, rand_min=-100, rand_max=100, dtype=dtype
+                        )
+                    },
+                    res,
+                    use_cpu_only=use_cpu_only,
+                    frontend_only=False,
+                    backend=backend,
+                )
+
+    @pytest.mark.xfail(reason="Backend doesn't support empty counts", run=False)
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_cumsum_empty_input_tf(self, use_cpu_only, backend):
+        empty_inputs = [[], [[]], [[[]]], [[], []], [[[]], [[]]]]
+        for input_x in empty_inputs:
+            input_x = np.array(input_x)
+            with tf.Graph().as_default() as graph:
+                x = tf.placeholder(tf.float32, shape=input_x.shape)
+                for axis in range(-1, len(input_x.shape)):
+                    res = tf.math.cumsum(x, axis=axis)
+                    run_compare_tf(
+                        graph,
+                        {x: input_x},
+                        res,
+                        use_cpu_only=use_cpu_only,
+                        frontend_only=False,
+                        backend=backend,
+                    )
+
+
+class TestFill:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, value",
+        itertools.product(
+            [True, False], backends, [rank for rank in range(1, 6)], [-19.0, 0.0, 37.0]
+        ),
+    )
+    def test_fill(self, use_cpu_only, backend, rank, value):
+        def test_tf_static():
+            shape = np.random.randint(low=1, high=3, size=rank)
+            with tf.Graph().as_default() as graph:
+                x = tf.placeholder(tf.float32, shape=shape)
+                ref = tf.add(
+                    x, tf.fill(dims=np.array(shape, dtype=np.float32), value=value)
+                )
+                run_compare_tf(
+                    graph,
+                    {x: np.random.rand(*shape)},
+                    ref,
+                    use_cpu_only=use_cpu_only,
+                    backend=backend,
+                )
+
+        def test_tf_dynamic():
+            shape = np.random.randint(low=1, high=3, size=rank)
+            with tf.Graph().as_default() as graph:
+                s = tf.placeholder(tf.int32, shape=(len(shape),))
+                ref = tf.fill(dims=s, value=value)
+                run_compare_tf(
+                    graph,
+                    {s: np.array(shape, dtype=np.float32)},
+                    ref,
+                    use_cpu_only=use_cpu_only,
+                    backend=backend,
+                )
+
+        test_tf_static()
+        test_tf_dynamic()
+
+
+class TestNonMaximumSuppression:
+    # TODO (rdar://60390856) TF may output fewer than max_boxes, but
+    # current TF frontend will always output max_boxes. Need to apply
+    # slice.
+    @pytest.mark.xfail
+    @pytest.mark.parametrize(
+        ",".join(
+            [
+                "use_cpu_only",
+                "backend",
+                "num_boxes",
+                "max_boxes",
+                "iou_threshold",
+                "score_threshold",
+            ]
+        ),
+        itertools.product(
+            [True, False],
+            backends,
+            [20, 30, 100],
+            [5, 20],
+            [1.0, 0.99],
+            [float("-inf"), -200.0],
+        ),
+    )
+    def test_non_max_suppression(
+        self,
+        use_cpu_only,
+        backend,
+        num_boxes,
+        max_boxes,
+        iou_threshold,
+        score_threshold,
+    ):
+        """
+        Note: TensorFlow and Core ML does not have exact same implementation,
+        Core ML pad -1s to the results while TensorFlow does not. Threshold
+        values are carefully choose to make test success as it fails when:
+        1) input num_boxes < max_boxes because of shape mis-match.
+        2) output num_boxes < max_boxes because of shape mis-match.
+        """
+        boxes_val = random_gen(shape=(num_boxes, 4), rand_min=0, rand_max=32)
+        scores_val = random_gen(shape=(num_boxes,), rand_min=-100, rand_max=100)
+
+        with tf.Graph().as_default() as graph:
+            boxes = tf.placeholder(tf.float32, shape=boxes_val.shape)
+            scores = tf.placeholder(tf.float32, shape=scores_val.shape)
+            ref = tf.image.non_max_suppression(
+                boxes=boxes,
+                scores=scores,
+                max_output_size=max_boxes,
+                iou_threshold=iou_threshold,
+                score_threshold=score_threshold,
+            )
+            run_compare_tf(
+                graph,
+                {boxes: boxes_val, scores: scores_val},
+                ref,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+
+class TestOneHot:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank_and_axis, dynamic",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                (2, 0),
+                (2, -1),
+                (3, 3),
+                (3, 0),
+                (3, -2),
+                (4, -4),
+                (4, 1),
+                (4, -1),
+                (4, -2),
+                (4, 3),
+            ],
+            [True, False],
+        ),
+    )
+    def test_one_hot(self, use_cpu_only, backend, rank_and_axis, dynamic):
+        rank, axis = rank_and_axis
+        depth, on_value, off_value = 30, 28.0, -4.0
+        x_shape = np.random.randint(low=2, high=5, size=rank)
+
+        if not dynamic:
+            with tf.Graph().as_default() as graph:
+                x = tf.placeholder(tf.int32, shape=x_shape)
+                axis = (
+                    axis if axis >= -1 else axis + rank + 1
+                )  ## TF limitation: Doesn't support axis < -1
+                res = tf.one_hot(
+                    x, axis=axis, depth=depth, on_value=on_value, off_value=off_value
+                )
+                run_compare_tf(
+                    graph,
+                    {x: np.random.randint(0, depth, size=x_shape)},
+                    res,
+                    use_cpu_only=use_cpu_only,
+                    frontend_only=False,
+                    backend=backend,
+                )
+        else:  # Dynamic Case with depth being an input
+            with tf.Graph().as_default() as graph:
+                x = tf.placeholder(tf.int32, shape=x_shape)
+                depth_input = tf.placeholder(tf.int32)
+                axis = (
+                    axis if axis >= -1 else axis + rank + 1
+                )  ## TF limitation: Doesn't support axis < -1
+                res = tf.one_hot(
+                    x,
+                    axis=axis,
+                    depth=depth_input,
+                    on_value=on_value,
+                    off_value=off_value,
+                )
+                run_compare_tf(
+                    graph,
+                    {x: np.random.randint(0, depth, size=x_shape), depth_input: depth},
+                    res,
+                    use_cpu_only=use_cpu_only,
+                    frontend_only=False,
+                    backend=backend,
+                )
+
+
+class TestPad:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, mode, trial",
+        itertools.product(
+            [True, False],
+            backends,
+            [2, 3, 4],
+            # rdar://59854962 ([Pad Precision issue] Rank 5 Pad precision dropped on GPU comparing to CPU)
+            ["reflect", "constant"],
+            list(range(10)),
+        ),
+    )
+    def test(self, use_cpu_only, backend, rank, mode, trial):
+        input_shape = np.random.randint(low=2, high=10, size=rank)
+        min_input_dim_size = input_shape.min()
+        padding_val = np.random.randint(
+            low=0, high=min_input_dim_size, size=(rank, 2), dtype=np.int32
+        )
+
+        # Only constant mode supports padding across all dimensions
+        # All other padding modes are only applied on two dimensions.
+        perm = list(range(rank))
+        import random
+
+        random.shuffle(perm)
+        if mode != "constant":
+            padding_val[perm[:-2]] = 0
+
+        tf_mode = mode.upper()
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.pad(x, paddings=padding_val, mode=tf_mode)
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(input_shape, rand_min=0.2, rand_max=1000)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestPadV2:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, constant_values",
+        itertools.product([True, False], backends, list(range(1, 6)), [0.0, 10, -1],),
+    )
+    def test(self, use_cpu_only, backend, rank, constant_values):
+        input_shape = np.random.randint(low=2, high=10, size=rank)
+        paddings = np.random.randint(low=2, high=5, size=2 * rank)
+        paddings = paddings.reshape(-1, 2)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.raw_ops.PadV2(
+                input=x, paddings=paddings, constant_values=constant_values
+            )
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(input_shape, rand_min=1, rand_max=10)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestRange:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, params",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                (-10.4, 23, 12.2),
+                (0, 1000, 1),
+                (50.5, 90.5, 1.5),
+                (5, 8, 2),
+                (5, 8, 98),
+                (5, 8, 1.5),
+                (10, 5, -0.6),
+                (24, -65, -2),
+            ],
+        ),
+    )
+    def test_range(self, use_cpu_only, backend, params):
+        start, end, step = params
+        with tf.Graph().as_default() as graph:
+            limit = tf.placeholder(tf.float32)
+            res = tf.range(start=start, limit=limit, delta=step)
+            run_compare_tf(
+                graph,
+                {limit: end},
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+        with tf.Graph().as_default() as graph:
+            delta = tf.placeholder(tf.float32)
+            res = tf.range(start=start, limit=end, delta=delta)
+            run_compare_tf(
+                graph,
+                {delta: step},
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+        with tf.Graph().as_default() as graph:
+            begin = tf.placeholder(tf.float32)
+            res = tf.range(start=begin, limit=end, delta=step)
+            run_compare_tf(
+                graph,
+                {begin: start},
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+
+class TestTile:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank_and_reps",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                (1, (2,)),
+                (2, (1, 2)),
+                (2, (2, 2)),
+                (3, (3, 2, 1)),
+                (3, (2, 1, 3)),
+                (3, (2, 1, 1)),
+                (4, (1, 3, 2, 1)),
+                (4, (2, 1, 1, 2)),
+                (5, (2, 1, 1, 3, 2)),
+                (5, (1, 1, 2, 3, 2)),
+            ],
+        ),
+    )
+    def test_tile(self, use_cpu_only, backend, rank_and_reps):
+        rank, reps = rank_and_reps
+        x_shape = np.random.randint(low=2, high=5, size=rank)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=x_shape)
+            res = tf.tile(x, multiples=reps)
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*x_shape)},
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+
+class TestDynamicTile:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [1, 2, 3, 4, 5]),
+    )
+    def test_tile(self, use_cpu_only, backend, rank):
+        x_shape = np.random.randint(low=2, high=5, size=rank)
+        reps_val = np.random.randint(low=1, high=10, size=rank)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=x_shape)
+            reps = tf.placeholder(tf.int32, shape=reps_val.shape)
+            res = tf.tile(x, multiples=reps)
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*x_shape), reps: reps_val},
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+
+class TestTopK:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, k",
+        itertools.product(
+            [True, False], backends, [rank for rank in range(1, 6)], [1, 2, 3],
+        ),
+    )
+    def test_top_k(self, use_cpu_only, backend, rank, k):
+        # TensorFlow only supports last dimension (axis = -1).
+        shape = np.random.randint(low=3, high=6, size=rank)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=shape)
+            ref = tf.math.top_k(x, k=k, sorted=True)
+            ref = (ref[1], ref[0])
+            run_compare_tf(
+                graph,
+                {x: random_gen(shape, rand_min=-100, rand_max=100)},
+                ref,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+
+class TestConcat:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, op_version",
+        itertools.product([True, False], ["nn_proto"], ["v1", "v2"]),
+    )
+    def test_concat(self, use_cpu_only, backend, op_version):
+        input_shape1 = [3, 2, 1]
+        input_shape2 = [3, 1, 1]
+
+        @make_tf_graph([input_shape1, input_shape2])
+        def build_model(x, y):
+            if op_version == "v1":
+                # Seems like now the tf functions are using concatV2, so create as raw_ops here
+                res = tf.raw_ops.Concat(concat_dim=-2, values=[x, y], name="concat")
+            elif op_version == "v2":
+                res = tf.concat((x, y), axis=-2)
+            return res
+
+        model, inputs, outputs = build_model
+
+        input_values = [
+            np.random.rand(*input_shape1).astype(np.float32),
+            np.random.rand(*input_shape2).astype(np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestSplit:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, dynamic",
+        itertools.product([True, False], backends, [1, 2, 3, 4], [True, False],),
+    )
+    def test_split(self, use_cpu_only, backend, rank, dynamic):
+        input_shape1 = np.random.randint(low=1, high=10, size=rank)
+        for axis in range(-rank, rank):
+            for split_num in range(1, input_shape1[axis] + 1):
+                if input_shape1[axis] % split_num != 0:
+                    continue
+                tf_input_shape = list(input_shape1)
+                if dynamic:
+                    axis1 = np.random.randint(low=0, high=rank)
+                    tf_input_shape[axis1] = None
+                with tf.Graph().as_default() as graph:
+                    x = tf.placeholder(tf.float32, shape=tf_input_shape)
+                    res = tf.split(x, split_num, axis=axis)
+                    # TODO (rdar://60358242) If tf.split output is returned, there's no
+                    # get_tuple nodes. Some graph pass is needed. Example:
+                    #
+                    #    x = tf.placeholder(tf.float32, shape=input_shape1)
+                    #    res = tf.split(x, 3, axis=0)
+                    #
+                    # res are ['split:0', 'split:1', 'split']
+                    #
+                    # but node.outputs == ['gto_1', 'gto_2', 'gto_3']
+                    import random
+
+                    random.shuffle(res)
+                    res = tuple(res)
+                    inputs = {
+                        x: np.random.rand(*input_shape1),
+                    }
+                    run_compare_tf(
+                        graph,
+                        inputs,
+                        res,
+                        use_cpu_only=use_cpu_only,
+                        frontend_only=False,
+                        backend=backend,
+                    )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_splitv(self, use_cpu_only, backend):
+        input_shape1 = [3, 2, 1]
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=input_shape1)
+            res = tf.split(x, [1, 2], axis=0)
+            res = (res[0], res[1])
+            inputs = {
+                x: np.random.rand(*input_shape1),
+            }
+            run_compare_tf(
+                graph,
+                inputs,
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+
+class TestStack:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_stack(self, use_cpu_only, backend):
+        input_shape1 = [3, 1, 1]
+        input_shape2 = [3, 1, 1]
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=input_shape1)
+            y = tf.placeholder(tf.float32, shape=input_shape2)
+            res = [tf.stack((x, y), axis=0), tf.stack((x, y), axis=-1)]
+            inputs = {
+                x: np.random.rand(*input_shape1),
+                y: np.random.rand(*input_shape2),
+            }
+            run_compare_tf(
+                graph,
+                inputs,
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+
+class TestPack:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, num_inputs",
+        itertools.product([True, False], backends, list(range(5)), list(range(1, 10))),
+    )
+    def test_pack(self, use_cpu_only, backend, rank, num_inputs):
+        shape = np.random.randint(low=1, high=6, size=rank)
+        input_shapes = [shape[:] for _ in range(num_inputs)]
+
+        @make_tf_graph(input_shapes)
+        def build_model(*inputs):
+            return tf.raw_ops.Pack(values=inputs, axis=0)
+
+        model, inputs, outputs = build_model
+        input_values = [
+            random_gen(shape, rand_min=-1, rand_max=1) for shape in input_shapes
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestArgSort:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, axis, direction",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(1, 6)],
+            [-1, 0],
+            ["ascending", "descending"],
+        ),
+    )
+    def test_argsort(self, use_cpu_only, backend, rank, axis, direction):
+        shape = np.random.randint(low=1, high=6, size=rank)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=shape)
+            ref = tf.argsort(x, axis=axis, direction=direction.upper())
+            if use_cpu_only:
+                dtype = np.float32
+            else:
+                dtype = np.float16
+            run_compare_tf(
+                graph,
+                {
+                    x: random_gen(
+                        shape,
+                        rand_min=-100,
+                        rand_max=100,
+                        allow_duplicate=False,
+                        dtype=dtype,
+                    )
+                },
+                ref,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+
+class TestDepthToSpace:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, shape, block_size",
+        itertools.product(
+            [True, False],
+            backends,
+            [(1, 1, 1, 16), (1, 1, 1, 32), (1, 3, 3, 16)],
+            [2, 4],
+        ),
+    )
+    def test_depth_to_space(self, use_cpu_only, backend, shape, block_size):
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=shape)
+            ref = tf.depth_to_space(x, block_size)
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*shape)},
+                ref,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+
+class TestReshape:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_flatten(self, use_cpu_only, backend):
+        shapes = [[10, 10], [3, 4, 5, 6], [4, 4, 5, 6]]
+
+        for input_shape in shapes:
+
+            @make_tf_graph([input_shape])
+            def build_model(x):
+                return tf.keras.backend.flatten(x)
+
+            model, inputs, outputs = build_model
+
+            input_values = [np.random.rand(*input_shape).astype(np.float32)]
+            input_dict = dict(zip(inputs, input_values))
+            run_compare_tf(
+                model,
+                input_dict,
+                outputs,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(1, 6)],),
+    )
+    def test_shape(self, use_cpu_only, backend, rank):
+        shape = np.random.randint(low=3, high=6, size=rank)
+        shape_holder = [None] * rank
+
+        @make_tf_graph([shape_holder])
+        def build_model(x):
+            return tf.shape(x)
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(shape, rand_min=-100, rand_max=100)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank_and_axis",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                (rank, axis)
+                for rank in range(1, 5)
+                for axis in range(-rank - 1, rank + 1)
+            ],
+        ),
+    )
+    def test_expand_dims(self, use_cpu_only, backend, rank_and_axis):
+        rank, axis = rank_and_axis
+        input_shape = np.random.randint(low=2, high=6, size=rank)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.expand_dims(x, axis=axis)
+
+        model, inputs, outputs = build_model
+
+        input_values = [np.random.rand(*input_shape).astype(np.float32)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, input_shape",
+        itertools.product(
+            [False],
+            backends,
+            [
+                ([10, 10], [5, 20]),
+                ([3, 4, 5, 6], [4, 5, 3, 6]),
+                ([4, 4, 5, 6], [2, 2, -1]),
+            ],
+        ),
+    )
+    def test_reshape_static(self, use_cpu_only, backend, input_shape):
+        @make_tf_graph([input_shape[0]])
+        def build_model(x):
+            return tf.reshape(x, shape=input_shape[1])
+
+        model, inputs, outputs = build_model
+
+        input_values = [np.random.rand(*input_shape[0]).astype(np.float32)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, input_shape",
+        itertools.product(
+            [False],
+            backends,
+            [
+                ([10, 10], [5, 20]),
+                ([3, 4, 5, 6], [4, 5, 3, 6]),
+                ([4, 4, 5, 6], [2, 2, -1]),
+                ([2, 3, 5, 3], [2, -1]),
+            ],
+        ),
+    )
+    def test_reshape_dynamic(self, use_cpu_only, backend, input_shape):
+        @make_tf_graph([input_shape[0], (len(input_shape[1]), tf.int32)])
+        def build_model(x, y):
+            return tf.reshape(x, shape=y)
+
+        model, inputs, outputs = build_model
+
+        input_values = [
+            np.random.rand(*input_shape[0]).astype(np.float32),
+            np.array(input_shape[1], dtype=np.int32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, shape",
+        itertools.product([False], backends, [[1], [1, 1], [1, 1, -1], []],),
+    )
+    def test_reshape_scalar(self, use_cpu_only, backend, shape):
+        input_shape = ()
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.raw_ops.Reshape(tensor=x, shape=shape)
+
+        model, inputs, outputs = build_model
+
+        input_values = [np.random.rand(*input_shape)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestReverse:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank_and_axes",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                (1, (-1,)),
+                (2, (0,)),
+                (2, (-1, 0)),
+                (3, (1, -3)),
+                (3, (-2,)),
+                (3, (0, 1, 2)),
+                (4, (-2, -1, 0)),
+                (4, (-1, -2)),
+                (4, []),
+                (5, (-3, -1, 3)),
+                (5, (0, -1, 1, -2)),
+            ],
+        ),
+    )
+    def test_reverse(self, use_cpu_only, backend, rank_and_axes):
+        rank, axes = rank_and_axes
+        shape = np.random.randint(low=1, high=4, size=rank)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=shape)
+            res = tf.reverse(x, axis=axes)
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*shape)},
+                res,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [rank for rank in range(2, 6)]),
+    )
+    def test_reverse_sequence(self, use_cpu_only, backend, rank):
+        shape = np.random.randint(low=1, high=6, size=rank)
+        seq_axis = np.random.randint(low=1, high=rank)
+        batch_axis = np.random.randint(low=0, high=seq_axis)
+        lengths = np.random.randint(low=0, high=shape[seq_axis], size=shape[batch_axis])
+
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=shape)
+            res = tf.reverse_sequence(
+                x, seq_lengths=lengths, seq_axis=seq_axis, batch_axis=batch_axis
+            )
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*shape)},
+                res,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+
+class TestSpaceToDepth:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, shape, block_size",
+        itertools.product(
+            [True, False],
+            backends,
+            [(1, 6, 6, 1), (1, 12, 12, 1), (1, 6, 6, 3)],
+            [2, 3],
+        ),
+    )
+    def test_space_to_depth(self, use_cpu_only, backend, shape, block_size):
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=shape)
+            ref = tf.space_to_depth(x, block_size)
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*shape)},
+                ref,
+                use_cpu_only=use_cpu_only,
+                backend=backend,
+            )
+
+
+class TestSqueeze:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank_and_axes",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                (2, (1,)),
+                (2, (0,)),
+                (3, (1,)),
+                (3, (0, -1)),
+                (3, []),
+                (4, (-1, 2, 1)),
+                (4, (0, 1)),
+                (5, (3, 1, 2)),
+                (5, (-1,)),
+            ],
+        ),
+    )
+    def test_squeeze(self, use_cpu_only, backend, rank_and_axes):
+        rank, axes = rank_and_axes
+        x_shape = np.random.randint(low=2, high=5, size=rank)
+        for axis in axes:
+            x_shape[axis] = 1
+
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=x_shape)
+            res = tf.squeeze(x, axis=axes)
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*x_shape)},
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=x_shape)
+            res = tf.squeeze(x, axis=None)
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*x_shape)},
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+
+class TestTranspose:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank_and_perm",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                (1, (0,)),
+                (2, (1, 0)),
+                (2, (0, 1)),
+                (3, (0, 2, 1)),
+                (3, (2, 1, 0)),
+                (3, (2, 0, 1)),
+                (4, (0, 3, 2, 1)),
+                (4, (3, 0, 1, 2)),
+                (5, (2, 3, 1, 0, 4)),
+                (5, (3, 1, 0, 4, 2)),
+            ],
+        ),
+    )
+    def test_transpose_1(self, use_cpu_only, backend, rank_and_perm):
+        rank, perm = rank_and_perm
+        x_shape = np.random.randint(low=2, high=5, size=rank)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=x_shape)
+            res = tf.transpose(x, perm=perm)
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*x_shape)},
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True, False], backends, [1, 2, 3, 4],),
+    )
+    def test_transpose_2(self, use_cpu_only, backend, rank):
+        input_shape = np.random.randint(low=2, high=6, size=rank)
+        perm = np.random.permutation(rank).astype(np.float32)
+
+        def static_perm():
+            with tf.Graph().as_default() as graph:
+                x = tf.placeholder(tf.float32, shape=input_shape)
+                res = tf.transpose(x, perm)
+                run_compare_tf(
+                    graph,
+                    {x: np.random.rand(*input_shape)},
+                    res,
+                    use_cpu_only=use_cpu_only,
+                    frontend_only=False,
+                    backend=backend,
+                )
+
+        def dynamic_perm():
+            with tf.Graph().as_default() as graph:
+                x = tf.placeholder(tf.float32, shape=input_shape)
+                tf_perm = tf.placeholder(tf.int32, shape=[None])
+                res = tf.transpose(x, tf_perm)
+                run_compare_tf(
+                    graph,
+                    {x: np.random.rand(*input_shape), tf_perm: perm},
+                    res,
+                    use_cpu_only=use_cpu_only,
+                    frontend_only=False,
+                    backend=backend,
+                )
+
+        static_perm()
+        # Note that TF supports dynamic perm in tf.transpose.
+        with pytest.raises(ValueError, match=r".*must be const at compile time.*"):
+            dynamic_perm()
+
+
+class TestSpaceToBatchND:
+    # No direct mil smoke test since it's a TF op which is a composite of several ops.
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, input_shape, block_shape, paddings",
+        itertools.product(
+            [True, False],
+            backends,
+            [(1, 4, 4, 1), (1, 4, 4, 3), (2, 4, 6, 1)],
+            [[2, 2]],
+            [[[0, 0], [0, 0]], [[1, 1], [0, 2]], [[4, 2], [4, 2]]],
+        ),
+    )
+    def test_smoke(self, use_cpu_only, backend, input_shape, block_shape, paddings):
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.raw_ops.SpaceToBatchND(
+                input=x, block_shape=block_shape, paddings=paddings
+            )
+
+        model, inputs, outputs = build_model
+        input_values = [random_gen(input_shape)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, input_block_rank, dynamic, trial",
+        itertools.product(
+            [True, False],
+            backends,
+            [(3, 1), (3, 2), (4, 1)],
+            [True, False],
+            list(range(10)),
+        ),
+    )
+    def test_programmatic(
+        self, use_cpu_only, backend, input_block_rank, dynamic, trial
+    ):
+
+        input_rank, block_rank = input_block_rank
+
+        # generate data
+        input_shape = np.random.randint(low=1, high=5, size=input_rank)
+        block_shape = np.random.randint(low=1, high=3, size=block_rank)
+        paddings = []
+        for i in range(block_rank):
+            while True:
+                temp = np.random.randint(low=0, high=10, size=2)
+                if (np.sum(temp) + input_shape[i + 1]) % block_shape[i] == 0:
+                    paddings.append(temp)
+                    break
+        paddings = np.array(paddings)
+
+        if not dynamic:
+
+            @make_tf_graph([input_shape])
+            def build_model(x):
+                return tf.raw_ops.SpaceToBatchND(
+                    input=x, block_shape=block_shape, paddings=paddings
+                )
+
+        else:
+
+            @make_tf_graph([[None] * input_rank])
+            def build_model(x):
+                return tf.raw_ops.SpaceToBatchND(
+                    input=x, block_shape=block_shape, paddings=paddings
+                )
+
+        model, inputs, outputs = build_model
+        input_values = [random_gen(input_shape)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestBatchToSpaceND:
+    # No direct mil smoke test since it's a TF op which is a composite of several ops.
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, input_shape, block_size, crops",
+        itertools.product(
+            [True, False],
+            backends,
+            [(4, 4, 4, 1), (4, 4, 4, 3), (4, 4, 6, 1)],
+            [[2, 2]],
+            [[[0, 0], [0, 0]], [[1, 1], [0, 2]], [[4, 2], [4, 2]]],
+        ),
+    )
+    def test_smoke(self, use_cpu_only, backend, input_shape, block_size, crops):
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.raw_ops.BatchToSpaceND(
+                input=x, block_shape=block_size, crops=crops
+            )
+
+        model, inputs, outputs = build_model
+        input_values = [random_gen(input_shape)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, input_block_rank, dynamic, trial",
+        itertools.product(
+            [True, False],
+            backends,
+            [(3, 1), (3, 2), (4, 1)],
+            [True, False],
+            list(range(10)),
+        ),
+    )
+    def test_programmatic(
+        self, use_cpu_only, backend, input_block_rank, dynamic, trial
+    ):
+
+        input_rank, block_rank = input_block_rank
+
+        # generate data
+        input_shape = np.random.randint(low=1, high=5, size=input_rank)
+        block_shape = np.random.randint(low=1, high=3, size=block_rank)
+        input_shape[0] = input_shape[0] * np.prod(block_shape)
+        crops = []
+        for i in range(block_rank):
+            while True:
+                temp = np.random.randint(low=0, high=5, size=2)
+                if np.sum(temp) < input_shape[i + 1] * block_shape[i]:
+                    crops.append(temp)
+                    break
+        crops = np.array(crops)
+
+        if not dynamic:
+
+            @make_tf_graph([input_shape])
+            def build_model(x):
+                return tf.raw_ops.BatchToSpaceND(
+                    input=x, block_shape=block_shape, crops=crops
+                )
+
+        else:
+
+            @make_tf_graph([[None] * input_rank])
+            def build_model(x):
+                return tf.raw_ops.BatchToSpaceND(
+                    input=x, block_shape=block_shape, crops=crops
+                )
+
+        model, inputs, outputs = build_model
+        input_values = [random_gen(input_shape)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestTensorArray:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_tf_basic(self, use_cpu_only, backend):
+        # TF1: TensorArrayV3, TensorArrayWriteV3, TensorArrayScatterV3,
+        #      TensorArraySizeV3, TensorArrayGatherV3
+        # TF2: TensorListReserve, TensorListLength, TensorListSetItem,
+        #      TensorListScatterIntoExistingList, TensorListStack,
+        #      TensorListResize
+
+        elem_shape = (3, 2)
+
+        @make_tf_graph([elem_shape])
+        def build_model(x):
+            ta = tf.TensorArray(dtype=tf.float32, size=1, dynamic_size=True)
+
+            ta = ta.write(2, x)
+
+            # TensorArray has write-once semantics, and thus we write to a new
+            # index
+            # (https://www.tensorflow.org/api_docs/python/tf/TensorArray)
+            # writing to out of bound index
+            ta = ta.scatter([3], tf.expand_dims(x, 0))
+
+            # writing to in-bound index
+            ta = ta.scatter([0], tf.expand_dims(x, 0))
+
+            return ta.stack()
+
+        model, inputs, outputs = build_model
+        input_values = [random_gen(elem_shape)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.skip(
+        reason="[NNv2 TensorArray scatter returns wrong result](rdar://63345281)"
+    )
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_tf_while_loop(self, use_cpu_only, backend):
+        @make_tf_graph([(3, 2)])
+        def build_model(x):
+            def body(i, num_iters, array, update):
+                return i + 1, num_iters, array.write(i, update), update
+
+            def cond(i, num_iters, array, update):
+                return i < num_iters
+
+            i = 0
+            max_iters = 3
+            ta = tf.TensorArray(dtype=tf.float32, size=1, dynamic_size=True)
+            _, _, new_ta, _ = tf.while_loop(cond, body, [i, max_iters, ta, x])
+            new_ta = new_ta.scatter([max_iters], tf.expand_dims(x, 0))
+
+            return new_ta.stack()
+
+        model, inputs, outputs = build_model
+        input_values = [random_gen(shape=(3, 2))]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+
+class TestBroadcastTo:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, shapes, is_dynamic",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                ((2,), (2,)),
+                ((1,), (10,)),
+                ((3,), (3, 3)),
+                ((1, 1), (1, 4)),
+                ((1, 1, 5), (3, 4, 4, 4, 5)),
+                ((3,), (1, 3, 2, 1, 3)),
+                ((3, 5), (2, 3, 5)),
+                ((1, 2), (2, 3, 1, 2)),
+                ((1, 3, 1, 4), (8, 3, 32, 4)),
+                ((2, 16), (3, 1, 4, 2, 16)),
+            ],
+            [False],
+        ),
+    )
+    def test(self, use_cpu_only, backend, shapes, is_dynamic):
+        input_shape, output_shape = shapes
+
+        if is_dynamic is False:
+
+            @make_tf_graph([input_shape])
+            def build_model(x):
+                return tf.broadcast_to(x, output_shape)
+
+        else:  # output / target shape is an input (placeholder)
+
+            @make_tf_graph([input_shape, (len(output_shape), tf.int32)])
+            def build_model(x, shape):
+                return tf.broadcast_to(x, shape)
+
+        model, inputs, outputs = build_model
+        if is_dynamic is False:
+            input_values = [random_gen(input_shape)]
+        else:
+            input_values = [
+                random_gen(input_shape),
+                np.array(output_shape, dtype=np.int32),
+            ]
+
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+
+class TestLSTMBlockCell:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, batch, return_hc_only, has_peephole, has_clip",
+        itertools.product(
+            [True, False],
+            backends,
+            [1, 2],
+            [True, False],
+            [True, False],
+            [True, False],
+        ),
+    )
+    def test_tf_no_variable(
+        self, use_cpu_only, batch, backend, return_hc_only, has_peephole, has_clip
+    ):
+        """
+        If return_hc_only == True, the op can be mapped to mb.lstm.
+        Otherwise it has to be expanded.
+        """
+        # _lstm_block_cell allows fine-grained control of W, peephole etc
+        from tensorflow.contrib.rnn.python.ops.lstm_ops import _lstm_block_cell
+
+        actual_len, padded_len = 3, 4
+        input_dim, hidden_dim = 2, 3
+        x_shape = (batch, input_dim)
+        init_h = np.random.rand(batch, hidden_dim).astype(np.float32)
+        init_c = np.random.rand(batch, hidden_dim).astype(np.float32)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=x_shape)
+            res = _lstm_block_cell(
+                x,
+                tf.constant(init_c),
+                tf.constant(init_h),
+                w=tf.constant(
+                    np.random.rand(input_dim + hidden_dim, 4 * hidden_dim).astype(
+                        np.float32
+                    )
+                ),
+                b=tf.constant(np.random.rand(4 * hidden_dim).astype(np.float32)),
+                use_peephole=has_peephole,
+                wci=tf.constant(np.random.rand(hidden_dim).astype(np.float32)),
+                wcf=tf.constant(np.random.rand(hidden_dim).astype(np.float32)),
+                wco=tf.constant(np.random.rand(hidden_dim).astype(np.float32)),
+                forget_bias=np.random.rand(),
+                cell_clip=np.random.rand() if has_clip else -1,
+            )
+            if return_hc_only:
+                # All other outputs aren't supported by mb.lstm.
+                res = res[1], res[6]
+
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*x_shape).astype(np.float32),},
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+    @pytest.mark.xfail(
+        reason="Revert the assumption of invoking set_global before get_global: <rdar://problem/63326545>",
+        run=False,
+    )
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, batch",
+        itertools.product([True, False], backends, [1, 2],),
+    )
+    def test_tf_lstm_block_cell(self, use_cpu_only, backend, batch):
+        actual_len, padded_len = 3, 4
+        input_dim, hidden_dim = 2, 3
+        # [timelen, batch_size, num_inputs]
+        x_shape = (batch, input_dim)
+        init_h = np.random.rand(batch, hidden_dim).astype(np.float32)
+        init_c = np.random.rand(batch, hidden_dim).astype(np.float32)
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=x_shape)
+            rnn_cell = tf.contrib.rnn.LSTMBlockCell(
+                hidden_dim, use_peephole=True, forget_bias=np.random.rand()
+            )
+            res = rnn_cell(x, (init_h, init_c))
+            cs_new, h_new = res[1][0], res[1][1]
+            res = [h_new, cs_new]
+
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(*x_shape).astype(np.float32),},
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+                # variable needs to be frozen
+                freeze_graph=True,
+            )
+
+
+class TestVariable:
+    @pytest.mark.xfail(reason="Investigate get_global <rdar://62623458>", run=False)
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True], backends,)
+    )
+    def test_tf_no_variable(self, use_cpu_only, backend):
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=[1,], name="input")
+            y = tf.Variable([1.0], dtype=tf.float32, name="y")
+
+            # We set our assign op
+            assign_op = tf.assign(y, y + 10)
+
+            with tf.control_dependencies([assign_op]):
+                res = tf.multiply(x, y, name="output")
+
+            run_compare_tf(
+                graph,
+                {x: np.random.rand(1).astype(np.float32),},
+                res,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+
+class TestZerosLike:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, dynamic",
+        itertools.product(
+            [True, False], backends, [rank for rank in range(5)], [True, False],
+        ),
+    )
+    def test(self, use_cpu_only, backend, rank, dynamic):
+        input_shape = np.random.randint(low=2, high=6, size=rank)
+        input_value = random_gen(input_shape, rand_min=-1, rand_max=1)
+        if dynamic:
+            a, b = np.prod(input_shape[:2]), np.prod(input_shape[2:])
+            reshape_vals = np.array([a, b], dtype=np.int32)
+            reshape_input_shape = np.array([2], dtype=np.int32)
+
+            @make_tf_graph([input_shape, list(reshape_input_shape) + [tf.int32]])
+            def build_model(x, reshape):
+                x = tf.reshape(x, shape=reshape)
+                return tf.raw_ops.ZerosLike(x=x)
+
+            model, inputs, outputs = build_model
+            input_values = [input_value, reshape_vals]
+        else:
+
+            @make_tf_graph([input_shape])
+            def build_model(x):
+                return tf.raw_ops.ZerosLike(x=x)
+
+            model, inputs, outputs = build_model
+            input_values = [input_value]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestIsFinite:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, dynamic",
+        itertools.product(
+            [True, False], backends, [rank for rank in range(5)], [True, False]
+        ),
+    )
+    def test(self, use_cpu_only, backend, rank, dynamic):
+        def _generate_num_with_inf(input_shape):
+            res = random_gen(input_shape, rand_min=-1, rand_max=1)
+            random_map = np.random.choice([np.inf, -np.inf, 0], size=input_shape)
+            if len(input_shape) == 0:
+                return random_map.astype(np.float32)
+            res[np.where(random_map == np.inf)] = np.inf
+            res[np.where(random_map == -np.inf)] = -np.inf
+            return res.astype(np.float32)
+
+        input_shape = np.random.randint(low=2, high=6, size=rank)
+        input_value = _generate_num_with_inf(input_shape)
+        if dynamic:
+            reshape_shape = [2, tf.int32]
+
+            if len(input_shape) == 0:
+                reshape_value = np.array([1, 1], dtype=np.int32)
+            else:
+                reshape_value = np.array(
+                    [input_shape[0], np.prod(input_shape[1:])], dtype=np.int32
+                )
+
+            @make_tf_graph([input_shape, reshape_shape])
+            def build_model(x, reshape):
+                x = tf.reshape(x, reshape)
+                x = tf.raw_ops.IsFinite(x=x)
+                return tf.raw_ops.Cast(x=x, DstT=tf.float32)
+
+            model, inputs, outputs = build_model
+            input_values = [input_value, reshape_value]
+
+        else:
+
+            @make_tf_graph([input_shape])
+            def build_model(x):
+                x = tf.raw_ops.IsFinite(x=x)
+                return tf.raw_ops.Cast(x=x, DstT=tf.float32)
+
+            model, inputs, outputs = build_model
+            input_values = [input_value]
+
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
diff --git a/coremltools/converters/mil/frontend/tensorflow/test/test_parse.py b/coremltools/converters/mil/frontend/tensorflow/test/test_parse.py
new file mode 100644
index 000000000..4d21e2bbb
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/test/test_parse.py
@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import unittest
+import pytest
+
+pytest.importorskip("tensorflow", minversion="1.14.0")
+from tensorflow.core.framework import attr_value_pb2 as attr_value
+from tensorflow.core.framework import tensor_shape_pb2 as tensor_shape
+from tensorflow.core.framework import types_pb2 as types
+
+from coremltools.converters.mil.mil import types as mil_types
+import coremltools.converters.mil.frontend.tensorflow.parse as parse
+
+
+class TestParse(unittest.TestCase):
+    def test_parse_list(self):
+        def compare(expected, lst, field_name):
+            attr = attr_value.AttrValue()
+            field = getattr(attr.list, field_name)
+            field.extend(lst)
+
+            actual = parse.parse_attr(attr)
+            self.assertEqual(expected, actual)
+
+        compare([1, 2, 3], [1, 2, 3], "i")
+        compare(["foo", "bar"], [b"foo", b"bar"], "s")
+
+    def test_parse_scalar(self):
+        def compare(expected, val, field_name):
+            a = attr_value.AttrValue()
+            setattr(a, field_name, val)
+            actual = parse.parse_attr(a)
+            self.assertEqual(expected, actual)
+
+        compare("a String", b"a String", "s")
+        compare(55, 55, "i")
+        compare(True, True, "b")
+
+        attr = attr_value.AttrValue()
+        attr.f = 12.3
+        self.assertAlmostEqual(12.3, parse.parse_attr(attr), places=2)
+
+    @staticmethod
+    def _attr_with_shape(dims, unknown_rank=0):
+        attr = attr_value.AttrValue()
+        for (dim_size, dim_name) in dims:
+            tf_dim = tensor_shape.TensorShapeProto.Dim()
+            tf_dim.size = dim_size
+            tf_dim.name = dim_name
+            attr.shape.dim.append(tf_dim)
+        attr.shape.unknown_rank = unknown_rank
+        return attr
+
+    def test_parse_shape(self):
+        def compare(expected, dims, unknown_rank=0):
+            attr = self._attr_with_shape(dims, unknown_rank)
+            actual = parse.parse_attr(attr)
+            self.assertEqual(expected, actual)
+
+        compare(None, [], 5)
+        compare([100], [(100, "outer")])
+        compare([1, 2, 3], [(1, "outer"), (2, "middle"), (3, "inner")])
+
+    def test_parse_tensor(self):
+        # Zero-rank tensor
+        attr = attr_value.AttrValue()
+        attr.tensor.version_number = 1
+        attr.tensor.dtype = types.DataType.DT_INT32
+        t = parse.parse_attr(attr)
+        self.assertTrue(isinstance(t, mil_types.int32))
+        self.assertEqual(0, t.val)
+
+        # Non-zero rank
+        attr = attr_value.AttrValue()
+        attr.tensor.version_number = 1
+        attr.tensor.dtype = types.DataType.DT_INT32
+        shaped_attr = self._attr_with_shape([(1, "outer"), (2, "middle"), (3, "inner")])
+        attr.tensor.tensor_shape.dim.extend(shaped_attr.shape.dim)
+        attr.tensor.int_val.extend([55, 56, 57])
+
+        t = parse.parse_attr(attr)
+        self.assertEqual([55, 56, 57], t.val.tolist())
+        self.assertEqual("tensor", mil_types.get_type_info(t).name)
+
+        # Note that the result of t.get_primitive() is a function that returns a type
+        # rather than an instance of that type as it is when the tensor has rank zero.
+        self.assertTrue(isinstance(t.get_primitive()(), mil_types.int32))
+        self.assertEqual((1, 2, 3), t.get_shape())
+
+    def test_parse_type(self):
+        def compare(expected, tf_type):
+            attr = attr_value.AttrValue()
+            attr.type = tf_type
+            self.assertEqual(expected, parse.parse_attr(attr))
+
+        compare(None, types.DataType.DT_INVALID)
+        compare(mil_types.float, types.DataType.DT_FLOAT)
+        compare(mil_types.double, types.DataType.DT_DOUBLE)
+        compare(mil_types.int32, types.DataType.DT_INT32)
+        compare(mil_types.uint8, types.DataType.DT_UINT8)
+        compare(mil_types.int16, types.DataType.DT_INT16)
+        compare(mil_types.int8, types.DataType.DT_INT8)
+        compare(mil_types.int8, types.DataType.DT_INT8)
+        compare(mil_types.str, types.DataType.DT_STRING)
+        compare(None, types.DataType.DT_COMPLEX64)
+        compare(mil_types.int64, types.DataType.DT_INT64)
+        compare(mil_types.bool, types.DataType.DT_BOOL)
+        compare(None, types.DataType.DT_QINT8)
+        compare(None, types.DataType.DT_QUINT8)
+        compare(None, types.DataType.DT_QINT32)
+        compare(None, types.DataType.DT_BFLOAT16)
+        compare(None, types.DataType.DT_QINT16)
+        compare(None, types.DataType.DT_QUINT16)
+        compare(mil_types.uint16, types.DataType.DT_UINT16)
+        compare(None, types.DataType.DT_COMPLEX128)
+        compare(None, types.DataType.DT_HALF)
+        compare(None, types.DataType.DT_RESOURCE)
+        compare(None, types.DataType.DT_VARIANT)
+        compare(mil_types.uint32, types.DataType.DT_UINT32)
+        compare(mil_types.uint64, types.DataType.DT_UINT64)
diff --git a/coremltools/converters/mil/frontend/tensorflow/test/test_parsed_tf_node.py b/coremltools/converters/mil/frontend/tensorflow/test/test_parsed_tf_node.py
new file mode 100644
index 000000000..05b467a55
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/test/test_parsed_tf_node.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import unittest
+import pytest
+
+pytest.importorskip("tensorflow", minversion="1.14.0")
+from tensorflow.core.framework import node_def_pb2 as node_def
+from tensorflow.core.framework import tensor_shape_pb2 as tensor_shape
+from tensorflow.core.framework import types_pb2 as types
+
+from coremltools.converters.mil.frontend.tensorflow.parsed_tf_node import ParsedTFNode
+
+
+def _mock_tf_node():
+    tfnode = node_def.NodeDef()
+    tfnode.name = "aNode"
+    tfnode.op = "PlaceholderWithDefault"
+    tfnode.input.extend(["anInput", "^aControlInput"])
+    tfnode.attr["dtype"].type = types.DataType.DT_INT32
+    dims = [(1, "outer"), (2, "middle"), (3, "inner")]
+    for (dim_size, dim_name) in dims:
+        tf_dim = tensor_shape.TensorShapeProto.Dim()
+        tf_dim.size = dim_size
+        tf_dim.name = dim_name
+        tfnode.attr["shape"].shape.dim.append(tf_dim)
+    return tfnode
+
+
+class TestParsedTFNode(unittest.TestCase):
+    def test_init(self):
+        parsed_node = ParsedTFNode(_mock_tf_node())
+        parsed_node.parse_from_attr()
+        self.assertEqual("aNode", parsed_node.name)
+        self.assertEqual("Placeholder", parsed_node.op)
+        self.assertEqual(["anInput"], parsed_node.inputs)
+        self.assertEqual(["aControlInput"], parsed_node.control_inputs)
+
+    def test_copy(self):
+        parsed_node = ParsedTFNode(_mock_tf_node())
+        parsed_node.parse_from_attr()
+        copy = parsed_node.copy()
+        self.assertTrue(isinstance(copy, type(parsed_node)))
+        props = [
+            "name",
+            "op",
+            "datatype",
+            "value",
+            "inputs",
+            "control_inputs",
+            "outputs",
+            "control_outputs",
+            "attr",
+            "original_node",
+        ]
+        for prop in props:
+            self.assertEqual(
+                getattr(parsed_node, prop),
+                getattr(copy, prop),
+                "Mismatch in property {}".format(prop),
+            )
diff --git a/coremltools/converters/mil/frontend/tensorflow/test/testing_utils.py b/coremltools/converters/mil/frontend/tensorflow/test/testing_utils.py
new file mode 100644
index 000000000..04c2c9b17
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/test/testing_utils.py
@@ -0,0 +1,272 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import six
+from coremltools import TensorType
+import pytest
+
+tf = pytest.importorskip("tensorflow", minversion="1.14.0")
+from coremltools.converters.mil.testing_utils import compare_shapes, compare_backend
+from coremltools.converters.mil.testing_reqs import converter
+from tensorflow.python.framework import dtypes
+import tempfile
+import os
+from tensorflow.python.tools.freeze_graph import freeze_graph as freeze_g
+
+frontend = "tensorflow"
+
+
+def make_tf_graph(input_types):
+    """
+    Decorator to help construct TensorFlow 1.x model.
+
+    Parameters
+    ----------
+    input_types: list of tuple
+        List of input types. E.g. [(3, 224, 224, tf.int32)] represent 1 input,
+        with shape (3, 224, 224), and the expected data type is tf.int32. The
+        dtype is optional, in case it's missing, tf.float32 will be used.
+
+    Returns
+    -------
+    tf.Graph, list of str, list of str
+    """
+
+    def wrapper(ops):
+        with tf.Graph().as_default() as model:
+            inputs = []
+            for input_type in input_types:
+                input_type = tuple(input_type)
+                if len(input_type) > 0 and isinstance(input_type[-1], dtypes.DType):
+                    shape, dtype = input_type[:-1], input_type[-1]
+                else:
+                    shape, dtype = input_type, tf.float32
+                inputs.append(tf.placeholder(shape=shape, dtype=dtype))
+
+            outputs = ops(*inputs)
+        return model, inputs, outputs
+
+    return wrapper
+
+
+def get_tf_keras_io_names(model):
+    """
+    Utility function to get tf.keras inputs/outputs names from a tf.keras model.
+
+    Parameter
+    ---------
+    model: tf.keras.Model
+    """
+    input_names, output_names = [], []
+    for i in model.inputs:
+        input_names.append(i.name.split(":")[0])
+    for o in model.outputs:
+        output_names.append(o.name.split(":")[0].split("/")[-1])
+    return input_names, output_names
+
+
+def get_tf_node_names(tf_nodes, mode="inputs"):
+    """
+    Inputs:
+        - tf_nodes: list[str]. Names of target placeholders or output variable.
+        - mode: str. When mode == inputs, do the stripe for the input names, for
+                instance 'placeholder:0' could become 'placeholder'.
+                when model == 'outputs', we keep the origin suffix number, like
+                'bn:0' will still be 'bn:0'.
+    Return a list of names from given list of TensorFlow nodes. Tensor name's
+    postfix is eliminated if there's no ambiguity. Otherwise, postfix is kept
+    """
+    if not isinstance(tf_nodes, list):
+        tf_nodes = [tf_nodes]
+    names = list()
+    for n in tf_nodes:
+        tensor_name = n if isinstance(n, six.string_types) else n.name
+        if mode == "outputs":
+            names.append(tensor_name)
+            continue
+        name = tensor_name.split(":")[0]
+        if name in names:
+            # keep postfix notation for multiple inputs/outputs
+            names[names.index(name)] = name + ":" + str(names.count(name) - 1)
+            names.append(tensor_name)
+        else:
+            names.append(name)
+    return names
+
+
+def tf_graph_to_proto(
+    graph, feed_dict, output_nodes, frontend="tensorflow", backend="nn_proto"
+):
+    """
+    Parameters
+    ----------
+    graph: tf.Graph
+        TensorFlow 1.x model in tf.Graph format.
+    feed_dict: dict of (tf.placeholder, np.array)
+        Dict of placeholder and value pairs representing inputs.
+    output_nodes: tf.node or list[tf.node]
+        List of names representing outputs.
+    frontend: str
+        Frontend to convert from.
+    backend: str
+        Backend to convert to.
+    -----------
+    Returns Proto, Input Values, Output Names
+    """
+    if isinstance(output_nodes, tuple):
+        output_nodes = list(output_nodes)
+    if not isinstance(output_nodes, list):
+        output_nodes = [output_nodes]
+
+    # Convert TF graph.
+    input_names = get_tf_node_names(list(feed_dict.keys()), mode="inputs")
+    output_names = get_tf_node_names(output_nodes, mode="outputs")
+    input_values = {name: val for name, val in zip(input_names, feed_dict.values())}
+
+    inputs = [TensorType(name=input_name) for input_name in input_names]
+    mlmodel = converter.convert(
+        graph, inputs=inputs, outputs=output_names, source=frontend, convert_to=backend
+    )
+
+    proto = mlmodel.get_spec()
+    return proto, input_values, output_names, output_nodes
+
+
+def load_tf_pb(pb_file):
+    """
+    Loads a pb file to tf.Graph
+    """
+    # We load the protobuf file from the disk and parse it to retrieve the
+    # unsterilized graph_def
+    with tf.io.gfile.GFile(pb_file, "rb") as f:
+        graph_def = tf.compat.v1.GraphDef()
+        graph_def.ParseFromString(f.read())
+
+    # Then, we import the graph_def into a new Graph and returns it
+    with tf.Graph().as_default() as graph:
+        # The name var will prefix every op/nodes in your graph
+        # Since we load everything in a new graph, this is not needed
+        tf.import_graph_def(graph_def, name="")
+    return graph
+
+
+def run_compare_tf(
+    graph,
+    feed_dict,
+    output_nodes,
+    use_cpu_only=False,
+    frontend_only=False,
+    frontend="tensorflow",
+    backend="nn_proto",
+    atol=1e-04,
+    rtol=1e-05,
+    validate_shapes_only=False,
+    freeze_graph=False,
+):
+    """
+    Utility function to convert and compare a given TensorFlow 1.x model.
+
+    Parameters
+    ----------
+    graph: tf.Graph
+        TensorFlow 1.x model in tf.Graph format.
+    feed_dict: dict of (tf.placeholder, np.array)
+        Dict of placeholder and value pairs representing inputs.
+    output_nodes: tf.node or list[tf.node]
+        List of names representing outputs.
+    use_cpu_only: bool
+        If true, use CPU only for prediction, otherwise, use GPU also.
+    frontend_only: bool
+        If true, skip the prediction call, only validate conversion.
+    frontend: str
+        Frontend to convert from.
+    backend: str
+        Backend to convert to.
+    atol: float
+        The absolute tolerance parameter.
+    rtol: float
+        The relative tolerance parameter.
+    validate_shapes_only: bool
+        If true, skip element-wise value comparision.
+    """
+    proto, input_key_values, output_names, output_nodes = tf_graph_to_proto(
+        graph, feed_dict, output_nodes, frontend, backend
+    )
+
+    if frontend_only:
+        return
+
+    if not isinstance(output_nodes, (tuple, list)):
+        output_nodes = [output_nodes]
+
+    if freeze_graph:
+        model_dir = tempfile.mkdtemp()
+        graph_def_file = os.path.join(model_dir, "tf_graph.pb")
+        checkpoint_file = os.path.join(model_dir, "tf_model.ckpt")
+        static_model_file = os.path.join(model_dir, "tf_static.pb")
+        coreml_model_file = os.path.join(model_dir, "coreml_model.mlmodel")
+
+        with tf.Session(graph=graph) as sess:
+            sess.run(tf.global_variables_initializer())
+            tf_outputs = sess.run(output_nodes, feed_dict=feed_dict)
+
+            tf.train.write_graph(sess.graph, model_dir, graph_def_file, as_text=False)
+            saver = tf.train.Saver()
+            saver.save(sess, checkpoint_file)
+            freeze_g(
+                input_graph=graph_def_file,
+                input_saver="",
+                input_binary=True,
+                input_checkpoint=checkpoint_file,
+                output_node_names=",".join([n.op.name for n in output_nodes]),
+                restore_op_name="save/restore_all",
+                filename_tensor_name="save/Const:0",
+                output_graph=static_model_file,
+                clear_devices=True,
+                initializer_nodes="",
+            )
+        graph = load_tf_pb(static_model_file)
+
+        # Need to convert again using frozen graph
+        proto, input_key_values, output_names, output_nodes = tf_graph_to_proto(
+            graph, feed_dict, output_nodes, frontend, backend
+        )
+    else:
+        with tf.Session(graph=graph) as sess:
+            sess.run(tf.global_variables_initializer())
+            tf_outputs = sess.run(output_nodes, feed_dict=feed_dict)
+    expected_outputs = {name: val for name, val in zip(output_names, tf_outputs)}
+
+    if validate_shapes_only:
+        compare_shapes(proto, input_key_values, expected_outputs, use_cpu_only)
+    else:
+        compare_backend(
+            proto,
+            input_key_values,
+            expected_outputs,
+            use_cpu_only,
+            atol=atol,
+            rtol=rtol,
+            also_compare_shapes=True,
+        )
+
+    return proto
+
+
+def layer_counts(spec, layer_type):
+    spec_type_map = {
+        "neuralNetworkClassifier": spec.neuralNetworkClassifier,
+        "neuralNetwork": spec.neuralNetwork,
+        "neuralNetworkRegressor": spec.neuralNetworkRegressor,
+    }
+    nn_spec = spec_type_map.get(spec.WhichOneof("Type"))
+    if nn_spec is None:
+        raise ValueError("MLModel must have a neural network")
+
+    n = 0
+    for layer in nn_spec.layers:
+        if layer.WhichOneof("layer") == layer_type:
+            n += 1
+    return n
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/__init__.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/__init__.py
similarity index 67%
rename from coremltools/converters/nnssa/frontend/tensorflow/graph_pass/__init__.py
rename to coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/__init__.py
index af21bdb47..706c12bb0 100644
--- a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/__init__.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/__init__.py
@@ -1,9 +1,15 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
 
-# graphdef to ssa
+# graphdef to tfssa
 from .delete_disconnected_nodes import delete_disconnected_nodes
 from .insert_get_tuple import insert_get_tuple
 from .tensor_array_transform import tensor_array_resource_removal
@@ -14,5 +20,4 @@
 from .variable_node_transform import remove_variable_nodes
 from .functionalize_loops import functionalize_loops
 from .cond_to_where import cond_to_where
-from .lstmblockcell_rewrite import lstmblockcell_rewrite
-from .fusedbatchnorm_rewrite import fusedbatchnorm_rewrite
+from .fuse_dilation_conv import fuse_dilation_conv
diff --git a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/cond_to_where.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/cond_to_where.py
new file mode 100644
index 000000000..528c67244
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/cond_to_where.py
@@ -0,0 +1,135 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+from ..basic_graph_ops import delete_node, disconnect_edge
+from .visitors import FindAllUpstreamTerminals
+
+import logging
+from coremltools._deps import _HAS_TF_2
+
+
+def compute_max_rank(graph):
+    #  highly inefficient way to calculate the rank of every node
+    ret = {}
+    # begin at max rank
+    for v in graph.keys():
+        if len(graph[v].inputs) == 0:
+            ret[v] = 0
+        else:
+            ret[v] = len(graph)
+
+    changes = True
+    while changes:
+        changes = False
+        for v in graph.keys():
+            if len(graph[v].inputs) > 0:
+                rank = max(ret[i] for i in graph[v].inputs) + 1
+                if ret[v] != rank:
+                    changes = True
+                    ret[v] = rank
+    return ret
+
+
+class CondToWhere(object):
+    @staticmethod
+    def _search(g, node_name):
+        """
+        Find the nearest Switch nodes upstream of node_name.
+        """
+        node = g[node_name]
+
+        switches = (
+            FindAllUpstreamTerminals(lambda x: x.op == "Switch")
+            .visit(g, node.name)
+            .get_result()
+        )
+        if len(switches) == 0:
+            switches = (
+                FindAllUpstreamTerminals(
+                    lambda x: x.op == "Switch" or x.attr.get("was_switch") is not None
+                )
+                .visit(g, node.name)
+                .get_result()
+            )
+        return switches
+
+    @staticmethod
+    def _fix_found_cond(g, merge, switches):
+        """
+        Convert a Merge's Switch nodes to Identity ops and the Merge to iff.
+        """
+        if g[switches[0]].op == "Switch":
+            condition_input = g[switches[0]].inputs[1]
+        else:
+            condition_input = g[switches[0]].attr["was_switch"]
+
+        # convert the merge to a select
+        # TensorFlow seems to ensure the condition that the first
+        # merge input is the True branch and the second merge input
+        # is the false branch.
+
+        # we convert switches to identity, detaching to switch condition
+        for s in switches:
+            if g[s].op == "Switch":
+                g[s].op = "Identity"
+                g[s].attr["was_switch"] = g[s].inputs[1]
+                # detach input 1: the switch condition
+                if g[s].inputs[0] == g[s].inputs[1]:
+                    g[s].inputs.pop()
+                    g[g[s].inputs[0]].outputs.pop()
+                else:
+                    disconnect_edge(g, g[s].inputs[1], s)
+
+        # build the final select
+        g[merge].op = "iff"
+        if not _HAS_TF_2:
+            # swap true branch with false branch to get the right semantics for IFF
+            g[merge].inputs[0], g[merge].inputs[1] = (
+                g[merge].inputs[1],
+                g[merge].inputs[0],
+            )
+
+        g[merge].inputs = [condition_input] + g[merge].inputs
+        g[condition_input].outputs.append(merge)
+
+    def cond_to_where(self, graph):
+        stuff_done = False
+        g = graph
+        ranks = compute_max_rank(graph)
+        merges = [a for a in g if g[a].op == "Merge"]
+        merges = sorted(merges, key=lambda k: ranks[k])
+        if len(merges) == 0:
+            return False
+        for m in merges:
+            logging.debug("Fixing cond at merge location: %s", m)
+            switches = self._search(g, m)
+            self._fix_found_cond(g, m, switches)
+            stuff_done = True
+
+        # delete the extra switches that seem to just lead to identities
+        # which then lead nowhere but into control dependencies
+        extra_switches = [a for a in g if g[a].op == "Switch"]
+        for s in extra_switches:
+            if all(
+                [g[o].op == "Identity" and len(g[o].outputs) == 0 for o in g[s].outputs]
+            ):
+                nodes_to_delete = g[s].outputs + [s]
+                for d in nodes_to_delete:
+                    delete_node(g, d)
+                    stuff_done = True
+        return stuff_done
+
+
+def cond_to_where(tfssa):
+    for k, v in tfssa.functions.items():
+        while True:
+            stuff_done = CondToWhere().cond_to_where(v.graph)
+            if not stuff_done:
+                break
diff --git a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/constant_propagation.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/constant_propagation.py
new file mode 100644
index 000000000..558a88b84
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/constant_propagation.py
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+import logging
+import tensorflow as tf
+import gc
+from .delete_constant import delete_unnecessary_constant_nodes
+from ..basic_graph_ops import const_determined_nodes, delete_node, disconnect_edge
+from coremltools.converters.mil.mil import types
+from coremltools.converters.mil.mil.types.type_mapping import numpy_val_to_builtin_val
+from coremltools.converters._profile_utils import _profile
+
+
+def _get_const_nodes(fn):
+    from tensorflow.core.framework import graph_pb2
+    from tensorflow.core.framework import node_def_pb2
+
+    new_graph = graph_pb2.GraphDef()
+    constant_nodes = set()
+    constant_node_num_outputs = {}
+    generated_nodes = [k for k, v in fn.graph.items() if v.original_node is None]
+    const_nodes_in_this_graph = const_determined_nodes(fn.graph, set(generated_nodes))
+    # we can only run TF on nodes with outputs since we must evaluate
+    # tensors and not ops
+    const_nodes_in_this_graph = [
+        i for i in const_nodes_in_this_graph if fn.graph[i].op != "NoOp"
+    ]
+    constant_nodes = constant_nodes.union(set(const_nodes_in_this_graph))
+
+    # topological sort const nodes
+    topsort = []
+    topsort_set = set()
+    while len(const_nodes_in_this_graph) > 0:
+        for n in const_nodes_in_this_graph:
+            input_names = fn.graph[n].inputs
+            if len(set(input_names).difference(topsort_set)) == 0:
+                topsort.append(n)
+                topsort_set.add(n)
+
+        const_nodes_in_this_graph = set(const_nodes_in_this_graph).difference(
+            topsort_set
+        )
+
+    for node in topsort:
+        new_node = node_def_pb2.NodeDef()
+        new_node.CopyFrom(fn.graph[node].original_node)
+        if "_class" in new_node.attr:
+            del new_node.attr["_class"]
+        del new_node.input[:]
+        new_node.input.extend(fn.graph[node].inputs)
+        if "_output_shapes" in fn.graph[node].attr:
+            constant_node_num_outputs[node] = len(fn.graph[node].attr["_output_shapes"])
+        else:
+            constant_node_num_outputs[node] = 1
+        new_graph.node.extend([new_node])
+        del new_node
+    gc.collect()
+    return new_graph, list(constant_nodes), constant_node_num_outputs
+
+
+@_profile
+def _constant_propagation(fn, new_graph, constant_nodes, constant_node_num_outputs):
+    try:
+        if len(constant_nodes) > 0:
+            with tf.Graph().as_default() as graph:
+                tf.import_graph_def(new_graph, name="")
+
+                # We're only making one call to `sess.run()` in order to compute constant values.
+                # In this context, the default optimization settings make everything dramatically
+                # slower and more memory-intensive.
+                session_config = tf.compat.v1.ConfigProto()
+                session_config.graph_options.optimizer_options.opt_level = (
+                    tf.compat.v1.OptimizerOptions.L0
+                )
+                session_config.graph_options.rewrite_options.disable_meta_optimizer = (
+                    True
+                )
+                with tf.compat.v1.Session(graph=graph, config=session_config) as sess:
+                    query_list = list()
+                    control_flow_ops = list()
+                    for c in constant_nodes:
+                        for j in range(constant_node_num_outputs[c]):
+                            query = c + ":" + str(j)
+                            lower_query = query.lower()
+                            if "switch" in lower_query or "cond" in lower_query:
+                                control_flow_ops.append(query)
+                            else:
+                                query_list.append(query)
+                    result_list = sess.run(query_list)
+                    result = {
+                        query_list[i]: result_list[i] for i in range(len(query_list))
+                    }
+                    # propagate switch one by one
+                    for op in control_flow_ops:
+                        try:
+                            res = sess.run([op])
+                            result.update({op: res[0]})
+                        except:
+                            logging.warning(
+                                '[Constant Propagation] Skip "dead" tensor: {}'.format(
+                                    op
+                                )
+                            )
+                            result.update({op: None})
+
+            for k, v in fn.graph.items():
+                if k in constant_node_num_outputs:
+                    if constant_node_num_outputs[k] == 1:
+                        result_entry = k + ":0"
+                        try:
+                            v.value, v.datatype = numpy_val_to_builtin_val(
+                                result[result_entry]
+                            )
+                        except:
+                            logging.error(result_entry)
+                            logging.error(result[result_entry])
+                    else:
+                        values = [
+                            result[k + ":" + str(i)]
+                            for i in range(constant_node_num_outputs[k])
+                        ]
+                        try:
+                            npval = [numpy_val_to_builtin_val(i) for i in values]
+                            v.datatype = types.tuple(tuple([val[1] for val in npval]))
+                            v.value = v.datatype()
+                            for idx, val in enumerate(npval):
+                                v.value.val[idx] = val[0]
+                        except:
+                            logging.error(values)
+            for k, v in fn.graph.items():
+                if v.op == "get_tuple":
+                    inp = fn.graph[v.inputs[0]]
+                    idx = v.attr["index"]
+                    if inp.value is not None:
+                        v.value = inp.value.val[idx]
+                        v.datatype = inp.datatype.T[idx]
+
+    except Exception as e:
+        logging.exception("Constant Propagation pass failed: {}".format(e))
+
+
+@_profile
+def constant_propagation(tfssa):
+    # we are going to rely on the TensorFlow graph to perform constant
+    # propagation. For each graph, we construct a new graph comprising
+    # only a subset of nodes that are constant nodes.
+
+    for f in tfssa.functions.values():
+        const_nodes_info = _get_const_nodes(f)
+        _constant_propagation(f, *const_nodes_info)
+    delete_unnecessary_constant_nodes(tfssa)
diff --git a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/delete_asserts.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/delete_asserts.py
new file mode 100644
index 000000000..34542858d
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/delete_asserts.py
@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+from ..basic_graph_ops import delete_node
+import logging
+import sys
+
+sys.setrecursionlimit(5000)  # increase recursion limit to support convert large models
+
+
+def _all_assert_leaves(gdict, nodename, memo):
+    """
+    Does the given node lead to only assertions?
+
+    Args:
+        gdict (dict): The node's graph.
+        nodename (str): The name of the node to test.
+        memo (dict): Storage for memoization.
+    """
+    work = [nodename]
+    while True:
+        assert len(work) <= len(gdict)  # If true, this algorithm is broken
+        node = gdict[work.pop()]
+
+        # Entries in memo have one of the following values for a given node:
+        #  None: the node is in the stack; this node is downstream.
+        #  True: the node is an assertion or leads only to assertions.
+        # False: the node does not lead only to assertions.
+        if not isinstance(memo.get(node.name), bool):
+            memo[node.name] = None
+            outputs = node.outputs
+            if len(outputs) == 0:
+                # Leaf node: stack shrinks
+                memo[node.name] = node.op in ("Assert", "CheckNumerics")
+            else:
+                outputs_to_process = [n for n in outputs if n not in memo]
+                if len(outputs_to_process) == 0:
+                    # Non-leaf node with fully processed outputs: stack shrinks
+                    memo[node.name] = all(memo[n] for n in outputs)
+                else:
+                    # Non-leaf node with unprocess outputs: stack grows
+                    work.append(node.name)
+                    work.extend(outputs_to_process)
+        if len(work) == 0:
+            return memo[node.name]
+
+
+def delete_asserts(tfssa):
+    """
+    Delete all nodes that lead only to assertions.
+    """
+    delete_count = 0
+    for f in tfssa.functions.values():
+        memo = {}
+        for n in f.graph:
+            _all_assert_leaves(f.graph, n, memo)
+        for m in memo:
+            if memo[m]:
+                delete_count += 1
+                delete_node(f.graph, m)
+    logging.debug("%d assert nodes deleted", delete_count)
+    return delete_count
diff --git a/coremltools/converters/nnssa/frontend/graph_pass/delete_constant.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/delete_constant.py
similarity index 65%
rename from coremltools/converters/nnssa/frontend/graph_pass/delete_constant.py
rename to coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/delete_constant.py
index 5689ab568..09dd5997c 100644
--- a/coremltools/converters/nnssa/frontend/graph_pass/delete_constant.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/delete_constant.py
@@ -1,24 +1,31 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
 
-import numpy as np
-from ...commons.basic_graph_ops import delete_node, check_connections, disconnect_edge
+from ..basic_graph_ops import delete_node, check_connections, disconnect_edge
+
+import logging
 
 
-def convert_constant_nodes_to_const_ops(nnssa):
+def convert_constant_nodes_to_const_ops(tfssa):
     """
     Convert nodes with known constant value to Const nodes
     """
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
+    for fn_key in list(tfssa.functions.keys()):
+        f = tfssa.functions[fn_key]
         for k in list(f.graph.keys()):
             v = f.graph.get(k, None)
             if v is None:
                 continue
             if v.value is not None:
-                v.op = 'Const'
+                v.op = "Const"
                 # delete all upstream edges now that this is constant
                 inv = v.inputs[:]
                 for i in inv:
@@ -27,7 +34,7 @@ def convert_constant_nodes_to_const_ops(nnssa):
                     disconnect_edge(f.graph, curnode, nextnode)
 
                     # keep deleting upwards as long as it is a chain
-                    while (curnode is not None):
+                    while curnode is not None:
                         prevnode = None
                         if len(f.graph[curnode].outputs) == 0:
                             if len(f.graph[curnode].inputs) == 1:
@@ -36,12 +43,12 @@ def convert_constant_nodes_to_const_ops(nnssa):
                         curnode = prevnode
 
 
-def delete_nodes_with_only_constant_descendents(nnssa):
+def delete_nodes_with_only_constant_descendents(tfssa):
     # look for nodes whose value is known AND downstream values are known
     # and delete them
     delete_count = 0
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
+    for fn_key in list(tfssa.functions.keys()):
+        f = tfssa.functions[fn_key]
         keys = list(f.graph.keys())
         for k in keys:
             if k not in f.graph:
@@ -57,21 +64,25 @@ def delete_nodes_with_only_constant_descendents(nnssa):
             if to_delete:
                 delete_count += 1
                 delete_node(f.graph, k)
-        # also delete all Const nodes with no descendents
+        # also delete all Const nodes with no descendants
         keys = list(f.graph.keys())
         for k in keys:
             if k not in f.graph:
                 continue
-            if f.graph[k].op == 'Const' and len(f.graph[k].outputs) == 0 and (k not in f.outputs):
+            if (
+                f.graph[k].op == "Const"
+                and len(f.graph[k].outputs) == 0
+                and (k not in f.outputs)
+            ):
                 delete_count += 1
                 delete_node(f.graph, k)
     return delete_count
 
 
-def delete_unnecessary_constant_nodes(nnssa):
-    delete_count = delete_nodes_with_only_constant_descendents(nnssa)
-    for f in list(nnssa.functions.values()):
+def delete_unnecessary_constant_nodes(tfssa):
+    delete_count = delete_nodes_with_only_constant_descendents(tfssa)
+    for f in list(tfssa.functions.values()):
         check_connections(f.graph)
-    convert_constant_nodes_to_const_ops(nnssa)
-    print("%s nodes deleted" % (delete_count))
+    convert_constant_nodes_to_const_ops(tfssa)
+    logging.debug("%s nodes deleted", delete_count)
     return delete_count
diff --git a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/delete_disconnected_nodes.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/delete_disconnected_nodes.py
new file mode 100644
index 000000000..a02921749
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/delete_disconnected_nodes.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+
+def delete_disconnected_nodes(gd):
+    # delete all nodes with no inputs and outputs
+    empty_nodes = []
+    for k, v in gd.items():
+        if (
+            len(gd[k].inputs) == 0
+            and len(gd[k].outputs) == 0
+            and len(gd[k].control_inputs) == 0
+            and len(gd[k].control_outputs) == 0
+            and gd[k].op != "Placeholder"
+        ):
+            empty_nodes.append(k)
+
+    for k in empty_nodes:
+        del gd[k]
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/functionalize_loops.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/functionalize_loops.py
similarity index 50%
rename from coremltools/converters/nnssa/frontend/tensorflow/graph_pass/functionalize_loops.py
rename to coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/functionalize_loops.py
index 8c8dd8083..01e870b80 100644
--- a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/functionalize_loops.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/functionalize_loops.py
@@ -1,252 +1,40 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
 from ..parsed_tf_node import ParsedTFNode
-from ....commons.basic_graph_ops import *
-from ....nnssa import SSAFunction
-
-
-class FindAllDownstreamTerminals(object):
-    # Find all nodes matching a particular function
-    # which is downstream reachable from a set of nodes.
-    def __init__(self, fn):
-        self.result = []
-        self.fn = fn
-        self.memo = {}
-
-    def visit(self, g, node):
-        if not isinstance(node, ParsedTFNode):
-            node = g[node]
-
-        if node.name in self.memo:
-            return self
-        self.memo[node.name] = 1
-
-        if self.fn(node):
-            self.result.append(node.name)
-            return self
-
-        for i in node.outputs:
-            self.visit(g, g[i])
-
-        return self
-
-    def visit_many(self, g, nodes):
-        for i in nodes:
-            self.visit(g, i)
-        return self
-
-    def get_result(self):
-        return self.result
-
-
-class FindAllReachableNodes(object):
-    # Find all nodes reachable from a set of nodes which satisfy a criteria
-    def __init__(self, fn):
-        self.result = []
-        self.fn = fn
-        self.memo = {}
-
-    def visit(self, g, node):
-        if not isinstance(node, ParsedTFNode):
-            node = g[node]
-
-        if node.name in self.memo:
-            return self
-        self.memo[node.name] = 1
-
-        if self.fn(node):
-            self.result.append(node.name)
-
-        for i in node.outputs:
-            self.visit(g, g[i])
-
-        for i in node.inputs:
-            self.visit(g, g[i])
-
-        return self
-
-    def visit_many(self, g, nodes):
-        for i in nodes:
-            self.visit(g, i)
-        return self
-
-    def get_result(self):
-        return self.result
-
-
-class FindImmediateUpstreamNodes(object):
-    # Find all nodes matching a particular function which is immediately above a set of nodes
-    def __init__(self, fn):
-        self.result = []
-        self.fn = fn
-
-    def visit(self, g, node):
-        if not isinstance(node, ParsedTFNode):
-            node = g[node]
-
-        for i in node.inputs:
-            if self.fn(g[i]):
-                self.result.append(i)
-
-        return self
-
-    def visit_many(self, g, nodes):
-        for i in nodes:
-            self.visit(g, i)
-        return self
-
-    def get_result(self):
-        return self.result
-
-
-class FindImmediateDownstreamNodes(object):
-    # Find all nodes matching a particular function which is immediately above a set of nodes
-    def __init__(self, fn):
-        self.result = []
-        self.fn = fn
-
-    def visit(self, g, node):
-        if not isinstance(node, ParsedTFNode):
-            node = g[node]
-
-        for i in node.outputs:
-            if self.fn(g[i]):
-                self.result.append(i)
-
-        return self
-
-    def visit_many(self, g, nodes):
-        for i in nodes:
-            self.visit(g, i)
-        self.result = list(set(self.result))
-        return self
-
-    def get_result(self):
-        return self.result
-
-
-class FindAllUpstreamTerminals(object):
-    # Find all nodes matching a particular function
-    # which is upstream reachable from a set of nodes.
-    def __init__(self, fn, control_dependencies=False):
-        self.result = []
-        self.fn = fn
-        self.control_dependencies = control_dependencies
-        self.memo = {}
-
-    def visit(self, g, node):
-        if not isinstance(node, ParsedTFNode):
-            node = g[node]
-
-        if node.name in self.memo:
-            return self
-        self.memo[node.name] = 1
-
-        if self.fn(node):
-            self.result.append(node.name)
-            return self
-
-        for i in node.inputs:
-            self.visit(g, g[i])
-        if self.control_dependencies:
-            for i in node.control_inputs:
-                self.visit(g, g[i])
-        return self
-
-    def visit_many(self, g, nodes):
-        for i in nodes:
-            self.visit(g, i)
-        self.result = list(set(self.result))
-        return self
-
-    def get_result(self):
-        return self.result
-
-
-class FindSubgraph(object):
-    # Find all nodes between a set of sources and a set of terminals
-    # Sources are not returned, but reached terminals are returned
-    def __init__(self, terminal_nodes):
-        self.memo = {}
-        self.terminal = terminal_nodes
-
-    def visit_impl(self, g, node):
-        if not isinstance(node, ParsedTFNode):
-            node = g[node]
-
-        if node.name in self.terminal:
-            self.memo[node.name] = True
-            return True
-
-        if node.name in self.memo:
-            return self.memo[node.name]
-
-        # add self to memo first otherwise cycles will not terminate
-        self.memo[node.name] = None
-        reachable = None
-        all_unreachable = True
-        for i in node.outputs + node.control_outputs:
-            visit_result = self.visit_impl(g, g[i])
-            if visit_result == True:
-                reachable = True
-            if visit_result != False:
-                all_unreachable = False
-
-        if reachable:
-            self.memo[node.name] = reachable
-        elif all_unreachable:
-            self.memo[node.name] = False
-        else:
-            self.memo[node.name] = None
-
-        return reachable
-
-    def visit(self, g, node):
-        self.visit_impl(g, node)
-        while (True):
-            if None in iter(self.memo.values()):
-                revisit = [k for k, v in self.memo.items() if v is None]
-                self.memo = {k: v for k, v in self.memo.items() if v is not None}
-                for n in revisit:
-                    self.visit_impl(g, n)
-            else:
-                break
-        return self
-
-    def visit_many(self, g, nodes):
-        for node in nodes:
-            self.visit_impl(g, node)
-        while (True):
-            if None in iter(self.memo.values()):
-                revisit = [k for k, v in self.memo.items() if v is None]
-                self.memo = {k: v for k, v in self.memo.items() if v is not None}
-                for n in revisit:
-                    self.visit_impl(g, n)
-            else:
-                break
-        return self
-
-    def get_result(self):
-        return [k for k, v in self.memo.items() if v]
+from ..basic_graph_ops import *  # pylint: disable=unused-wildcard-import,wildcard-import
+from ..tfssa import SSAFunction
+from .visitors import (
+    FindAllReachableNodes,
+    FindImmediateDownstreamNodes,
+    FindImmediateUpstreamNodes,
+    FindSubgraph,
+)
+import logging
 
 
 class FunctionalizeLoops(object):
     """
-    Turns while loops in Tensorflow dataflow graph into the functional form:
+    Turns while loops in TensorFlow dataflow graph into the functional form:
     while(cond_function, body_function)
 
     Usage:
-    Given a graph in NNSSA (the NetworkEnsemble defined in network.py) form:
+    Given a graph in tfssa (the NetworkEnsemble defined in network.py) form:
 
     This will functionalize *ONE* loop in the main function.
 
         f = FunctionalizeLoops()
-        ret = f.functionalize_loops(self, nnssa, "main")
+        ret = f.functionalize_loops(self, tfssa, "main")
 
     if ret is True, one loop has been functionalized, and the new functions
-    added to nnssa. If False, there is no loop to functionalize.
+    added to tfssa. If False, there is no loop to functionalize.
 
     Generally, repeated calls to this will be necessary to catch all loops.
 
@@ -257,53 +45,85 @@ def __init__(self):
         self.exits = None
         self.merges = None
         self.enters = None
+        self.constant_enters = None
         self.switches = None
         self.subgraph = None
         self.loopcond = None
-        pass
+        self.is_constant = None
+        self.next_iterations = None
+        self.cond = None
+        self.body = None
 
     def _search(self, g, node):
         if not isinstance(node, ParsedTFNode):
             node = g[node]
+
         # we look for NextIteration nodes
-        if node.op == "Enter":
-            frame_name = node.attr['frame_name']
-            print("Fixing frame name: %s" % (frame_name))
-            # find all the enter args
-            # this is basically the enter frame
-            # functionalize_control_flow.cc:FunctionalizeControlFlow (1160-1196)
-            self.enters = [k for k, v in g.items() if v.attr.get('frame_name', '') == frame_name]
-            self.is_constant = [bool(g[n].attr.get('is_constant', False)) for n in self.enters]
-            self.merges = FindImmediateDownstreamNodes(lambda x: x.op == "Merge").visit_many(
-                g, self.enters).get_result()
-            self.next_iterations = FindImmediateUpstreamNodes(
-                lambda x: x.op == "NextIteration").visit_many(g, self.merges).get_result()
-            self.switches = FindImmediateDownstreamNodes(lambda x: x.op == "Switch").visit_many(
-                g, self.merges).get_result()
-            self.exits = FindImmediateDownstreamNodes(lambda x: x.op == "Exit").visit_many(
-                g, self.switches).get_result()
-            self.loopcond = list(
-                set(
-                    FindImmediateUpstreamNodes(lambda x: x.op == "LoopCond").visit_many(
-                        g, self.switches).get_result()))
-
-            self.subgraph = FindSubgraph(self.exits).visit_many(g, self.enters).get_result()
-            self.cond = FindSubgraph(self.switches).visit_many(g, self.merges).get_result()
-            self.body = FindSubgraph([node.name] + self.exits).visit_many(
-                g, self.switches).get_result()
-            # drop merges and switches from cond and body
-            self.cond = [
-                i for i in self.cond if i not in (self.merges + self.switches + self.enters)
-            ]
-            self.body = [i for i in self.body if i not in ([node.name] + self.switches)
-                         ] + [node.name] + self.switches + self.merges + self.enters
-
-            # ok. we can now rebuild.
-        else:
-            pass
+        assert node.op == "Enter"
+
+        frame_name = node.attr["frame_name"]
+        logging.debug("Fixing frame name: %s", frame_name)
+        # find all the enter args
+        # this is basically the enter frame
+        # functionalize_control_flow.cc:FunctionalizeControlFlow (1160-1196)
+        self.enters = [
+            k for k, v in g.items() if v.attr.get("frame_name", "") == frame_name
+        ]
+        self.is_constant = [
+            bool(g[n].attr.get("is_constant", False)) for n in self.enters
+        ]
+        self.merges = (
+            FindImmediateDownstreamNodes(lambda x: x.op == "Merge")
+            .visit_many(g, self.enters)
+            .get_result()
+        )
+        self.next_iterations = (
+            FindImmediateUpstreamNodes(lambda x: x.op == "NextIteration")
+            .visit_many(g, self.merges)
+            .get_result()
+        )
+        self.switches = (
+            FindImmediateDownstreamNodes(lambda x: x.op == "Switch")
+            .visit_many(g, self.merges)
+            .get_result()
+        )
+        self.exits = (
+            FindImmediateDownstreamNodes(lambda x: x.op == "Exit")
+            .visit_many(g, self.switches)
+            .get_result()
+        )
+        self.loopcond = list(
+            set(
+                FindImmediateUpstreamNodes(lambda x: x.op == "LoopCond")
+                .visit_many(g, self.switches)
+                .get_result()
+            )
+        )
+
+        self.subgraph = FindSubgraph(self.exits).visit_many(g, self.enters).get_result()
+        self.cond = FindSubgraph(self.switches).visit_many(g, self.merges).get_result()
+        self.body = (
+            FindSubgraph([node.name] + self.exits)
+            .visit_many(g, self.switches)
+            .get_result()
+        )
+        # drop merges and switches from cond and body
+        self.cond = [
+            i for i in self.cond if i not in (self.merges + self.switches + self.enters)
+        ]
+        self.body = (
+            [i for i in self.body if i not in ([node.name] + self.switches)]
+            + [node.name]
+            + self.switches
+            + self.merges
+            + self.enters
+        )
+
+        # ok. we can now rebuild.
 
     def _fix_graph_invariants(self, g):
         import copy
+
         check = lambda x: x is not None and len(x) > 0
         check(self.exits)
         check(self.merges)
@@ -312,15 +132,15 @@ def _fix_graph_invariants(self, g):
         check(self.subgraph)
         check(self.cond)
         check(self.loopcond)
-        assert (len(self.loopcond) == 1)
+        assert len(self.loopcond) == 1
         # maintain the invariant of a unique Enter node per argument
         # functionalize_control_flow.cc:FunctionalizeLoop (295)
         for i in copy.copy(self.enters):
             node = g[i]
-            assert (len(node.outputs) > 0)
-            assert (len(node.inputs) == 1)
-            assert (len(node.control_inputs) == 0)
-            assert (len(node.control_outputs) == 0)
+            assert len(node.outputs) > 0
+            assert len(node.inputs) == 1
+            assert len(node.control_inputs) == 0
+            assert len(node.control_outputs) == 0
             if len(node.outputs) == 1:
                 continue
             node_output_copy = copy.copy(node.outputs)
@@ -331,7 +151,7 @@ def _fix_graph_invariants(self, g):
                 new_enter_node.outputs = []
                 new_enter_node.name = node.name + "/trsplit%d" % (j)
                 g[new_enter_node.name] = new_enter_node
-                print("splitting %s" % (node.name))
+                logging.debug("splitting %s", node.name)
                 # connect the new node
                 enter_output = node_output_copy[j]
                 disconnect_edge(g, node.name, enter_output)
@@ -340,9 +160,9 @@ def _fix_graph_invariants(self, g):
                 # insert into graph
                 self.enters.append(new_enter_node.name)
 
-    def functionalize_loops(self, nnssa, function_to_functionalize):
-        g = nnssa.functions[function_to_functionalize].graph
-        loopni = [a for a in g if g[a].op == 'Enter']
+    def functionalize_loops(self, tfssa, function_to_functionalize):
+        g = tfssa.functions[function_to_functionalize].graph
+        loopni = [a for a in g if g[a].op == "Enter"]
         if len(loopni) == 0:
             return False
         self._search(g, loopni[0])
@@ -350,21 +170,30 @@ def functionalize_loops(self, nnssa, function_to_functionalize):
         self.constant_enters = [
             self.enters[i] for i in range(len(self.enters)) if self.is_constant[i]
         ]
-        self.enters = [self.enters[i] for i in range(len(self.enters)) if not self.is_constant[i]]
+        self.enters = [
+            self.enters[i] for i in range(len(self.enters)) if not self.is_constant[i]
+        ]
         self._fix_graph_invariants(g)
         # for each enter node, find the corresponding downstream merge node
         enter_corresponding_merge = [
-            FindImmediateDownstreamNodes(lambda x: x.op == "Merge").visit(g, enter).get_result()[0]
+            FindImmediateDownstreamNodes(lambda x: x.op == "Merge")
+            .visit(g, enter)
+            .get_result()[0]
             for enter in self.enters
         ]
         merge_corresponding_ni = [
-            FindImmediateUpstreamNodes(lambda x: x.op == "NextIteration").visit(
-                g, merge).get_result()[0] for merge in enter_corresponding_merge
+            FindImmediateUpstreamNodes(lambda x: x.op == "NextIteration")
+            .visit(g, merge)
+            .get_result()[0]
+            for merge in enter_corresponding_merge
         ]
         switch_corresponding_merge = []
         for merge in enter_corresponding_merge:
-            switch_after_merge = FindImmediateDownstreamNodes(lambda x: x.op == "Switch").visit(
-                g, merge).get_result()
+            switch_after_merge = (
+                FindImmediateDownstreamNodes(lambda x: x.op == "Switch")
+                .visit(g, merge)
+                .get_result()
+            )
             if len(switch_after_merge) > 0:
                 switch_corresponding_merge.append(switch_after_merge[0])
             else:
@@ -374,7 +203,7 @@ def functionalize_loops(self, nnssa, function_to_functionalize):
                 # constructed later on
                 new_switch_node = ParsedTFNode()
                 new_switch_node.op = "Switch"
-                new_switch_node.name = nnssa._find_free_name("fake_switch_")
+                new_switch_node.name = tfssa._find_free_name("fake_switch_")
                 g[new_switch_node.name] = new_switch_node
                 connect_edge(g, merge, new_switch_node.name)
                 connect_edge(g, self.loopcond[0], new_switch_node.name)
@@ -382,21 +211,24 @@ def functionalize_loops(self, nnssa, function_to_functionalize):
 
         exit_corresponding_switch = []
         for switch in switch_corresponding_merge:
-            res = FindImmediateDownstreamNodes(lambda x: x.op == "Exit").visit(g,
-                                                                               switch).get_result()
+            res = (
+                FindImmediateDownstreamNodes(lambda x: x.op == "Exit")
+                .visit(g, switch)
+                .get_result()
+            )
             if len(res) > 0:
                 exit_corresponding_switch.append(res[0])
             else:
                 new_exit_node = ParsedTFNode()
                 new_exit_node.op = "Exit"
-                new_exit_node.name = nnssa._find_free_name("fake_exit_")
+                new_exit_node.name = tfssa._find_free_name("fake_exit_")
                 g[new_exit_node.name] = new_exit_node
                 connect_edge(g, switch, new_exit_node.name)
                 exit_corresponding_switch.append(new_exit_node.name)
 
         while_loop = ParsedTFNode()
         while_loop.op = "while"
-        while_loop.name = nnssa._find_free_name("while_")
+        while_loop.name = tfssa._find_free_name("while_")
         g[while_loop.name] = while_loop
 
         # Build the Loop Condition
@@ -406,7 +238,7 @@ def functionalize_loops(self, nnssa, function_to_functionalize):
         # terminated with LoopCond
         make_inputs = ParsedTFNode()
         make_inputs.op = "make_tuple"
-        make_inputs.name = nnssa._find_free_name("make_input_")
+        make_inputs.name = tfssa._find_free_name("make_input_")
         g[make_inputs.name] = make_inputs
         for enter in self.enters:
             replace_dest(g, g[enter].inputs[0], enter, make_inputs.name)
@@ -420,7 +252,7 @@ def functionalize_loops(self, nnssa, function_to_functionalize):
         # build the cond function
         cond_body = ParsedTFNode()
         cond_body.op = "function_entry"
-        cond_body.name = nnssa._find_free_name("cond_function_")
+        cond_body.name = tfssa._find_free_name("cond_function_")
         cond_body.inputs = []
         g[cond_body.name] = cond_body
         for merge_idx in range(len(enter_corresponding_merge)):
@@ -452,7 +284,7 @@ def functionalize_loops(self, nnssa, function_to_functionalize):
         # build the body function
         body = ParsedTFNode()
         body.op = "function_entry"
-        body.name = nnssa._find_free_name("body_function_")
+        body.name = tfssa._find_free_name("body_function_")
         body.inputs = []
         g[body.name] = body
         for switch_idx in range(len(switch_corresponding_merge)):
@@ -474,7 +306,7 @@ def functionalize_loops(self, nnssa, function_to_functionalize):
         # terminated with LoopCond
         make_outputs = ParsedTFNode()
         make_outputs.op = "make_tuple"
-        make_outputs.name = nnssa._find_free_name("make_output_")
+        make_outputs.name = tfssa._find_free_name("make_output_")
         g[make_outputs.name] = make_outputs
         for ni in merge_corresponding_ni:
             connect_edge(g, g[ni].inputs[0], make_outputs.name)
@@ -485,8 +317,12 @@ def functionalize_loops(self, nnssa, function_to_functionalize):
             body_connected = False
             for output in list(g[enter].outputs):
                 if output not in self.cond and output not in self.body:
-                    cond_intersection = FindSubgraph(self.cond).visit(g, output).get_result()
-                    body_intersection = FindSubgraph(self.body).visit(g, output).get_result()
+                    cond_intersection = (
+                        FindSubgraph(self.cond).visit(g, output).get_result()
+                    )
+                    body_intersection = (
+                        FindSubgraph(self.body).visit(g, output).get_result()
+                    )
                     if len(cond_intersection) > 0:
                         cond_intersection.append(output)
                         self.cond += cond_intersection
@@ -495,7 +331,7 @@ def functionalize_loops(self, nnssa, function_to_functionalize):
                         self.body += body_intersection
                 get_tuple = ParsedTFNode()
                 get_tuple.op = "get_tuple"
-                get_tuple.name = nnssa._find_free_name("get_tuple_const_")
+                get_tuple.name = tfssa._find_free_name("get_tuple_const_")
                 get_tuple.attr = {"index": idx + constant_base_index}
                 g[get_tuple.name] = get_tuple
 
@@ -508,21 +344,21 @@ def functionalize_loops(self, nnssa, function_to_functionalize):
             # body must accept and return everything
             get_tuple = ParsedTFNode()
             get_tuple.op = "get_tuple"
-            get_tuple.name = nnssa._find_free_name("get_tuple_const_")
+            get_tuple.name = tfssa._find_free_name("get_tuple_const_")
             get_tuple.attr = {"index": idx + constant_base_index}
             g[get_tuple.name] = get_tuple
             connect_edge(g, body.name, get_tuple.name)
             connect_edge(g, get_tuple.name, make_outputs.name)
 
-        assert (len(g[make_outputs.name].inputs) == len(g[make_inputs.name].inputs))
+        assert len(g[make_outputs.name].inputs) == len(g[make_inputs.name].inputs)
 
         output_return = ParsedTFNode()
         output_return.op = "return"
-        output_return.name = nnssa._find_free_name("body_return_")
+        output_return.name = tfssa._find_free_name("body_return_")
         g[output_return.name] = output_return
         connect_edge(g, make_outputs.name, output_return.name)
-        while_loop.attr['cond_function'] = cond_body.name
-        while_loop.attr['body_function'] = body.name
+        while_loop.attr["cond_function"] = cond_body.name
+        while_loop.attr["body_function"] = body.name
         for i in self.enters:
             delete_node(g, i)
         for i in self.next_iterations:
@@ -534,44 +370,69 @@ def functionalize_loops(self, nnssa, function_to_functionalize):
             exit_node = exit_corresponding_switch[i]
             g[exit_node].op = "get_tuple"
             g[exit_node].attr = {"index": i}
-        cond_function = FindSubgraph(self.loopcond[0]).visit(g, cond_body.name).get_result()
+        cond_function = (
+            FindSubgraph(self.loopcond[0]).visit(g, cond_body.name).get_result()
+        )
         cond_function = set(cond_function + [self.loopcond[0], cond_body.name])
-        body_function = FindSubgraph(output_return.name).visit(g, body.name).get_result()
+        body_function = (
+            FindSubgraph(output_return.name).visit(g, body.name).get_result()
+        )
         body_function = set(body_function + [body.name, output_return.name])
 
         # trace input constants associated with the cond_graph
         # and the body_graph. These constants can only have one consumer
         # for now. Any more and we will either need to associate
         # it as an argument, or split the constant.
-        cond_constants = FindImmediateUpstreamNodes(lambda x: x.op == "Const").visit_many(
-            g, cond_function).get_result()
-        body_constants = FindImmediateUpstreamNodes(lambda x: x.op == "Const").visit_many(
-            g, body_function).get_result()
-        #for const_node in cond_constants + body_constants:
+        cond_constants = (
+            FindImmediateUpstreamNodes(lambda x: x.op == "Const")
+            .visit_many(g, cond_function)
+            .get_result()
+        )
+        body_constants = (
+            FindImmediateUpstreamNodes(lambda x: x.op == "Const")
+            .visit_many(g, body_function)
+            .get_result()
+        )
+        # for const_node in cond_constants + body_constants:
         #    assert(len(g[const_node].outputs) == 1)
 
         cond_function = cond_function.union(set(cond_constants))
         body_function = body_function.union(set(body_constants))
 
-        downstream_cond = FindAllReachableNodes(lambda x: True).visit_many(
-            g, cond_function).get_result()
+        downstream_cond = (
+            FindAllReachableNodes(lambda x: True)
+            .visit_many(g, cond_function)
+            .get_result()
+        )
         downstream_cond = set(downstream_cond) - cond_function
         if len(downstream_cond) > 0:
-            print("Disconnecting unused variables in condition function ", downstream_cond)
+            logging.debug(
+                "Disconnecting unused variables in condition function %s",
+                downstream_cond,
+            )
             for i in downstream_cond:
                 delete_node(g, i)
 
-        downstream_body = FindAllReachableNodes(lambda x: True).visit_many(
-            g, body_function).get_result()
+        downstream_body = (
+            FindAllReachableNodes(lambda x: True)
+            .visit_many(g, body_function)
+            .get_result()
+        )
         downstream_body = set(downstream_body) - body_function
         if len(downstream_body) > 0:
-            print("Disconnecting unused variables in body function ", downstream_body)
+            logging.debug(
+                "Disconnecting unused variables in body function %s", downstream_body
+            )
             for i in downstream_body:
                 delete_node(g, i)
 
         cond_graph = {k: v for k, v in g.items() if k in cond_function}
         body_graph = {k: v for k, v in g.items() if k in body_function}
-        g = {k: v for k, v in g.items() if k not in cond_function and k not in body_function}
+        g = {
+            k: v
+            for k, v in g.items()
+            if k not in cond_function and k not in body_function
+        }
         # localize control dependencies
         # In the main graph, reattach the control dependency to the while op
         for k, v in g.items():
@@ -595,21 +456,21 @@ def functionalize_loops(self, nnssa, function_to_functionalize):
                 for idx in range(len(v.control_outputs) - 1, -1, -1):
                     if v.control_outputs[idx] not in graph:
                         v.control_outputs.pop(idx)
-        nnssa.functions[function_to_functionalize] = SSAFunction(g)
-        nnssa.add_function(cond_body.name, SSAFunction(cond_graph))
-        nnssa.add_function(body.name, SSAFunction(body_graph))
+        tfssa.functions[function_to_functionalize] = SSAFunction(g)
+        tfssa.add_function(cond_body.name, SSAFunction(cond_graph))
+        tfssa.add_function(body.name, SSAFunction(body_graph))
         return True
 
 
-def functionalize_loops(ssa):
+def functionalize_loops(tfssa):
     """
-    Functionalize all loops in an SSA
+    Functionalize all loops in an tfssa
     """
     done = False
-    while (done == False):
+    while not done:
         done = True
-        for f in list(ssa.functions.keys()):
+        for f in list(tfssa.functions.keys()):
             functionalize = FunctionalizeLoops()
-            ret = functionalize.functionalize_loops(ssa, f)
-            if ret == True:
+            ret = functionalize.functionalize_loops(tfssa, f)
+            if ret:
                 done = False
diff --git a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/fuse_dilation_conv.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/fuse_dilation_conv.py
new file mode 100644
index 000000000..2bd58fd23
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/fuse_dilation_conv.py
@@ -0,0 +1,217 @@
+# -*- coding: utf-8 -*-
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+import numpy as np
+
+from coremltools.converters.mil.mil import types
+from ..parsed_tf_node import ParsedTFNode
+from ..basic_graph_ops import replace_source, delete_node
+
+
+def _try_same(input_h, input_w, W_h, W_w, dilation_factor, padding, crop):
+    base_paddings = [0] * 4
+
+    dilated_W_h = dilation_factor[0] * (W_h - 1) + 1
+    dilated_W_w = dilation_factor[1] * (W_w - 1) + 1
+
+    base_paddings[0] = (dilated_W_h - 1) // 2
+    base_paddings[1] = dilated_W_h - 1 - (dilated_W_h - 1) // 2
+    base_paddings[2] = (dilated_W_w - 1) // 2
+    base_paddings[3] = dilated_W_w - 1 - (dilated_W_w - 1) // 2
+
+    pad_start_h = base_paddings[0]
+    pad_start_w = base_paddings[2]
+    orig_pad_end_h = base_paddings[1]
+    orig_pad_end_w = base_paddings[3]
+    full_input_h = input_h + pad_start_h + orig_pad_end_h
+    full_input_w = input_w + pad_start_w + orig_pad_end_w
+    pad_end_extra_h = (
+        dilation_factor[0] - full_input_h % dilation_factor[0]
+    ) % dilation_factor[0]
+    pad_end_extra_w = (
+        dilation_factor[1] - full_input_w % dilation_factor[1]
+    ) % dilation_factor[1]
+    pad_end_h = orig_pad_end_h + pad_end_extra_h
+    pad_end_w = orig_pad_end_w + pad_end_extra_w
+
+    return (
+        padding[0] == pad_start_h
+        and padding[1] == pad_end_h
+        and padding[2] == pad_start_w
+        and padding[3] == pad_end_w
+        and crop[0] == 0
+        and crop[1] == pad_end_extra_h
+        and crop[2] == 0
+        and crop[3] == pad_end_extra_w
+    )
+
+
+def _pattern_match_and_rewrite(gddict, conv_op):
+    node = gddict[conv_op]
+    channel_first = node.attr["data_format"].startswith("NC")
+
+    if len(node.inputs) == 0 or len(node.outputs) == 0:
+        return
+
+    prev_node = gddict[node.inputs[0]]
+    next_node = gddict[node.outputs[0]]
+
+    expand_node = None
+    squeeze_node = None
+    # Check for Conv1D cases
+    if prev_node.op == "ExpandDims":
+        # All Conv1D has ExpandDims and Squeeze as pairs.
+        if next_node.op != "Squeeze":
+            return
+
+        expand_node = prev_node
+        squeeze_node = next_node
+
+        if len(prev_node.inputs) == 0 or len(next_node.outputs) == 0:
+            return
+        prev_node = gddict[prev_node.inputs[0]]
+        next_node = gddict[next_node.outputs[0]]
+
+    # Check if Conv1D/Conv2D is surrounded by SpaceToBatchND and BatchToSpaceND
+    if prev_node.op != "SpaceToBatchND" or next_node.op != "BatchToSpaceND":
+        return
+    else:
+        stb_node = prev_node
+        bts_node = next_node
+
+    dilation_node = gddict[stb_node.inputs[1]]
+    if dilation_node.value is None:
+        return
+    dilation_factor = dilation_node.value.val
+    if gddict[bts_node.inputs[1]].value is None or np.any(
+        dilation_factor != gddict[bts_node.inputs[1]].value.val
+    ):
+        # If SpaceToBatchND and BatchToSpaceND doesn't match, we do not fuse.
+        return
+
+    padding_node = gddict[stb_node.inputs[2]]
+    if padding_node.value is None:
+        return
+    padding_val = padding_node.value.val.flatten()
+
+    crop_node = gddict[bts_node.inputs[2]]
+    if crop_node.value is None:
+        return
+    crop_val = crop_node.value.val.flatten()
+
+    if expand_node:
+        dilation_factor = [1] + list(dilation_factor)
+        padding_val = [0, 0] + list(padding_val)
+        crop_val = [0, 0] + list(crop_val)
+    # Trying to inverse the logic of TF generating padding/cropping values for
+    # SpaceToBatchND and BatchToSpaceND with different padding values in Conv2D.
+    # Logic extracted from TF's builder at:
+    # tensorflow/python/ops/nn_ops.py and tensorflow/python/ops/array_ops.py
+    is_same = False
+    if np.any(padding_val != 0):
+        input_shape = gddict[stb_node.inputs[0]].attr.get("_output_shapes", None)
+        if input_shape is None:
+            input_shape = gddict[stb_node.inputs[0]].attr.get("shape", None)
+        else:
+            input_shape = input_shape[0]
+        W_node = gddict[node.inputs[1]]
+        W_shape = None if W_node.op != "Const" else W_node.datatype.get_shape()
+        if input_shape is None or W_shape is None:
+            return
+        W_h, W_w = W_shape[0], W_shape[1]
+        HW = input_shape[2:] if channel_first else input_shape[1:-1]
+        if expand_node:
+            HW = [1] + list(HW)
+        is_same = _try_same(
+            HW[0], HW[1], W_h, W_w, dilation_factor, padding_val, crop_val
+        )
+
+    # Re-wiring the nodes to skip SpaceToBatchND.
+    # We change BatchToSpaceND to Identity since it might be a terminate op.
+    deleted_nodes = set()
+    if expand_node:
+        replace_source(gddict, stb_node, expand_node, stb_node.inputs[0])
+    else:
+        replace_source(gddict, stb_node, node, stb_node.inputs[0])
+
+    bts_node.op = "Identity"
+    bts_node.attr = {}
+
+    deleted_nodes.update(stb_node.inputs[1:])
+    deleted_nodes.update([stb_node.name])
+    deleted_nodes.update(bts_node.inputs[1:])
+
+    # Rewrite dilation attribute for (Depthwise)Conv2D
+    dilation_val = (
+        [1, 1] + list(dilation_factor)
+        if node.attr["data_format"] == "NCHW"
+        else [1] + list(dilation_factor) + [1]
+    )
+    node.attr["dilations"] = dilation_val
+    # Rewrite padding attribute for (Depthwise)Conv2D
+    # This is due to, TF always plug in VALID padding for Conv2D after
+    # SpaceToBatchND. If, the original Conv2D is SAME padding, TF would
+    # automatically insert padding, therefore, we set it as SAME over here.
+    if is_same:
+        node.attr["padding"] = "SAME"
+
+    # Removing stale attributes for nodes.
+    if expand_node and "_output_shapes" in expand_node.attr:
+        del expand_node.attr["_output_shapes"]
+    if squeeze_node and "_output_shapes" in squeeze_node.attr:
+        del squeeze_node.attr["_output_shapes"]
+    if "_output_shapes" in node.attr:
+        del node.attr["_output_shapes"]
+    if expand_node and "shape" in expand_node.attr:
+        del expand_node.attr["shape"]
+    if squeeze_node and "shape" in squeeze_node.attr:
+        del squeeze_node.attr["shape"]
+    if "shape" in node.attr:
+        del node.attr["shape"]
+
+    for d in deleted_nodes:
+        delete_node(gddict, d)
+
+
+def _fuse_dilation_conv(gddict):
+    """
+    A dilated convolution in older tensorflow versions might not be fused in the
+    Conv2D or DepthwiseConv2D op, but represented with the following format:
+
+        SpaceToBatchND -> (Depthwise)Conv2D -> BatchToSpaceND
+
+    We try to fuse it back into (Depthwise)Conv2D with the dilation parameter
+    set in attribute.
+    There are several patterns that exist in tensorflow for breaking up dilation
+    convolutions. We detect the following patterns:
+
+      SpaceToBatchND -> ExpandDims -> Conv2D -> Squeeze -> BatchToSpaceND
+
+      SpaceToBatchND -> Conv2D -> BatchToSpaceND
+
+    The first case appears when Conv1D is used, TF expands/squeeze the inputs to
+    conform Conv2D pattern.
+    The second case is a basic Conv2D pattern.
+
+    """
+    for name in list(gddict.keys()):
+        if name not in gddict:
+            # Node might have been removed from graph during fusion.
+            continue
+        node = gddict[name]
+        if node.op in {"Conv2D", "DepthwiseConv2dNative"}:
+            _pattern_match_and_rewrite(gddict, name)
+
+
+def fuse_dilation_conv(tfssa):
+    """
+    Tensorflow decomposes Depthwise Convolution with dialtion into:
+
+    SpaceToBatchND ---> Conv2D/DepthwiseConv2D ---> BatchToSpaceND
+
+    We identify such pattern and use Conv2D/DepthwiseConv2D to represent it.
+    """
+    for f in tfssa.functions.keys():
+        _fuse_dilation_conv(tfssa.functions[f].graph)
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/insert_get_tuple.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/insert_get_tuple.py
similarity index 66%
rename from coremltools/converters/nnssa/frontend/tensorflow/graph_pass/insert_get_tuple.py
rename to coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/insert_get_tuple.py
index e6adc57a0..c10af592d 100644
--- a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/insert_get_tuple.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/insert_get_tuple.py
@@ -1,4 +1,10 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
@@ -8,7 +14,7 @@
 
 def insert_get_tuple(gddict):
     """
-    Tensorflow uses input "nodename:i" to denote "get tuple i" from "nodename".
+    TensorFlow uses input "nodename:i" to denote "get tuple i" from "nodename".
     Here we split it so that:
 
     node1:i -> node2
@@ -28,7 +34,6 @@ def insert_get_tuple(gddict):
 
     node1 -> node2
     """
-    import copy
     retdict = {}
     get_tuple_op_var_index = 1
 
@@ -45,17 +50,24 @@ def make_op(input_node, index, new_node_name, gto_make_op_cache):
         inserted_op_name = new_node_name
         inserted_op = ParsedTFNode()
         inserted_op.name = inserted_op_name
-        inserted_op.op = 'get_tuple'
+        inserted_op.op = "get_tuple"
         inserted_op.inputs = [input_node]
-        inserted_op.attr['index'] = index
+        inserted_op.attr["index"] = index
         inserted_ops[inserted_op_name] = inserted_op
         gto_make_op_cache[cache_key] = inserted_op
         return inserted_op
 
     exclusions = [
-        'Switch', 'Enter', 'Exit', 'Merge', 'LoopCond', 'NextIteration', 'TensorArrayV3', 'Const'
+        "Switch",
+        "Enter",
+        "Exit",
+        "Merge",
+        "LoopCond",
+        "NextIteration",
+        "TensorArrayV3",
+        "Const",
     ]
-    inclusions = ['Split', 'SplitV', 'LSTMBlockCell']
+    inclusions = ["Split", "SplitV", "LSTMBlockCell"]
     gto_make_op_cache = {}
     for name in list(gddict.keys()):
         new_node = ParsedTFNode()
@@ -68,14 +80,20 @@ def make_op(input_node, index, new_node_name, gto_make_op_cache):
                 input_node = new_node.inputs[idx]
                 input_index = 0
 
-            if ('_output_shapes' in gddict[input_node].attr and \
-                    len(gddict[input_node].attr['_output_shapes']) > 1 and \
-                    gddict[input_node].op not in exclusions) or \
-               (gddict[input_node].op in inclusions):
-                get_tuple_node_name = 'gto_%s' % (get_tuple_op_var_index)
+            if (
+                "_output_shapes" in gddict[input_node].attr
+                and len(gddict[input_node].attr["_output_shapes"]) > 1
+                and gddict[input_node].op not in exclusions
+            ) or (gddict[input_node].op in inclusions):
+                get_tuple_node_name = "gto_%s" % (get_tuple_op_var_index)
                 new_inputs.append(
-                    make_op(input_node, int(input_index), get_tuple_node_name,
-                            gto_make_op_cache).name)
+                    make_op(
+                        input_node,
+                        int(input_index),
+                        get_tuple_node_name,
+                        gto_make_op_cache,
+                    ).name
+                )
                 get_tuple_op_var_index += 1
             else:
                 new_inputs.append(new_node.inputs[idx])
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/tensor_array_transform.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/tensor_array_transform.py
similarity index 90%
rename from coremltools/converters/nnssa/frontend/tensorflow/graph_pass/tensor_array_transform.py
rename to coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/tensor_array_transform.py
index dca541587..17f67d1ba 100644
--- a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/tensor_array_transform.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/tensor_array_transform.py
@@ -1,9 +1,14 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
-from ....commons import builtins
-from ....commons.basic_graph_ops import delete_node
+
 
 # A TensorArray is essentially a runtime vector<Tensor> with
 #
@@ -59,7 +64,7 @@ def tensor_array_resource_removal(gd):
     # this should be called *BEFORE* introduction of tuples,
     # and before output edges are added (for simplicity)
     for k, node in gd.items():
-        if node.op.startswith('TensorArray') and node.op != 'TensorArrayV3':
+        if node.op.startswith("TensorArray") and node.op != "TensorArrayV3":
             # generally the resource edge is the first edge
             # input is resource, indices, flow
             # output is generally flow
@@ -74,6 +79,6 @@ def tensor_array_resource_removal(gd):
             else:
                 input_node = node.inputs[i]
                 input_index = 0
-            if gd[input_node].op == 'TensorArrayV3':
+            if gd[input_node].op == "TensorArrayV3":
                 if input_index == 1:
                     node.inputs[i] = "%s" % input_node
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/variable_node_transform.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/variable_node_transform.py
similarity index 65%
rename from coremltools/converters/nnssa/frontend/tensorflow/graph_pass/variable_node_transform.py
rename to coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/variable_node_transform.py
index 4e64797c0..f5f6c83c1 100644
--- a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/variable_node_transform.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/variable_node_transform.py
@@ -1,8 +1,14 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
-from ....commons.basic_graph_ops import disconnect_vertex_ins, delete_node
+from ..basic_graph_ops import disconnect_vertex_ins, delete_node
 
 
 # Variable nodes are not horribly complicated.
@@ -31,12 +37,15 @@
 #  - We transform Variable to a function attribute
 #  - We transform Assign ops to just "set_global" with attribute variable:VariableName
 #  - We transform Read ops to just "get_global" with attribute variable:VariableName
-def remove_variable_node_impl(fn, ssa):
-    variables = [var for var in fn.graph.values() if var.op == 'VariableV2']
-    assigns = [assign for assign in fn.graph.values() if assign.op == 'Assign']
+def remove_variable_node_impl(fn, tfssa):
+    variables = [var for var in fn.graph.values() if var.op == "VariableV2"]
+    assigns = [assign for assign in fn.graph.values() if assign.op == "Assign"]
     reads = [
-        read for read in fn.graph.values() if read.op == 'Identity' and len(read.inputs) == 1
-        and fn.graph[read.inputs[0]].op == 'VariableV2'
+        read
+        for read in fn.graph.values()
+        if read.op == "Identity"
+        and len(read.inputs) == 1
+        and fn.graph[read.inputs[0]].op == "VariableV2"
     ]
 
     # find the variable initial values
@@ -46,19 +55,22 @@ def remove_variable_node_impl(fn, ssa):
         v.parse_from_attr()
         variable_values[v.name] = v.datatype()
         for node in fn.graph.values():
-            if node.op == 'Assign' and node.inputs[0] == v.name and node.inputs[
-                    1] == v.name + "/initial_value":
+            if (
+                node.op == "Assign"
+                and node.inputs[0] == v.name
+                and node.inputs[1] == v.name + "/initial_value"
+            ):
                 variable_values[v.name] = fn.graph[node.inputs[1]].value
                 additional_nodes_to_delete += [node.name, node.inputs[1]]
     for r in reads:
-        r.op = 'get_global'
-        r.attr['variable'] = r.inputs[0]
+        r.op = "get_global"
+        r.attr["variable"] = r.inputs[0]
         disconnect_vertex_ins(fn.graph, r.name)
 
     # transform writes to set_global
     for r in assigns:
-        r.op = 'set_global'
-        r.attr['variable'] = r.inputs[0]
+        r.op = "set_global"
+        r.attr["variable"] = r.inputs[0]
 
     for var in variables:
         delete_node(fn.graph, var.name)
@@ -67,12 +79,12 @@ def remove_variable_node_impl(fn, ssa):
         delete_node(fn.graph, node)
 
     for k, v in variable_values.items():
-        ssa.variables[k] = v
+        tfssa.variables[k] = v
 
 
-def remove_variable_nodes(ssa):
+def remove_variable_nodes(tfssa):
     """
     This should be performed after constant propagation pass.
     """
-    for v in ssa.functions.values():
-        remove_variable_node_impl(v, ssa)
+    for v in tfssa.functions.values():
+        remove_variable_node_impl(v, tfssa)
diff --git a/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/visitors.py b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/visitors.py
new file mode 100644
index 000000000..7790a855a
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_graph_pass/visitors.py
@@ -0,0 +1,235 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from ..parsed_tf_node import ParsedTFNode
+
+
+class FindAllDownstreamTerminals(object):
+    # Find all nodes matching a particular function
+    # which is downstream reachable from a set of nodes.
+    def __init__(self, fn):
+        self.result = []
+        self.fn = fn
+        self.memo = {}
+
+    def visit(self, g, node):
+        if not isinstance(node, ParsedTFNode):
+            node = g[node]
+
+        if node.name in self.memo:
+            return self
+        self.memo[node.name] = 1
+
+        if self.fn(node):
+            self.result.append(node.name)
+            return self
+
+        for i in node.outputs:
+            self.visit(g, g[i])
+
+        return self
+
+    def visit_many(self, g, nodes):
+        for i in nodes:
+            self.visit(g, i)
+        return self
+
+    def get_result(self):
+        return self.result
+
+
+class FindAllReachableNodes(object):
+    # Find all nodes reachable from a set of nodes which satisfy a criteria
+    def __init__(self, fn):
+        self.result = []
+        self.fn = fn
+        self.memo = {}
+
+    def visit(self, g, node):
+        if not isinstance(node, ParsedTFNode):
+            node = g[node]
+
+        if node.name in self.memo:
+            return self
+        self.memo[node.name] = 1
+
+        if self.fn(node):
+            self.result.append(node.name)
+
+        for i in node.outputs:
+            self.visit(g, g[i])
+
+        for i in node.inputs:
+            self.visit(g, g[i])
+
+        return self
+
+    def visit_many(self, g, nodes):
+        for i in nodes:
+            self.visit(g, i)
+        return self
+
+    def get_result(self):
+        return self.result
+
+
+class FindImmediateUpstreamNodes(object):
+    # Find all nodes matching a particular function which is immediately above a set of nodes
+    def __init__(self, fn):
+        self.result = []
+        self.fn = fn
+
+    def visit(self, g, node):
+        if not isinstance(node, ParsedTFNode):
+            node = g[node]
+
+        for i in node.inputs:
+            if self.fn(g[i]):
+                self.result.append(i)
+
+        return self
+
+    def visit_many(self, g, nodes):
+        for i in nodes:
+            self.visit(g, i)
+        return self
+
+    def get_result(self):
+        return self.result
+
+
+class FindImmediateDownstreamNodes(object):
+    # Find all nodes matching a particular function which is immediately above a set of nodes
+    def __init__(self, fn):
+        self.result = []
+        self.fn = fn
+
+    def visit(self, g, node):
+        if not isinstance(node, ParsedTFNode):
+            node = g[node]
+
+        for i in node.outputs:
+            if self.fn(g[i]):
+                self.result.append(i)
+
+        return self
+
+    def visit_many(self, g, nodes):
+        for i in nodes:
+            self.visit(g, i)
+        self.result = list(set(self.result))
+        return self
+
+    def get_result(self):
+        return self.result
+
+
+class FindAllUpstreamTerminals(object):
+    # Find the "upstream frontier" of nodes passing some predicate.
+    # In other words, perform a pre-order traversal of a node and its inputs, collecting all nodes
+    # passing a given predicate as we go along. Terminate the search along a given branch as soon
+    # as a node is collected.
+    def __init__(self, fn, control_dependencies=False):
+        self.result = []
+        self.fn = fn
+        self.control_dependencies = control_dependencies
+        self.memo = {}
+
+    def visit(self, g, node):
+        if not isinstance(node, ParsedTFNode):
+            node = g[node]
+
+        if node.name in self.memo:
+            return self
+        self.memo[node.name] = 1
+
+        if self.fn(node):
+            self.result.append(node.name)
+            return self
+
+        for i in node.inputs:
+            self.visit(g, g[i])
+        if self.control_dependencies:
+            for i in node.control_inputs:
+                self.visit(g, g[i])
+        return self
+
+    def visit_many(self, g, nodes):
+        for i in nodes:
+            self.visit(g, i)
+        self.result = list(set(self.result))
+        return self
+
+    def get_result(self):
+        return self.result
+
+
+class FindSubgraph(object):
+    # Find all nodes between a set of sources and a set of terminals
+    # Sources are not returned, but reached terminals are returned
+    def __init__(self, terminal_nodes):
+        self.memo = {}
+        self.terminal = terminal_nodes
+
+    def visit_impl(self, g, node):
+        if not isinstance(node, ParsedTFNode):
+            node = g[node]
+
+        if node.name in self.terminal:
+            self.memo[node.name] = True
+            return True
+
+        if node.name in self.memo:
+            return self.memo[node.name]
+
+        # add self to memo first otherwise cycles will not terminate
+        self.memo[node.name] = None
+        reachable = None
+        all_unreachable = True
+        for i in node.outputs + node.control_outputs:
+            visit_result = self.visit_impl(g, g[i])
+            if visit_result == True:  # pylint: disable=singleton-comparison
+                reachable = True
+            if visit_result != False:  # pylint: disable=singleton-comparison
+                all_unreachable = False
+
+        if reachable:
+            self.memo[node.name] = reachable
+        elif all_unreachable:
+            self.memo[node.name] = False
+        else:
+            self.memo[node.name] = None
+
+        return reachable
+
+    def visit(self, g, node):
+        self.visit_impl(g, node)
+        while True:
+            if None in iter(self.memo.values()):
+                revisit = [k for k, v in self.memo.items() if v is None]
+                self.memo = {k: v for k, v in self.memo.items() if v is not None}
+                for n in revisit:
+                    self.visit_impl(g, n)
+            else:
+                break
+        return self
+
+    def visit_many(self, g, nodes):
+        for node in nodes:
+            self.visit_impl(g, node)
+        while True:
+            if None in iter(self.memo.values()):
+                revisit = [k for k, v in self.memo.items() if v is None]
+                self.memo = {k: v for k, v in self.memo.items() if v is not None}
+                for n in revisit:
+                    self.visit_impl(g, n)
+            else:
+                break
+        return self
+
+    def get_result(self):
+        return [k for k, v in self.memo.items() if v]
diff --git a/coremltools/converters/mil/frontend/tensorflow/tf_op_registry.py b/coremltools/converters/mil/frontend/tensorflow/tf_op_registry.py
new file mode 100644
index 000000000..9b5d48a72
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow/tf_op_registry.py
@@ -0,0 +1,47 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+_TF_OPS_REGISTRY = {}
+
+
+def register_tf_op(_func=None, tf_alias=None, override=False):
+    """
+    Registration routine for TensorFlow operators
+    _func: (TF conversion function) [Default=None]
+        TF conversion function to register
+
+    tf_alias: (List of string) [Default=None]
+        All other TF operators that should also be mapped to
+        current conversion routine.
+        e.g. Sort aliased with SortV1, SortV2
+        All provided alias operators must not be registered previously.
+
+    override: (Boolean) [Default=False]
+        If True, overrides earlier registration i.e. specified
+        operator and alias will start pointing to current conversion
+        function.
+        Otherwise, duplicate registration will error out.
+    """
+
+    def func_wrapper(func):
+        f_name = func.__name__
+
+        if not override and f_name in _TF_OPS_REGISTRY:
+            raise ValueError("TF op {} already registered.".format(f_name))
+        _TF_OPS_REGISTRY[f_name] = func
+        # If tf_alias is provided, then all the functions mentioned as aliased
+        # are mapped to current function
+        if tf_alias is not None:
+            for name in tf_alias:
+                if not override and name in _TF_OPS_REGISTRY:
+                    msg = "TF op alias {} already registered."
+                    raise ValueError(msg.format(name))
+                _TF_OPS_REGISTRY[name] = func
+        return func
+
+    if _func is None:
+        # decorator called without argument
+        return func_wrapper
+    return func_wrapper(_func)
diff --git a/coremltools/converters/nnssa/nnssa.py b/coremltools/converters/mil/frontend/tensorflow/tfssa.py
similarity index 55%
rename from coremltools/converters/nnssa/nnssa.py
rename to coremltools/converters/mil/frontend/tensorflow/tfssa.py
index 6af8ce53d..dde3aa947 100644
--- a/coremltools/converters/nnssa/nnssa.py
+++ b/coremltools/converters/mil/frontend/tensorflow/tfssa.py
@@ -1,17 +1,25 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
 import copy
+import logging
 
-from .commons import builtins
-from .commons.dot_visitor import DotVisitor
-from .commons.basic_graph_ops import check_connections, const_determined_nodes
+from coremltools.converters.mil.mil import types
+from .basic_graph_ops import check_connections, const_determined_nodes
+from .dot_visitor import DotVisitor
+from .naming_utils import escape_fn_name
 
 
 class ParsedNode(object):
     """
-    Node class for the SSA graph.
+    Node class for the tfssa graph.
 
     name: The name of the node (str)
     op: The operation represented by the node (str)
@@ -23,9 +31,17 @@ class ParsedNode(object):
     outputs: The list of nodes which consume the result of this node (list[str])
     control_outputs: The list of nodes which have to be executed after this node (list[str])
     """
+
     __slots__ = [
-        'name', 'op', 'datatype', 'value', 'inputs', 'control_inputs', 'outputs', 'control_outputs',
-        'attr'
+        "name",
+        "op",
+        "datatype",
+        "value",
+        "inputs",
+        "control_inputs",
+        "outputs",
+        "control_outputs",
+        "attr",
     ]
 
     def __init__(self):
@@ -40,53 +56,71 @@ def __init__(self):
         self.attr = {}
 
     def __copy__(self):
-        ret = ParsedNode()
-        ret.name = self.name
-        ret.op = self.op
-        ret.datatype = self.datatype
-        ret.value = copy.deepcopy(self.value)
-        ret.inputs = self.inputs[:]
-        ret.control_inputs = self.control_inputs[:]
-        ret.outputs = self.outputs[:]
-        ret.control_outputs = self.control_outputs[:]
-        ret.attr = {k: copy.deepcopy(v) for k, v in self.attr.items()}
-        return ret
+        return self._copy_impl(ParsedNode())
+
+    def _copy_impl(self, dest):
+        dest.name = self.name
+        dest.op = self.op
+        dest.datatype = self.datatype
+        dest.value = copy.deepcopy(self.value)
+        dest.inputs = self.inputs[:]
+        dest.control_inputs = self.control_inputs[:]
+        dest.outputs = self.outputs[:]
+        dest.control_outputs = self.control_outputs[:]
+        dest.attr = {k: copy.deepcopy(v) for k, v in self.attr.items()}
+        return dest
 
     def copy(self):
         return self.__copy__()
 
 
 class SSAFunction(object):
-    __slots__ = ["graph", "inputs", "input_types", "outputs", "output_types"]
+    __slots__ = ["graph", "inputs", "input_types", "outputs", "output_types", "ret"]
 
-    def __init__(self, gdict={}):
+    def __init__(self, gdict=None, inputs=None, outputs=None, ret=None):
+        if gdict is None:
+            gdict = {}
         self.graph = gdict
-        self.inputs = []
-        self.outputs = []
+        self.inputs = [] if inputs is None else inputs
+        self.outputs = [] if outputs is None else outputs
         self.input_types = []
         self.output_types = []
+
+        # ret is a mapping from the output arg names from `signature` to the
+        # outputs from `node_def` that should be returned by the function.
+        # Only used in TF2 for getting indices when generating get_tuple ops
+        # for control flow ops. Because the sub-graph's outputs and control
+        # flow node's outputs mapping is defined in `ret` dict. See usages in
+        # tf_graph_pass: rewrite_control_flow_functions for details.
+        # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/function.proto
+        self.ret = [] if ret is None else ret
+
         check_connections(gdict)
-        self.find_inputs_and_outputs()
+
+        # respect TF inputs/outputs if given, otherwise, infer from the graph
+        # in currently implementation: TF1 will always infer from graph. TF2,
+        # on the other hand, respect the inputs/outputs provided.
+        if len(self.inputs) == 0 or len(self.outputs) == 0:
+            self.find_inputs_and_outputs()
+        else:
+            self.inputs, self.outputs = inputs, outputs
+            self.filter_inputs_and_outputs()
 
     def find_inputs_and_outputs(self):
         # solve for input and output vars
-        self.inputs = []
-        self.outputs = []
-        self.input_types = []
-        self.output_types = []
         sorted_keys = sorted(self.graph.keys())
 
         # we use function entry and exit points if available
         # otherwise we find graph entry and exit points
-        enters = [n.name for n in self.graph.values() if 'entry' in n.op]
-        exits = [n.name for n in self.graph.values() if n.op == 'return']
+        # TODO: op name should be fixed here.
+        #       <rdar://problem/57081966> Remove wrappers that are used for old tfssa
+        enters = [
+            n.name for n in self.graph.values() if ("entry" in n.op or "Entry" in n.op)
+        ]
+        exits = [n.name for n in self.graph.values() if n.op in ("Return", "return")]
         if len(enters) > 0 or len(exits) > 0:
-            try:
-                assert (len(enters) > 0)
-                assert (len(exits) > 0)
-            except:
-                import pdb
-                pdb.set_trace()
+            assert len(enters) > 0
+            assert len(exits) > 0
             self.inputs = enters
             self.input_types = [self.graph[v].datatype for v in self.inputs]
             self.outputs = exits
@@ -94,24 +128,58 @@ def find_inputs_and_outputs(self):
         else:
             for k in sorted_keys:
                 v = self.graph[k]
-                if len(v.inputs) == 0 and v.op not in ['Const', 'get_global', 'NoOp']:
+                if len(v.inputs) == 0 and v.op not in ["Const", "get_global", "NoOp"]:
                     self.inputs.append(k)
                     self.input_types.append(v.datatype)
                 elif len(v.inputs) != 0 and v.op == "Placeholder":
                     assert len(v.inputs) == 1, "This is not a PlaceholderWithDefault!"
                     self.inputs.append(k)
                     self.input_types.append(v.datatype)
-                if len(v.outputs) == 0 and v.op != "set_global":
+                if (
+                    len(v.outputs) == 0
+                    and len(v.control_outputs) == 0
+                    and v.op != "set_global"
+                ):
                     self.outputs.append(k)
                     self.output_types.append(v.datatype)
 
+    def filter_inputs_and_outputs(self):
+        """
+        Eliminate invalid input/output nodes in the given list. Should only be
+        invoked if the self.inputs and self.outputs are both provided and we
+        want to respect those when adding SSAFunctions. Only needed for TF2 for
+        now because of the needs to parse multiple functions in graph. TF1 only
+        has one "main" function.
+        """
+        filtered_inputs = []
+        filtered_outputs = []
+        for k in self.inputs:
+            if k not in self.graph.keys():
+                continue
+            v = self.graph[k]
+            if len(v.inputs) == 0 and v.op not in {"Const", "get_global", "NoOp"}:
+                filtered_inputs.append(k)
+                self.input_types.append(v.datatype)
+            elif len(v.inputs) != 0 and v.op == "Placeholder":
+                assert len(v.inputs) == 1, "This is not a PlaceholderWithDefault!"
+                filtered_inputs.append(k)
+                self.input_types.append(v.datatype)
+        for k in self.outputs:
+            if k not in self.graph.keys():
+                continue
+            v = self.graph[k]
+            filtered_outputs.append(k)
+            self.output_types.append(v.datatype)
+        self.inputs, self.outputs = filtered_inputs, filtered_outputs
+
     def __copy__(self):
         ret = SSAFunction()
         ret.inputs = self.inputs[:]
         ret.input_types = self.input_types[:]
         ret.outputs = self.outputs[:]
         ret.output_types = self.output_types[:]
-        ret.graph = {k: copy.copy(v) for k, v in self.graph.items()}
+        ret.graph = {k: copy.deepcopy(v) for k, v in self.graph.items()}
+
         return ret
 
     def copy(self):
@@ -127,23 +195,31 @@ def __init__(self, instance=None):
         self.global_resource = {}
 
         if isinstance(instance, NetworkEnsemble):
-            self.functions = {k: copy.copy(v) for k, v in instance.functions.items()}
-            self.variables = {k: copy.copy(v) for k, v in instance.variables.items()}
-            self.global_resource = {k: copy.copy(v) for k, v in instance.global_resource.items()}
+            self.functions = instance.functions
+            self.variables = instance.variables
+            self.global_resource = instance.global_resource
+        elif instance is not None:
+            raise ValueError(
+                "Instance type {} not compatible with NetworkEnsemble".format(
+                    type(instance)
+                )
+            )
 
     def rename_function(self, src_func, tgt_func):
         """
         Renames the function with function name (src_func) to (tgt_func)
         """
         if src_func not in self.functions:
-            print("Couldn't find function name (%s)." % (src_func))
+            logging.warning("Couldn't find function name (%s).", src_func)
             return
         if tgt_func in self.functions:
-            print("(%s) already exists in some function name." % (tgt_func))
+            logging.warning("(%s) already exists in some function name.", tgt_func)
             return
 
         self.functions[tgt_func] = self.functions.pop(src_func)
-        print("Successfully changed function name from (%s) to (%s)" % (src_func, tgt_func))
+        logging.debug(
+            "Successfully changed function name from (%s) to (%s)", src_func, tgt_func
+        )
 
     def rename_node(self, src_node, tgt_node):
         """
@@ -153,60 +229,66 @@ def rename_node(self, src_node, tgt_node):
         """
         in_ssa = False
         success = None
-        for func, ssa in self.functions.items():
-            if src_node in ssa.graph:
+        for func, tfssa in self.functions.items():
+            if src_node in tfssa.graph:
                 in_ssa = True
-                if tgt_node in ssa.graph:
-                    print("(%s) already exists in function (%s)." % (tgt_node, func))
+                if tgt_node in tfssa.graph:
+                    logging.warning(
+                        "(%s) already exists in function (%s).", tgt_node, func
+                    )
                     break
                 success = func
-                ssa.graph[tgt_node] = ssa.graph.pop(src_node)
+                tfssa.graph[tgt_node] = tfssa.graph.pop(src_node)
                 # Replace other nodes' output dependency
-                for inp in ssa.graph[tgt_node].inputs:
-                    for idx, out in enumerate(ssa.graph[inp].outputs):
+                for inp in tfssa.graph[tgt_node].inputs:
+                    for idx, out in enumerate(tfssa.graph[inp].outputs):
                         if out == src_node:
-                            ssa.graph[inp].outputs[idx] = tgt_node
+                            tfssa.graph[inp].outputs[idx] = tgt_node
                             break
                 # Replace other nodes' control output dependency
-                for c_inp in ssa.graph[tgt_node].control_inputs:
-                    for idx, c_out in enumerate(ssa.graph[c_inp].control_outputs):
+                for c_inp in tfssa.graph[tgt_node].control_inputs:
+                    for idx, c_out in enumerate(tfssa.graph[c_inp].control_outputs):
                         if c_out == src_node:
-                            ssa.graph[c_inp].control_outputs[idx] = tgt_node
+                            tfssa.graph[c_inp].control_outputs[idx] = tgt_node
                             break
                 # Replace other nodes' input dependency
-                for out in ssa.graph[tgt_node].outputs:
-                    for idx, inp in enumerate(ssa.graph[out].inputs):
+                for out in tfssa.graph[tgt_node].outputs:
+                    for idx, inp in enumerate(tfssa.graph[out].inputs):
                         if inp == src_node:
-                            ssa.graph[out].inputs[idx] = tgt_node
+                            tfssa.graph[out].inputs[idx] = tgt_node
                             break
                 # Replace other nodes' control input dependency
-                for c_out in ssa.graph[tgt_node].control_outputs:
-                    for idx, c_inp in enumerate(ssa.graph[c_out].control_inputs):
+                for c_out in tfssa.graph[tgt_node].control_outputs:
+                    for idx, c_inp in enumerate(tfssa.graph[c_out].control_inputs):
                         if c_inp == src_node:
-                            ssa.graph[c_out].control_inputs[idx] = tgt_node
+                            tfssa.graph[c_out].control_inputs[idx] = tgt_node
                             break
                 break
 
         if not in_ssa:
-            print("Couldn't find (%s) in any functions" % (src_node))
+            logging.warning("Couldn't find (%s) in any functions", src_node)
         if success is not None:
-            print("Changed (%s) to (%s) in function (%s)" % (src_node, tgt_node, success))
+            logging.debug(
+                "Changed (%s) to (%s) in function (%s)", src_node, tgt_node, success
+            )
 
     def extract_subgraph(self, outputs, target_inputs=None, name=""):
-        """
-        Given a list of outputs, determine which nodes are needed for
-        producing the outputs and build a new SSAFunction in the original
-        NetworkEnsemble that would produce the target outputs.
-        The function name for the new function would be concatenating all
-        outputs together unless specified.
+        """Add a new SSAFunction to the current NetworkEnsemble to produce the given outputs.
+
+        Args:
+            outputs: The outputs the new function must produce.
+            target_inputs:
+            name: The name of the new function to create. If unspecified, a name will be generated
+                  by joining output names.
+        Returns:
+            The name of the new function.
         """
         if not isinstance(outputs, list):
-            print("Feed a list of output names for subgraph extraction.\nArgument is not a list.")
-            return
+            raise TypeError("Expected a list of output names for subgraph extraction")
 
         if name == "":
             outputs.sort()
-            name = "_".join(outputs)
+            name = escape_fn_name("_".join(outputs))
 
         if target_inputs is None:
             target_inputs = []
@@ -215,8 +297,11 @@ def DFS_inputs(graph, node, vis):
             vis.add(node)
             if node in target_inputs:
                 return [node]
-            if (len(graph[node].inputs) == 0 and
-                len(graph[node].control_inputs) == 0 and graph[node].op != "Const"):
+            if (
+                len(graph[node].inputs) == 0
+                and len(graph[node].control_inputs) == 0
+                and graph[node].op != "Const"
+            ):
                 return [node]
             inputs = []
             for i in graph[node].inputs + graph[node].control_inputs:
@@ -264,7 +349,9 @@ def DFS_set_globals(graph, node, vis):
                 if new_k in target_inputs:
                     gdict[new_k].op = "Placeholder"
                 gdict[new_k].inputs = [inp for inp in new_v.inputs if inp in incl_nodes]
-                gdict[new_k].outputs = [out for out in new_v.outputs if out in incl_nodes]
+                gdict[new_k].outputs = [
+                    out for out in new_v.outputs if out in incl_nodes
+                ]
                 gdict[new_k].control_inputs = [
                     inp for inp in new_v.control_inputs if inp in incl_nodes
                 ]
@@ -303,13 +390,14 @@ def DFS_set_globals(graph, node, vis):
                 gdict[output] = output_node
 
             self.functions[name] = SSAFunction(gdict)
+        return name
 
     def delete_subgraph(self, name):
         """
         Delete the SSAfunction with function_name.
         """
         if name not in self.functions:
-            print("(%s) not in NetworkEnsemble" % (name))
+            logging.warning("(%s) not in NetworkEnsemble", name)
             return
         del self.functions[name]
 
@@ -321,7 +409,9 @@ def __str__(self):
         for func, v in self.functions.items():
             if func.startswith("body_function_") or func.startswith("f_body_function_"):
                 continue
-            elif func.startswith("cond_function_") or func.startswith("f_cond_function_"):
+            elif func.startswith("cond_function_") or func.startswith(
+                "f_cond_function_"
+            ):
                 continue
 
             ret += "Input Function Name: %s\n" % (func)
@@ -330,12 +420,14 @@ def __str__(self):
                 ret += "    %s\n" % (inp)
             ret += "  Outputs:\n"
             for out in v.outputs:
-                if out.startswith('fake_exit_'):
+                if out.startswith("fake_exit_"):
                     continue
                 ret += "    %s\n" % (out)
         return ret
 
-    def get_dot_string(self, name_and_op_style=False, annotation=False, highlight_debug_nodes=[]):
+    def get_dot_string(
+        self, name_and_op_style=False, annotation=False, highlight_debug_nodes=None
+    ):
         """
         Return the dot string that can be used to show the whole graph
         with dot. By default, the graph contains op and type. If
@@ -360,54 +452,66 @@ def get_dot_string(self, name_and_op_style=False, annotation=False, highlight_de
         >>> graphviz.Source(network.get_dot_string()).view()
 
         """
+        if highlight_debug_nodes is None:
+            highlight_debug_nodes = []
         function_names = sorted(self.functions.keys())
 
-        dotstring = 'digraph g {\n' + \
-                    '\tcompound=true;\n'
+        dotstring = "digraph g {\n" + "\tcompound=true;\n"
         # find all tensor nodes with unknown sizes
         ctr = 0
         for k in function_names:
             const_nodes = const_determined_nodes(self.functions[k].graph)
             unknown_sized_tensor_ops = []
             for v, n in self.functions[k].graph.items():
-                if n.datatype is None or (n.datatype is not None and \
-                        builtins.is_tensor(n.datatype) and \
-                        (len(n.datatype.get_shape()) == 0 or -1 in n.datatype.get_shape())):
+                if n.datatype is None or (
+                    n.datatype is not None
+                    and types.is_tensor(n.datatype)
+                    and (
+                        len(n.datatype.get_shape()) == 0 or -1 in n.datatype.get_shape()
+                    )
+                ):
                     unknown_sized_tensor_ops.append(v)
                 if n.op in highlight_debug_nodes:
                     highlight_debug_nodes.append(v)
 
             v = self.functions[k]
             vis = DotVisitor(annotation)
-            vis.highlight_nodes(v.inputs, 'yellow') \
-               .highlight_nodes(const_nodes, 'azure2') \
-               .highlight_nodes(v.outputs,'goldenrod2') \
-               .highlight_nodes(unknown_sized_tensor_ops,'cyan2')
+            vis.highlight_nodes(v.inputs, "yellow").highlight_nodes(
+                const_nodes, "azure2"
+            ).highlight_nodes(v.outputs, "goldenrod2").highlight_nodes(
+                unknown_sized_tensor_ops, "cyan2"
+            )
             if len(highlight_debug_nodes) > 0:
-                vis.highlight_nodes(highlight_debug_nodes, 'green')
+                vis.highlight_nodes(highlight_debug_nodes, "green")
             if name_and_op_style:
-                vis.labeller(lambda n: n.name + ': ' + n.op)
+                vis.labeller(lambda n: n.name + " (" + n.op + ")")
 
-            res = vis.visit_all(
-                v.graph,
-                nodename_prefix=str(ctr)).get_result('subgraph', 'cluster_' + k.replace('/', '_'))
-            dotstring += '\n'.join('\t' + r for r in res.split('\n')) + "\n"
+            res = vis.visit_all(v.graph, nodename_prefix=str(ctr)).get_result(
+                "subgraph", "cluster_" + k.replace("/", "_")
+            )
+            dotstring += "\n".join("\t" + r for r in res.split("\n")) + "\n"
             ctr += 1
         dotstring += "}"
         return dotstring
 
-    def add_function_with_prefix(self, fprefix, ssa):
-        assert (isinstance(ssa, SSAFunction))
+    def add_function_with_prefix(self, fprefix, tfssa):
+        assert isinstance(tfssa, SSAFunction)
         s = 0
         while fprefix + str(s) in self.functions:
             s += 1
-        self.functions[fprefix + str(s)] = ssa
+        self.functions[fprefix + str(s)] = tfssa
 
-    def add_function(self, f, ssa):
-        self.functions[f] = ssa
+    def add_function(self, f, tfssa):
+        self.functions[f] = tfssa
 
     def __copy__(self):
-        import copy
+        ret = self.__class__()
+        ret.functions = self.functions
+        ret.variables = self.variables
+        ret.global_resource = self.global_resource
+        return ret
+
+    def __deepcopy__(self, memo):
         ret = self.__class__()
         ret.functions = {k: copy.copy(v) for k, v in self.functions.items()}
         ret.variables = {k: copy.copy(v) for k, v in self.variables.items()}
@@ -419,7 +523,7 @@ def copy(self):
 
     def _find_free_name(self, prefix):
         idx = 0
-        while (True):
+        while True:
             name = prefix + str(idx)
             found = False
             for v in self.functions.values():
@@ -441,8 +545,11 @@ def get_image_format(self):
 
             for name in graph:
                 node = graph[name]
-                if node.attr.get('data_format', None) == 'NHWC' or node.attr.get('data_format') == 'NHWC_format_inserted':
-                    return 'NHWC'
-                elif node.attr.get('data_format', None) == 'NCHW':
-                    return 'NCHW'
+                if (
+                    node.attr.get("data_format", None) == "NHWC"
+                    or node.attr.get("data_format") == "NHWC_format_inserted"
+                ):
+                    return "NHWC"
+                elif node.attr.get("data_format", None) == "NCHW":
+                    return "NCHW"
         return None
diff --git a/coremltools/converters/mil/frontend/tensorflow2/__init__.py b/coremltools/converters/mil/frontend/tensorflow2/__init__.py
new file mode 100644
index 000000000..4cca3ab2a
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow2/__init__.py
@@ -0,0 +1,12 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from ....._deps import _HAS_TF_2
+
+if _HAS_TF_2:
+    from .ops import *  # register all TF2 ops
+    from coremltools.converters.mil.frontend.tensorflow.tf_op_registry import (
+        register_tf_op,
+    )
diff --git a/coremltools/converters/mil/frontend/tensorflow2/converter.py b/coremltools/converters/mil/frontend/tensorflow2/converter.py
new file mode 100644
index 000000000..a459102d4
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow2/converter.py
@@ -0,0 +1,49 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.frontend.tensorflow.converter import TFConverter
+from coremltools.converters.mil.frontend.tensorflow.basic_graph_ops import (
+    simple_topsort,
+)
+
+from .ssa_passes.tf_passes import tensorflow_passes as tensorflow2_passes
+
+
+class TF2Converter(TFConverter):
+    def __init__(self, tf_ssa, inputs=None, outputs=None, **kwargs):
+        TFConverter.__init__(self, tf_ssa, inputs, outputs, **kwargs)
+
+        # Overwrite tensorflow_passes
+        # TF 2.x uses different set of graph passes
+        self.tensorflow_passes = tensorflow2_passes
+
+    def _get_stack(self, tfssa, root="main"):
+        """
+        Overwrite TFConverter._get_stack() as TF2 generates different sub-graphs.
+        """
+
+        # We're trying to get a order of how to loop through the graphs.
+        # This is NOT necessarily a DAG.
+        dep = {x: [] for x in tfssa.functions}
+        for fname in tfssa.functions:
+            for node in tfssa.functions[fname].graph.values():
+                func_x, func_y = None, None
+
+                if node.op in {"StatelessIf", "If"}:
+                    func_x = node.attr.get("then_branch")
+                    func_y = node.attr.get("else_branch")
+                elif node.op in {"StatelessWhile", "While"}:
+                    func_x = node.attr.get("body")
+                    func_y = node.attr.get("cond")
+
+                if func_x and fname not in dep[func_x]:
+                    dep[func_x].append(fname)
+                if func_y and fname not in dep[func_y]:
+                    dep[func_y].append(fname)
+
+        assert len(dep[root]) == 0
+        graph_stack = simple_topsort(dep)
+
+        return graph_stack
diff --git a/coremltools/converters/mil/frontend/tensorflow2/load.py b/coremltools/converters/mil/frontend/tensorflow2/load.py
new file mode 100644
index 000000000..1856bafc3
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow2/load.py
@@ -0,0 +1,317 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+
+import logging as _logging
+import os.path as _os_path
+
+from six import string_types as _string_types
+from tqdm import tqdm as _tqdm
+import tensorflow as _tf
+
+from tensorflow.python.framework import dtypes as _dtypes
+from tensorflow.python.framework.convert_to_constants import (
+    convert_variables_to_constants_v2 as _convert_variables_to_constants_v2,
+)
+from tensorflow.python.framework.function_def_to_graph import (
+    function_def_to_graph as _function_def_to_graph,
+)
+from tensorflow.python.keras.saving import saving_utils as _saving_utils
+
+from tensorflow.lite.python.util import (
+    run_graph_optimizations as _run_graph_optimizations,
+)
+from tensorflow.lite.python.util import get_grappler_config as _get_grappler_config
+
+from .converter import TF2Converter
+from coremltools.converters.mil.frontend.tensorflow.basic_graph_ops import fill_outputs
+from coremltools.converters.mil.frontend.tensorflow.tf_graph_pass import (
+    constant_propagation,
+    remove_variable_nodes,
+    tensor_array_resource_removal,
+    insert_get_tuple,
+    delete_disconnected_nodes,
+    fuse_dilation_conv,
+)
+from coremltools.converters.mil.frontend.tensorflow2.tf_graph_pass import (
+    flatten_sub_graph_namespaces,
+    rewrite_control_flow_functions,
+)
+from coremltools.converters.mil.frontend.tensorflow.tfssa import (
+    NetworkEnsemble,
+    SSAFunction,
+)
+from coremltools.converters.mil.frontend.tensorflow.parsed_tf_node import ParsedTFNode
+from coremltools.converters.mil.frontend.tensorflow.load import TFLoader
+
+
+class TF2Loader(TFLoader):
+    def __init__(self, model, debug=False, **kwargs):
+        """
+        TensorFlow 2.x model loader.
+
+        Parameters
+        ----------
+        model: Model created with TensorFlow 2.x
+            One of the following model format:
+                - TensorFlow tf.keras.Model object or HDF5 (.h5) file path
+                - TensorFlow SavedModel directory path
+                - TensorFlow list of concrete functions(s)
+        debug: bool, optional. Defaults to False.
+            This flag should generally be False except for debugging purposes
+            for diagnosing conversion errors. Setting this flag to True will
+            cause graph pass errors to be ignored, forcefully returning a
+            NetworkEnsemble object.
+        kwargs: dict(str, Any), optional
+            Dictionary of additional arguments.
+        """
+        TFLoader.__init__(self, model, debug, **kwargs)
+
+    def _graph_def_from_model(self, outputs=None):
+        """Overwrites TFLoader._graph_def_from_model()"""
+        msg = (
+            "Expected model format: [SavedModel | [concrete_function] | "
+            "tf.keras.Model | .h5], got {}"
+        )
+        if (
+            isinstance(self.model, list)
+            or isinstance(self.model, _tf.keras.Model)
+            or isinstance(self.model, _string_types)
+        ):
+            cfs = []
+            if isinstance(self.model, list):
+                cfs = self.model
+            if isinstance(self.model, _tf.keras.Model):
+                cfs = self._concrete_fn_from_tf_keras_or_h5(self.model)
+            elif isinstance(self.model, _string_types):
+                if not _os_path.exists(self.model):
+                    raise ValueError(
+                        'Input model "{}" does not exist'.format(self.model)
+                    )
+                elif _os_path.isfile(self.model) and self.model.endswith(".h5"):
+                    cfs = self._concrete_fn_from_tf_keras_or_h5(self.model)
+                elif _os_path.isdir(self.model):
+                    saved_model = _tf.saved_model.load(self.model)
+                    sv = saved_model.signatures.values()
+                    cfs = sv if isinstance(sv, list) else list(sv)
+                else:
+                    raise NotImplementedError(msg.format(self.model))
+
+            graph_def = self._graph_def_from_concrete_fn(cfs)
+            return self.extract_sub_graph(graph_def, outputs)
+        else:
+            raise NotImplementedError(msg.format(self.model))
+
+    def _tf_ssa_from_graph_def(self, fn_name="main"):
+        """Overwrites TFLoader._tf_ssa_from_graph_def()"""
+        with _tf.Graph().as_default() as tf_graph:
+            _tf.graph_util.import_graph_def(self._graph_def, name="")
+
+        # sub-graphs' input shapes are required for extracting sub-graphs
+        sg_input_shapes = self._populate_sub_graph_input_shapes(
+            tf_graph, tf_graph._functions
+        )
+
+        # get graph_dict and sub-graphs' inputs / outputs
+        graph_dict, inputs, outputs, ret = self._dict_from_graph_def(
+            tf_graph, fn_name, sg_input_shapes
+        )
+
+        tf_ssa = NetworkEnsemble()
+        for name, graph in graph_dict.items():
+            tensor_array_resource_removal(graph)
+            graph = insert_get_tuple(graph)
+            graph = fill_outputs(graph)
+            if name == "main":  # skip for sub-graphs as input can be also output
+                delete_disconnected_nodes(graph)
+            tf_ssa.functions[name] = SSAFunction(
+                graph, inputs=inputs[name], outputs=outputs[name], ret=ret[name]
+            )
+
+        return tf_ssa
+
+    def _program_from_tf_ssa(self):
+        # Notes:
+        # - "flatten_while_loop_namespaces" should be after "constant_propagation"
+        #   as it changes node names which constant propagation pass is relying on
+        #   to perform session.run(), renamed nodes are not understandable for TF.
+        tf_passes = [
+            # delete_asserts,  # FIXME: rdar://62472804
+            constant_propagation,
+            rewrite_control_flow_functions,
+            flatten_sub_graph_namespaces,
+            remove_variable_nodes,
+            fuse_dilation_conv,
+        ]
+
+        if self.debug:
+            for tf_pass in _tqdm(
+                tf_passes, desc="Running TensorFlow Graph Passes", unit=" passes"
+            ):
+                try:
+                    tf_pass(self._tf_ssa)
+                except Exception as e:
+                    _logging.exception('Exception in pass "{}": {}'.format(tf_pass, e))
+                    _logging.info("Ignoring exception and continuing to next pass")
+
+        else:
+            for tf_pass in _tqdm(
+                tf_passes, desc="Running TensorFlow Graph Passes", unit=" passes"
+            ):
+                tf_pass(self._tf_ssa)
+
+        if self.debug:
+            import graphviz
+
+            dot_string = self._tf_ssa.get_dot_string(
+                annotation=True, name_and_op_style=True, highlight_debug_nodes=[]
+            )
+            graphviz.Source(dot_string).view(
+                filename="/tmp/ssa_after_tf_passes", cleanup=True
+            )
+
+        converter = TF2Converter(self._tf_ssa, **self.kwargs)
+        return converter.convert()
+
+    def _populate_sub_graph_input_shapes(self, graph, graph_fns):
+        """
+        Populate function (sub-graph) input shapes from control flow op's inputs
+        Note that the functions (sub-graphs) are not nested but the control flow
+        ops are nested. The input shapes are used to extract sub-graphs from the
+        parent graph (as the input of function_def_to_graph).
+
+        Parameter
+        ---------
+        graph: tf.Graph
+            TensorFlow graph.
+        graph_fns: list of graph functions.
+            List of TensorFlow graph functions.
+
+        Returns
+        -------
+        sg_input_shapes: dict(str: list)
+            Dictionary of function (sub-graph) name and input shape pairs.
+        """
+        sg_input_shapes = {}
+        sub_graphs = []
+        for op in graph.get_operations():
+            if op.type not in {"StatelessIf", "If", "StatelessWhile", "While"}:
+                continue
+
+            sg1, sg2 = None, None
+            if op.type in {"StatelessIf", "If"}:
+                sg1 = op.get_attr("then_branch").name
+                sg2 = op.get_attr("else_branch").name
+            if op.type in {"StatelessWhile", "While"}:
+                sg1 = op.get_attr("cond").name
+                sg2 = op.get_attr("body").name
+
+            # memorize input shapes for sub-graph conversions
+            op_input_shapes = [i.get_shape() for i in op.inputs]
+            sg_input_shapes.update({sg1: op_input_shapes, sg2: op_input_shapes})
+            sub_graphs += [sg1, sg2]
+
+        for name in sub_graphs:
+            sg = graph_fns.get(name)
+            fn_def = sg.definition
+            op_input_shapes = sg_input_shapes[name]
+            op_input_shapes = op_input_shapes[-len(fn_def.signature.input_arg) :]
+            fn_graph = _function_def_to_graph(fn_def, input_shapes=op_input_shapes)
+            sg_input_shapes.update(
+                self._populate_sub_graph_input_shapes(fn_graph, graph_fns)
+            )
+
+        return sg_input_shapes
+
+    @staticmethod
+    def _dict_from_graph_def(graph, fn_name="main", sg_input_shapes=None):
+        """
+        Loads a tf.Graph and transform it into dictionary of ParsedTFNodes.
+        Potentially contains multiple functions, in such case, recursively
+        resolve functions (sub-graphs).
+
+        Parameters
+        ----------
+        graph: tf.Graph
+            TensorFlow graph.
+        fn_name: str, optional, defaults to 'main'
+            Function name of the graph.
+        sg_input_shapes: dict(str: list)
+            Dictionary of name and input shapes for functions / sub-graphs.
+
+        Returns
+        -------
+        dict(str: dict(str: ParsedTFNode))
+            Dictionary of function name and dictionary of node name and
+            ParsedTFNode object.
+        """
+        graph_dict = {fn_name: {}}
+        graph_inputs = {fn_name: []}
+        graph_outputs = {fn_name: []}
+        graph_ret = {fn_name: {}}
+
+        for op in graph.get_operations():
+            graph_dict[fn_name].update({op.name: ParsedTFNode(op.node_def)})
+
+        for name, sg in graph._functions.items():
+            sg_def = sg.definition
+            input_shapes = sg_input_shapes[name]
+            input_shapes = input_shapes[-len(sg_def.signature.input_arg) :]
+            fn_graph = _function_def_to_graph(sg_def, input_shapes=input_shapes)
+
+            graph_dict.update(
+                TF2Loader._dict_from_graph_def(fn_graph, name, sg_input_shapes)[0]
+            )
+            graph_inputs.update({name: [t.name.split(":")[0] for t in fn_graph.inputs]})
+            graph_outputs.update(
+                {name: [t.name.split(":")[0] for t in fn_graph.outputs]}
+            )
+
+            # ret is a mapping from the output arg names from `signature` to the
+            # outputs from `node_def` that should be returned by the function.
+            sg_def_ret = sg_def.ret
+            sg_def_ret["identity_0"] = sg_def_ret.pop("identity")
+            graph_ret.update({name: sg_def_ret})
+
+        return graph_dict, graph_inputs, graph_outputs, graph_ret
+
+    @staticmethod
+    def _concrete_fn_from_tf_keras_or_h5(keras_model):
+        if isinstance(keras_model, _tf.keras.Model):
+            input_signature = _saving_utils.model_input_signature(
+                keras_model, keep_original_batch_size=True
+            )
+            fn = _saving_utils.trace_model_call(keras_model, input_signature)
+        else:
+            keras_model = _tf.keras.models.load_model(keras_model)
+            input_signature = _saving_utils.model_input_signature(
+                keras_model, keep_original_batch_size=True
+            )
+            fn = _saving_utils.trace_model_call(keras_model, input_signature)
+        return [fn.get_concrete_function()]
+
+    @staticmethod
+    def _graph_def_from_concrete_fn(cfs):
+        if len(cfs) != 1:
+            raise NotImplementedError("Only a single concrete function is supported.")
+
+        frozen_fn = _convert_variables_to_constants_v2(cfs[0], lower_control_flow=False)
+        graph_def = frozen_fn.graph.as_graph_def(add_shapes=True)
+
+        # run a Grappler's constant folding pass.
+        fn_inputs = [t for t in frozen_fn.inputs if t.dtype != _dtypes.resource]
+        graph_def = _run_graph_optimizations(
+            graph_def,
+            fn_inputs,
+            frozen_fn.outputs,
+            config=_get_grappler_config(["constfold", "dependency"]),
+            graph=frozen_fn.graph,
+        )
+        return graph_def
diff --git a/coremltools/converters/mil/frontend/tensorflow2/ops.py b/coremltools/converters/mil/frontend/tensorflow2/ops.py
new file mode 100644
index 000000000..bee29ce87
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow2/ops.py
@@ -0,0 +1,201 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import numpy as _np
+
+from coremltools.converters.mil.frontend.tensorflow.ops import (
+    _transpose_NHWC_to_NCHW,
+    _transpose_NCHW_to_NHWC,
+)
+
+# TF 2.x now imports and registers all TF 1.x op against the new registry
+# (separated from TF 1.x registry). Overwrite might needed in case the op
+# semantics are different between TF 1.x and TF 2.x.<
+from coremltools.converters.mil.frontend.tensorflow.ops import *
+from coremltools.converters.mil.frontend.tensorflow.dialect_ops import *
+
+
+@register_tf_op(override=True)
+def FusedBatchNormV3(context, node):
+    # Get attributes
+    data_format = node.attr.get("data_format", "NHWC")
+    epsilon = node.attr.get("epsilon", None)
+
+    # Get inputs
+    x = context[node.inputs[0]]
+    scale = context[node.inputs[1]]
+    offset = context[node.inputs[2]]
+    mean = context[node.inputs[3]]
+    variance = context[node.inputs[4]]
+    if data_format == "NHWC":
+        # TF's FusedBatchNorm is only for 4D inputs
+        x = _transpose_NHWC_to_NCHW(x)
+        x = mb.batch_norm(
+            x=x, mean=mean, variance=variance, gamma=scale, beta=offset, epsilon=epsilon
+        )
+        x = _transpose_NCHW_to_NHWC(x, node.name)
+    else:
+        x = mb.batch_norm(
+            x=x,
+            mean=mean,
+            variance=variance,
+            gamma=scale,
+            beta=offset,
+            epsilon=epsilon,
+            name=node.name,
+        )
+    # Inference only batch norm does not have meaningful outputs for
+    # batch_mean, batch_variance etc.
+    context.add(node.name, x)
+
+
+@register_tf_op(tf_alias=["If"], override=True)
+def StatelessIf(context, node):
+    pred = context[node.inputs[0]][0]
+    then_graph = context.get_graph(node.attr.get("then_branch"))
+    else_graph = context.get_graph(node.attr.get("else_branch"))
+
+    def then_fn():
+        context.stack_func_inputs(context[node.inputs[0]])
+        then_output_var = convert_graph(context, then_graph)
+        context.unstack_func_inputs()
+        return then_output_var
+
+    def else_fn():
+        context.stack_func_inputs(context[node.inputs[0]])
+        else_output_var = convert_graph(context, else_graph)
+        context.unstack_func_inputs()
+        return else_output_var
+
+    x = mb.cond(pred=pred, _true_fn=then_fn, _false_fn=else_fn, name=node.name)
+
+    # wraps x as tuple for get_tuple that always follow the cond node.
+    x = (x,) if not isinstance(x, (tuple, list)) else x
+
+    context.add(node.name, x)
+
+
+@register_tf_op(tf_alias=["While"], override=True)
+def StatelessWhile(context, node):
+    # inputs are loop_counter, max_iterations, [loop_vars]
+    loop_vars = context[node.inputs[0]][2:]
+
+    cond_graph = context.get_graph(node.attr.get("cond"))
+    body_graph = context.get_graph(node.attr.get("body"))
+
+    def cond(*loop_vars):
+        context.stack_func_inputs(loop_vars)
+        cond_output_vars = convert_graph(context, cond_graph)
+        context.unstack_func_inputs()
+        return cond_output_vars
+
+    def body(*loop_vars):
+        context.stack_func_inputs(loop_vars)
+        body_output_vars = convert_graph(context, body_graph)
+        context.unstack_func_inputs()
+        return body_output_vars
+
+    x = mb.while_loop(_cond=cond, _body=body, loop_vars=loop_vars, name=node.name)
+
+    # wraps x as tuple for get_tuple that always follow the while node.
+    x = (x,) if not isinstance(x, (tuple, list)) else x
+
+    context.add(node.name, x)
+
+
+@register_tf_op
+def TensorListFromTensor(context, node):
+    value = context[node.inputs[0]]
+    element_shape = context[node.inputs[1]]
+    element_dtype = node.attr.get("element_dtype")
+    dtype_str = types.builtin_to_string(element_dtype)
+
+    length = mb.shape(x=value)
+    length = mb.slice_by_index(x=length, begin=[0], end=[1], squeeze_mask=[True])
+
+    if element_shape is not None and all(_np.atleast_1d(element_shape.val) != -1):
+        ls = mb.make_list(init_length=length, elem_shape=element_shape, dtype=dtype_str)
+    else:
+        ls = mb.tf_make_list(init_length=length, dtype=dtype_str)
+
+    indices = mb.range_1d(end=length, start=0, step=1)
+    ls = mb.list_scatter(ls=ls, indices=indices, value=value, name=node.name)
+    context.add(node.name, ls)
+
+
+@register_tf_op
+def TensorListGather(context, node):
+    ls = context[node.inputs[0]]
+    indices = context[node.inputs[1]]
+    tensor = mb.list_gather(ls=ls, indices=indices, name=node.name)
+    context.add(node.name, tensor)
+
+
+@register_tf_op
+def TensorListGetItem(context, node):
+    ls = context[node.inputs[0]]
+    index = context[node.inputs[1]]
+    new_ls = mb.list_read(ls=ls, index=index, name=node.name)
+    context.add(node.name, new_ls)
+
+
+@register_tf_op
+def TensorListLength(context, node):
+    ls = context[node.inputs[0]]
+    length = mb.list_length(ls=ls, name=node.name)
+    context.add(node.name, length)
+
+
+@register_tf_op
+def TensorListResize(context, node):
+    # skip here as the list will be dynamically resized when
+    # necessary in downstream list_write or list_scatter ops
+    Identity(context, node)
+
+
+@register_tf_op
+def TensorListReserve(context, node):
+    element_shape = context[node.inputs[0]]
+    num_elements = context[node.inputs[1]]
+    element_dtype = node.attr.get("element_dtype")
+    dtype = types.builtin_to_string(element_dtype)
+
+    if element_shape is not None and all(_np.atleast_1d(element_shape.val) != -1):
+        ls = mb.make_list(
+            init_length=num_elements,
+            elem_shape=element_shape,
+            dtype=dtype,
+            name=node.name,
+        )
+    else:
+        ls = mb.tf_make_list(init_length=num_elements, dtype=dtype, name=node.name)
+    context.add(node.name, ls)
+
+
+@register_tf_op
+def TensorListScatterIntoExistingList(context, node):
+    ls = context[node.inputs[0]]
+    value = context[node.inputs[1]]
+    indices = context[node.inputs[2]]
+    ls = mb.list_scatter(ls=ls, indices=indices, value=value, name=node.name)
+    context.add(node.name, ls)
+
+
+@register_tf_op
+def TensorListSetItem(context, node):
+    ls = context[node.inputs[0]]
+    index = context[node.inputs[1]]
+    value = context[node.inputs[2]]
+    new_ls = mb.list_write(ls=ls, index=index, value=value, name=node.name)
+    context.add(node.name, new_ls)
+
+
+@register_tf_op
+def TensorListStack(context, node):
+    ls = context[node.inputs[0]]
+    length = mb.list_length(ls=ls)
+    indices = mb.range_1d(end=length, start=0, step=1)
+    x = mb.list_gather(ls=ls, indices=indices, name=node.name)
+    context.add(node.name, x)
diff --git a/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/__init__.py b/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/__init__.py
new file mode 100644
index 000000000..a659bb2d3
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/__init__.py
@@ -0,0 +1,25 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+# Import all passes in this dir
+from os.path import dirname, basename, isfile, join
+import glob
+
+excluded_files = [
+    "__init__.py",
+    "tf_passes.py",
+]
+modules = glob.glob(join(dirname(__file__), "*.py"))
+pass_modules = [
+    basename(f)[:-3]
+    for f in modules
+    if isfile(f)
+    and basename(f)[:1] != "_"  # Follow python convention to hide _* files.
+    and basename(f)[:4] != "test"
+    and basename(f) not in excluded_files
+]
+__all__ = pass_modules
+
+from . import *  # import everything in __all__
diff --git a/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/remove_vacuous_cond.py b/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/remove_vacuous_cond.py
new file mode 100644
index 000000000..daae15119
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/remove_vacuous_cond.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+import logging
+
+
+def remove_vacuous_cond_block(block):
+    num_changes = 0
+    for op in list(block.operations):
+        for b in op.blocks:
+            num_changes += remove_vacuous_cond_block(b)
+
+        if op.op_type != "cond":
+            continue
+
+        then_ops = op.blocks[0].operations
+        else_ops = op.blocks[1].operations
+
+        if len(then_ops) > 1 or len(else_ops) > 1:
+            continue
+
+        # Pattern 1: dynamic length TensorList generates this pattern. See
+        # conversion functions of TensorList* ops for details. TF2's graph
+        # contains a tf.cond op with 2 sub-graphs. The condition is either
+        # `less_equal` or `greater_equal` op. 1 sub-graph contains only an
+        # identity op forwarding the original TensorList, another sub-graph
+        # contains TensorListResize op to generate a new TensorList. But in
+        # backend, list length is handled dynamically in list_write/scatter
+        # and thus, the entire tf.cond and it's sub-graphs can be removed.
+        if len(then_ops) == 0 and len(else_ops) == 0:
+            if op.pred.op.op_type not in {"less_equal", "greater_equal"}:
+                continue
+
+            # cond op must have pred
+            pred_x = op.pred.op.x.op
+            pred_y = op.pred.op.y.op
+
+            if pred_x is None and pred_y is None:
+                continue
+
+            if op.pred.op.op_type == "less_equal":
+                if pred_x.op_type != "list_length":
+                    continue
+                new_var = pred_x.ls
+
+            else:  # op.pred.op.op_type == 'greather_equal':
+                if pred_y.op_type != "list_length":
+                    continue
+                new_var = pred_y.ls
+
+            with block:
+                op.enclosing_block.replace_uses_of_var_after_op(
+                    anchor_op=op, old_var=op.outputs[0], new_var=new_var
+                )
+                block.remove_ops([op])  # rely on DCE to remove extra cond inputs
+            num_changes += 1
+
+        # Pattern 2: both than and else branch contains exactly 1 identity op
+        if len(then_ops) == 1 and len(then_ops) == 1:
+            if then_ops[0].op_type != "identity" or else_ops[0].op_type != "identity":
+                continue
+            if then_ops[0].x != else_ops[0].x:
+                continue
+
+            with block:
+                new_var = mb.identity(x=then_ops[0].x, before_op=op, name=op.name)
+                op.enclosing_block.replace_uses_of_var_after_op(
+                    anchor_op=op, old_var=op.outputs[0], new_var=new_var
+                )
+                block.remove_ops([op])  # rely on DCE to remove extra cond inputs
+            num_changes += 1
+
+    return num_changes
+
+
+@register_pass(namespace="tensorflow2")
+def remove_vacuous_cond(prog):
+    """
+    Remove cond op and it's sub-graphs that produces identity on both then and
+    else branch. One example use case is the TensorListReverse op, in Core ML,
+    we dynamically resize in write operations, and thus, both branches of the
+    cond op will be a skip (identity) op.
+
+    Given:
+
+        main(%a: (1, bool),
+         %b: (2, 3, fp32)) {
+          block0() {
+            %squeeze_0: (bool) = squeeze(x=%a, name="squeeze_0")
+            %cond_0: (2, 3, fp32) = cond(pred=%squeeze_0, name="cond_0")
+              cond_0_true() {
+                %identity_0: (2, 3, fp32) = identity(x=%b, name="identity_0")
+              } -> (%identity_0)
+              cond_0_false() {
+                %identity_1: (2, 3, fp32) = identity(x=%b, name="identity_1")
+              } -> (%identity_1)
+          } -> (%cond_0)
+        }
+
+    Result:
+
+        main(%a: (1, bool),
+             %b: (2, 3, fp32)) {
+          block0() {
+            %squeeze_0: (bool) = squeeze(x=%a, name="squeeze_0")
+            %cond_0: (2, 3, fp32) = identity(x=%b, name="cond_0")
+          } -> (%cond_0)
+        }
+    """
+    for f_name, f in prog.functions.items():
+        num_changes = remove_vacuous_cond_block(f)
+        msg = "remove_vacuous_cond: changed {} ops in function '{}'"
+        logging.info(msg.format(num_changes, f_name))
diff --git a/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/test_v2_passes.py b/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/test_v2_passes.py
new file mode 100644
index 000000000..5cb4a2289
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/test_v2_passes.py
@@ -0,0 +1,56 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.testing_utils import (
+    assert_op_count_match,
+    assert_model_is_valid,
+    assert_same_output_names,
+)
+from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY
+from coremltools.converters.mil.mil import types
+import copy
+
+import numpy as np
+
+np.random.seed(1984)
+validate_model = True
+
+
+def test_remove_vacuous_cond():
+    @mb.program(
+        input_specs=[
+            mb.TensorSpec(shape=(1,), dtype=types.bool),
+            mb.TensorSpec(shape=(2, 3)),
+        ]
+    )
+    def prog(a, b):
+        def then_branch():
+            return mb.identity(x=b)
+
+        def else_branch():
+            return mb.identity(x=b)
+
+        pred = mb.squeeze(x=a)
+        return mb.cond(pred=pred, _true_fn=then_branch, _false_fn=else_branch)
+
+    cond_op = prog.find_ops(op_type="cond", exactly_one=True)[0]
+    original_cond_op_name = cond_op.name
+    assert len(cond_op.blocks[0].operations) == 1
+    assert len(cond_op.blocks[1].operations) == 1
+    assert cond_op.blocks[0].operations[0].op_type == "identity"
+    assert cond_op.blocks[1].operations[0].op_type == "identity"
+
+    prev_prog = copy.deepcopy(prog)
+    PASS_REGISTRY["tensorflow2::remove_vacuous_cond"](prog)
+    assert_same_output_names(prev_prog, prog)
+
+    cond_op = prog.find_ops(op_type="cond")
+    assert len(cond_op) == 0
+    identity_op = prog.find_ops(prefix=original_cond_op_name, exactly_one=True)[0]
+    assert identity_op.op_type == "identity"
+
+    if validate_model:
+        assert_model_is_valid(prog, {"a": (1,), "b": (2, 3)})
diff --git a/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/tf_passes.py b/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/tf_passes.py
new file mode 100644
index 000000000..57f30828a
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow2/ssa_passes/tf_passes.py
@@ -0,0 +1,29 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY
+import logging
+
+
+def tensorflow_passes(prog):
+    passes = [
+        "common::dead_code_elimination",
+        "common::loop_invariant_elimination",
+        # tensorflow2::remove_vacuous_cond should come before
+        # tensorflow::backfill_make_list_elem_type.
+        "tensorflow2::remove_vacuous_cond",
+        "tensorflow::backfill_make_list_elem_type",
+        # DCE to reduce tf_lstm_block outputs and allow lstm_rewrite to
+        # ssa lstm
+        "common::dead_code_elimination",
+    ]
+
+    prog.validate()
+    for p in passes:
+        logging.info('Performing passes for TensorFlow 2.x frontend: "{}"'.format(p))
+        PASS_REGISTRY[p](prog)
+        prog.validate()
+
+    logging.debug("Program after TensorFlow 2.x frontend passes:\n{}".format(prog))
diff --git a/coremltools/converters/mil/frontend/tensorflow2/test/__init__.py b/coremltools/converters/mil/frontend/tensorflow2/test/__init__.py
new file mode 100644
index 000000000..61aafff42
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow2/test/__init__.py
@@ -0,0 +1,4 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
diff --git a/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_load.py b/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_load.py
new file mode 100644
index 000000000..2cf7c6723
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_load.py
@@ -0,0 +1,201 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import os
+import shutil
+import tempfile
+
+import pytest
+import coremltools.converters as converter
+from coremltools.converters.mil.input_types import TensorType
+from coremltools.converters.mil.frontend.tensorflow.test.testing_utils import (
+    get_tf_keras_io_names,
+)
+from coremltools.converters.mil.frontend.tensorflow.test import (
+    testing_utils as tf_testing_utils,
+)
+from coremltools.converters.mil.frontend.tensorflow2.test.testing_utils import (
+    make_tf2_graph,
+    run_compare_tf2,
+)
+
+tf = pytest.importorskip("tensorflow", minversion="2.1.0")
+
+# -----------------------------------------------------------------------------
+# Overwrite utilities to enable different conversion / compare method
+tf_testing_utils.frontend = "tensorflow"
+tf_testing_utils.make_tf_graph = make_tf2_graph
+tf_testing_utils.run_compare_tf = run_compare_tf2
+
+# -----------------------------------------------------------------------------
+# Import TF 2.x-compatible TF 1.x test cases
+from coremltools.converters.mil.frontend.tensorflow.test.test_load import (
+    frontend,
+    TestTf1ModelInputsOutputs as TestTf2ModelInputsOutputs,
+)
+
+
+class TestTf2ModelFormats:
+    def setup(self):
+        self.saved_model_dir = tempfile.mkdtemp()
+        _, self.model_path_h5 = tempfile.mkstemp(
+            suffix=".h5", prefix=self.saved_model_dir
+        )
+        _, self.model_path_pb = tempfile.mkstemp(
+            suffix=".pb", prefix=self.saved_model_dir
+        )
+
+    def teardown(self):
+        if os.path.exists(self.saved_model_dir):
+            shutil.rmtree(self.saved_model_dir)
+
+    def test_keras_model(self):
+        keras_model = tf.keras.Sequential(
+            [tf.keras.layers.ReLU(input_shape=(4, 5), batch_size=3)]
+        )
+        input_names, output_names = get_tf_keras_io_names(keras_model)
+        mlmodel = converter.convert(
+            keras_model,
+            inputs=[TensorType(input_names[0], (3, 4, 5))],
+            outputs=["Identity"],
+            source=frontend,
+        )
+        assert mlmodel is not None
+
+    def test_keras_saved_model_file(self):
+        keras_model = tf.keras.Sequential(
+            [
+                tf.keras.layers.Flatten(input_shape=(28, 28), batch_size=1),
+                tf.keras.layers.Dense(10, activation=tf.nn.relu),
+            ]
+        )
+        keras_model.save(self.saved_model_dir, save_format="tf")
+        mlmodel = converter.convert(
+            self.saved_model_dir, outputs="Identity", source=frontend
+        )
+        assert mlmodel is not None
+
+    def test_keras_h5_file(self):
+        keras_model = tf.keras.Sequential(
+            [tf.keras.layers.ReLU(input_shape=(4, 5), batch_size=3)]
+        )
+        input_names, output_names = get_tf_keras_io_names(keras_model)
+        keras_model.save(self.model_path_h5, save_format="h5")
+        mlmodel = converter.convert(
+            self.model_path_h5,
+            inputs=[TensorType(input_names[0], (3, 4, 5))],
+            outputs=["Identity"],
+            source=frontend,
+        )
+        assert mlmodel is not None
+
+    def test_concrete_function_list_from_tf_low_level_api(self):
+        root = tf.train.Checkpoint()
+        root.v1 = tf.Variable(3.0)
+        root.v2 = tf.Variable(2.0)
+        root.f = tf.function(lambda x: root.v1 * root.v2 * x)
+
+        input_data = tf.constant(1.0, shape=[1, 1])
+        to_save = root.f.get_concrete_function(input_data)
+        tf.saved_model.save(root, self.saved_model_dir, to_save)
+
+        tf_model = tf.saved_model.load(self.saved_model_dir)
+        concrete_func = tf_model.signatures[
+            tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+        ]
+        mlmodel = converter.convert(
+            [concrete_func], outputs="Identity", source=frontend
+        )
+        assert mlmodel is not None
+
+    def test_saved_model_list_from_tf_function(self):
+        class build_model(tf.Module):
+            @tf.function(
+                input_signature=[tf.TensorSpec(shape=[3, 4, 5], dtype=tf.float32)]
+            )
+            def __call__(self, x):
+                return tf.nn.relu(x)
+
+        model = build_model()
+        tf.saved_model.save(model, self.saved_model_dir)
+        mlmodel = converter.convert(
+            self.saved_model_dir, outputs=["Identity"], source=frontend
+        )
+        assert mlmodel is not None
+
+    def test_concrete_function_list_from_tf_function(self):
+        class build_model(tf.Module):
+            @tf.function(
+                input_signature=[tf.TensorSpec(shape=[3, 4, 5], dtype=tf.float32)]
+            )
+            def __call__(self, x):
+                return tf.nn.relu(x)
+
+        model = build_model()
+        concrete_func = model.__call__.get_concrete_function()
+        mlmodel = converter.convert(
+            [concrete_func], outputs=["Identity"], source=frontend
+        )
+        assert mlmodel is not None
+
+    def test_model_metadata(self):
+        keras_model = tf.keras.Sequential(
+            [tf.keras.layers.ReLU(input_shape=(4, 5), batch_size=3)]
+        )
+        input_names, output_names = get_tf_keras_io_names(keras_model)
+        mlmodel = converter.convert(
+            keras_model,
+            inputs=[TensorType(input_names[0], (3, 4, 5))],
+            outputs=["Identity"],
+            source=frontend,
+        )
+        metadata_keys = mlmodel.get_spec().description.metadata.userDefined
+        assert "com.github.apple.coremltools.version" in metadata_keys
+        assert "com.github.apple.coremltools.source" in metadata_keys
+        assert "tensorflow==2." in metadata_keys["com.github.apple.coremltools.source"]
+
+    def test_invalid_format_none(self):
+        with pytest.raises(NotImplementedError) as e:
+            converter.convert(None, source=frontend)
+        e.match(r"Expected model format: .* .h5")
+
+    def test_invalid_format_invalid_extension(self):
+        _, invalid_filename = tempfile.mkstemp(
+            suffix=".invalid", prefix=self.saved_model_dir
+        )
+        with pytest.raises(NotImplementedError) as e:
+            converter.convert(invalid_filename, source=frontend)
+        e.match(r"Expected model format: .* .h5")
+
+    def test_invalid_format_multiple_concrete_functions(self):
+        class build_model(tf.Module):
+            @tf.function(
+                input_signature=[tf.TensorSpec(shape=[3, 4, 5], dtype=tf.float32)]
+            )
+            def __call__(self, x):
+                return tf.nn.relu(x)
+
+        model = build_model()
+        cf = model.__call__.get_concrete_function()
+        with pytest.raises(NotImplementedError) as e:
+            converter.convert([cf, cf, cf], source=frontend)
+        e.match(r"Only a single concrete function is supported")
+
+    def test_invalid_converter_type(self):
+        with pytest.raises(ValueError) as e:
+            converter.convert(None, source="invalid")
+
+        expected_msg = r'Unrecognized value of argument "source": .*'
+        e.match(expected_msg)
+
+        with pytest.raises(NotImplementedError) as e:
+            converter.convert(None, convert_to="invalid", source=frontend)
+        e.match(r"Backend converter .* not implemented")
+
+    def test_invalid_format_non_exist(self):
+        non_exist_filename = self.model_path_h5.replace(".h5", "_non_exist.h5")
+        with pytest.raises(ValueError) as e:
+            converter.convert(non_exist_filename, source=frontend)
+        e.match(r"Input model .* does not exist")
diff --git a/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops.py b/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops.py
new file mode 100644
index 000000000..726137628
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops.py
@@ -0,0 +1,485 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.frontend.tensorflow.test import (
+    testing_utils as tf_testing_utils,
+)
+from coremltools.converters.mil.frontend.tensorflow2.test.testing_utils import (
+    make_tf2_graph as make_tf_graph,
+    run_compare_tf2 as run_compare_tf,
+)
+from coremltools.converters.mil.testing_reqs import *
+
+tf = pytest.importorskip("tensorflow", minversion="2.1.0")
+
+backends = testing_reqs.backends
+
+# -----------------------------------------------------------------------------
+# Overwrite utilities to enable different conversion / compare method
+tf_testing_utils.frontend = "TensorFlow2"
+tf_testing_utils.make_tf_graph = make_tf_graph
+tf_testing_utils.run_compare_tf = run_compare_tf
+
+# -----------------------------------------------------------------------------
+# Import TF 2.x-compatible TF 1.x test cases
+from coremltools.converters.mil.frontend.tensorflow.test.test_custom_ops import (
+    TestCompositeOp,
+)
+from coremltools.converters.mil.frontend.tensorflow.test.test_ops import (
+    TestActivationElu,
+    TestActivationLeakyReLU,
+    TestActivationReLU,
+    TestActivationReLU6,
+    TestActivationSelu,
+    TestActivationSigmoid,
+    TestActivationSoftmax,
+    TestActivationSoftPlus,
+    TestActivationSoftSign,
+    TestAddN,
+    TestBroadcastTo,
+    TestBatchToSpaceND,
+    TestCond,
+    TestConcat,  # Redirects to ConcatV2 in TF2
+    TestConv,
+    TestConv3d,
+    TestDepthwiseConv,
+    TestElementWiseBinary,
+    TestIsFinite,
+    TestLinear,
+    TestNormalization,
+    TestPad,
+    TestPack,
+    TestPooling1d,
+    TestPooling2d,
+    TestPooling3d,
+    TestSeparableConv,
+    TestSpaceToBatchND,
+    TestTensorArray,
+    TestWhileLoop,
+    TestReshape,
+    TestSelect,
+    TestSlice,
+    TestZerosLike,
+)
+
+del TestWhileLoop.test_nested_while_body  # tf.function() error in TF2
+
+
+class TestNormalizationTF2:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, epsilon",
+        itertools.product([True, False], backends, [1e-1, 1e-10]),
+    )
+    def test_fused_batch_norm_v3(self, use_cpu_only, backend, epsilon):
+        input_shape = np.random.randint(low=1, high=6, size=4)
+        attr_shape = [list(input_shape)[-1]]
+
+        m = random_gen(shape=attr_shape, rand_min=-1.0, rand_max=1.0)
+        v = random_gen(shape=attr_shape, rand_min=0.0, rand_max=10.0)
+        o = random_gen(shape=attr_shape, rand_min=1.0, rand_max=10.0)
+        s = random_gen(shape=attr_shape, rand_min=-1.0, rand_max=1.0)
+
+        @make_tf_graph([input_shape])
+        def build_model(x):
+            return tf.raw_ops.FusedBatchNormV3(
+                x=x,
+                scale=s,
+                offset=o,
+                mean=m,
+                variance=v,
+                epsilon=epsilon,
+                is_training=False,
+            )[0]
+
+        model, inputs, outputs = build_model
+
+        input_values = [random_gen(shape=input_shape, rand_min=-100.0, rand_max=100.0)]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model,
+            input_dict,
+            outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+            atol=1e-2,
+            rtol=1e-3,
+        )
+
+
+class TestElementWiseBinaryTF2:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank",
+        itertools.product([True], backends, [rank for rank in range(1, 4)]),  # False
+    )
+    def test_add_v2(self, use_cpu_only, backend, rank):
+        x_shape = list(np.random.randint(low=2, high=6, size=rank))
+        y_shape = x_shape[:]
+        for i in range(rank):
+            if np.random.randint(4) == 0:
+                y_shape[i] = 1
+        if np.random.randint(2) == 0:
+            y_shape = [1] + y_shape
+
+        if use_cpu_only:
+            dtype = np.float32
+        else:
+            dtype = np.float16
+
+        @make_tf_graph([x_shape, y_shape])
+        def build_model(x, y):
+            return tf.raw_ops.AddV2(x=x, y=y)
+
+        model, inputs, outputs = build_model
+
+        input_values = [
+            np.random.randint(low=-1000, high=1000, size=x_shape).astype(dtype),
+            np.random.randint(low=-1000, high=1000, size=y_shape).astype(dtype),
+        ]
+
+        input_dict = dict(zip(inputs, input_values))
+
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+
+# TestElementWiseBinaryTF1: overwrite parameters
+
+# Note: The data type is set as float16 when `use_cpu_only=False`
+# which errors out in TF2 tests. TODO:62466374
+mark = pytest.mark.parametrize(
+    "use_cpu_only, backend, rank, mode",
+    itertools.product(
+        [True],  # False
+        backends,
+        [rank for rank in range(1, 4)],
+        [
+            "add",
+            "floor_div",
+            "floor_mod",
+            "maximum",
+            "minimum",
+            "mod",
+            "mul",
+            "pow",
+            "real_div",
+            "sub",
+            "squared_difference",
+        ],
+    ),
+)
+TestElementWiseBinary.test_binary.pytestmark[0] = mark
+
+mark = pytest.mark.parametrize(
+    "use_cpu_only, backend, rank",
+    itertools.product([True], backends, [rank for rank in range(1, 4)]),  # False
+)
+TestElementWiseBinary.test_equal.pytestmark[0] = mark
+
+mark = pytest.mark.parametrize(
+    "use_cpu_only, backend, rank",
+    itertools.product([True], backends, [rank for rank in range(1, 4)]),  # False
+)
+TestElementWiseBinary.test_greater.pytestmark[0] = mark
+
+mark = pytest.mark.parametrize(
+    "use_cpu_only, backend, rank",
+    itertools.product([True], backends, [rank for rank in range(1, 4)]),  # False
+)
+TestElementWiseBinary.test_greater_equal.pytestmark[0] = mark
+
+mark = pytest.mark.parametrize(
+    "use_cpu_only, backend, rank",
+    itertools.product([True], backends, [rank for rank in range(1, 4)]),  # False
+)
+TestElementWiseBinary.test_less.pytestmark[0] = mark
+
+mark = pytest.mark.parametrize(
+    "use_cpu_only, backend, rank",
+    itertools.product([True], backends, [rank for rank in range(1, 4)]),  # False
+)
+TestElementWiseBinary.test_less_equal.pytestmark[0] = mark
+
+mark = pytest.mark.parametrize(
+    "use_cpu_only, backend, rank",
+    itertools.product([True], backends, [rank for rank in range(1, 4)]),  # False
+)
+TestElementWiseBinary.test_not_equal.pytestmark[0] = mark
+
+
+class TestControlFlowFromAutoGraph:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_if_unary_const(self, use_cpu_only, backend):
+        @make_tf_graph([(1,)])
+        def build_model(x):
+            if x > 0.5:
+                y = x - 0.5
+            else:
+                y = x + 0.5
+            return y
+
+        model, inputs, outputs = build_model
+        input_values = [np.array([0.7], dtype=np.float32)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_if_unary_double_if_positive_else_square(self, use_cpu_only, backend):
+        @make_tf_graph([(1,)])
+        def build_model(x):
+            if x >= 0:
+                out = x + x
+            else:
+                out = x * x
+            return out
+
+        model, inputs, outputs = build_model
+        input_values = [np.array([2], dtype=np.float32)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_if_binary_add_if_else_mul(self, use_cpu_only, backend):
+        @make_tf_graph([(1,), (1,)])
+        def build_model(x, y):
+            if x > y:
+                out = x + x
+            else:
+                out = x * x
+            return out
+
+        model, inputs, outputs = build_model
+        input_values = [
+            np.array([3], dtype=np.float32),
+            np.array([7], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_while_loop_square(self, use_cpu_only, backend):
+        @make_tf_graph([(1,)])
+        def build_model(x):
+            i = 0
+            while i < 10:
+                x *= 2
+                i += 1
+            return x
+
+        model, inputs, outputs = build_model
+        input_values = [np.array([2.0], dtype=np.float32)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_while_loop_power(self, use_cpu_only, backend):
+        @make_tf_graph([(1,)])
+        def build_model(x):
+            i = 0
+            while i < 3:
+                x *= x
+                i += 1
+            return x
+
+        model, inputs, outputs = build_model
+        input_values = [np.array([2.0], dtype=np.float32)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_while_loop_nested_body(self, use_cpu_only, backend):
+        @make_tf_graph([(1,)])
+        def build_model(x):
+            i, j = 0, 10
+            while i < j:
+                while 2 * i < i + 2:
+                    i += 1
+                    x -= 1
+                i += 2
+                x *= 2
+            return x
+
+        model, inputs, outputs = build_model
+        input_values = [np.array([9.0], dtype=np.float32)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+
+class TestTensorList:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, size_dynamic_shape",
+        itertools.product(
+            [True, False],
+            backends,
+            [(1, True, None), (1, True, (1,)), (2, False, (1,))],
+        ),
+    )
+    def test_write_read_and_stack(self, use_cpu_only, backend, size_dynamic_shape):
+        size, dynamic_size, element_shape = size_dynamic_shape
+
+        @make_tf_graph([(1,), (1,)])
+        def build_model(x, y):
+            ta = tf.TensorArray(
+                tf.float32,
+                size=size,
+                dynamic_size=dynamic_size,
+                element_shape=element_shape,
+            )
+            ta = ta.write(0, x)
+            ta = ta.write(1, y)
+            return ta.read(0), ta.read(1), ta.stack()
+
+        model, inputs, outputs = build_model
+        input_values = [
+            np.array([3.14], dtype=np.float32),
+            np.array([6.17], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, size_dynamic_shape",
+        itertools.product(
+            [True, False],
+            backends,
+            [(0, True, None), (1, True, (1,)), (3, False, (1,))],
+        ),
+    )
+    def test_unstack_and_read(self, use_cpu_only, backend, size_dynamic_shape):
+        size, dynamic_size, element_shape = size_dynamic_shape
+
+        @make_tf_graph([(3, 1)])
+        def build_model(x):
+            ta = tf.TensorArray(
+                tf.float32,
+                size=size,
+                dynamic_size=dynamic_size,
+                element_shape=element_shape,
+            )
+            ta = ta.unstack(x)
+            return ta.read(0), ta.read(1), ta.read(2)
+
+        model, inputs, outputs = build_model
+        input_values = [np.array([[3.14], [6.17], [12.14]], dtype=np.float32)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, size_dynamic_shape",
+        itertools.product(
+            [True, False],
+            backends,
+            [(2, True, None), (1, True, (1,)), (3, False, (1,))],
+        ),
+    )
+    def test_write_and_gather(self, use_cpu_only, backend, size_dynamic_shape):
+        size, dynamic_size, element_shape = size_dynamic_shape
+
+        @make_tf_graph([(1,), (1,)])
+        def build_model(x, y):
+            ta = tf.TensorArray(
+                tf.float32,
+                size=size,
+                dynamic_size=dynamic_size,
+                element_shape=element_shape,
+            )
+            ta = ta.write(0, x)
+            ta = ta.write(1, y)
+            return ta.gather(indices=[0, 1])
+
+        model, inputs, outputs = build_model
+        input_values = [
+            np.array([3.14], dtype=np.float32),
+            np.array([6.17], dtype=np.float32),
+        ]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, size_dynamic_shape",
+        itertools.product(
+            [True, False],
+            backends,
+            [(2, True, None), (1, True, (1,)), (3, False, (1,))],
+        ),
+    )
+    def test_scatter_and_read(self, use_cpu_only, backend, size_dynamic_shape):
+        size, dynamic_size, element_shape = size_dynamic_shape
+
+        @make_tf_graph([(3, 1)])
+        def build_model(x):
+            ta = tf.TensorArray(
+                tf.float32,
+                size=size,
+                dynamic_size=dynamic_size,
+                element_shape=element_shape,
+            )
+            ta = ta.scatter(indices=[0, 1, 2], value=x)
+            return ta.read(0), ta.read(1), ta.read(2)
+
+        model, inputs, outputs = build_model
+        input_values = [np.array([[3.14], [6.17], [12.14]], dtype=np.float32)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, size_dynamic_shape",
+        itertools.product([True, False], backends, [(2, False, (None, 8))]),
+    )
+    def test_partial_element_shape(self, use_cpu_only, backend, size_dynamic_shape):
+        size, dynamic_size, element_shape = size_dynamic_shape
+
+        @make_tf_graph([(3, 1, 8)])
+        def build_model(x):
+            ta = tf.TensorArray(
+                tf.float32,
+                size=size,
+                dynamic_size=dynamic_size,
+                element_shape=element_shape,
+            )
+            ta = ta.scatter(indices=[0, 1, 2], value=x)
+            return ta.read(0), ta.read(1), ta.read(2)
+
+        model, inputs, outputs = build_model
+        input_values = [np.random.rand(3, 1, 8).astype(np.float32)]
+        input_dict = dict(zip(inputs, input_values))
+        run_compare_tf(
+            model, input_dict, outputs, use_cpu_only=use_cpu_only, backend=backend
+        )
diff --git a/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops_tf_keras.py b/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops_tf_keras.py
new file mode 100644
index 000000000..e45695577
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow2/test/test_v2_ops_tf_keras.py
@@ -0,0 +1,1240 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import random
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.frontend.tensorflow2.test.testing_utils import (
+    run_compare_tf_keras,
+)
+from coremltools.converters.mil.testing_reqs import *
+
+backends = testing_reqs.backends
+
+tf = pytest.importorskip("tensorflow", minversion="2.1.0")
+
+
+class TestActivation:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, op",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(1, 6)],
+            [
+                tf.keras.layers.ELU,
+                tf.keras.layers.LeakyReLU,
+                tf.keras.layers.ReLU,
+                tf.keras.layers.PReLU,
+                tf.keras.layers.Softmax,
+                tf.keras.layers.ThresholdedReLU,
+            ],
+        ),
+    )
+    def test_layer(self, use_cpu_only, backend, rank, op):
+        shape = np.random.randint(low=2, high=6, size=rank)
+        model = tf.keras.Sequential([op(batch_input_shape=shape)])
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, -10, 10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, op",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(1, 6)],
+            [
+                tf.keras.activations.elu,
+                tf.keras.activations.exponential,
+                tf.keras.activations.hard_sigmoid,
+                tf.keras.activations.linear,
+                tf.keras.activations.relu,
+                tf.keras.activations.selu,
+                tf.keras.activations.sigmoid,
+                tf.keras.activations.softmax,
+                tf.keras.activations.softplus,
+                tf.keras.activations.softsign,
+                tf.keras.activations.tanh,
+            ],
+        ),
+    )
+    def test_activation(self, use_cpu_only, backend, rank, op):
+        kwargs = (
+            {"atol": 1e-3, "rtol": 1e-4}
+            if op == tf.keras.activations.exponential and use_cpu_only is False
+            else {}
+        )
+        if op == tf.keras.activations.softmax and rank == 1:
+            return  # skip apply softmax to a tensor that is 1D
+        shape = np.random.randint(low=2, high=6, size=rank)
+        model = tf.keras.Sequential(
+            [tf.keras.layers.Activation(op, batch_input_shape=shape)]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, -10, 10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+            **kwargs
+        )
+
+
+class TestBinary:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, op",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(2, 6)],
+            [
+                tf.keras.layers.Add,
+                tf.keras.layers.Average,
+                tf.keras.layers.Subtract,
+                tf.keras.layers.Maximum,
+                tf.keras.layers.Minimum,
+            ],
+        ),
+    )
+    def test(self, use_cpu_only, backend, rank, op):
+        shape = np.random.randint(low=1, high=6, size=rank)
+        input_x = tf.keras.layers.Input(batch_input_shape=tuple(shape))
+        input_y = tf.keras.layers.Input(batch_input_shape=tuple(shape))
+        out = op()([input_x, input_y])
+        model = tf.keras.Model(inputs=[input_x, input_y], outputs=out)
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, -10, 10), random_gen(shape, -10, 10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, axes, normalize",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(2, 3)],
+            [-1,],
+            [True, False],
+        ),
+    )
+    def test_dot(self, use_cpu_only, rank, backend, axes, normalize):
+        shape = np.random.randint(low=2, high=6, size=rank)
+        input_x = tf.keras.layers.Input(batch_input_shape=tuple(shape))
+        input_y = tf.keras.layers.Input(batch_input_shape=tuple(shape))
+        out = tf.keras.layers.Dot(axes=axes, normalize=normalize)([input_x, input_y])
+        model = tf.keras.Model(inputs=[input_x, input_y], outputs=out)
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, -10, 10), random_gen(shape, -10, 10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestConcatenate:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, axis",
+        itertools.product(
+            [True, False], backends, [rank for rank in range(5, 6)], [-1, -2],
+        ),
+    )
+    def test(self, use_cpu_only, backend, rank, axis):
+        shape = np.random.randint(low=2, high=6, size=rank)
+        inputs = []
+        for _ in range(2):
+            inputs.append(tf.keras.layers.Input(batch_input_shape=tuple(shape)))
+        out = tf.keras.layers.Concatenate(axis=axis)(inputs)
+        model = tf.keras.Model(inputs=inputs, outputs=out)
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape), random_gen(shape)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestConvolution:
+    @pytest.mark.parametrize(
+        ",".join(
+            [
+                "use_cpu_only",
+                "backend",
+                "op",
+                "padding",
+                "data_format",
+                "spatial_dim_and_ks",
+                "strides",
+                "dilations",
+                "batch_size",
+            ]
+        ),
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                tf.keras.layers.Conv1D,
+                tf.keras.layers.Conv2D,
+                tf.keras.layers.Conv3D,
+                tf.keras.layers.LocallyConnected1D,
+                tf.keras.layers.LocallyConnected2D,
+            ],
+            ["same", "valid"],
+            ["channels_last"],
+            [(2, 4, 4, 2, 2, 2), (3, 7, 5, 1, 3, 2), (5, 12, 11, 2, 3, 4)],
+            [(1, 1, 1), (1, 2, 3), (1, 3, 2)],
+            [
+                (1, 1, 1)
+            ],  # rdar://62951360 (Enhance SpaceToBatchND op to support more dialation rate of Conv)
+            [1, 3],
+        ),
+    )
+    def test_conv(
+        self,
+        use_cpu_only,
+        backend,
+        op,
+        padding,
+        data_format,
+        spatial_dim_and_ks,
+        strides,
+        dilations,
+        batch_size,
+    ):
+        s1, s2, s3, k1, k2, k3 = spatial_dim_and_ks
+        c_in, c_out = 2, 3
+        input_shape = None
+        kernel_size = None
+        if op in {tf.keras.layers.Conv1D, tf.keras.layers.LocallyConnected1D}:
+            input_shape = (batch_size, s3, c_in)
+            kernel_size = k3
+            strides = strides[2]
+            dilations = dilations[2]
+        elif op in {tf.keras.layers.Conv2D, tf.keras.layers.LocallyConnected2D}:
+            input_shape = (batch_size, s2, s3, c_in)
+            kernel_size = (k2, k3)
+            strides = (strides[1], strides[2])
+            dilations = dilations[1:]
+        elif op == tf.keras.layers.Conv3D:
+            input_shape = (batch_size, s1, s2, s3, c_in)
+            kernel_size = (k1, k2, k3)
+
+        if op in {
+            tf.keras.layers.LocallyConnected1D,
+            tf.keras.layers.LocallyConnected2D,
+        }:
+            if padding != "valid":
+                return  # tf.keras only supports "valid"
+            model = tf.keras.Sequential(
+                [
+                    op(
+                        batch_input_shape=input_shape,
+                        filters=c_out,
+                        kernel_size=kernel_size,
+                        strides=strides,
+                        padding=padding.upper(),
+                        data_format=data_format,
+                    )
+                ]
+            )
+        else:
+            model = tf.keras.Sequential(
+                [
+                    op(
+                        batch_input_shape=input_shape,
+                        filters=c_out,
+                        kernel_size=kernel_size,
+                        strides=strides,
+                        padding=padding.upper(),
+                        data_format=data_format,
+                        dilation_rate=dilations,
+                    )
+                ]
+            )
+
+        run_compare_tf_keras(
+            model,
+            [random_gen(input_shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        ",".join(
+            [
+                "use_cpu_only",
+                "backend",
+                "op",
+                "padding",
+                "data_format",
+                "spatial_dim_and_ks",
+                "output_padding",
+                "strides",
+                "dilations",
+                "batch_size",
+            ]
+        ),
+        itertools.product(
+            [True, False],
+            backends,
+            # TODO: rdar://63968613 ([deconv3d] Deconv_3d top_shapes_for_bottom_shapes does not sets output channel if output shape is provided)
+            [tf.keras.layers.Conv2DTranspose],  # tf.keras.layers.Conv3DTranspose],
+            ["same", "valid"],
+            ["channels_last"],
+            [(7, 11, 12, 1, 2, 2), (9, 5, 7, 3, 3, 3)],
+            [(1, 1, 1)],
+            [(2, 2, 2), (2, 3, 3)],
+            [(1, 1, 1)],  # Dilation > 1 not supported by TF
+            [1, 3],
+        ),
+    )
+    def test_conv_transpose(
+        self,
+        use_cpu_only,
+        backend,
+        op,
+        padding,
+        data_format,
+        spatial_dim_and_ks,
+        output_padding,
+        strides,
+        dilations,
+        batch_size,
+    ):
+        s1, s2, s3, k1, k2, k3 = spatial_dim_and_ks
+        c_in, c_out = 2, 3
+        input_shape = None
+        kernel_size = None
+        if op == tf.keras.layers.Conv2DTranspose:
+            input_shape = (batch_size, s2, s3, c_in)
+            kernel_size = (k2, k3)
+            strides = (strides[1], strides[2])
+            dilations = dilations[1:]
+            output_padding = (output_padding[1], output_padding[2])
+        elif op == tf.keras.layers.Conv3DTranspose:
+            input_shape = (batch_size, s1, s2, s3, c_in)
+            kernel_size = (k1, k2, k3)
+
+        model = tf.keras.Sequential(
+            [
+                op(
+                    batch_input_shape=input_shape,
+                    filters=c_out,
+                    kernel_size=kernel_size,
+                    strides=strides,
+                    padding=padding.upper(),
+                    output_padding=output_padding,
+                    data_format=data_format,
+                    dilation_rate=dilations,
+                )
+            ]
+        )
+
+        run_compare_tf_keras(
+            model,
+            [random_gen(input_shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        ",".join(
+            [
+                "use_cpu_only",
+                "backend",
+                "op",
+                "padding",
+                "data_format",
+                "spatial_dim_and_ks",
+                "strides",
+                "dilations",
+                "batch_size",
+            ]
+        ),
+        itertools.product(
+            [True, False],
+            backends,
+            [tf.keras.layers.DepthwiseConv2D],
+            ["same", "valid"],
+            ["channels_last"],
+            [(11, 12, 3, 2), (12, 11, 2, 3)],
+            [(1, 1), (2, 2)],
+            [(1, 1), (2, 2)],
+            [1, 3],
+        ),
+    )
+    def test_depth_wise_conv(
+        self,
+        use_cpu_only,
+        backend,
+        op,
+        padding,
+        data_format,
+        spatial_dim_and_ks,
+        strides,
+        dilations,
+        batch_size,
+    ):
+        s1, s2, k1, k2 = spatial_dim_and_ks
+        c_in, c_out = 2, 6
+
+        if len(strides) != np.sum(strides) and len(dilations) != np.sum(dilations):
+            # TF produces incorrect output for non-one strides + dilations
+            return
+
+        input_shape = (batch_size, s1, s2, c_in)
+        model = tf.keras.Sequential(
+            [
+                op(
+                    batch_input_shape=input_shape,
+                    kernel_size=(k1, k2),
+                    strides=strides,
+                    padding=padding.upper(),
+                    data_format=data_format,
+                    dilation_rate=dilations,
+                )
+            ]
+        )
+
+        run_compare_tf_keras(
+            model,
+            [random_gen(input_shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        ",".join(
+            [
+                "use_cpu_only",
+                "backend",
+                "op",
+                "padding",
+                "data_format",
+                "spatial_dim_and_ks",
+                "strides",
+                "dilations",
+                "batch_size",
+            ]
+        ),
+        itertools.product(
+            [True, False],
+            backends,
+            [tf.keras.layers.SeparableConv1D, tf.keras.layers.SeparableConv2D],
+            ["same", "valid"],
+            ["channels_last"],
+            [(14, 14, 2, 2), (11, 9, 3, 2), (12, 11, 2, 3)],
+            [(1, 1), (2, 2), (3, 3)],
+            [(1, 1)],
+            [1, 3],
+        ),
+    )
+    def test_separable_conv(
+        self,
+        use_cpu_only,
+        backend,
+        op,
+        padding,
+        data_format,
+        spatial_dim_and_ks,
+        strides,
+        dilations,
+        batch_size,
+    ):
+        s1, s2, k1, k2 = spatial_dim_and_ks
+        c_in, c_out = 2, 3
+        input_shape = None
+        kernel_size = None
+        if op == tf.keras.layers.SeparableConv1D:
+            input_shape = (batch_size, s2, c_in)
+            kernel_size = k2
+            strides = strides[1]
+            dilations = dilations[1]
+        elif op == tf.keras.layers.SeparableConv2D:
+            input_shape = (batch_size, s1, s2, c_in)
+            kernel_size = (k1, k2)
+
+        model = tf.keras.Sequential(
+            [
+                op(
+                    batch_input_shape=input_shape,
+                    filters=c_out,
+                    kernel_size=kernel_size,
+                    strides=strides,
+                    padding=padding.upper(),
+                    data_format=data_format,
+                    dilation_rate=dilations,
+                )
+            ]
+        )
+
+        run_compare_tf_keras(
+            model,
+            [random_gen(input_shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestCropping:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, begin_end",
+        itertools.product(
+            [True, False], backends, [(0, 0), (1, 1), (1, 2), (2, 1), (2, 4), (3, 2)],
+        ),
+    )
+    def test_cropping_1d(self, use_cpu_only, backend, begin_end):
+        shape = (1, 10, 3)
+        model = tf.keras.Sequential(
+            [tf.keras.layers.Cropping1D(batch_input_shape=shape, cropping=begin_end)]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-1, rand_max=1)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, begin_end1, begin_end2",
+        itertools.product(
+            [True, False],
+            backends,
+            [(0, 0), (1, 1), (1, 2), (2, 1), (2, 4)],
+            [(0, 0), (1, 1), (1, 2), (2, 1), (4, 2)],
+        ),
+    )
+    def test_cropping_2d(self, use_cpu_only, backend, begin_end1, begin_end2):
+        shape = (1, 10, 10, 3)
+        model = tf.keras.Sequential(
+            [
+                tf.keras.layers.Cropping2D(
+                    batch_input_shape=shape, cropping=(begin_end1, begin_end2)
+                )
+            ]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-1, rand_max=1)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, begin_end1, begin_end2, begin_end3",
+        itertools.product(
+            [True, False],
+            backends,
+            [(0, 0), (1, 1), (1, 2), (2, 1), (2, 4)],
+            [(0, 0), (1, 1), (1, 2), (2, 1), (4, 2)],
+            [(0, 0), (1, 1), (1, 2), (2, 1), (2, 4)],
+        ),
+    )
+    def test_cropping_3d(
+        self, use_cpu_only, backend, begin_end1, begin_end2, begin_end3
+    ):
+        shape = (1, 10, 10, 10, 3)
+        model = tf.keras.Sequential(
+            [
+                tf.keras.layers.Cropping3D(
+                    batch_input_shape=shape,
+                    cropping=(begin_end1, begin_end2, begin_end3),
+                )
+            ]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-1, rand_max=1)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestDense:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, units, activation, use_bias",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(2, 6)],
+            [2, 4, 8],
+            [tf.nn.relu, tf.nn.softmax, tf.nn.swish],
+            [True, False],
+        ),
+    )
+    def test(self, use_cpu_only, backend, rank, units, activation, use_bias):
+        shape = np.random.randint(low=2, high=6, size=rank)
+        model = tf.keras.Sequential(
+            [
+                tf.keras.layers.Dense(
+                    batch_input_shape=shape,
+                    units=units,
+                    activation=activation,
+                    use_bias=use_bias,
+                )
+            ]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestEmbedding:
+    @pytest.mark.xfail(reason="rdar://63414784")
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, dims, batch_size, input_length",
+        itertools.product(
+            [True, False],
+            backends,
+            [(4, 1), (8, 3), (16, 5), (32, 7), (64, 9)],
+            [1, 3, 5],
+            [2, 4, 10],
+        ),
+    )
+    def test(self, use_cpu_only, backend, dims, batch_size, input_length):
+        # input shape: 2D tensor (batch_size, input_length)
+        # output shape: 3D tensor (batch_size, input_length, output_dim)
+        shape = (batch_size, input_length)
+        model = tf.keras.Sequential(
+            [
+                tf.keras.layers.Embedding(
+                    batch_input_shape=shape,
+                    input_dim=dims[0],
+                    output_dim=dims[1],
+                    input_length=input_length,
+                )
+            ]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=0, rand_max=dims[0])],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+            atol=1e-3,
+            rtol=1e-4,
+        )
+
+
+class TestFlatten:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, data_format",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(1, 6)],
+            ["channels_last", "channels_first"],
+        ),
+    )
+    def test(self, use_cpu_only, backend, rank, data_format):
+        shape = np.random.randint(low=2, high=6, size=rank)
+        model = tf.keras.Sequential(
+            [tf.keras.layers.Flatten(batch_input_shape=shape, data_format=data_format,)]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestLambda:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, function",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(1, 6)],
+            [
+                lambda x: x + x,
+                lambda x: x * 3.14 - 1.0,
+                lambda x: np.sqrt(4) + x,
+                lambda x: tf.math.abs(x),
+            ],
+        ),
+    )
+    def test_unary(self, use_cpu_only, backend, rank, function):
+        shape = np.random.randint(low=2, high=6, size=rank)
+        model = tf.keras.Sequential(
+            [tf.keras.layers.Lambda(batch_input_shape=shape, function=function,)]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-5, rand_max=5)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestNormalization:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, axis, momentum, epsilon",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(1, 6)],
+            [0, -1],
+            [0.99, 0.85],
+            [1e-2, 1e-5],
+        ),
+    )
+    def test_batch_normalization(
+        self, use_cpu_only, backend, rank, axis, momentum, epsilon
+    ):
+        shape = np.random.randint(low=2, high=5, size=rank)
+        model = tf.keras.Sequential(
+            [
+                tf.keras.layers.BatchNormalization(
+                    batch_input_shape=shape,
+                    axis=axis,
+                    momentum=momentum,
+                    epsilon=epsilon,
+                )
+            ]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank_and_axis, momentum, epsilon",
+        itertools.product(
+            [True, False], backends, [(4, 1), (4, -3)], [0.99, 0.85], [1e-2, 1e-5],
+        ),
+    )
+    def test_fused_batch_norm_v3(
+        self, use_cpu_only, backend, rank_and_axis, momentum, epsilon
+    ):
+        rank, axis = rank_and_axis
+        shape = np.random.randint(low=2, high=5, size=rank)
+        model = tf.keras.Sequential(
+            [
+                tf.keras.layers.BatchNormalization(
+                    batch_input_shape=shape,
+                    axis=axis,
+                    momentum=momentum,
+                    epsilon=epsilon,
+                )
+            ]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, axis, epsilon",
+        itertools.product(
+            [True, False], backends, [rank for rank in range(3, 4)], [-1,], [1e-10],
+        ),
+    )
+    def test_layer_normalization(self, use_cpu_only, backend, rank, axis, epsilon):
+        shape = np.random.randint(low=2, high=6, size=rank)
+        model = tf.keras.Sequential(
+            [
+                tf.keras.layers.LayerNormalization(
+                    batch_input_shape=shape, axis=axis, epsilon=epsilon, trainable=False
+                )
+            ]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-100, rand_max=100)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, axis, epsilon, center, scale",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(4, 5)],
+            [-1],
+            [1e-3, 1e-5],
+            [True, False],
+            [True, False],
+        ),
+    )
+    def test_instance_normalization(
+        self, use_cpu_only, backend, rank, axis, epsilon, center, scale
+    ):
+        tensorflow_addons = pytest.importorskip("tensorflow_addons")
+        from tensorflow_addons.layers import InstanceNormalization
+
+        shape = np.random.randint(low=2, high=6, size=rank)
+        model = tf.keras.Sequential(
+            [
+                InstanceNormalization(
+                    batch_input_shape=shape,
+                    axis=axis,
+                    epsilon=epsilon,
+                    center=center,
+                    scale=scale,
+                )
+            ]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-1, rand_max=1)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+            atol=1e-3,
+            rtol=1e-4,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, groups, axis, epsilon, center, scale",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(4, 5)],
+            [1, 2, 3],
+            [-1],
+            [1e-3, 1e-5],
+            [True, False],
+            [True, False],
+        ),
+    )
+    def test_group_normalization(
+        self, use_cpu_only, backend, rank, groups, axis, epsilon, center, scale
+    ):
+        tensorflow_addons = pytest.importorskip("tensorflow_addons")
+        from tensorflow_addons.layers import GroupNormalization
+
+        shape = np.random.randint(low=2, high=6, size=rank)
+        shape[-1] = shape[-1] * groups  # groups must be a multiple of channels
+        model = tf.keras.Sequential(
+            [
+                GroupNormalization(
+                    batch_input_shape=shape,
+                    groups=groups,
+                    axis=axis,
+                    epsilon=epsilon,
+                    center=center,
+                    scale=scale,
+                )
+            ]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-1, rand_max=1)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+            atol=1e-3,
+            rtol=1e-4,
+        )
+
+
+class TestPadding:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, op, data_format, padding",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                tf.keras.layers.ZeroPadding1D,
+                tf.keras.layers.ZeroPadding2D,
+                tf.keras.layers.ZeroPadding3D,
+            ],
+            ["channels_first", "channels_last"],
+            [(1, 1, 1), (2, 2, 2), (3, 3, 3), (1, 3, 4), (2, 3, 5)],
+        ),
+    )
+    def test(self, use_cpu_only, backend, op, data_format, padding):
+        shape = None
+        kwargs = {}
+        if op == tf.keras.layers.ZeroPadding1D:
+            padding = padding[-1]
+            shape = np.random.randint(low=2, high=6, size=3)
+        elif op == tf.keras.layers.ZeroPadding2D:
+            padding = padding[1:]
+            kwargs = {"data_format": data_format}
+            shape = np.random.randint(low=2, high=6, size=4)
+        elif op == tf.keras.layers.ZeroPadding3D:
+            kwargs = {"data_format": data_format}
+            shape = np.random.randint(low=2, high=6, size=5)
+        model = tf.keras.Sequential(
+            [op(batch_input_shape=shape, padding=padding, **kwargs)]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestPermute:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank_and_perm",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                (rank, perm)
+                for rank in range(3, 6)
+                for perm in list(itertools.permutations(range(rank)[1:]))
+            ],
+        ),
+    )
+    def test(self, use_cpu_only, backend, rank_and_perm):
+        rank, perm = rank_and_perm
+        shape = np.random.randint(low=2, high=6, size=rank)
+        model = tf.keras.Sequential(
+            [tf.keras.layers.Permute(batch_input_shape=shape, dims=perm)]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestPooling:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, op, data_format",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                tf.keras.layers.GlobalAveragePooling1D,
+                tf.keras.layers.GlobalAveragePooling2D,
+                tf.keras.layers.GlobalAveragePooling3D,
+                tf.keras.layers.GlobalMaxPool1D,
+                tf.keras.layers.GlobalMaxPool2D,
+                tf.keras.layers.GlobalMaxPool3D,
+            ],
+            ["channels_first", "channels_last"],
+        ),
+    )
+    def test_global_pooling(self, use_cpu_only, backend, op, data_format):
+        shape = None
+        if op in {
+            tf.keras.layers.GlobalAveragePooling1D,
+            tf.keras.layers.GlobalMaxPool1D,
+        }:
+            shape = np.random.randint(low=2, high=6, size=3)
+        elif op in {
+            tf.keras.layers.GlobalAveragePooling2D,
+            tf.keras.layers.GlobalMaxPool2D,
+        }:
+            shape = np.random.randint(low=2, high=6, size=4)
+        elif op in {
+            tf.keras.layers.GlobalAveragePooling3D,
+            tf.keras.layers.GlobalMaxPool3D,
+        }:
+            shape = np.random.randint(low=2, high=6, size=5)
+        model = tf.keras.Sequential(
+            [op(batch_input_shape=shape, data_format=data_format)]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, op, data_format, pool_size",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                tf.keras.layers.AveragePooling1D,
+                tf.keras.layers.AveragePooling2D,
+                tf.keras.layers.AveragePooling3D,
+                tf.keras.layers.MaxPool1D,
+                tf.keras.layers.MaxPool2D,
+                tf.keras.layers.MaxPool3D,
+            ],
+            ["channels_first", "channels_last"],
+            [(2, 2, 1), (2, 3, 2), (1, 2, 3)],
+        ),
+    )
+    def test_pooling(self, use_cpu_only, backend, op, data_format, pool_size):
+        shape = None
+        if op in {tf.keras.layers.AveragePooling1D, tf.keras.layers.MaxPool1D}:
+            shape = np.random.randint(low=3, high=9, size=3)
+            pool_size = pool_size[2]
+        elif op in {tf.keras.layers.AveragePooling2D, tf.keras.layers.MaxPool2D}:
+            if data_format == "channels_first":
+                return  # AvgPoolingOp only supports NHWC on CPU
+            shape = np.random.randint(low=3, high=9, size=4)
+            pool_size = pool_size[1:]
+        elif op in {tf.keras.layers.AveragePooling3D, tf.keras.layers.MaxPool3D}:
+            shape = np.random.randint(low=3, high=9, size=5)
+        model = tf.keras.Sequential(
+            [op(batch_input_shape=shape, pool_size=pool_size, data_format=data_format)]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestRecurrent:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, units, activation, "
+        "recurrent_activation, use_bias, return_sequences",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(3, 4)],
+            [1, 2, 5],
+            [None, tf.nn.tanh, tf.nn.softmax],
+            [None, tf.nn.sigmoid, tf.nn.relu],
+            [True, False],
+            [True, False],
+        ),
+    )
+    def test_lstm(
+        self,
+        use_cpu_only,
+        backend,
+        rank,
+        units,
+        activation,
+        recurrent_activation,
+        use_bias,
+        return_sequences,
+    ):
+        shape = np.random.randint(low=1, high=5, size=rank)
+        model = tf.keras.Sequential(
+            [
+                tf.keras.layers.LSTM(
+                    batch_input_shape=shape,
+                    units=units,
+                    activation=activation,
+                    recurrent_activation=recurrent_activation,
+                    use_bias=use_bias,
+                    return_sequences=return_sequences,
+                ),
+            ]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-1, rand_max=1)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_lstmcell(self, use_cpu_only, backend):
+        shape = np.random.randint(low=1, high=5, size=3)
+        model = tf.keras.Sequential(
+            [
+                tf.keras.layers.RNN(
+                    batch_input_shape=shape, cell=tf.keras.layers.LSTMCell(units=3)
+                )
+            ]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-1, rand_max=1)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_lstm_time_distributed_dense(self, use_cpu_only, backend):
+        shape = list(np.random.randint(low=1, high=5, size=3))
+        k_in = tf.keras.layers.Input(batch_size=shape[0], shape=shape[1:])
+        lstm = tf.keras.layers.LSTM(units=32, return_sequences=True)(k_in)
+        k_out = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1))(lstm)
+        model = tf.keras.Model(inputs=k_in, outputs=k_out)
+
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-1, rand_max=1)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestRepeatVector:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, n",
+        itertools.product([True, False], backends, [2, 3, 5, 7],),
+    )
+    def test(self, use_cpu_only, backend, n):
+        # input shape 2D tensor (batch size, features)
+        # output shape 3D tensor (batch size, n, features)
+        shape = np.random.randint(low=1, high=5, size=2)
+        model = tf.keras.Sequential(
+            [tf.keras.layers.RepeatVector(batch_input_shape=shape, n=n)]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestReshape:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, infer_shape",
+        itertools.product(
+            [True, False], backends, [rank for rank in range(1, 6)], [True, False],
+        ),
+    )
+    def test(self, use_cpu_only, backend, rank, infer_shape):
+        shape = np.random.randint(low=2, high=5, size=rank)
+        # target shape does not include the batch dimension
+        target_shape = random.sample(list(shape[1:]), len(shape[1:]))
+        if len(target_shape) > 0 and infer_shape:
+            target_shape[-1] = -1
+        model = tf.keras.Sequential(
+            [
+                tf.keras.layers.Reshape(
+                    batch_input_shape=shape, target_shape=target_shape
+                )
+            ]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestSkips:
+    # ops in this class should be ignored / pass-through during conversion
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, skip_op",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                tf.keras.layers.Dropout,
+                tf.keras.layers.AlphaDropout,
+                tf.keras.layers.GaussianDropout,
+                tf.keras.layers.SpatialDropout1D,
+                tf.keras.layers.SpatialDropout2D,
+                tf.keras.layers.SpatialDropout3D,
+            ],
+        ),
+    )
+    def test_skip_dropout(self, use_cpu_only, backend, skip_op):
+        shape = np.random.randint(low=1, high=5, size=5)
+        if skip_op == tf.keras.layers.SpatialDropout1D:
+            shape = shape[:3]
+        elif skip_op == tf.keras.layers.SpatialDropout2D:
+            shape = shape[:4]
+        model = tf.keras.Sequential([skip_op(batch_input_shape=shape, rate=0.5)])
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_skip_noise(self, use_cpu_only, backend):
+        shape = np.random.randint(low=1, high=5, size=5)
+        model = tf.keras.Sequential(
+            [
+                # GaussianNoise should do nothing in inference mode
+                tf.keras.layers.GaussianNoise(batch_input_shape=shape, stddev=0.5)
+            ]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, l1, l2",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(5, 6)],
+            [0.0, 0.5, 1.0],
+            [0.0, 0.5, 1.0],
+        ),
+    )
+    def test_skip_regularization(self, use_cpu_only, backend, rank, l1, l2):
+        shape = np.random.randint(low=2, high=6, size=rank)
+        model = tf.keras.Sequential(
+            [
+                tf.keras.layers.ActivityRegularization(
+                    batch_input_shape=shape, l1=l1, l2=l2
+                )
+            ]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestUpSampling:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, op, upsample_factor, data_format, interpolation",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                tf.keras.layers.UpSampling1D,
+                tf.keras.layers.UpSampling2D,
+                tf.keras.layers.UpSampling3D,
+            ],
+            [(2, 2, 1), (4, 3, 2), (1, 2, 3)],
+            ["channels_first", "channels_last"],
+            ["nearest", "bilinear"],
+        ),
+    )
+    def test(
+        self, use_cpu_only, backend, op, upsample_factor, data_format, interpolation
+    ):
+        kwargs = {}
+        shape = None
+        if op == tf.keras.layers.UpSampling1D:
+            shape = np.random.randint(low=2, high=6, size=3)
+            upsample_factor = upsample_factor[2]
+        elif op == tf.keras.layers.UpSampling2D:
+            kwargs = {"data_format": data_format, "interpolation": interpolation}
+            shape = np.random.randint(low=2, high=6, size=4)
+            upsample_factor = (upsample_factor[1], upsample_factor[2])
+        elif op == tf.keras.layers.UpSampling3D:
+            kwargs = {"data_format": data_format}
+            shape = np.random.randint(low=2, high=6, size=5)
+
+        model = tf.keras.Sequential(
+            [op(batch_input_shape=shape, size=upsample_factor, **kwargs)]
+        )
+        run_compare_tf_keras(
+            model,
+            [random_gen(shape, rand_min=-10, rand_max=10)],
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
diff --git a/coremltools/converters/mil/frontend/tensorflow2/test/testing_utils.py b/coremltools/converters/mil/frontend/tensorflow2/test/testing_utils.py
new file mode 100644
index 000000000..51d33f2ac
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow2/test/testing_utils.py
@@ -0,0 +1,197 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters import convert
+import pytest
+
+tf = pytest.importorskip("tensorflow", minversion="2.1.0")
+from coremltools.converters.mil.frontend.tensorflow.test.testing_utils import (
+    get_tf_node_names,
+)
+
+from coremltools.converters.mil.input_types import TensorType
+from coremltools.converters.mil.testing_utils import compare_shapes, compare_backend
+from tensorflow.python.framework import dtypes
+
+
+def make_tf2_graph(input_types):
+    """
+    Decorator to help construct TensorFlow 2.x model.
+
+    Parameters
+    ----------
+    input_types: list of tuple
+        List of input types. E.g. [(3, 224, 224, tf.int32)] represent 1 input,
+        with shape (3, 224, 224), and the expected data type is tf.int32. The
+        dtype is optional, in case it's missing, tf.float32 will be used.
+
+    Returns
+    -------
+    list of ConcreteFunction, list of str, list of str
+    """
+
+    def wrapper(ops):
+        class TensorFlowModule(tf.Module):
+            input_signature = []
+            for input_type in input_types:
+                if len(input_type) > 0 and isinstance(input_type[-1], dtypes.DType):
+                    shape, dtype = input_type[:-1], input_type[-1]
+                else:
+                    shape, dtype = input_type, tf.float32
+                input_signature.append(tf.TensorSpec(shape=shape, dtype=dtype))
+
+            @tf.function(input_signature=input_signature)
+            def __call__(self, *args):
+                return ops(*args)
+
+        module = TensorFlowModule()
+        concrete_func = module.__call__.get_concrete_function()
+        inputs = get_tf_node_names(
+            [t.name for t in concrete_func.inputs if t.dtype != dtypes.resource],
+            mode="input",
+        )
+        outputs = get_tf_node_names(
+            [t.name for t in concrete_func.outputs], mode="output"
+        )
+        return [concrete_func], inputs, outputs
+
+    return wrapper
+
+
+def run_compare_tf2(
+    model,
+    input_dict,
+    output_names,
+    use_cpu_only=False,
+    frontend_only=False,
+    frontend="tensorflow",
+    backend="nn_proto",
+    debug=False,
+    atol=1e-04,
+    rtol=1e-05,
+):
+    """
+    Parameters
+    ----------
+    model: list of tf.ConcreteFunction
+        List of TensorFlow 2.x concrete functions.
+    input_dict: dict of (str, np.array)
+        Dict of name and value pairs representing inputs.
+    output_names: list of str
+        List of output node names.
+    use_cpu_only: bool
+        If true, use CPU only for prediction, otherwise, use GPU also.
+    frontend_only: bool
+        If true, skip the prediction call, only validate conversion.
+    frontend: str
+        Frontend to convert from.
+    backend: str
+        Backend to convert to.
+    debug: bool
+        If true, print verbose information and plot intermediate graphs.
+    atol: float
+        The absolute tolerance parameter.
+    rtol: float
+        The relative tolerance parameter.
+    """
+    inputs = []
+    cf_inputs = [t for t in model[0].inputs if t.dtype != dtypes.resource]
+    for t in cf_inputs:
+        name = get_tf_node_names(t.name)[0]
+        inputs.append(TensorType(name=name, shape=list(t.get_shape())))
+    outputs = []
+    for t in output_names:
+        name = get_tf_node_names(t)[0]
+        outputs.append(name)
+
+    # get TensorFlow 2.x output as reference and run comparision
+    tf_input_values = [tf.constant(t) for t in input_dict.values()]
+    tf_outputs = model[0](*tf_input_values)
+    if isinstance(tf_outputs, (tuple, list)):
+        ref = [t.numpy() for t in tf_outputs]
+    else:
+        ref = [tf_outputs.numpy()]
+    expected_outputs = {n: v for n, v in zip(outputs, ref)}
+
+    proto = convert(
+        model,
+        source=frontend,
+        inputs=inputs,
+        outputs=outputs,
+        convert_to=backend,
+        debug=debug,
+    ).get_spec()
+
+    if frontend_only:
+        return
+
+    compare_backend(
+        proto,
+        input_dict,
+        expected_outputs,
+        use_cpu_only,
+        atol=atol,
+        rtol=rtol,
+        also_compare_shapes=True,
+    )
+
+    return proto
+
+
+def run_compare_tf_keras(
+    model,
+    input_values,
+    use_cpu_only=False,
+    frontend_only=False,
+    frontend="tensorflow",
+    backend="nn_proto",
+    atol=1e-04,
+    rtol=1e-05,
+):
+    """
+    Parameters
+    ----------
+    model: TensorFlow 2.x model
+        TensorFlow 2.x model annotated with @tf.function.
+    input_values: list of np.array
+        List of input values in the same order as the input signature.
+    use_cpu_only: bool
+        If true, use CPU only for prediction, otherwise, use GPU also.
+    frontend_only: bool
+        If true, skip the prediction call, only validate conversion.
+    frontend: str
+        Frontend to convert from.
+    backend: str
+        Backend to convert to.
+    atol: float
+        The absolute tolerance parameter.
+    rtol: float
+        The relative tolerance parameter.
+    """
+
+    proto = convert(model, source=frontend, convert_to=backend).get_spec()
+
+    # assumes conversion preserve the i/o names
+    inputs = sorted([str(i.name) for i in proto.description.input])
+    outputs = [str(o.name) for o in proto.description.output]
+
+    if frontend_only:
+        return
+
+    # get tf.keras model output as reference and run comparision
+    ref = [model(input_values).numpy()]
+    expected_outputs = {n: v for n, v in zip(outputs, ref)}
+    input_key_values = {n: v for n, v in zip(inputs, input_values)}
+    compare_backend(
+        proto,
+        input_key_values,
+        expected_outputs,
+        use_cpu_only,
+        atol=atol,
+        rtol=rtol,
+        also_compare_shapes=True,
+    )
+
+    return proto
diff --git a/coremltools/converters/mil/frontend/tensorflow2/tf_graph_pass/__init__.py b/coremltools/converters/mil/frontend/tensorflow2/tf_graph_pass/__init__.py
new file mode 100644
index 000000000..3a0cf529d
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow2/tf_graph_pass/__init__.py
@@ -0,0 +1,6 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from .rewrite_control_flow_functions import *
diff --git a/coremltools/converters/mil/frontend/tensorflow2/tf_graph_pass/rewrite_control_flow_functions.py b/coremltools/converters/mil/frontend/tensorflow2/tf_graph_pass/rewrite_control_flow_functions.py
new file mode 100644
index 000000000..304d64dec
--- /dev/null
+++ b/coremltools/converters/mil/frontend/tensorflow2/tf_graph_pass/rewrite_control_flow_functions.py
@@ -0,0 +1,552 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+import logging
+from coremltools.converters.mil.frontend.tensorflow.parsed_tf_node import ParsedTFNode
+from coremltools.converters.mil.frontend.tensorflow.basic_graph_ops import (
+    disconnect_edge,
+    connect_edge,
+    delete_node,
+    replace_node,
+    replace_dest,
+    connect_edge_at_index,
+)
+
+
+def _rename_node_in_fn(node, new_name, fn):
+    """
+    Rename a node and all it's connections.
+
+    Parameters
+    ----------
+    node: ParsedTFNode
+        Node to rename.
+    new_name: str
+        New name of the node.
+    fn: SSAFunction
+        Function that contains graph to operate on.
+    """
+    old_name = node.name
+    node.name = new_name
+    for i in node.inputs:
+        idx = fn.graph[i].outputs.index(old_name)
+        fn.graph[i].outputs[idx] = new_name
+        if old_name in fn.graph[i].control_outputs:
+            idx = fn.graph[i].control_outputs.index(old_name)
+            fn.graph[i].control_outputs[idx] = new_name
+
+    for o in node.outputs:
+        idx = fn.graph[o].inputs.index(old_name)
+        fn.graph[o].inputs[idx] = new_name
+        if old_name in fn.graph[o].control_inputs:
+            idx = fn.graph[o].control_inputs.index(old_name)
+            fn.graph[o].control_inputs[idx] = new_name
+
+    for i in node.control_inputs:
+        if old_name in fn.graph[i].control_outputs:
+            idx = fn.graph[i].control_outputs.index(old_name)
+            fn.graph[i].control_outputs[idx] = new_name
+
+    for o in node.control_outputs:
+        if old_name in fn.graph[o].control_inputs:
+            idx = fn.graph[o].control_inputs.index(old_name)
+            fn.graph[o].control_inputs[idx] = new_name
+
+    fn.graph[new_name] = fn.graph.pop(old_name)
+
+
+def _flatten_sub_graph_namespaces(tf_ssa, fn_name):
+    """
+    A pass to flatten namespaces for sub-graphs of the control flow while_loop
+    op. For example, the while_loop's has two sub-graphs, "cond" and "body",
+    all the nodes in the graph will be prefixing the sub-graph's name. This
+    pass is required for converting control flow v2 ops (enabled by default in
+    TensorFlow 2.0+) as the original sub-graphs will contain duplicated names.
+
+    Parameters
+    ----------
+    tf_ssa: NetworkEnsemble
+        An object that contains multiple functions / sub-graphs.
+    fn_name: str
+        Name of the function / sub-graph to operate on.
+    """
+    count = 0
+    fn = tf_ssa.functions.get(fn_name)
+    for name, node in fn.graph.copy().items():
+        if node.op not in {"StatelessWhile", "While", "StatelessIf", "If"}:
+            continue
+
+        if node.op in {"StatelessWhile", "While"}:
+            sub_fn_names = [node.attr.get("cond"), node.attr.get("body")]
+        else:
+            sub_fn_names = [node.attr.get("then_branch"), node.attr.get("else_branch")]
+
+        for sf_name in sub_fn_names:
+            sf = tf_ssa.functions.get(sf_name)
+            prefix = "{}/{}".format(node.name, sf_name)
+
+            for old_name, n in sf.graph.copy().items():
+                _rename_node_in_fn(n, "{}/{}".format(prefix, old_name), sf)
+                count += 1
+
+            ios = set(sf.inputs + sf.outputs)
+            io_name_mappings = {n: "{}/{}".format(prefix, n) for n in ios}
+            sf.inputs = [io_name_mappings[n] for n in sf.inputs]
+            sf.outputs = [io_name_mappings[n] for n in sf.outputs]
+            _flatten_sub_graph_namespaces(tf_ssa, sf_name)
+
+            msg = "flatten_sub_graph_namespaces: {} nodes renamed in '{}'"
+            logging.info(msg.format(count, sf_name))
+
+
+def _insert_op(fn, op, name, attr=None):
+    """
+    Create a node with given attributes, then insert to the target graph in
+    given function.
+
+    Parameters
+    ----------
+    fn: SSAFunction
+        Function that contains graph to operate on.
+    op: str
+        Type of the operation for the new node.
+    name: str
+        Name of the new node.
+    attr: dict or None (optional)
+        Attributes of the new node.
+
+    Returns
+    -------
+    node: ParsedTFNode
+        New node object.
+    """
+    node = ParsedTFNode()
+    node.op = op
+    node.name = name
+    if attr is not None:
+        node.attr = attr
+    fn.graph[node.name] = node
+    return node
+
+
+def _insert_function_entry(fn):
+    return _insert_op(fn=fn, op="function_entry", name="entry")
+
+
+def _insert_return(fn):
+    return _insert_op(fn=fn, op="return", name="return")
+
+
+def _insert_make_tuple(fn, name=None):
+    name = "make_tuple" if name is None else name
+    return _insert_op(fn=fn, op="make_tuple", name=name)
+
+
+def _insert_get_tuple(fn, name, idx):
+    return _insert_op(fn=fn, op="get_tuple", name=name, attr={"index": idx})
+
+
+def _rewrite_cond_functions(tf_ssa, fn):
+    r"""
+    Rewrite tf.cond's sub-graphs with get_tuple, make_tuple, function_entry and
+    return ops. This rewrite is required in order to convert functional form
+    control flow v2 nodes 'StatelessIf' and 'If'.
+
+    Parameters
+    ----------
+    tf_ssa: NetworkEnsemble
+        An object that contains multiple functions / sub-graphs.
+    fn: SSAFunction
+        Function that contains graph to operate on.
+
+    Examples
+    --------
+
+    Input:
+
+        Before pass "main" graph:
+
+            [const/greater/y] ---------\
+            [placeholder/args_0] -> [greater] -> [if] -> [identity]
+                              \------------------/  \--> [identity]
+            [placeholder/args_1] ----------------/
+
+        Before pass "then" graph:
+
+            [const/sub/y] ---------------\
+            [placeholder/sub_args_0] -> [sub]
+            [placeholder/sub_args_1] -> [identity]
+
+        Before pass "else" graph:
+
+            [const/add/y] ---------------\
+            [placeholder/add_args_0] -> [add]
+
+            [const/mul/y] ---------------\
+            [placeholder/add_args_1] -> [mul]
+
+    Output:
+
+        After pass "main" graph:
+
+            [const/greater/y] ---------\
+            [placeholder/args_0] -> [greater] -> [make_tuple] -> [if] -> [get_tuple] -> [identity]
+                              \---------------------/               \--> [get_tuple] -> [identity]
+            [placeholder/args_1] -------------------/
+
+        After pass "then" graph:
+
+                                      [const/sub/y] ---------------\
+            [entry] -> [get_tuple] -> [placeholder/sub_args_0] -> [sub] -> [make_tuple] -> [return]
+                    -> [get_tuple] -> [placeholder/sub_args_1] -----------------/
+
+        After pass "else" graph:
+
+                                      [const/add/y] ---------------\
+            [entry] -> [get_tuple] -> [placeholder/add_args_0] -> [add] -> [make_tuple] -> [return]
+                    -> [get_tuple] -> [placeholder/add_args_1] -> [mul] --------/
+                                      [const/mul/y] ---------------/
+
+    """
+    for cond_name, cond_node in fn.graph.copy().items():
+        if cond_node.op not in {"StatelessIf", "If"}:
+            continue
+
+        then_fn_name = cond_node.attr.get("then_branch")
+        else_fn_name = cond_node.attr.get("else_branch")
+
+        msg = "Rewriting '{}' ({}) sub-graphs: then '{}', else '{}'"
+        logging.info(
+            msg.format(cond_node.name, cond_node.op, then_fn_name, else_fn_name)
+        )
+
+        then_fn = tf_ssa.functions.get(then_fn_name)
+        else_fn = tf_ssa.functions.get(else_fn_name)
+
+        # insert function entry nodes
+        then_entry = _insert_function_entry(then_fn)
+        else_entry = _insert_function_entry(else_fn)
+
+        # pack node inputs to a single tuple
+        cond_input = _insert_make_tuple(fn, "make_tuple/{}".format(cond_name))
+        for ci in cond_node.inputs:
+            disconnect_edge(fn.graph, ci, cond_node.name)
+            connect_edge(fn.graph, ci, cond_input)
+        connect_edge(fn.graph, cond_input, cond_node.name)
+
+        # unpack node outputs to multiple get_tuples
+        for i, co in enumerate(cond_node.outputs):
+            # utilize FunctionDef's ret to make sure function outputs and
+            # node outputs order matches when multiple outputs are there.
+            # Fallback to use original cond_node.outputs order if fails.
+            o_original = fn.graph[co].original_node
+            if o_original:
+                c_input = [n for n in o_original.input if str(n).startswith(cond_name)][
+                    0
+                ]
+                c_index = c_input.split(":")[-1] if ":" in c_input else 0
+                mapped_name = then_fn.ret["identity_{}".format(c_index)].split(":")[0]
+                if mapped_name in then_fn.outputs:
+                    idx = then_fn.outputs.index(mapped_name)
+                else:  # in else_fn.outputs
+                    idx = else_fn.outputs.index(mapped_name)
+            else:
+                idx = i
+
+            cond_output = _insert_get_tuple(
+                fn, "get_tuple/{}/{}".format(idx, cond_name), idx
+            )
+            edge_idx = fn.graph[co].inputs.index(cond_node.name)
+            replace_dest(fn.graph, cond_node, co, cond_output)
+            connect_edge_at_index(fn.graph, cond_output, co, edge_idx)
+
+        # fetch inputs using get_tuple for then branch
+        for i, ti in enumerate(then_fn.inputs):
+            then_input = _insert_get_tuple(
+                then_fn, "get_tuple/{}/{}".format(i, ti), i + 1
+            )
+            connect_edge(then_fn.graph, then_entry, then_input)
+            replace_node(then_fn.graph, ti, then_input)
+            delete_node(then_fn.graph, ti)
+
+        # fetch inputs using get_tuple for else branch
+        for i, ei in enumerate(else_fn.inputs):
+            else_input = _insert_get_tuple(
+                else_fn, "get_tuple/{}/{}".format(i, ei), i + 1
+            )
+            connect_edge(else_fn.graph, else_entry, else_input)
+            replace_node(else_fn.graph, ei, else_input)
+            delete_node(else_fn.graph, ei)
+
+        # returns a tuple of value(s) as output for then branch
+        then_output = _insert_make_tuple(then_fn)
+        for to in then_fn.outputs:
+            if to not in then_fn.graph.keys():
+                # from identity, map back to get_tuple node
+                to = "get_tuple/{}/{}".format(then_fn.inputs.index(to), to)
+            connect_edge(then_fn.graph, to, then_output.name)
+
+        then_return = _insert_return(then_fn)
+        connect_edge(then_fn.graph, then_output.name, then_return.name)
+
+        # returns a tuple of value(s) as output for else branch
+        else_output = _insert_make_tuple(else_fn)
+        for eo in else_fn.outputs:
+            if eo not in else_fn.graph.keys():
+                # from identity, map back to get_tuple node
+                eo = "get_tuple/{}/{}".format(else_fn.inputs.index(eo), eo)
+            connect_edge(else_fn.graph, eo, else_output.name)
+
+        else_return = _insert_return(else_fn)
+        connect_edge(else_fn.graph, else_output.name, else_return.name)
+
+
+def _eliminate_loop_cond_nodes(tf_ssa, fn):
+    """
+    Eliminate loop condition nodes, such as loop_counters, max_iterations from
+    the cond sub-graph and body sub-graph of tf.while_loop.
+
+    Parameters
+    ----------
+    tf_ssa: NetworkEnsemble
+        An object that contains multiple functions / sub-graphs.
+    fn: SSAFunction
+        Function that contains graph to operate on.
+
+    Examples
+    --------
+
+    Input:
+
+        Before pass "main" graph:
+
+            [while/maximum_iterations] -----\
+            [while/loop_counter] -------> [while] --> [identity]
+            [placeholder/args_0] ----------/
+
+        Before pass "cond" graph:
+
+            [const/mean] -------\
+            [placeholder] --> [mean] --> [greater]
+            [const/greater/y] --------------/
+
+            [while_maximum_iterations], [while_loop_counter] (not connected)
+
+        Before pass "body" graph:
+
+            [const/sub/y] ------\
+            [placeholder] ---> [sub]
+
+            [const/add/y] ------------\
+            [while_loop_counter] --> [add]
+
+            [while_maximum_iterations] (not connected)
+
+    Output:
+
+        After pass "main" graph:
+
+            [placeholder/args_0] --> [while] --> [identity]
+
+        After pass "cond" graph:
+
+            [const/mean] -------\
+            [placeholder] --> [mean] --> [greater]
+            [const/greater/y] --------------/
+
+        After pass "body" graph:
+
+            [const/sub/y] ------\
+            [placeholder] ---> [sub]
+    """
+    for name, node in fn.graph.copy().items():
+        if node.op not in {"StatelessWhile", "While"}:
+            continue
+
+        cond_fn = tf_ssa.functions.get(node.attr.get("cond"))
+        body_fn = tf_ssa.functions.get(node.attr.get("body"))
+
+        cond_lc_nodes = {cond_fn.inputs.pop(0), cond_fn.inputs.pop(0)}
+        logging.info("Removing {} from cond graph".format(cond_lc_nodes))
+        for n in cond_lc_nodes:
+            delete_node(cond_fn.graph, n)
+
+        body_lc_nodes = {body_fn.inputs.pop(0), body_fn.inputs.pop(0)}
+        q = list(body_lc_nodes)
+
+        # delete entire sub-fn
+        while len(q) > 0:
+            n = body_fn.graph[q.pop(0)]
+            for o in n.outputs:
+                if o not in body_lc_nodes:
+                    q.append(o)
+                body_lc_nodes.add(o)
+                for i in body_fn.graph[o].inputs:
+                    if i not in body_lc_nodes:
+                        q.append(i)
+                    body_lc_nodes.add(i)
+
+        # remove if in outputs
+        for n in body_lc_nodes:
+            if n in body_fn.outputs:
+                msg = "Removing '{}' ({}) from body fn outputs"
+                logging.info(msg.format(n, body_fn.graph[n].op))
+                body_fn.outputs.remove(n)
+
+        logging.info("Removing {} from body graph".format(body_lc_nodes))
+        for n in body_lc_nodes:
+            delete_node(body_fn.graph, n)
+
+
+def _rewrite_while_loop_functions(tf_ssa, fn):
+    """
+    Rewrite tf.while_loop's sub-graphs with get_tuple, make_tuple,
+    function_entry and return ops. This rewrite is required in order to convert
+    functional form control flow v2 nodes 'StatelessWhile' and 'While'.
+
+    Parameters
+    ----------
+    tf_ssa: NetworkEnsemble
+        An object that contains multiple functions / sub-graphs.
+    fn: SSAFunction
+        Function that contains graph to operate on.
+
+    Example
+    -------
+
+    Input:
+
+        Before pass "main" graph:
+
+            [placeholder/args_0] --> [while] --> [identity]
+
+        Before pass "cond" graph:
+
+            [const/mean] -------\
+            [placeholder] --> [mean] --> [greater]
+            [const/greater/y] --------------/
+
+        Before pass "body" graph:
+
+            [const/sub/y] ------\
+            [placeholder] ---> [sub]
+
+    Output:
+
+        After pass "main" graph:
+
+            [placeholder/args_0] --> [make_tuple] --> [while] --> [get_tuple] --> [identity]
+
+        After pass "cond" graph:
+
+                                      [const/mean] ------\
+            [entry] -> [get_tuple] -> [placeholder] -> [mean] -> [greater] -> [make_tuple] -> [return]
+                                      [const/greater/y] ------------/
+
+        After pass "body" graph:
+
+                                      [const/sub/y] ----\
+            [entry] -> [get_tuple] -> [placeholder] -> [sub] -> [make_tuple] -> [return]
+    """
+    for while_name, while_node in fn.graph.copy().items():
+        if while_node.op not in {"StatelessWhile", "While"}:
+            continue
+
+        cond_fn_name = while_node.attr.get("cond")
+        body_fn_name = while_node.attr.get("body")
+
+        msg = "Rewriting '{}' ({}) sub-graphs: cond '{}', body '{}'"
+        logging.info(
+            msg.format(while_node.name, while_node.op, cond_fn_name, body_fn_name)
+        )
+
+        cond_fn = tf_ssa.functions.get(cond_fn_name)
+        body_fn = tf_ssa.functions.get(body_fn_name)
+
+        # insert function entry nodes
+        cond_entry = _insert_function_entry(cond_fn)
+        body_entry = _insert_function_entry(body_fn)
+
+        # pack node inputs to a single tuple
+        while_input_tuple = _insert_make_tuple(fn, "make_tuple/{}".format(while_name))
+        for wi in while_node.inputs:
+            disconnect_edge(fn.graph, wi, while_node.name)
+            connect_edge(fn.graph, wi, while_input_tuple)
+        connect_edge(fn.graph, while_input_tuple, while_node.name)
+
+        # unpack node outputs to multiple get_tuples
+        for i, wo in enumerate(while_node.outputs):
+            # utilize FunctionDef's ret to make sure function outputs and
+            # node outputs order matches when multiple outputs are there.
+            o_original = fn.graph[wo].original_node
+            while_input = [
+                n for n in o_original.input if str(n).startswith(while_name)
+            ][0]
+            while_index = while_input.split(":")[-1]
+            mapped_name = body_fn.ret["identity_{}".format(while_index)].split(":")[0]
+            idx = body_fn.outputs.index(mapped_name)
+
+            loop_output = _insert_get_tuple(
+                fn, "get_tuple/{}/{}".format(idx, while_input), idx
+            )
+
+            edge_idx = fn.graph[wo].inputs.index(while_node.name)
+            replace_dest(fn.graph, while_node, wo, loop_output)
+            connect_edge_at_index(fn.graph, loop_output, wo, edge_idx)
+
+        # fetch inputs using get_tuple for cond fn
+        for i, ci in enumerate(cond_fn.inputs):
+            cond_input = _insert_get_tuple(cond_fn, "get_tuple/{}/{}".format(i, ci), i)
+            connect_edge(cond_fn.graph, cond_entry, cond_input)
+            replace_node(cond_fn.graph, ci, cond_input)
+            delete_node(cond_fn.graph, ci)
+
+        # fetch inputs using get_tuple for body fn
+        for i, bi in enumerate(body_fn.inputs):
+            new_name = "get_tuple/{}/{}".format(i, bi)
+
+            if bi in body_fn.outputs:  # input is also an output
+                body_fn.outputs[body_fn.outputs.index(bi)] = new_name
+
+            body_input = _insert_get_tuple(body_fn, new_name, i)
+
+            connect_edge(body_fn.graph, body_entry, body_input)
+            replace_node(body_fn.graph, bi, body_input)
+            delete_node(body_fn.graph, bi)
+
+        # returns a tuple of value(s) as output for cond fn
+        cond_output = _insert_make_tuple(cond_fn)
+        for co in cond_fn.outputs:
+            connect_edge(cond_fn.graph, co, cond_output.name)
+
+        cond_return = _insert_return(cond_fn)
+        connect_edge(cond_fn.graph, cond_output.name, cond_return.name)
+
+        # returns a tuple of value(s) as output for body branch
+        body_output = _insert_make_tuple(body_fn)
+
+        for bo in body_fn.outputs:
+            connect_edge(body_fn.graph, bo, body_output.name)
+
+        body_return = _insert_return(body_fn)
+        connect_edge(body_fn.graph, body_output.name, body_return.name)
+
+
+def rewrite_control_flow_functions(tf_ssa):
+    for fn_name, fn in tf_ssa.functions.items():
+        _rewrite_cond_functions(tf_ssa, fn)
+    for fn_name, fn in tf_ssa.functions.items():
+        _eliminate_loop_cond_nodes(tf_ssa, fn)
+        _rewrite_while_loop_functions(tf_ssa, fn)
+
+
+def flatten_sub_graph_namespaces(tf_ssa):
+    _flatten_sub_graph_namespaces(tf_ssa, fn_name="main")
diff --git a/coremltools/converters/mil/frontend/torch/__init__.py b/coremltools/converters/mil/frontend/torch/__init__.py
new file mode 100644
index 000000000..d83660848
--- /dev/null
+++ b/coremltools/converters/mil/frontend/torch/__init__.py
@@ -0,0 +1,12 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from ....._deps import _HAS_TORCH
+
+register_torch_op = None
+
+if _HAS_TORCH:
+    from .load import load
+    from .torch_op_registry import register_torch_op
diff --git a/coremltools/converters/mil/frontend/torch/converter.py b/coremltools/converters/mil/frontend/torch/converter.py
new file mode 100644
index 000000000..8bf3b635b
--- /dev/null
+++ b/coremltools/converters/mil/frontend/torch/converter.py
@@ -0,0 +1,374 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+
+from six import string_types as _string_types
+import logging as _logging
+import torch as _torch
+
+from coremltools.converters.mil.input_types import InputType, ImageType
+from coremltools.converters.mil.mil import types
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil import (
+    Placeholder,
+    Function,
+    Program,
+    get_new_symbol,
+)
+from coremltools.converters.mil.mil import Var
+
+from .internal_graph import *
+from .ops import *
+from .torch_op_registry import _TORCH_OPS_REGISTRY
+
+torch_to_mil_types = {
+    _torch.float32: types.fp32,
+    _torch.float64: types.fp64,
+    _torch.int32: types.int32,
+    _torch.int64: types.int64,
+}
+
+mil_to_torch_types = {v: k for k, v in torch_to_mil_types.items()}
+
+
+class TranscriptionContext:
+    """Maintains a map from torch operations to their MIL values
+        while building the graph. Can be used to process subgraphs recursively
+        by pushing new context when stepping into a subgraph and popping that
+        context when stepping out."""
+
+    def __init__(self, name=None):
+        self.name = name if name else ""
+        self._current_graph = [{}]
+
+    def add(self, ssa_var, torch_name=None):
+        """
+        Arguments:
+            ssa_var: Varable to add to the graph being constructed.
+            torch_name: Optional unique string identifier of the operation. If
+                ommitted, it will use @ssa_var.name.
+        """
+        if torch_name is None:
+            torch_name = ssa_var.name
+        if torch_name in self._current_graph[-1]:
+            print("Torch var {} is added again.".format(torch_name))
+            return
+        self._current_graph[-1][torch_name] = ssa_var
+
+    def __getitem__(self, torch_name):
+        """ Lookup a name in the context. Note that since nested blocks must be
+            able to access anything that was defined before them, we have to
+            search all contexts for a name, starting with the most local scope.
+        """
+        for idx in reversed(range(len(self._current_graph))):
+            current_graph = self._current_graph[idx]
+            if torch_name in current_graph:
+                return self._current_graph[idx][torch_name]
+        raise ValueError(
+            "Torch var {} not found in context {}".format(torch_name, self.name)
+        )
+
+    def push(self, inputs=None):
+        """
+        Add another frame to the context. Optionally provide a tuple of
+        (name list, Var list) to populate the new context frame.
+        """
+        self._current_graph.append({})
+
+        if inputs is not None:
+            if len(inputs[0]) != len(inputs[1]):
+                raise ValueError("name list and Var list must be the same length")
+            for name, var in zip(inputs[0], inputs[1]):
+                self.add(var, torch_name=name)
+
+    def pop(self):
+        """
+        Remove and discard the top context frame.
+        """
+        self._current_graph = self._current_graph[:-1]
+
+    def __str__(self):
+        _str = ""
+        for current_graph in reversed(self._current_graph):
+            __str = ""
+            for k, v in current_graph.items():
+                if hasattr(v, "shape_str"):
+                    shape_str = v.shape_str()
+                elif hasattr(v, "sym_shape"):
+                    shape_str = v.sym_shape()
+                else:
+                    shape_str = "None"
+                __str += "%{} : {}\n".format(k, shape_str)
+            _str += __str + "\n"
+        return _str
+
+    def __repr__(self):
+        return str(self)
+
+
+class TorchConverter:
+    """Class that handles conversion of pytorch models represented in TorchScript
+    format to the MIL format.
+
+    Models passed to the @TorchConverter go from:
+    TorchScript -> Expanded/Optimized Torch IR -> Internal Graph -> CoreML SSA
+    The internal graph representation was added to make testing easier.
+
+    Arguments:
+        torchscript: torch.jit.ScriptModule object representing the model to convert.
+        inputs: Input values and optional names. See kwarg in load.py for full description.
+        outputs: Names of the graph's outputs. See kwarg in load.py for full description.
+        cut_at_symbols: A list of internal symbol name strings. Graph conversion will
+            terminate once these symbols have been generated. For debugging use
+            only. See kwarg in load.py.
+    """
+
+    def __init__(
+        self, torchscript, inputs, outputs=None, cut_at_symbols=None,
+    ):
+        assert isinstance(torchscript, _torch.jit.ScriptModule)
+        self.inputs = inputs
+        for idx, inp in enumerate(self.inputs):
+            if isinstance(inp, ImageType) and self.inputs[idx].channel_first is None:
+                self.inputs[idx].channel_first = True
+        self.torchscript = torchscript
+        self.output_names = outputs
+        self.context = TranscriptionContext()
+        raw_graph, params_dict = self._expand_and_optimize_ir(self.torchscript)
+        self.graph = InternalTorchIRGraph(
+            raw_graph, params_dict, self.inputs, cut_at_symbols
+        )
+        self._flatten_graph_input_values()
+        self._flatten_graph_output_values()
+
+    def _flatten_graph_input_values(self):
+        """ CoreML can't handle nested iterables of tensors, so we flatten the
+            inputs of any graph that expects them.
+        """
+        new_graph_inputs = self.graph.inputs
+        all_new_nodes = []
+        changed = True
+        notified = False
+
+        while changed:
+            old_graph_inputs = new_graph_inputs
+            new_graph_inputs = OrderedDict()
+            new_nodes = []
+            changed = False
+            for _input_name, _input_val in old_graph_inputs.items():
+                if isinstance(_input_val, (tuple, list)):
+                    changed = True
+                    if not notified:
+                        notified = True
+                        _logging.warning(
+                            "Tuple detected at graph input. This will be flattened in the converted model."
+                        )
+                    # If this input to the graph is a tuple, we want to replace it
+                    # with a flattened version and add an op to construct the tuple.
+                    node_inputs = []
+                    for idx, item in enumerate(_input_val):
+                        name = _input_name + "_{}".format(idx)
+                        new_graph_inputs[name] = item
+                        node_inputs.append(name)
+                    new_nodes.append(
+                        InternalTorchIRNode(
+                            inputs=node_inputs,
+                            outputs=[_input_name],
+                            kind="tupleconstruct",
+                        )
+                    )
+                else:
+                    # This input isn't a tuple, keep it as is.
+                    new_graph_inputs[_input_name] = _input_val
+            all_new_nodes = new_nodes + all_new_nodes
+        self.graph.inputs = new_graph_inputs
+        self.graph.nodes = all_new_nodes + self.graph.nodes
+        self.inputs = [v for v in self.graph.inputs.values()]
+
+    def _flatten_graph_output_values(self):
+        """ CoreML can't handle nested iterables of tensors, so we flatten the
+            outputs of any graph that produces them.
+        """
+        node_names = [node.name for node in self.graph.nodes]
+        new_graph_outputs = self.graph.outputs
+        changed = True
+        notified = False
+
+        while changed:
+            old_graph_outputs = new_graph_outputs
+            new_graph_outputs = []
+            changed = False
+            for outp in old_graph_outputs:
+                # Find the node that generates this output var.
+                # It is possible to not find the output var in the list of node
+                # names since nodes are named after their first output. In that
+                # case, it means the output var comes from a node that returns
+                # multiple outputs, which means that node cannot be a construct op.
+                try:
+                    node_idx = node_names.index(outp)
+                except:
+                    # @outp doesn't come from a construct op
+                    new_graph_outputs.append(outp)
+                    continue
+                if self.graph.nodes[node_idx].kind in [
+                    "tupleconstruct",
+                    "listconstruct",
+                ]:
+                    # Since this output came from a construct op, we can replace it
+                    # with the inputs to the op.
+                    new_graph_outputs.extend(self.graph.nodes[node_idx].inputs)
+                    changed = True
+                    if not notified:
+                        notified = True
+                        _logging.warning(
+                            "Tuple detected at graph output. This will be flattened in the converted model."
+                        )
+                else:
+                    new_graph_outputs.append(outp)
+        # Note: if we flattened outputs, there are likely to be construct ops
+        # that are no longer needed. These will be removed in a later DCE pass.
+        self.graph.outputs = new_graph_outputs
+
+    @staticmethod
+    def _check_ops(graph):
+        """ Returns the set of ops in @graph that are implemented, and the set
+            for which no conversion function is registered. @graph can be
+            either InternalTorchIRGraph or InternalTorchIRBlock."""
+        implemented_ops = set()
+        missing_ops = set()
+        for node in graph.nodes:
+            _add_op = _TORCH_OPS_REGISTRY.get(node.kind, None)
+            if _add_op is None:
+                missing_ops.add(node.kind)
+            else:
+                implemented_ops.add(node.kind)
+            for block in node.blocks:
+                _impl, _miss = TorchConverter._check_ops(block)
+                implemented_ops.update(_impl)
+                missing_ops.update(_miss)
+        return implemented_ops, missing_ops
+
+    @staticmethod
+    def _create_placeholder(_input):
+        """Converts an InputType torch.Tensor into a Placeholder.
+        """
+        shape = _input.shape.shape
+        dtype = _input.dtype
+        return mb.placeholder(shape, dtype=dtype)
+
+    def check_ops(self):
+        """ Returns the set of ops in @self.graph that are implemented, and
+            the set for which no conversion function is registered."""
+        return TorchConverter._check_ops(self.graph)
+
+    def convert(self):
+
+        _logging.info("Converting graph.")
+
+        # This will hold the converted model.
+        prog = Program()
+
+        # Construct placeholder for input to ssa function
+        # This is where input renaming occurs
+        ssa_func_inputs = OrderedDict()
+        for index, (name, spec) in enumerate(self.graph.inputs.items()):
+            placeholder = self._create_placeholder(spec)
+            # Set ssa function input name to user defined name if provided.
+            if spec.name is not None:
+                name = spec.name
+            self.inputs[index].name = name
+            ssa_func_inputs[name] = placeholder
+        prog.set_main_input_types(tuple(self.inputs))
+
+        # Initialize the SSA for conversion
+        with Function(ssa_func_inputs) as ssa_func:
+
+            # Map internal @self.graph.inputs to user specified @ssa_func_inputs
+            # If @self.graph.inputs == @ssa_func_inputs this just adds the inputs
+            # to the context.
+            for internal_name, users_name in zip(
+                self.graph.inputs.keys(), ssa_func_inputs.keys()
+            ):
+                self.context.add(ssa_func.inputs[users_name], torch_name=internal_name)
+            for name, val in self.graph.params.items():
+                mode = decide_immediate_or_file(val)
+                const = mb.const(val=val, mode=mode, name=name)
+                self.context.add(const)
+
+            # Add the rest of the operations
+            convert_nodes(self.context, self.graph)
+
+            graph_outputs = [self.context[name] for name in self.graph.outputs]
+            # Output renaming occurs
+            if self.output_names:
+                for index, var in enumerate(graph_outputs):
+                    output_rename = self.output_names[index]
+                    var.name = output_rename
+
+            ssa_func.set_outputs(graph_outputs)
+            prog.add_function("main", ssa_func)
+
+        # TODO (sberardi): graph cleanup passes
+        # rdar://60177439
+        return prog
+
+    @staticmethod
+    def _expand_and_optimize_ir(torchscript):
+        """Given a torch.jit.ScriptModule, convert it to a optimized
+        torch._C.Graph and dict of model parameter's names to tensors.
+        """
+
+        # Recursively replaces all attribute accesses with the sub-graphs of
+        # those modules. The resulting graph will be self-contained and will
+        # not reference into other modules. Params will contain the "trainable"
+        # inputs to the graph.
+        graph, params = _torch._C._jit_pass_lower_graph(
+            torchscript.forward.graph, torchscript._c
+        )
+
+        # From PyTorch code: Inline function and method calls.
+        _torch._C._jit_pass_inline(graph)
+        # From PyTorch code: This inlines the forked section in the fork()
+        # callsite and replaces uses of the result of wait() calls with the
+        # values produced from the (now-inlined) forked section.
+        _torch._C._jit_pass_inline_fork_wait(graph)
+        # Starting from the return node, marks all nodes that feed into the
+        # output, as well as nodes with side effects. Any nodes not marked are
+        # eliminated.
+        _torch._C._jit_pass_dce(graph)
+        # From PyTorch code: checks well-formedness and invariants of graph.
+        _torch._C._jit_pass_lint(graph)
+        # From PyTorch code: remove all in-place ops and replace them with
+        # out-of-place equivalents.
+        # e.g.
+        #   %foo = aten::add_(%foo, %n)
+        # becomes
+        #   %foo.2 = aten::add(%foo, %n)
+        _torch._C._jit_pass_remove_inplace_ops(graph)
+        _torch._C._jit_pass_dce(graph)
+        _torch._C._jit_pass_lint(graph)
+        # Replaces a couple specific ops patterns (add, sub, mul, div, chunk).
+        _torch._C._jit_pass_canonicalize_ops(graph)
+        _torch._C._jit_pass_lint(graph)
+        # From PyTorch code: This pass catches all of the small, easy to catch
+        # peephole optimizations you might be interested in doing.
+        #     Eliminate no-op 'expand' nodes
+        #     Simplify x.t().t() to x
+        _torch._C._jit_pass_peephole(graph, addmm_fusion_enabled=False)
+        _torch._C._jit_pass_lint(graph)
+        # From PyTorch docs: Renumber the graph so that all structurally
+        # equivalent graphs have same numbers.
+        graph = _torch._C._jit_pass_canonicalize(graph)
+        _torch._C._jit_pass_lint(graph)
+        _torch._C._jit_pass_constant_propagation(graph)
+        # NOTE: Don't need another DCE, it's included in constant propagation.
+        _torch._C._jit_pass_lint(graph)
+
+        input_and_param_names = [val.debugName() for val in graph.inputs()]
+        param_names = input_and_param_names[len(input_and_param_names) - len(params) :]
+        params_dict = dict(zip(param_names, params))
+
+        return graph, params_dict
diff --git a/coremltools/converters/mil/frontend/torch/internal_graph.py b/coremltools/converters/mil/frontend/torch/internal_graph.py
new file mode 100644
index 000000000..22f32abd8
--- /dev/null
+++ b/coremltools/converters/mil/frontend/torch/internal_graph.py
@@ -0,0 +1,221 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from collections import OrderedDict
+from itertools import islice
+
+import torch
+
+
+def _make_ssa_name(name):
+    """Converts a symbol name (string) into an SSA name, by prepending '%'.
+    Only used for pretty printing the graph.
+    """
+    return "%" + name
+
+
+def _ssa_name_list(names):
+    """Take a list of symbol names (strings) and return them as SSA names. Only
+    used for pretty printing the graph.
+    """
+    return [_make_ssa_name(x) for x in names]
+
+
+class InternalTorchIRBlock:
+    """CoreML internal representation of a torch IR block.
+
+        Arguments:
+            raw_block: The torch._C.Block to convert, or None.
+            nodes: If @raw_block is None, the list of InternalTorchIRNodes in the block
+            inputs: If @raw_block is None, the list of input symbols.
+            outputs: If @raw_block is None, the list of output symbols.
+    """
+
+    def __init__(self, raw_block=None, nodes=None, inputs=None, outputs=None):
+        self.nodes = []
+        self.inputs = []
+        self.outputs = []
+
+        if raw_block:
+            # Add nodes
+            for raw_node in raw_block.nodes():
+                self.nodes.append(InternalTorchIRNode(raw_node))
+
+            # Add inputs
+            for inp in raw_block.inputs():
+                self.inputs.append(inp.debugName())
+
+            # Add outputs
+            for outp in raw_block.outputs():
+                self.outputs.append(outp.debugName())
+        else:
+            self.nodes = nodes
+            self.inputs = inputs
+            self.outputs = outputs
+
+    def __str__(self, indent=2):
+        indent_str = " " * indent
+        graph_str = "{}block({}):\n".format(
+            indent_str, ", ".join(_ssa_name_list(self.inputs))
+        )
+        graph_str += "{}\n".format(indent_str).join(
+            [x.__str__(indent=indent + 2) for x in self.nodes]
+        )
+        graph_str += "\n{}return ({})".format(
+            indent_str, ", ".join(_ssa_name_list(self.outputs))
+        )
+        return graph_str
+
+    def __repr__(self):
+        return str(self)
+
+
+class InternalTorchIRNode:
+    """CoreML internal representation of a torch IR node.
+    Can construct itself from a provided torchIR node or manually constructed with
+    args for testing.
+
+    See InternalTorchIRGraph for the motivation behind this structure.
+
+        Arguments:
+            node: The torch._C.Node to convert, or None.
+            attr: If @node is not specified, the dict of named attributes.
+            inputs: If @node is not specified, the list of input symbols.
+            outputs: If @node is not specified, the list of output symbols.
+            kind: If @node is not specified, the kind (op) of the node.
+            blocks: If @node is not specified, the list of InternalTorchIRBlock.
+    """
+
+    def __init__(
+        self, node=None, attr=None, inputs=None, outputs=None, kind=None, blocks=None,
+    ):
+        if node:
+            self.inputs = [_input.debugName() for _input in node.inputs()]
+            self.outputs = [output.debugName() for output in node.outputs()]
+            self.name = self.outputs[0]
+            self.kind = node.kind().split("::")[-1].lower()
+            self.blocks = [InternalTorchIRBlock(raw_block=b) for b in node.blocks()]
+            self.attr = {
+                name: getattr(node, node.kindOf(name))(name)
+                for name in node.attributeNames()
+            }
+            if "value" not in self.attr:
+                self.attr["value"] = None
+            # If the output is boolean, explicitly cast it so type inference
+            # will work correctly.
+            if len(self.outputs) == 1 and next(node.outputs()).type().str() == "bool":
+                self.attr["value"] = bool(self.attr["value"])
+        else:
+            self.inputs = inputs
+            self.outputs = outputs
+            self.name = self.outputs[0]
+            self.kind = kind
+            self.blocks = blocks if blocks is not None else []
+            self.attr = attr if attr is not None else {"value": None}
+
+    def __str__(self, indent=2):
+        node_str = " " * indent + "{} = {}".format(
+            ", ".join(_ssa_name_list(self.outputs)), self.kind
+        )
+        node_str += "[{}]".format(
+            ", ".join(
+                ["{}={}".format(n, v) for n, v in self.attr.items() if v is not None]
+            )
+        )
+        node_str += "({})".format(", ".join(_ssa_name_list(self.inputs)))
+        for b in self.blocks:
+            node_str += "\n" + b.__str__(indent=indent + 2)
+        return node_str
+
+    def __repr__(self):
+        return str(self)
+
+
+class InternalTorchIRGraph:
+    """CoreML internal representation of a torch IR graph. A torch._C.Graph
+    object is not an ideal structure to use in converting to CoreML. Conversion
+    to an InternalTorchIRGraph is inserted between the original graph and the
+    final CoreML model to address several issues:
+        1. A torch._C.graph is hard to work with. For example, its .inputs()
+          and .outputs() functions return iterators, so the only way to
+          determine the number of inputs/outputs is by counting to the end.
+          There are other examples of why the torch structure is hard to work
+          with, and this structure alleviates those isses.
+        2. torch._C.graph is an internal API and so we can't count on its
+          stability. By inserting a layer in between, we can handle any changes
+          to torch._C.graph here and isolate the ops code that processes the
+          graph.
+        3. torch._C.graph does not expose a Python constructor. This makes
+          it impossible to write unit tests that isolate specific ops since
+          they have to come from actually converting a PyTorch graph. With an
+          internal structure, we can directly build the test cases we need for
+          unit testing.
+
+        Arguments:
+            raw_graph: The torch._C.Graph to convert.
+            params_dict: A dictionary mapping graph parameter names to tensors.
+            input_spec: A list of InputType objects, describing the name,
+                shape, and dtype of graph inputs.
+            cut_at_symbols: The list of desired outputs from the graph. Must
+                be present in the graph. For debugging use only.
+                See kwarg in load.py for more information.
+    """
+
+    def __init__(
+        self, raw_graph, params_dict, input_spec, cut_at_symbols=None,
+    ):
+        self.nodes = []
+        self.params = {}
+        self.inputs = OrderedDict()
+        self.outputs = []
+
+        # Add nodes
+        for raw_node in raw_graph.nodes():
+            self.nodes.append(InternalTorchIRNode(raw_node))
+
+        # Add params
+        for name, param in params_dict.items():
+            value = param.detach().numpy()
+            self.params[name] = value
+
+        # Add inputs
+        for index, _input in enumerate(islice(raw_graph.inputs(), len(input_spec))):
+            name = _input.debugName()
+            spec = input_spec[index]
+            self.inputs[name] = spec
+
+        # Add outputs, cutting if @cut_at_symbols is set
+        output_names = cut_at_symbols
+        if output_names is None:
+            output_names = [x.debugName() for x in raw_graph.outputs()]
+        for output in output_names:
+            self.outputs.append(output)
+
+    def __str__(self):
+        graph_str = "graph(\n"
+        graph_str += self._format_inputs(self.inputs, unpack=True)
+        graph_str += self._format_inputs(self.params)
+        graph_str += "):\n"
+        graph_str += "\n".join([str(x) for x in self.nodes]) + "\n"
+        graph_str += "return ({})".format(", ".join(_ssa_name_list(self.outputs)))
+        return graph_str
+
+    def _format_inputs(self, inputs, unpack=False):
+        def tensor_str(x):
+            return "Tensor{}".format(
+                tuple(list(x.shape.shape if unpack else x.shape) + [str(x.dtype)])
+            )
+
+        inp_str = ""
+        for k, v in inputs.items():
+            if isinstance(v, (tuple, list)):
+                shape_str = "({})".format(", ".join([tensor_str(x) for x in v]))
+            else:
+                shape_str = tensor_str(v)
+            inp_str += "    {} : {},\n".format(_make_ssa_name(k), shape_str)
+        return inp_str
+
+    def __repr__(self):
+        return str(self)
diff --git a/coremltools/converters/mil/frontend/torch/load.py b/coremltools/converters/mil/frontend/torch/load.py
new file mode 100644
index 000000000..8bd41c372
--- /dev/null
+++ b/coremltools/converters/mil/frontend/torch/load.py
@@ -0,0 +1,102 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+
+import logging as _logging
+import os.path as _os_path
+
+import torch as _torch
+
+from six import string_types as _string_types
+from .converter import TorchConverter, torch_to_mil_types
+from coremltools.converters.mil.input_types import InputType, TensorType
+from coremltools.converters.mil.mil import Program, types
+
+
+def load(model_spec, debug=False, **kwargs):
+    """
+    Convert PyTorch .pt file to mil CoreML format.
+
+    Parameters
+    ----------
+    model_spec: String path to .pt file, or a TorchScript object representing
+        the model to convert.
+    debug: bool, optional. Defaults to False.
+        This flag should generally be False except for debugging purposes
+        for diagnosing conversion errors. Setting this flag to True will
+        print the list of supported and unsupported ops found in the model
+        if conversion fails due to an unsupported op.
+    inputs: Can be a singular element or list of elements of the following form
+        1. Any subclass of InputType
+        2. torch.Tensor (only shape and dtype will be used)
+        3. list of (1. or 2.)
+        Inputs are parsed in the flattened order that the model accepts them.
+        If names are not specified: input keys for calling predict on the converted model
+        will be internal symbols of the input to the graph.
+        User can specify a subset of names.
+    outputs (optional): List of output name strings. If specified: keys of output dictionary
+        will be these names in order of flattened returned outputs. If not specified:
+        output dictionary keys will be the internal output symbols in the graph.
+        User can specify a subset of names.
+    cut_at_symbols (optional): List of internal symbol name strings. Graph conversion will
+        terminate once these symbols have been generated. For debugging use
+        only.
+    """
+
+    torchscript = _torchscript_from_model(model_spec)
+
+    def _convert_to_inputtype(inputs):
+        input_type = []
+        for _input in inputs:
+            if isinstance(_input, (list, tuple)):
+                input_type.append(_convert_to_inputtype(_input))
+            elif isinstance(_input, InputType):
+                input_type.append(_input)
+            elif isinstance(_input, _torch.Tensor):
+                input_type.append(
+                    TensorType(
+                        shape=_input.shape, dtype=torch_to_mil_types[_input.dtype]
+                    )
+                )
+            else:
+                raise ValueError(
+                    "Unknown type {} for conversion to InputType.".format(type(_input))
+                )
+        return input_type
+
+    inputs = _convert_to_inputtype(kwargs["inputs"])
+    outputs = kwargs.get("outputs", None)
+    cut_at_symbols = kwargs.get("cut_at_symbols", None)
+    converter = TorchConverter(torchscript, inputs, outputs, cut_at_symbols)
+
+    try:
+        prog = converter.convert()
+    except RuntimeError as e:
+        if debug and "convert function" in str(e):
+            implemented, missing = converter.check_ops()
+            print("the following model ops are IMPLEMENTED:")
+            print("\n".join(["  " + str(x) for x in sorted(implemented)]))
+            print("the following model ops are MISSING:")
+            print("\n".join(["  " + str(x) for x in sorted(missing)]))
+        raise e
+    except Exception as e:
+        raise e
+
+    return prog
+
+
+def _torchscript_from_model(model_spec):
+    if isinstance(model_spec, _string_types) and model_spec.endswith(".pt"):
+        filename = _os_path.abspath(model_spec)
+        return _torch.jit.load(filename)
+    elif isinstance(model_spec, _torch.jit.ScriptModule):
+        return model_spec
+    else:
+        raise TypeError(
+            "@model must either be a PyTorch .pt file or a TorchScript object, received: {}".format(
+                type(model_spec)
+            )
+        )
diff --git a/coremltools/converters/mil/frontend/torch/ops.py b/coremltools/converters/mil/frontend/torch/ops.py
new file mode 100644
index 000000000..483002eb7
--- /dev/null
+++ b/coremltools/converters/mil/frontend/torch/ops.py
@@ -0,0 +1,1858 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import logging as _logging
+
+import math as _math
+import numpy as _np
+from tqdm import tqdm as _tqdm
+
+from coremltools.converters.mil.mil import types
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.var import Var, ListVar
+from coremltools.converters.mil.mil import Placeholder, Symbol
+from .internal_graph import *
+from .torch_op_registry import _TORCH_OPS_REGISTRY, register_torch_op
+
+# The pytorch args for many of the below ops were sourced from
+# https://github.com/pytorch/pytorch/blob/d971007c291c0ead1003d12cd553d18ddb582207/torch/csrc/jit/mobile/register_mobile_ops.cpp#L216
+
+
+# This is a magic number in PyTorch. It's used as a default value in many
+# functions.
+PYTORCH_MAGIC_DEFAULT = 9223372036854775807
+
+
+def _all_outputs_present(context, graph):
+    """ Returns true if all the symbols in the graph's output list are
+        present in context."""
+    for outp in graph.outputs:
+        try:
+            context[outp]
+        except ValueError:
+            return False
+    return True
+
+
+def convert_nodes(context, graph):
+    """Iterate over the nodes of a graph or block and convert to MIL.
+
+        Arguments:
+            context: A TranscriptionContext object to pull node inputs and
+                assign node outputs.
+            graph: An InternalTorchIRGraph or InternalTorchIRBlock object.
+    """
+    for node in _tqdm(graph.nodes, desc="Converting Frontend ==> MIL Ops", unit=" ops"):
+        _add_op = _TORCH_OPS_REGISTRY.get(node.kind, None)
+        _logging.info("Converting op {} : {}".format(node.name, node.kind))
+        if _add_op is None:
+            raise RuntimeError(
+                "PyTorch convert function for op {} not implemented".format(node.kind)
+            )
+        else:
+            _add_op(context, node)
+
+        # We've generated all the outputs the graph needs, terminate conversion.
+        if _all_outputs_present(context, graph):
+            break
+
+
+def convert_block(context, block, inputs):
+    """Convert a block (sub-graph) to MIL. Conversion happens within a new
+        context frame.
+
+        Arguments:
+            context: A TranscriptionContext object to pull node inputs and
+                assign node outputs.
+            block: An InternalTorchIRBlock object.
+            inputs: List of Vars from the outer context that map to the block's
+                expected inputs. The number of inputs provided must match the
+                number expected by the block.
+    """
+
+    assert len(block.inputs) == len(inputs)
+
+    # Start a new context frame.
+    context.push((block.inputs, inputs))
+
+    # Add the block ops.
+    convert_nodes(context, block)
+
+    # Collect the block outputs.
+    outputs = [context[outp] for outp in block.outputs]
+
+    # Return to the previous context frame.
+    context.pop()
+    return outputs
+
+
+# Some ops will receive a dtype input as an integer
+# which maps to a torch dtype. The below mapping was found by
+# converting test models with different dtypes passed to ones.
+NUM_TO_TORCH_DTYPE = {
+    0: torch.uint8,
+    1: torch.int8,
+    2: torch.int16,
+    3: torch.int32,
+    4: torch.int64,
+    5: torch.float16,
+    6: torch.float32,
+    7: torch.float64,
+    11: torch.bool,
+}
+
+
+def decide_immediate_or_file(val):
+    if (
+        val is not None
+        and isinstance(val, (_np.ndarray, _np.generic))
+        and val.size >= 10
+    ):
+        return "file_value"
+    return "immediate_value"
+
+
+def _get_inputs(context, node, expected=None):
+    """Look up a node's inputs in @context and return them as a list. If
+        @expected is not None, also verifies the number of inputs matches the
+        value of @expcted.
+    """
+    inputs = [context[name] for name in node.inputs]
+    if expected is not None and len(inputs) != expected:
+        raise ValueError(
+            "node {} ({}) got {} input(s), expected {}".format(
+                node.name, node.kind, len(inputs), expected
+            )
+        )
+    return inputs
+
+
+def _list_select(ls, index):
+    """ Sometimes we need to select a specific item from a list. If that item
+        is known at compile time, extract it as a const. Otherwise, if it's
+        symbolic, use gather.
+    """
+    # TODO: gather doesn't work when the shape is known size.
+    if ls.sym_val is not None and not isinstance(ls.sym_val[index], Symbol):
+        res = mb.const(val=ls.sym_val[index])
+    else:
+        res = mb.gather(x=ls, indices=index)
+    return res
+
+
+def _construct_constant(val, name):
+    # Converter cannot handle torch tensors.
+    if isinstance(val, torch.Tensor):
+        val = val.numpy()
+
+    # MIL casts ints to int32, which can't represent the 64 bit magic number.
+    # So we instead represent it with None, and any ops that might get the
+    # value will check for None instead.
+    if isinstance(val, int) and val == PYTORCH_MAGIC_DEFAULT:
+        val = None
+
+    mode = decide_immediate_or_file(val)
+    if val is None:
+        return None
+    else:
+        return mb.const(mode=mode, val=val, name=name)
+
+
+@register_torch_op
+def constant(context, node):
+    assert len(node.inputs) == 0
+    assert len(node.outputs) == 1
+
+    name = node.name
+    val = node.attr["value"]
+
+    const = _construct_constant(val, name)
+    context.add(const, torch_name=name)
+
+
+def _array_construct(context, node, array_type):
+    assert len(node.outputs) == 1
+    inputs = _get_inputs(context, node)
+    scalar_inputs = [
+        inp
+        for inp in inputs
+        if isinstance(inp, Var) and inp.val is not None and len(inp.shape) == 0
+    ]
+
+    if len(scalar_inputs) == len(inputs):
+        # All the list items are compile-time scalar constants, so let's create
+        # a new const that concatenates them.
+        mode = "immediate_value"
+        val = array_type([inp.val for inp in inputs])
+        const = mb.const(mode=mode, val=val, name=node.name)
+        context.add(const)
+    else:
+        # If at least one input to the construct op is non-const, collect
+        # the inputs and add them directly to the context. Ops that use this
+        # node's output will take the list directly as input.
+        context.add(array_type(inputs), node.name)
+
+
+@register_torch_op
+def tupleconstruct(context, node):
+    _array_construct(context, node, array_type=tuple)
+
+
+@register_torch_op
+def listconstruct(context, node):
+    _array_construct(context, node, array_type=list)
+
+
+@register_torch_op
+def eq(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+    equal_to = mb.equal(x=inputs[0], y=inputs[1], name=node.name)
+    context.add(equal_to)
+
+
+@register_torch_op
+def ne(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+    equal_to = mb.not_equal(x=inputs[0], y=inputs[1], name=node.name)
+    context.add(equal_to)
+
+
+@register_torch_op
+def le(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+    less_equal = mb.less_equal(x=inputs[0], y=inputs[1], name=node.name)
+    context.add(less_equal)
+
+
+@register_torch_op
+def lt(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+    less = mb.less(x=inputs[0], y=inputs[1], name=node.name)
+    context.add(less)
+
+
+@register_torch_op
+def ge(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+    greater_equal = mb.greater_equal(x=inputs[0], y=inputs[1], name=node.name)
+    context.add(greater_equal)
+
+
+@register_torch_op
+def gt(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+    greater = mb.greater(x=inputs[0], y=inputs[1], name=node.name)
+    context.add(greater)
+
+
+@register_torch_op(torch_alias=["t", "transpose_"])
+def transpose(context, node):
+    assert len(node.outputs) == 1
+    inputs = _get_inputs(context, node)
+    x = inputs[0]
+
+    if len(node.inputs) == 1:
+        # PyTorch has several tranpose ops that can be emitted. This one is only
+        # emitted when .t() is called on a tensor, which means it can only be
+        # called on a matrix.
+        if len(x.shape) > 2:
+            raise ValueError("transpose without dims for rank > 2 is unsupported")
+        res = mb.transpose(x=x, perm=[1, 0], name=node.name)
+    else:
+        assert len(inputs) == 3
+        ax0 = inputs[1].val
+        ax1 = inputs[2].val
+
+        perm = list(range(len(x.shape)))
+        perm[ax0] = ax1
+        perm[ax1] = ax0
+
+        res = mb.transpose(x=x, perm=perm, name=node.name)
+    context.add(res)
+
+
+@register_torch_op
+def permute(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+    perm = mb.transpose(x=inputs[0], perm=inputs[1], name=node.name)
+    context.add(perm)
+
+
+@register_torch_op
+def matmul(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+    res = mb.matmul(x=inputs[0], y=inputs[1], name=node.name)
+    context.add(res)
+
+
+@register_torch_op
+def add(context, node):
+    add_inputs = _get_inputs(context, node)
+    assert len(node.outputs) == 1
+
+    # TODO (sberardi): 3rd param to aten::add is a scale factor, need to handle that.
+    # out=input+alpha x other
+    # rdar://60175736
+    if len(add_inputs) > 2 and add_inputs[2].val != 1:
+        raise ValueError("ADD does not support scale factor param")
+
+    add_node = mb.add(x=add_inputs[0], y=add_inputs[1], name=node.name)
+    context.add(add_node)
+
+
+@register_torch_op
+def addmm(context, node):
+    # addmm(Tensor input, Tensor mat1, Tensor mat2, Scalar beta=1, Scalar alpha=1)
+    # output = beta * input + alpha * mat1 * mat2
+
+    assert len(node.inputs) == 5
+    assert len(node.outputs) == 1
+
+    inputs = [context[name] for name in node.inputs]
+    bias = inputs[0]
+    mat1 = inputs[1]
+    mat2 = inputs[2]
+    beta = inputs[3]
+    alpha = inputs[4]
+
+    if beta.val != 1.0:
+        # Apply scaling factor beta to the bias.
+        bias = mb.mul(x=beta, y=bias, name=bias.name + "_scaled")
+        context.add(bias)
+
+    if alpha.val != 1.0:
+        # Apply scaling factor alpha to the input.
+        mat1 = mb.mul(x=alpha, y=mat1, name=mat1.name + "_scaled")
+        context.add(mat1)
+
+    # MIL linear will transpose mat2, but addmm expects that mat1 and mat2
+    # can multiply as is. So we add a tranpose.
+    mat2 = mb.transpose(x=mat2, perm=[1, 0], name=mat2.name + "_transposed")
+    context.add(mat2)
+
+    addmm_node = mb.linear(x=mat1, weight=mat2, bias=bias, name=node.name)
+    context.add(addmm_node)
+
+
+@register_torch_op(torch_alias=["conv2d"])
+def _convolution(context, node):
+    inputs = _get_inputs(context, node)
+
+    x = inputs[0]
+    weight = inputs[1]
+    bias = inputs[2]
+    strides = inputs[3]
+
+    # Expand padding. Torch accepts either an int (for all dimensions) or an n-tuple of ints (one per dimension), but
+    # we require a (2 * n)-tuple, where n is the number of spatial dimensions, start and end for each spatial dimension
+    pad = inputs[4]
+    if weight.val.ndim in (3, 4):
+        # 1D and 2D: Need to explicitly state L-R, T-B pad
+        pad = _np.repeat(pad.val, 2)
+    elif weight.val.ndim == 5:
+        # 3D: Need to explicitly state F-Bk, L-R, T-B pad
+        if type(pad.val) == int:
+            pad = _np.repeat(pad.val, 6)
+        elif len(pad.val) == 3:
+            pad = _np.repeat(pad.val, 2)
+    else:
+        raise ValueError(
+            "Invalid weight dimension. Must be 3, 4, or 5 for 1D, 2D, or 3D convolution, respectively."
+        )
+
+    dilations = inputs[5]
+    if len(inputs) == 12:
+        transposed = inputs[6].val
+        out_pad = inputs[7]  # unused
+        group = inputs[8]
+
+        if any([v != 0 for v in out_pad.val]):
+            raise ValueError(
+                "convolution does not support output_padding (given {})".format(out_pad)
+            )
+    elif len(inputs) == 7:
+        transposed = False
+        group = inputs[6]
+    else:
+        raise ValueError(
+            "unexpected number of inputs for node {} ({}): {}".format(
+                node.name, node.kind, len(inputs)
+            )
+        )
+
+    kwargs = {
+        "x": x,
+        "strides": strides,
+        "pad_type": "custom",
+        "pad": pad,
+        "dilations": dilations,
+        "groups": group,
+        "name": node.name,
+    }
+
+    # Bias is optional in PyTorch's convolution.
+    if bias is not None:
+        kwargs["bias"] = bias
+
+    if transposed is True:
+        # Transposed convolution
+
+        # PyTorch weight ordering [Cin, Cout, H, W]
+        # MIL expects [Cout, Cin, H, W]
+        weight_transpose = mb.transpose(
+            x=weight, perm=[1, 0, 2, 3], name=weight.name + "_transpose"
+        )
+        kwargs["weight"] = weight_transpose
+        conv = mb.conv_transpose(**kwargs)
+    else:
+        # Normal convolution
+        kwargs["weight"] = weight
+        conv = mb.conv(**kwargs)
+
+    context.add(conv)
+
+
+@register_torch_op
+def softmax(context, node):
+    inputs = _get_inputs(context, node)
+
+    x = inputs[0]
+    axis = inputs[1]
+    res = mb.softmax(logit=x, axis=axis, name=node.name)
+    context.add(res)
+
+
+@register_torch_op
+def flatten(context, node):
+    inputs = _get_inputs(context, node)
+
+    x = inputs[0]
+    dims = list(x.shape)
+    start = inputs[1].val
+    end_val = inputs[2].val
+
+    total = 1
+    if end_val < 0:
+        end = len(dims) + end_val
+    else:
+        end = end_val
+
+    if start > len(dims) or end > len(dims) or start < 0 or end < 0:
+        raise ValueError(
+            "Invalid start and end. (start, end) == ({}, {})".format(start, end_val)
+        )
+    if start > end:
+        raise ValueError(
+            "Start must be before end. (start, end) == ({}, {})".format(start, end_val)
+        )
+    for dim in dims[start : end + 1]:
+        total *= dim
+    dims = dims[:start] + [total] + dims[end + 1 :]
+
+    reshape = mb.reshape(x=x, shape=dims, name=node.name)
+    context.add(reshape)
+
+
+@register_torch_op(torch_alias=["relu_"])
+def relu(context, node):
+    inputs = _get_inputs(context, node, expected=1)
+
+    res = mb.relu(x=inputs[0], name=node.name)
+    context.add(res)
+
+
+def _adjust_pad_for_ceil_mode(input_shape, kernel_sizes, stride_sizes, pad_sizes):
+    """ TODO Given an input tensor and pooling parameters, add the extra input
+        padding needed to replicate ceil_mode. If no padding is needed, returns
+        the original input. Otherwise, returns the Var returned by the new
+        padding op.
+    """
+    new_pad = pad_sizes
+    for idx in range(len(input_shape)):
+        dim = input_shape[idx]
+        kernel = kernel_sizes[idx]
+        stride = stride_sizes[idx]
+        pad = pad_sizes[idx * 2 : idx * 2 + 2]
+        out_numerator = dim + pad[0] + pad[1] - kernel
+        remainder = out_numerator % stride
+        # Additional padding is added only on one side.
+        # https://stackoverflow.com/questions/59906456/in-pytorchs-maxpool2d-is-padding-added-depending-on-ceil-mode
+        if remainder > 0:
+            # MIL pooling does not support ceil_mode natively, but we can
+            # workaround by padding the input appropriately.
+            # rdar://60634390
+            _logging.warning("pooling padding adjusted to support ceil_mode=True")
+            new_pad[2 * idx + 1] += stride - remainder
+
+    return new_pad
+
+
+@register_torch_op
+def max_pool2d(context, node):
+    inputs = _get_inputs(context, node)
+
+    x = inputs[0]
+    kernel_sizes = inputs[1]
+    strides = inputs[2]
+    pad_type = "custom"
+
+    # Need to explicity state L-R, T-B pad
+    pad = inputs[3]
+    pad = _np.repeat(pad.val, 2)
+    dilation = inputs[4].val
+    ceil_mode = inputs[5].val
+    if _np.any(dilation > 1):
+        # See: rdar://60633736 (Implement dilation for mil op max_pool)
+        raise ValueError("@max_pool2d does not support dilation > 1")
+    if ceil_mode is True:
+        pad = _adjust_pad_for_ceil_mode(
+            x.shape[-2:], kernel_sizes.val, strides.val, pad
+        )
+
+    pool = mb.max_pool(
+        x=x,
+        kernel_sizes=kernel_sizes,
+        strides=strides,
+        pad_type=pad_type,
+        pad=pad,
+        name=node.name,
+    )
+    context.add(pool)
+
+
+@register_torch_op
+def div(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+
+    res = mb.real_div(x=inputs[0], y=inputs[1], name=node.name)
+    context.add(res)
+
+
+@register_torch_op
+def floor_divide(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+    res = mb.floor_div(x=inputs[0], y=inputs[1], name=node.name)
+    context.add(res)
+
+
+@register_torch_op
+def mul(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+
+    res = mb.mul(x=inputs[0], y=inputs[1], name=node.name)
+    context.add(res)
+
+
+@register_torch_op(torch_alias=["pow"])
+def pow_(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+
+    res = mb.pow(x=inputs[0], y=inputs[1], name=node.name)
+    context.add(res)
+
+
+@register_torch_op(torch_alias=["rsub"])
+def sub(context, node):
+    inputs = _get_inputs(context, node, expected=3)
+    assert len(node.outputs) == 1
+
+    if node.kind == "rsub":
+        # rsub reverses the order of arguments
+        y = inputs[0]
+        x = inputs[1]
+    else:
+        x = inputs[0]
+        y = inputs[1]
+    alpha = inputs[2].val
+
+    # TODO (sberardi): 3rd param to aten::sub is a scale factor, need to handle that.
+    # out=input-alpha x other
+    # rdar://60175736
+    if alpha != 1:
+        raise ValueError("SUB does not support scale factor param")
+
+    res = mb.sub(x=x, y=y, name=node.name)
+    context.add(res)
+
+
+@register_torch_op
+def mean(context, node):
+    inputs = _get_inputs(context, node)
+
+    kwargs = {"x": inputs[0], "name": node.name}
+
+    # @axes is optional, so omit if None.
+    axes = inputs[1]
+    if axes is not None:
+        # @axes needs to be a list, but if only one axis was specified in the
+        # model, it will be constructed as an int. Construct a new constant as a
+        # list.
+        if not isinstance(axes.val, _np.ndarray):
+            axes = mb.const(val=[axes.val], name=axes.name + "_list")
+            context.add(axes)
+        kwargs["axes"] = axes
+
+    # @keep_dims is optional.
+    if len(inputs) >= 3:
+        keep_dims = inputs[2]
+        kwargs["keep_dims"] = keep_dims
+
+    # Last input to mean is an optional output tensor. We always expect this to
+    # be None or absent.
+    assert len(inputs) <= 3 or inputs[3] is None
+    res = mb.reduce_mean(**kwargs)
+    context.add(res)
+
+
+@register_torch_op
+def squeeze(context, node):
+    inputs = _get_inputs(context, node)
+    if len(inputs) == 1:
+        res = mb.squeeze(x=inputs[0], name=node.name)
+    elif len(inputs) == 2:
+        squeeze_dim = inputs[1].val
+        res = mb.squeeze(x=inputs[0], axes=(squeeze_dim,), name=node.name)
+    context.add(res)
+
+
+@register_torch_op
+def unsqueeze(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+    unsqueeze = mb.expand_dims(x=inputs[0], axes=[inputs[1].val], name=node.name)
+    context.add(unsqueeze)
+
+
+@register_torch_op
+def size(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+
+    # Get the shape of the tensor.
+    shape_node = mb.shape(x=inputs[0], name=node.name + "_shape")
+    # Get the size of the tensor along the input dimension.
+    dim = inputs[1].val
+    size_node = _list_select(shape_node, dim)
+    context.add(size_node, node.name)
+
+
+@register_torch_op(torch_alias=["reshape"])
+def view(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+    x = inputs[0]
+    shape = inputs[1]
+
+    if isinstance(shape, ListVar):
+        length = mb.list_length(ls=shape)
+        indices = mb.range_1d(start=0, end=length, step=1)
+        shape = mb.list_gather(ls=shape, indices=indices)
+    view = mb.reshape(x=x, shape=shape, name=node.name)
+    context.add(view)
+
+
+@register_torch_op
+def adaptive_avg_pool2d(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+
+    _input = inputs[0]
+    output_size = inputs[1].val
+    assert isinstance(output_size, _np.ndarray)
+    output_size = tuple(output_size)
+
+    if output_size == (1, 1):
+        # Represent (1,1) output size via @reduce_mean
+        # Assume channel first ordering, reduce the last two (HW) dims.
+        axes = mb.const(val=[-2, -1], name=node.name + "_axes")
+        keep_dims = mb.const(val=True, name=node.name + "_keep_dims")
+
+        avg_pool = mb.reduce_mean(
+            x=_input, axes=axes, keep_dims=keep_dims, name=node.name
+        )
+    elif _input.shape is not None:
+        # TODO: The calculations to convert adaptive_pool to standard pool,
+        # given a known input size, come from
+        # https://stackoverflow.com/questions/53841509/how-does-adaptive-pooling-in-pytorch-work
+        # However, as indicated in that SO, this isn't quite how PyTorch
+        # computes adaptive pooling, leading to inaccuracies in model outputs.
+        # rdar://60900834
+        strides = [ind // outd for ind, outd in zip(_input.shape[-2:], output_size)]
+        pad_type = "valid"
+        # Need to explicity state L-R, T-B pad
+        pad = [0, 0, 0, 0]
+        dilation = [1, 1]
+        kernel_sizes = [
+            ind - s * (outd - 1)
+            for ind, outd, s in zip(_input.shape[-2:], output_size, strides)
+        ]
+        avg_pool = mb.avg_pool(
+            x=_input,
+            kernel_sizes=kernel_sizes,
+            strides=strides,
+            pad_type=pad_type,
+            pad=pad,
+            name=node.name,
+        )
+    else:
+        raise ValueError(
+            "adaptive_avg_pool2d only supported when input tensor size is known or output size == (1,1). Recived: input size == {}, output size == {}".format(
+                _input.shape_str(), output_size,
+            )
+        )
+
+    context.add(avg_pool)
+
+
+@register_torch_op
+def batch_norm(context, node):
+    inputs = _get_inputs(context, node, expected=9)
+    # inputs skipped:
+    #   bool training (5)
+    #   float momentum (6)
+    #   bool cudnn_enabled (8)
+    _input = inputs[0]
+    weight = inputs[1]
+    bias = inputs[2]
+    running_mean = inputs[3]
+    running_var = inputs[4]
+    eps = inputs[7]
+    batch_norm = mb.batch_norm(
+        x=_input,
+        mean=running_mean,
+        variance=running_var,
+        gamma=weight,
+        beta=bias,
+        epsilon=eps,
+        name=node.name,
+    )
+    context.add(batch_norm)
+
+
+@register_torch_op
+def embedding(context, node):
+    inputs = _get_inputs(context, node)
+    _input = inputs[0]
+    indices = inputs[1]
+
+    padding_idx = -1
+    scale_grad_by_freq = False
+    sparse = False
+    if len(inputs) >= 3:
+        padding_idx = inputs[2].val
+    if len(inputs) >= 4:
+        scale_grad_by_freq = inputs[3].val
+    if len(inputs) >= 5:
+        sparse = inputs[4].val
+
+    if padding_idx != -1 or scale_grad_by_freq or sparse:
+        _logging.warning(
+            "CoreML embedding (gather) layer does not support any "
+            "inputs besides the weights and indices. Those given "
+            "will be ignored."
+        )
+
+    #  Changing the axis from 0 is not an option in torch, so we don't expose it
+    gather = mb.gather(x=_input, indices=indices, name=node.name)
+    context.add(gather)
+
+
+@register_torch_op
+def hardtanh_(context, node):
+    inputs = _get_inputs(context, node, expected=3)
+    _input = inputs[0]
+    min_val = inputs[1].val
+    max_val = inputs[2].val
+
+    res = mb.clip(x=_input, alpha=min_val, beta=max_val, name=node.name)
+    context.add(res)
+
+
+@register_torch_op
+def cat(context, node):
+    inputs = _get_inputs(context, node)
+
+    values = inputs[0]
+    if len(values) == 1:
+        # Only one item to "concatenate", so treat it as a no-OP. Otherwise,
+        # NN concatND layer will complain it only has one input.
+        context.add(values[0], node.name)
+        return
+
+    if len(inputs) < 2:
+        axis = 0
+    else:
+        axis = inputs[1]
+
+    concat = mb.concat(values=values, axis=axis, name=node.name)
+    context.add(concat)
+
+
+@register_torch_op
+def stack(context, node):
+    inputs = _get_inputs(context, node)
+
+    values = inputs[0]
+
+    if len(inputs) < 2:
+        axis = 0
+    else:
+        axis = inputs[1]
+
+    res = mb.stack(values=values, axis=axis, name=node.name)
+    context.add(res)
+
+
+@register_torch_op
+def item(context, node):
+    inputs = _get_inputs(context, node, expected=1)
+
+    if inputs[0].shape == ():
+        # MIL ops that reduce already output a scalar, so no need to do
+        # anything.
+        res = inputs[0]
+    elif _np.all([d == 1 for d in inputs[0].shape]):
+        # Item only makes sense when called on a length 1 tensor. We use
+        # reduce_max as a workaround for not having a way to extract a scalar
+        # from a symbolic tensor.
+        res = mb.reduce_max(x=inputs[0], name=node.name)
+    else:
+        raise ValueError("expected input to be a scalar or a length 1 tensor")
+    context.add(res, node.name)
+
+
+def _cast(context, node, dtype, dtype_name):
+    inputs = _get_inputs(context, node, expected=1)
+    x = inputs[0]
+    # Input must either be a scalar or a (1 x 1 x ... x 1) tensor
+    if not (len(x.shape) == 0 or _np.all([d == 1 for d in x.shape])):
+        raise ValueError("input to cast must be either a scalar or a length 1 tensor")
+
+    if x.val is not None:
+        # If x is a compile-time constant, directly cast it to @dtype if it's
+        # not one already.
+        if not isinstance(x.val, dtype):
+            res = mb.const(val=dtype(x.val), name=node.name)
+        else:
+            res = x
+    else:
+        if len(x.shape) > 0:
+            # TODO: There's no MIL op to extract a value from a symbolic tensor,
+            # so as a workaround we use reduce_max to convert it to a scalar.
+            x = mb.reduce_max(x=x, name=node.name + "_item")
+        res = mb.cast(x=x, dtype=dtype_name, name=node.name)
+    context.add(res, node.name)
+
+
+@register_torch_op(torch_alias=["bool"])
+def _bool(context, node):
+    inputs = _get_inputs(context, node, expected=1)
+
+    x = inputs[0]
+    # TODO: this is a hack and should be replaced once MIL supports cast to
+    # bool.
+    if x.val is not None and not isinstance(x.val, bool):
+        x = mb.const(val=bool(x.val), name=node.name)
+    context.add(x, node.name)
+
+
+@register_torch_op(torch_alias=["int"])
+def _int(context, node):
+    _cast(context, node, int, "int32")
+
+
+@register_torch_op
+def layer_norm(context, node):
+    inputs = _get_inputs(context, node, expected=6)
+    _input = inputs[0]
+    normalized_shape = inputs[1]
+    weight = inputs[2]
+    bias = inputs[3]
+    eps = inputs[4]
+    # cudnn_enable = inputs[5] unused
+    layer_norm = mb.layer_norm(
+        x=_input,
+        axes=normalized_shape,
+        gamma=weight,
+        beta=bias,
+        epsilon=eps,
+        name=node.name,
+    )
+    context.add(layer_norm)
+
+
+@register_torch_op
+def numtotensor(context, node):
+    inputs = _get_inputs(context, node, expected=1)
+    x = inputs[0]
+    if x.shape != ():
+        raise ValueError(
+            "numtotensor expected scalar input, got tensor with shape {}".format(
+                x.shape
+            )
+        )
+    if isinstance(x.sym_val, Symbol):
+        context.add(x, node.name)
+    else:
+        res = mb.const(val=[x.sym_val], name=node.name)
+        context.add(res)
+
+
+def _ifzo_to_ifoz(weights, name):
+    """
+        i, f, z, o -> i, f, o, z
+        where weights_split[0] == i, etc.
+        Used to transform lstm weights from pytorch
+        to CoreML format
+    """
+    split_size = weights.shape[0] // 4
+    weights_split = mb.split(x=weights, split_sizes=_np.array([split_size] * 4), axis=0)
+    weights_concat = mb.concat(
+        values=[weights_split[0], weights_split[1], weights_split[3], weights_split[2]],
+        axis=0,
+    )
+    # make transpose a noOP for 0/1d tensors
+    return mb.transpose(
+        x=weights_concat, perm=([1, 0] if len(weights.shape) > 1 else [0]), name=name
+    )
+
+
+def _pytorch_hidden_to_coreml_milops(x, name):
+    """
+        Used to transform lstm state values (hn, cn)
+        from pytorch to CoreML format.
+    """
+    split_size = x.shape[0] // 2
+    x_split = mb.split(x=x, split_sizes=_np.array([split_size] * 2), axis=0)
+    x_concat = mb.concat(values=[x_split[0], x_split[1]], axis=2,)
+    # (4.) See docstring to @lstm
+    return mb.squeeze(x=x_concat, axes=_np.array([0]), name=name)
+
+
+@register_torch_op
+def lstm(context, node):
+    inputs = _get_inputs(context, node, expected=9)
+
+    _input = inputs[0]
+    h0, c0 = inputs[1]
+    weights = inputs[2]
+    bias = inputs[3].val
+    num_layers = inputs[4].val
+    dropout = inputs[5]
+    bidirectional = inputs[7].val
+    batch_first = inputs[8].val
+
+    if num_layers != 1:
+        raise ValueError(
+            "CoreML does not support stacked LSTM layers (LSTM "
+            "with num_layers > 1). Received {}. Redefine as "
+            " multiple layers if this is the desired "
+            "implementation.".format(num_layers)
+        )
+
+    if batch_first:
+        raise ValueError("CoreML does not support LSTM layer with batch_first==True.")
+
+    expected_num_weights = 2 * num_layers * (int(bidirectional) + 1) * (int(bias) + 1)
+    if len(weights) != expected_num_weights:
+        raise ValueError(
+            "Incorrect weights shape for lstm layer: Expected: {}. Recieved {}".format(
+                expected_num_weights, len(weights)
+            )
+        )
+
+    # NOTE:
+    # Most of this code is to transform the tensors into
+    # a shape acceptable by the CoreML implementation of LSTM.
+    # Since this transforming is complicated and unintuitive we include
+    # a description of what is happening:
+
+    # For weights, biases and per direction, pytorch uses two tensors:
+    # (ii, if, ig, io) stacked on top of eachother for each layer (tensor 1)
+    # and (hi, hf, hg, ho) stacked on top of eachother for each layer (tensor 2)
+    # These weights are used in the calculation of the layers found in the torch.nn documentation:
+    # https://pytorch.org/docs/stable/nn.html
+
+    # The CoreML LSTM op expects two tensors, weight and bias. So
+    # the tensors for weight and bias are seperated from pytorch's @weights list (1.).
+    # For each individual weight and bias tensor, the CoreML LSTM op expects the form
+    # ii, if, io, ig and hi, hf, ho, hg, requiring the ifzo_to_ifoz function (2.).
+    # Each seperate weight and bias tensor is concatinated to
+    # form the two weight and bias tensors. (3.)
+    # In the bidirectional case, the forward and backward weights and biases
+    # are stacked on top of eachother instead of stored as seperate tensors in
+    # the @weights list. (4.)
+
+    # In the initial cell and hidden states, pytorch's tensor's 0th
+    # dimension stores each layer and direction.
+    # However, since CoreML's LSTM allows only one layer, the direction is squeezed out the state
+    # tensor. (4.)
+    # In the bidirectional case, the forward and backward state tensors are stacked on top of eachother.
+    # instead of stored in the layer and direction dimension
+    # using @_pytorch_hidden_to_coreml_milops (5.).
+
+    # For output: The CoreML LSTM op returns the final states with the first dimension: @num_layers
+    # squeezed out. To fit with the rest of the shapes expected down the line in
+    # the TorchIR graph- we unsqueeze that dimension in the final state output. (6.)
+    if bidirectional:
+        if bias:
+            # (1.)
+            biases = weights[2:4] + weights[6:8]
+            weights = weights[0:2] + weights[4:6]
+
+            # (2.)
+            assert len(biases) == 4
+            for index in range(len(biases)):
+                biases[index] = _ifzo_to_ifoz(
+                    biases[index],
+                    name="{}_lstm_bias_reshape_{}".format(node.name, index),
+                )
+
+            # (4.)
+            f_stack = mb.stack(values=biases[0:2], axis=0,)
+            r_stack = mb.stack(values=biases[2:4], axis=0,)
+            # (3.)
+            final_biases = mb.concat(
+                values=(f_stack, r_stack),
+                axis=1,
+                name=node.name + "_lstm_biases_concat",
+            )
+
+        # (4.)
+        forward_concat = mb.concat(
+            values=[weights[0], weights[1]],
+            axis=1,
+            name=node.name + "_lstm_weights_forward_concat",
+        )
+        backward_concat = mb.concat(
+            values=[weights[2], weights[3]],
+            axis=1,
+            name=node.name + "_lstm_weights_backward_concat",
+        )
+        # (2.)
+        forward_transformed = _ifzo_to_ifoz(
+            forward_concat, name=node.name + "_lstm_forward_weights_ifoz_to_ifzo",
+        )
+        backward_transformed = _ifzo_to_ifoz(
+            backward_concat, name=node.name + "_lstm_backward_weights_ifoz_to_ifzo"
+        )
+        # (3.)
+        final_weights = mb.concat(
+            values=[forward_transformed, backward_transformed],
+            axis=1,
+            name=node.name + "_lstm_weights_final_concat",
+        )
+
+        # (5.)
+        h = _pytorch_hidden_to_coreml_milops(h0, name="_lstm_h0_reshaped")
+        c = _pytorch_hidden_to_coreml_milops(c0, name="_lstm_c0_reshaped")
+
+    else:
+        if bias:
+            # (1.)
+            biases = weights[len(weights) // 2 :]
+            weights = weights[: len(weights) // 2]
+            ih_b = biases[0]
+            hh_b = biases[1]
+
+            # (2.)
+            ih_b_transformed = _ifzo_to_ifoz(
+                ih_b, name=node.name + "_lstm_ih_bias_transformed",
+            )
+            hh_b_transformed = _ifzo_to_ifoz(
+                hh_b, name=node.name + "_lstm_hh_bias_transformed",
+            )
+
+            # (3.)
+            final_biases = mb.stack(
+                values=(ih_b_transformed, hh_b_transformed),
+                axis=0,
+                name=node.name + "_lstm_bias_stacked",
+            )
+
+        # (3.)
+        weights_concat = mb.concat(
+            values=weights, axis=1, name=node.name + "_lstm_weights_concat"
+        )
+        # (2.)
+        final_weights = _ifzo_to_ifoz(
+            weights_concat, name=node.name + "_lstm_weights_ifoz_to_ifzo",
+        )
+
+        # (4.)
+        h = mb.squeeze(x=h0, axes=_np.array([0]), name=node.name + "_lstm_h0_squeeze")
+        c = mb.squeeze(x=c0, axes=_np.array([0]), name=node.name + "_lstm_c0_squeeze")
+
+    lstm = mb.lstm(
+        x=_input,
+        initial_h=h,
+        initial_c=c,
+        weight=final_weights,
+        bias=(final_biases if bias else None),
+        direction=("bidirectional" if bidirectional is True else "forward"),
+        output_sequence=True,
+        name=node.name,
+    )
+
+    # (6.)
+    for index, (name, output) in enumerate(zip(node.outputs, lstm)):
+        if index > 0:
+            # Add in @num_layers in first dimension to hn, cn output
+            unsqueeze = mb.expand_dims(x=output, axes=[0], name=name)
+            context.add(unsqueeze)
+        else:
+            context.add(output, name)
+
+
+@register_torch_op
+def upsample_bilinear2d(context, node):
+    inputs = _get_inputs(context, node)
+    _input = inputs[0]
+    output_size = inputs[1]
+    align_corners = bool(inputs[2].val)
+
+    if len(inputs) == 5:
+        # For torch==1.5.0, upsample_bilinear2d has 5 inputs.
+        scales_h = inputs[3]
+        scales_w = inputs[4]
+
+    if output_size is not None:
+        # @output_size will be a list if scales was provided or a
+        # single var if output size was provided
+        if isinstance(output_size, list):
+            output_size = [output_size[0].val, output_size[1].val]
+        if isinstance(output_size, Var):
+            output_size = [output_size.val[0], output_size.val[1]]
+
+        # output size is computed using the formula
+        # floor (scale * input_size) in Core ML (and PyTorch)
+        # Thus, when computing the scales from the output size,
+        # add a small positive constant to the output size,
+        # to make sure that the floor formula results in the correct output
+        # size and not 1 unit smaller, due to float precision issues
+        # e.g. if output size = 34 and input size = 2, then scale will be
+        # 17, which can get represented as 16.9999, resulting in an output size of 33
+        # instead of 34, without this correction.
+        scales_h = (output_size[0] + 1e-4) / float(_input.shape[-2])
+        scales_w = (output_size[1] + 1e-4) / float(_input.shape[-1])
+
+    upsample_bilinear = mb.upsample_bilinear(
+        x=_input,
+        scale_factor_height=scales_h,
+        scale_factor_width=scales_w,
+        align_corners=align_corners,
+        name=node.name,
+    )
+    context.add(upsample_bilinear)
+
+
+@register_torch_op(torch_alias=["listunpack"])
+def tupleunpack(context, node):
+    inputs = _get_inputs(context, node, expected=1)
+    values = inputs[0]
+    # Node input could have been turned into constant array in @tupleconstruct
+    if not isinstance(values, tuple) and not isinstance(values, list):
+        values = values.val
+    if len(values) != len(node.outputs):
+        raise ValueError(
+            "unpack node expected {} outputs, got {}".format(
+                len(node.outputs), len(values)
+            )
+        )
+    assert len(values) == len(node.outputs)
+    # @value is either a numpy primitive or a Var object
+    for value, output in zip(values, node.outputs):
+        if not isinstance(value, Var):
+            value = _construct_constant(value, name=output)
+        assert isinstance(value, Var)
+        context.add(value, output)
+
+
+@register_torch_op
+def loop(context, node):
+    """ In TorchIR, a loop looks like:
+            %y_1, ..., %y_r = prim::Loop(%max_trip_count, %initial_condition, %x_1, ..., %x_r)
+            block0(%i, %a_1, ..., %a_r):
+                %b_1, ..., %b_m = some::node(%a_value_from_outer_block, %a_1)
+                %iter_condition = some::other_node(%a_2)
+                -> (%iter_condition, %b_1, ..., %b_r)
+
+        This translates to pseudo code as:
+            y_1, ..., y_r = x_1, ..., x_r
+            condition = initial_condition
+            i = 0
+            while condition and i < max_trip_count:
+                a_1, ..., a_r = y_1, ..., y_r
+
+                ############################################################
+                # Actual body of the loop
+                b_1, ..., b_m = some::node(a_value_from_outside_of_the_loop, a_1)
+                iter_condition = some::node(a_2)
+                ############################################################
+
+                y_1, ..., y_r = b_1, ..., b_r
+                condition = iter_condition
+                i += 1
+
+        Which further translates to MIL while_loop as:
+            loop_vars = (0, initial_condition, x_1, ..., x_r)
+            _cond = {
+                return (loop_vars[1] and loop_vars[0] < max_trip_count)
+            }
+            _body = {
+                a_1, ..., a_r = loop_vars[2], ..., loop_vars[-1]
+                b_1, ..., b_m = some::node(a_value_from_outside_of_the_loop, a_1)
+                iter_condition = some::node(a_2)
+                return (loop_vars[0] + 1, iter_condition, b_1, ..., b_r)
+            }
+
+        For loops pass True for %initial_condition and %iter_condition
+        While loops set %max_trip_count to INT_MAX and %i is unused
+    """
+    name = node.name
+    # inputs[0]: max iter count
+    # inputs[1]: initial condition
+    # inputs[2]: block input 0
+    # ...
+    # inputs[N+2]: block input N
+    inputs = _get_inputs(context, node)
+    max_iter_count = inputs[0]
+
+    # Magic default signals this is a while-only loop, so no iteration count
+    # is needed.
+    has_iter_count = max_iter_count is not None
+
+    # Create an interation count. This will only be used if this is a for loop.
+    iter_count = mb.const(val=0, name=node.name + "_iter")
+    # @loop_vars is tuple(iter_count, cond, inputs...)
+    loop_vars = tuple([iter_count] + inputs[1:])
+
+    def _loop_cond(*loop_vars):
+        cond = loop_vars[1]
+
+        # Check the iteration count if we're keeping track.
+        if has_iter_count:
+            iter_count = loop_vars[0]
+            iter_cond = mb.less(
+                x=iter_count, y=max_iter_count, name=node.name + "_cond"
+            )
+            return mb.logical_and(x=cond, y=iter_cond)
+        else:
+            return mb.identity(x=cond)
+
+    def _shapes_are_equivalent(shape1, shape2):
+        """ Compares two sets of tensor shapes and returns True if they are
+            equivalent. That is, they are the same rank, and each dimension
+            is the same or symbolic.
+        """
+        if len(shape1) != len(shape2):
+            return False
+
+        # Each dimension must have the same integer length, or else be
+        # symbolic.
+        all_equivalent = [
+            s1 == s2 or (isinstance(s1, Symbol) and isinstance(s2, Symbol))
+            for s1, s2 in zip(shape1, shape2)
+        ]
+        return all_equivalent
+
+    def _loop_body(*loop_vars):
+        block = node.blocks[0]
+        iter_var = loop_vars[0]
+        inputs = (iter_var,) + loop_vars[2:]
+        res = convert_block(context, block, inputs)
+
+        for input_var, output_var in zip(loop_vars[2:], res[1:]):
+            if not _shapes_are_equivalent(input_var.shape, output_var.shape):
+                _logging.warning(
+                    "detected change in shape of loop variable. this could lead to incorrect inference results!"
+                )
+                _logging.warning(
+                    "{}:{} -> {}:{}".format(
+                        input_var.name,
+                        input_var.shape,
+                        output_var.name,
+                        output_var.shape,
+                    )
+                )
+
+        # Update the iteration count if we're keeping track.
+        if has_iter_count:
+            iter_var = mb.add(x=iter_var, y=1, name=iter_var.name + "_inc")
+        else:
+            iter_var = mb.identity(x=iter_var)
+
+        # Must return tuple with same length and types as @loop_vars.
+        return tuple([iter_var,] + res)
+
+    loop = mb.while_loop(
+        _cond=_loop_cond, _body=_loop_body, loop_vars=loop_vars, name=name
+    )
+
+    # Make sure the loop returned the expected number of outputs. Note that the
+    # first two loop outputs are the iteration count and condition.
+    assert len(loop) - 2 == len(node.outputs)
+    for output_name, output_var in zip(node.outputs, loop[2:]):
+        context.add(output_var, torch_name=output_name)
+
+
+@register_torch_op(torch_alias=["if"])
+def _if(context, node):
+    """ In TorchIR, a conditional looks like:
+            %y_1, ..., %y_r = prim::If(%condition)
+            block0():  # TRUE BRANCH, never takes arguments, has to return r outputs
+                %t_1, ..., %t_k = some::node(%a_value_from_outer_block)
+                -> (%t_1, ..., %t_r)
+            block1():  # FALSE BRANCH, never takes arguments, has to return r outputs
+                %f_1, ..., %f_m = some::node(%a_value_from_outer_block)
+                -> (%f_1, ..., %f_r)
+
+        This translates to pseudo code as:
+            if (condition):
+                t_1, ..., t_k = some::node(a_value_from_outer_block)
+                y_1, ..., y_r = t_1, ..., t_r
+            else:
+                f_1, ..., f_m = some::node(a_value_from_outer_block)
+                y_1, ..., y_r = f_1, ..., f_r
+
+        Which further translates to MIL cond as:
+            _true = {
+                t_1, ..., t_k = some::node(a_value_from_outer_block)
+                return (t_1, ..., t_r)
+            }
+            _false = {
+                f_1, ..., f_m = some::node(a_value_from_outer_block)
+                return (f_1, ..., f_m)
+            }
+    """
+    name = node.name
+    # inputs[0]: condition
+    inputs = _get_inputs(context, node, expected=1)
+    condition = inputs[0]
+
+    assert len(node.blocks) == 2
+    true_block = node.blocks[0]
+    false_block = node.blocks[1]
+
+    def _true_path():
+        res = convert_block(context, true_block, [])
+        return tuple(res)
+
+    def _false_path():
+        res = convert_block(context, false_block, [])
+        return tuple(res)
+
+    cond = mb.cond(
+        pred=condition, _true_fn=_true_path, _false_fn=_false_path, name=name
+    )
+    # If the condition only returns one item, wrap it in a tuple.
+    if not isinstance(cond, tuple):
+        cond = (cond,)
+
+    # Make sure the condition returned the expected number of outputs.
+    assert len(cond) == len(node.outputs)
+    for output_name, output_var in zip(node.outputs, cond):
+        context.add(output_var, torch_name=output_name)
+
+
+@register_torch_op
+def select(context, node):
+    inputs = _get_inputs(context, node, expected=3)
+    _input = inputs[0]
+    dim = inputs[1].val
+    index = inputs[2].val
+
+    assert dim.shape == ()
+    assert index.shape == ()
+    assert _input.val is None
+
+    # NOTE:
+    # Each index in @begin_array/@end_array corresponds to a dimension of @_input
+    # Each val of those arrays corresponds to the start/end index to slice in that dimension
+    begin_array = [0] * len(_input.shape)
+    begin_array[dim] = index
+    end_array = [s if isinstance(s, int) else 0 for s in _input.shape]
+    end_mask = [True] * len(_input.shape)
+    if index != -1:
+        end_array[dim] = index + 1
+        end_mask[dim] = False
+
+    slice_by_index = mb.slice_by_index(
+        x=_input,
+        begin=begin_array,
+        end=end_array,
+        end_mask=end_mask,
+        name=node.name + "_slice_by_index",
+    )
+    # Now we squeeze the dimension we have selected from to remove it
+    squeeze = mb.squeeze(
+        x=slice_by_index, axes=_np.array([dim]), name=node.name + "_squeeze"
+    )
+    context.add(squeeze, node.name)
+
+
+@register_torch_op
+def ones(context, node):
+    inputs = _get_inputs(context, node, expected=6)
+    size = inputs[0]
+    # dtype = NUM_TO_TORCH_DTYPE[inputs[1].val] unused
+    # layout = inputs[2] unused
+    # device = inputs[3] unused
+    # requires_grad = inputs[4] unused
+    # out = inputs[5] unused
+    fill = mb.fill(shape=size, value=1.0, name=node.name)
+    context.add(fill)
+
+
+@register_torch_op
+def ones_like(context, node):
+    inputs = _get_inputs(context, node, expected=6)
+    size = mb.shape(x=inputs[0])
+    # dtype = NUM_TO_TORCH_DTYPE[inputs[1].val] unused
+    # layout = inputs[2] unused
+    # device = inputs[3] unused
+    # requires_grad = inputs[4] unused
+    # out = inputs[5] unused
+    fill = mb.fill(shape=size, value=1.0, name=node.name)
+    context.add(fill)
+
+
+def _avg_pool(context, node, inputs):
+    x = inputs[0]
+    kernel_sizes = inputs[1]
+    strides = inputs[2]
+    pad_type = "custom"
+    # Need to explicity state L-R, T-B pad
+    pad = inputs[3]
+    pad = _np.repeat(pad.val, 2)
+    ceil_mode = inputs[4]
+    if ceil_mode.val is True:
+        rank = len(pad) // 2
+        pad = _adjust_pad_for_ceil_mode(
+            x.shape[-rank:], kernel_sizes.val, strides.val, pad
+        )
+    include_pad = inputs[5].val
+
+    pool = mb.avg_pool(
+        x=x,
+        kernel_sizes=kernel_sizes,
+        strides=strides,
+        pad_type=pad_type,
+        pad=pad,
+        name=node.name,
+        exclude_padding_from_average=not include_pad,
+    )
+    context.add(pool)
+
+
+@register_torch_op
+def avg_pool1d(context, node):
+    inputs = _get_inputs(context, node, expected=6)
+    _avg_pool(context, node, inputs)
+
+
+@register_torch_op
+def avg_pool2d(context, node):
+    inputs = _get_inputs(context, node, expected=7)
+    divisor_override = inputs[6]
+    if divisor_override is not None:
+        raise ValueError("divisor_override is not supported for avg_pool2d")
+    _avg_pool(context, node, inputs)
+
+
+@register_torch_op
+def log_softmax(context, node):
+    inputs = _get_inputs(context, node)
+
+    x = inputs[0]
+    axis = inputs[1]
+    out = inputs[2]  # Ignored.
+    assert out is None
+    res = mb.softmax(logit=x, axis=axis, name=node.name + "_softmax")
+    res = mb.log(x=res, name=node.name)
+    context.add(res)
+
+
+@register_torch_op
+def sigmoid(context, node):
+    inputs = _get_inputs(context, node, expected=1)
+
+    res = mb.sigmoid(x=inputs[0], name=node.name)
+    context.add(res)
+
+
+@register_torch_op
+def gelu(context, node):
+    inputs = _get_inputs(context, node, expected=1)
+
+    res = mb.gelu(x=inputs[0], name=node.name)
+    context.add(res)
+
+
+@register_torch_op(torch_alias=["slice"])
+def _slice(context, node):
+    inputs = _get_inputs(context, node, expected=5)
+    x = inputs[0]
+    dim = inputs[1].val
+    start = inputs[2].val if inputs[2].val is not None else 0
+    end = inputs[3].val if inputs[3] is not None else None
+    step = inputs[4].val
+
+    if start == 0 and end is None and step is 1:
+        # Handling x[:], just pass through the tensor.
+        context.add(x, node.name)
+        return
+
+    begin_array = [0] * len(x.shape)
+    begin_array[dim] = start
+    end_array = [s if isinstance(s, int) else 0 for s in x.shape]
+    end_mask = [True] * len(x.shape)
+    if end is not None:
+        end_array[dim] = end
+        end_mask[dim] = False
+
+    kwargs = {
+        "x": x,
+        "begin": begin_array,
+        "end": end_array,
+        "end_mask": end_mask,
+        "name": node.name,
+    }
+
+    if step != 1:
+        stride_array = _np.array([1] * len(x.shape))
+        stride_array[dim] = step
+        kwargs["stride"] = stride_array
+
+    res = mb.slice_by_index(**kwargs)
+    context.add(res)
+
+
+@register_torch_op(torch_alias=["split_with_sizes"])
+def split(context, node):
+    inputs = _get_inputs(context, node, expected=3)
+    x = inputs[0]
+    split_sizes = inputs[1]
+    dim = inputs[2].val
+
+    if not isinstance(split_sizes.val, _np.ndarray):
+        shape = mb.shape(x=x)
+        dim_size = _list_select(shape, dim)
+        # MIL split op needs the size of each split to be given explicitly.
+        num_whole_splits = mb.floor_div(x=dim_size, y=split_sizes)
+        remainder = mb.mod(x=dim_size, y=split_sizes)
+
+        # MIL doesn't have a way of turning a scalar into a tensor (list write
+        # only supports tensors). As a workaround, we create a constant [1]
+        # tensor and multiply it by the scalar value, thus creating a tensor
+        # with the scalar value in it.
+        tmp = mb.const(val=[1])
+        whole_sizes = mb.mul(x=tmp, y=split_sizes)
+        reps = mb.mul(x=tmp, y=num_whole_splits)
+        whole_sizes = mb.tile(x=whole_sizes, reps=reps)
+        if remainder.val == 0:
+            split_sizes = whole_sizes
+        else:
+            partial_size = mb.mul(x=tmp, y=remainder)
+            split_sizes = mb.concat(values=[whole_sizes, partial_size], axis=0)
+    res = mb.split(x=x, split_sizes=split_sizes, axis=dim, name=node.name)
+    context.add(res, torch_name=node.name)
+
+
+@register_torch_op
+def to(context, node):
+    # @non_blocking and @copy are unused
+    inputs = _get_inputs(context, node)
+    if len(inputs) == 5:
+        _input = inputs[0]
+        device = inputs[1]
+        dtype = inputs[2].val
+        # non_blocking = inputs[3]
+        # copy = inputs[4]
+    elif len(inputs) == 4:
+        _input = inputs[0]
+        dtype = inputs[1].val
+        # non_blocking = inputs[2]
+        # copy = inputs[3]
+    elif len(inputs) == 3:
+        # Since @non_blocking and @copy are unused, add back to context
+        _input = inputs[0]
+        # non_blocking = inputs[1]
+        # copy = inputs[2]
+        context.add(_input, torch_name=node.name)
+        return
+    else:
+        raise ValueError(
+            "Received invalid arguments for PyTorch conversion of op {}".format(node)
+        )
+
+    torch_dtype = NUM_TO_TORCH_DTYPE[dtype]
+    if isinstance(_input, Var):
+        _input = _input.val
+
+    # numpy -> torch -> torch cast -> numpy
+    # This path is needed to use the mapping of passed in dtypes to torch dtypes.
+    casted_input = torch.tensor(_input).type(torch_dtype).numpy()
+    const = mb.const(mode="immediate_value", val=casted_input, name=node.name)
+    context.add(const)
+
+
+@register_torch_op
+def floor(context, node):
+    inputs = _get_inputs(context, node, expected=1)
+    _input = inputs[0]
+    floor = mb.floor(x=_input, name=node.name)
+    context.add(floor)
+
+
+@register_torch_op
+def erf(context, node):
+    inputs = _get_inputs(context, node, expected=1)
+    _input = inputs[0]
+    erf = mb.erf(x=_input, name=node.name)
+    context.add(erf)
+
+
+@register_torch_op
+def implicittensortonum(context, node):
+    inputs = _get_inputs(context, node, expected=1)
+    _input = inputs[0]
+    assert _input.shape == (1,)
+    # shape: (1,) -> ()
+    squeeze = mb.squeeze(x=_input, name=node.name)
+    context.add(squeeze)
+
+
+@register_torch_op
+def constantchunk(context, node):
+    inputs = _get_inputs(context, node, expected=1)
+    x = inputs[0]
+    # ConstantChunk gets its parameters as attributes of the node.
+    chunks = node.attr["chunks"]
+    dim = node.attr["dim"]
+
+    total = x.shape[dim]
+    size = int(_math.ceil(float(total) / float(chunks)))
+    split_sizes = [size] * int(_math.floor(total / size))
+    remainder = total - sum(split_sizes)
+    if remainder > 0:
+        split_sizes.append(remainder)
+
+    res = mb.split(x=x, split_sizes=split_sizes, axis=dim, name=node.name)
+    for val, name in zip(res, node.outputs):
+        context.add(val, name)
+
+
+def _expand(context, name, tensor, shape):
+    reps = [ds if ds > 0 and ts == 1 else 1 for ts, ds in zip(tensor.shape, shape)]
+    res = mb.tile(x=tensor, reps=reps, name=name)
+    context.add(res)
+
+
+@register_torch_op
+def expand(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+    x = inputs[0]
+    shape = inputs[1].val
+
+    _expand(context, node.name, x, shape)
+
+
+@register_torch_op
+def expand_as(context, node):
+    inputs = _get_inputs(context, node, expected=2)
+    x = inputs[0]
+    other = inputs[1]
+
+    _expand(context, node.name, x, other.shape)
+
+
+@register_torch_op
+def arange(context, node):
+    inputs = _get_inputs(context, node)
+    # dtype = inputs[-4]
+    # layout = inputs[-3]
+    # device = inputs[-2]
+    # pin_memory = inputs[-1]
+    if len(inputs) == 5:
+        # inputs are [end, dtype, layout, device, pin_memory]
+        start = 0
+        end = inputs[0]
+        step = 1
+    elif len(inputs) == 6:
+        # inputs are [start, end, dtype, layout, device, pin_memory]
+        start = inputs[0]
+        end = inputs[1]
+        step = 1
+    elif len(inputs) == 7:
+        # inputs are [start, end, step, dtype, layout, device, pin_memory]
+        start = inputs[0]
+        end = inputs[1]
+        step = inputs[2]
+    else:
+        raise ValueError(
+            "arange must have exactly 5, 6, or 7 inputs, got {}".format(len(inputs))
+        )
+
+    res = mb.range_1d(start=start, end=end, step=step, name=node.name)
+    context.add(res)
+
+
+@register_torch_op(torch_alias=["masked_fill_"])
+def masked_fill(context, node):
+    inputs = _get_inputs(context, node, expected=3)
+    x = inputs[0]
+    mask = inputs[1]
+    value = inputs[2]
+    # @mb.select does not properly broadcast scalar input, so as a workaround
+    # we create a full sized tensor.
+    # rdar://61463562
+
+    if types.is_int(value.dtype):
+        # @mb.fill cannot handle value with dtype integer
+        # so we cast the value.
+        value = mb.cast(x=value, dtype="fp32")
+    value = mb.fill(shape=x.shape, value=value, name=node.name + "_value")
+    res = mb.select(cond=mask, a=value, b=x, name=node.name)
+    context.add(res)
+
+
+@register_torch_op
+def meshgrid(context, node):
+    """
+    For N input tensors, a meshgrid is constructed by viewing each tensor as an N-dimension tensor
+    with values in the dimension corresponding it its order in the args. (a.)
+    Then, it is expanded along dimensions corresponding to the dimensions of each
+    1d tensor in the order that they were passed in. (b.)
+
+    Each output tensor is put into a tuple that is returned. These tuples form
+    N, N-dimenional grids, where the ith grid is defined as expanding the ith input over
+    dimensions defined by the other inputs.
+    """
+    inputs = _get_inputs(context, node)
+    if len(inputs) < 2:
+        raise ValueError("Requires > 2 tensor inputs.")
+
+    # scalar inputs will be considered 1d tensors
+    tensor_inputs = []
+    for tensor_var in inputs:
+        if not isinstance(tensor_var.val, _np.ndarray):
+            tensor_inputs.append(_np.array(tensor_var.val))
+        else:
+            tensor_inputs.append(_np.array(tensor_var))
+
+    if any([len(tensor_var.shape) > 1 for tensor_var in inputs]):
+        raise ValueError("meshgrid recieved non-1d tensor.")
+
+    dim_tuple = tuple(tensor_var.shape[0] for tensor_var in inputs)
+
+    grids = []
+    size = len(inputs)
+    for i in range(size):
+        view_shape = [1] * size
+        view_shape[i] = -1
+        view_shape = tuple(view_shape)
+        tensor = torch.tensor(inputs[i].val)
+        # (a.) in docstring
+        view = mb.reshape(
+            x=inputs[i], shape=view_shape, name=node.name + "_view_" + str(i)
+        )
+
+        # (b.) in docstring
+        reps = [
+            ds if ds > 0 and ts == 1 else 1 for ts, ds in zip(view.shape, dim_tuple)
+        ]
+        expand = mb.tile(x=view, reps=reps, name=node.name + "_expand_" + str(i))
+        grids.append(expand)
+
+    context.add(tuple(grids), node.name)
+
+
+@register_torch_op
+def tanh(context, node):
+    inputs = _get_inputs(context, node, expected=1)
+    _input = inputs[0]
+    tanh = mb.tanh(x=_input, name=node.name)
+    context.add(tanh)
+
+
+# Defines all the nodes that are noOps
+@register_torch_op(
+    torch_alias=[
+        "dropout",
+        "dropout_",
+        "feature_dropout",
+        "contiguous",
+        "device",
+        "detach",
+        "clone",
+    ]
+)
+def noop(context, node):
+    _logging.info("Setting pytorch op: {} to no-op.".format(node))
+    inputs = _get_inputs(context, node)
+    _input = inputs[0]
+    context.add(_input, torch_name=node.name)
+
+
+@register_torch_op
+def argmax(context, node):
+    inputs = _get_inputs(context, node)
+    x = inputs[0]
+    axis = inputs[1]
+    keep_dims = inputs[2]
+    res = mb.reduce_argmax(x=x, axis=axis, keep_dims=keep_dims, name=node.name)
+    context.add(res)
+
+
+@register_torch_op
+def zeros(context, node):
+    inputs = _get_inputs(context, node, expected=5)
+    size = inputs[0].val
+    dtype = inputs[1].val
+    # layout = inputs[2] unused
+    # device = inputs[3] unused
+    # pin_memory = inputs[4] unused
+
+    torch_dtype = NUM_TO_TORCH_DTYPE[dtype]
+    zeros_array = torch.zeros(tuple(size)).type(torch_dtype).numpy()
+    const = mb.const(mode="immediate_value", val=zeros_array, name=node.name)
+    context.add(const)
+
+
+@register_torch_op
+def exp(context, node):
+    inputs = _get_inputs(context, node, expected=1)
+    exp = mb.exp(x=inputs[0], name=node.name)
+    context.add(exp)
+
+
+@register_torch_op
+def max(context, node):
+    inputs = _get_inputs(context, node, expected=3)
+    _input = inputs[0]
+    dim = inputs[1].val
+    keepdim = inputs[2].val
+
+    values = mb.reduce_max(x=_input, axes=[dim], keep_dims=keepdim)
+    indices = mb.reduce_argmax(x=_input, axis=dim, keep_dims=keepdim)
+    assert len(node.outputs) == 2
+    values_name = node.outputs[0]
+    indices_name = node.outputs[1]
+    context.add(values, torch_name=values_name)
+    context.add(indices, torch_name=indices_name)
+
+
+@register_torch_op
+def sort(context, node):
+    inputs = _get_inputs(context, node)
+    _input = inputs[0]
+    axis = inputs[1].val
+    descending = inputs[2].val
+    # NOTE: This is actually descending
+    # rdar://62901267 (argsort ascending is actually descending)
+    indices = mb.argsort(x=_input, axis=axis, ascending=descending)
+    values = mb.gather_along_axis(x=_input, indices=indices, axis=axis)
+
+    values_name = node.outputs[0]
+    indices_name = node.outputs[1]
+    context.add(values, torch_name=values_name)
+    context.add(indices, torch_name=indices_name)
diff --git a/coremltools/converters/mil/frontend/torch/test/__init__.py b/coremltools/converters/mil/frontend/torch/test/__init__.py
new file mode 100644
index 000000000..61aafff42
--- /dev/null
+++ b/coremltools/converters/mil/frontend/torch/test/__init__.py
@@ -0,0 +1,4 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
diff --git a/coremltools/converters/mil/frontend/torch/test/test_custom_ops.py b/coremltools/converters/mil/frontend/torch/test/test_custom_ops.py
new file mode 100644
index 000000000..9153d243c
--- /dev/null
+++ b/coremltools/converters/mil/frontend/torch/test/test_custom_ops.py
@@ -0,0 +1,149 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import itertools
+
+import numpy as np
+import pytest
+import torch
+import torch.nn as nn
+
+import coremltools
+from .testing_utils import *
+
+# Custom layer imports
+
+from coremltools.converters.mil.mil.ops.defs._op_reqs import *
+from coremltools.converters.mil.frontend.torch.torch_op_registry import (
+    register_torch_op,
+)
+from coremltools.converters.mil.frontend.torch.torch_op_registry import (
+    _TORCH_OPS_REGISTRY as _TORCH_OPS_REG,
+)
+from coremltools.converters.mil.frontend.torch.ops import _get_inputs
+from coremltools.converters.mil.mil import Builder as mb
+
+# Log Converter supported Cosine Similarity conversion function
+default_cosine_similarity = _TORCH_OPS_REG.get("cosine_similarity", None)
+
+
+@register_torch_op(override=True)
+def cosine_similarity(context, node):
+    inputs = _get_inputs(context, node, expected=4)
+    dim = inputs[-2].val
+    eps = inputs[-1].val
+    xy = mb.mul(x=inputs[0], y=inputs[1])
+    sum_xy = mb.reduce_sum(x=xy, axes=[dim])
+
+    xx = mb.mul(x=inputs[0], y=inputs[0])
+    sum_xx = mb.reduce_sum(x=xx, axes=[dim])
+    yy = mb.mul(x=inputs[1], y=inputs[1])
+    sum_yy = mb.reduce_sum(x=yy, axes=[dim])
+
+    mul_sum_xy = mb.mul(x=sum_xx, y=sum_yy)
+    div_12 = mb.maximum(x=mul_sum_xy, y=eps * eps)
+    div_sqrt = mb.sqrt(x=div_12)
+
+    cs = mb.real_div(x=sum_xy, y=div_sqrt, name=node.name)
+    context.add(cs)
+
+
+# Log custom Cosine Similarity conversion function
+custom_cosine_similarity = _TORCH_OPS_REG["cosine_similarity"]
+
+
+def _set_torch_reg_op(op_type, op_func):
+    _TORCH_OPS_REG[op_type] = op_func
+
+
+class TestCompositeOp:
+    @pytest.mark.parametrize("input_shape", [(100, 180), (56, 123)])
+    def test_composite_op(self, input_shape):
+        _set_torch_reg_op("cosine_similarity", custom_cosine_similarity)
+        model = nn.CosineSimilarity(dim=1, eps=1e-6)
+        run_numerical_test([input_shape, input_shape], model)
+        _set_torch_reg_op("cosine_similarity", default_cosine_similarity)
+
+
+class TestCustomOp:
+    # Define SSA Custom Op for Sparse MatMul
+    # This will map to `custom_op` in SSA with binding information
+    # to bind input spec to the custom implementation
+    @register_op(doc_str="Sparse MatMul Layer", is_custom_op=True)
+    class custom_torch_sparse_matmul(Operation):
+        # Defining input spec for current op
+        input_spec = InputSpec(
+            x=TensorInputType(),
+            y=TensorInputType(),
+            transpose_x=BoolInputType(const=True, default=False),
+            transpose_y=BoolInputType(const=True, default=False),
+            x_is_sparse=BoolInputType(const=True, default=False),
+            y_is_sparse=BoolInputType(const=True, default=False),
+        )
+
+        # Specifying binding for custom op for specifying inputs,
+        # parameters required for creating custom op to be synced with Swift API
+        bindings = {
+            "class_name": "SparseMatMul",
+            "input_order": ["x", "y"],
+            "parameters": ["transpose_x", "transpose_y", "x_is_sparse", "y_is_sparse"],
+            "description": "Custom Sparse MatMul Layer",
+        }
+
+        def __init__(self, **kwargs):
+            super(TestCustomOp.custom_torch_sparse_matmul, self).__init__(**kwargs)
+
+        def type_inference(self):
+            x_type = self.x.dtype
+            x_shape = self.x.shape
+            y_shape = self.y.shape
+            # For illustration purpose, assumming getting valid shape
+            # Ideally, should consider transpose_?, ?_is_sparse parameters into consideration
+            # for computing output shape
+            ret_shape = [x_shape[0], y_shape[1]]
+            return types.tensor(x_type, [x_shape[0], y_shape[1]])
+
+    @register_torch_op()
+    def _sparse_mm(context, node):
+        inputs = _get_inputs(context, node, expected=2)
+        x = mb.custom_torch_sparse_matmul(
+            x=inputs[0], y=inputs[1], x_is_sparse=True, y_is_sparse=True, name=node.name
+        )
+        context.add(x)
+
+    def test_custom_sparse_mm_op(self, input_shape=(4, 4)):
+        class TestLayer(nn.Module):
+            def __init__(self):
+                super(TestLayer, self).__init__()
+
+            def forward(self, x, y):
+                x = torch.sparse.mm(x, y)
+                return x
+
+        model = TestLayer()
+        input_data_x = torch.ones(input_shape)
+        input_data_y = torch.ones(input_shape)
+        input_data = [input_data_x, input_data_y]
+        model.eval()
+        torch_model = torch.jit.trace(model, (input_data_x, input_data_y))
+        mlmodel = convert_to_mlmodel(torch_model, input_data)
+
+        layers = mlmodel.get_spec().neuralNetwork.layers
+        assert layers[-1].custom is not None, "Expecting a custom layer"
+        assert (
+            "SparseMatMul" == layers[-1].custom.className
+        ), "Custom Layer class name mis-match"
+        assert (
+            False == layers[-1].custom.parameters["transpose_x"].boolValue
+        ), "Incorrect parameter value k"
+        assert (
+            False == layers[-1].custom.parameters["transpose_y"].boolValue
+        ), "Incorrect parameter value k"
+        assert (
+            True == layers[-1].custom.parameters["x_is_sparse"].boolValue
+        ), "Incorrect parameter value k"
+        assert (
+            True == layers[-1].custom.parameters["y_is_sparse"].boolValue
+        ), "Incorrect parameter value k"
diff --git a/coremltools/converters/mil/frontend/torch/test/test_numerical.py b/coremltools/converters/mil/frontend/torch/test/test_numerical.py
new file mode 100644
index 000000000..c3651328c
--- /dev/null
+++ b/coremltools/converters/mil/frontend/torch/test/test_numerical.py
@@ -0,0 +1,480 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import itertools
+
+import numpy as np
+import pytest
+import torch
+import torch.nn as nn
+import sys
+from .testing_utils import *
+
+
+class ModuleWrapper(nn.Module):
+    def __init__(self, function, kwargs):
+        super(ModuleWrapper, self).__init__()
+        self.function = function
+        self.kwargs = kwargs
+
+    def forward(self, x):
+        return self.function(x, **self.kwargs)
+
+
+@pytest.mark.skipif(sys.version_info >= (3, 8), reason="Segfault with Python 3.8+")
+class TestTorchNumerical:
+    """Class containing numerical correctness tests for TorchIR -> CoreML op
+        conversion.
+    """
+
+    @pytest.fixture
+    def set_random_seeds(self):
+        torch.manual_seed(1)
+        np.random.seed(1)
+
+    @pytest.mark.parametrize(
+        "in_features, out_features", itertools.product([10, 25, 100], [3, 6]),
+    )
+    def test_addmm(self, in_features, out_features):
+        model = nn.Linear(in_features, out_features)
+        run_numerical_test((1, in_features), model)
+
+    @pytest.mark.parametrize(
+        "num_features, eps", itertools.product([5, 2, 1], [0.1, 1e-05]),
+    )
+    def test_batchnorm(self, num_features, eps):
+        model = nn.BatchNorm2d(num_features, eps)
+        run_numerical_test((1, num_features, 5, 5), model)
+
+    @pytest.mark.parametrize(
+        "height, width, in_channels, out_channels, kernel_size, stride, padding, dilation",
+        itertools.product(
+            [5, 6], [5, 7], [1, 3], [1, 3], [1, 3], [1, 3], [1, 3], [1, 3]
+        ),
+    )
+    def test_convolution2d(
+        self,
+        height,
+        width,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride,
+        padding,
+        dilation,
+        groups=1,
+    ):
+        model = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+        )
+        run_numerical_test((1, in_channels, height, width), model)
+
+    @pytest.mark.parametrize(
+        "height, width, in_channels, out_channels, kernel_size, stride, padding, dilation",
+        itertools.product(
+            [5, 6], [5, 7], [1, 3], [1, 3], [1, 3], [2, 3], [0, 1], [1, 3]
+        ),
+    )
+    def test_convolution_transpose2d(
+        self,
+        height,
+        width,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride,
+        padding,
+        dilation,
+        groups=1,
+    ):
+        model = nn.ConvTranspose2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+        )
+        run_numerical_test((1, in_channels, height, width), model)
+
+    def test_for_loop(self):
+        class TestLayer(nn.Module):
+            def __init__(self):
+                super(TestLayer, self).__init__()
+
+            def forward(self, x):
+                x = 2.0 * x
+                return x
+
+        class TestNet(nn.Module):
+            input_size = (64,)
+
+            def __init__(self):
+                super(TestNet, self).__init__()
+                layer = TestLayer()
+                self.layer = torch.jit.trace(layer, torch.rand(self.input_size))
+
+            def forward(self, x):
+                for _ in range(7):
+                    x = self.layer(x)
+                return x
+
+        model = TestNet().eval()
+        torch_model = torch.jit.script(model)
+
+        run_numerical_test(model.input_size, torch_model)
+
+    def test_while_loop(self):
+        class TestLayer(nn.Module):
+            def __init__(self):
+                super(TestLayer, self).__init__()
+
+            def forward(self, x):
+                x = 0.5 * x
+                return x
+
+        class TestNet(nn.Module):
+            input_size = (1,)
+
+            def __init__(self):
+                super(TestNet, self).__init__()
+                layer = TestLayer()
+                self.layer = torch.jit.trace(layer, torch.rand(self.input_size))
+
+            def forward(self, x):
+                while x > 0.01:
+                    x = self.layer(x)
+                return x
+
+        model = TestNet().eval()
+        torch_model = torch.jit.script(model)
+
+        run_numerical_test(model.input_size, torch_model)
+
+    def test_if(self):
+        class TestLayer(nn.Module):
+            def __init__(self):
+                super(TestLayer, self).__init__()
+
+            def forward(self, x):
+                x = torch.mean(x)
+                return x
+
+        class TestNet(nn.Module):
+            input_size = (64,)
+
+            def __init__(self):
+                super(TestNet, self).__init__()
+                layer = TestLayer()
+                self.layer = torch.jit.trace(layer, torch.rand(self.input_size))
+
+            def forward(self, x):
+                m = self.layer(x)
+                if m < 0:
+                    scale = -2.0
+                else:
+                    scale = 2.0
+                x = scale * x
+                return x
+
+        model = TestNet().eval()
+        torch_model = torch.jit.script(model)
+
+        run_numerical_test(model.input_size, torch_model)
+
+    @pytest.mark.parametrize(
+        "output_size, align_corners",
+        [
+            x
+            for x in itertools.product(
+                [(10, 10), (1, 1), (20, 20), (2, 3), (190, 170)], [True, False]
+            )
+        ],
+    )
+    def test_upsample_bilinear2d_with_output_size(self, output_size, align_corners):
+        input_shape = (1, 3, 10, 10)
+        model = ModuleWrapper(
+            nn.functional.interpolate,
+            {"size": output_size, "mode": "bilinear", "align_corners": align_corners,},
+        )
+        run_numerical_test(input_shape, model)
+
+    @pytest.mark.parametrize(
+        "scales_h, scales_w, align_corners",
+        [x for x in itertools.product([2, 3, 4.5], [4, 5, 5.5], [True, False])],
+    )
+    def test_upsample_bilinear2d_with_scales(self, scales_h, scales_w, align_corners):
+        input_shape = (1, 3, 10, 10)
+        model = ModuleWrapper(
+            nn.functional.interpolate,
+            {
+                "scale_factor": (scales_h, scales_w),
+                "mode": "bilinear",
+                "align_corners": align_corners,
+            },
+        )
+        run_numerical_test(input_shape, model)
+
+    @pytest.mark.parametrize(
+        "input_shape, eps",
+        itertools.product([(1, 3, 15, 15), (1, 1, 1, 1)], [1e-5, 1e-9]),
+    )
+    def test_layer_norm(self, input_shape, eps):
+        model = nn.LayerNorm(input_shape, eps=eps)
+        run_numerical_test(input_shape, model)
+
+    @pytest.mark.parametrize(
+        "input_shape, eps",
+        itertools.product([(1, 3, 15, 15), (1, 1, 1, 1)], [1e-5, 1e-9]),
+    )
+    def test_batch_norm(self, input_shape, eps):
+        model = nn.BatchNorm2d(input_shape[-3], eps=eps)
+        run_numerical_test(input_shape, model)
+
+    @pytest.mark.xfail(reason="rdar://problem/61064173")
+    @pytest.mark.parametrize(
+        "input_shape, kernel_size, stride, pad, include_pad",
+        itertools.product(
+            [(1, 3, 15), (1, 1, 7), (1, 3, 10)],
+            [1, 2, 3],
+            [1, 2],
+            [0, 1],
+            [True, False],
+        ),
+    )
+    def test_avg_pool1d(self, input_shape, kernel_size, stride, pad, include_pad):
+        if pad > kernel_size / 2:
+            # Because this test is xfail, we have to fail rather than
+            # just return here, otherwise these test cases unexpectedly pass.
+            # This can be changed to `return` once the above radar
+            # is fixed and the test is no longer xfail.
+            raise ValueError("pad must be less than half the kernel size")
+        model = nn.AvgPool1d(kernel_size, stride, pad, False, include_pad)
+        run_numerical_test(input_shape, model)
+
+    @pytest.mark.parametrize(
+        "input_shape, kernel_size, stride, pad, include_pad",
+        itertools.product(
+            [(1, 3, 15, 15), (1, 1, 7, 7), (1, 3, 10, 10)],
+            [1, 2, 3],
+            [1, 2],
+            [0, 1],
+            [True, False],
+        ),
+    )
+    def test_avg_pool2d(self, input_shape, kernel_size, stride, pad, include_pad):
+        if pad > kernel_size / 2:
+            return
+        model = nn.AvgPool2d(kernel_size, stride, pad, False, include_pad)
+        run_numerical_test(input_shape, model)
+
+    @pytest.mark.parametrize(
+        "input_shape, kernel_size, stride, pad, include_pad",
+        itertools.product(
+            [(1, 3, 15, 15), (1, 1, 7, 7), (1, 3, 10, 10)],
+            [3],
+            [1, 2],
+            [0, 1],
+            [True, False],
+        ),
+    )
+    def test_avg_pool2d_ceil_mode(
+        self, input_shape, kernel_size, stride, pad, include_pad
+    ):
+        if pad > kernel_size / 2:
+            return
+        model = nn.AvgPool2d(kernel_size, stride, pad, True, include_pad)
+        run_numerical_test(input_shape, model)
+
+    @pytest.mark.xfail(
+        reason="PyTorch convert function for op max_pool1d not implemented, "
+        "we will also likely run into rdar://problem/61064173"
+    )
+    @pytest.mark.parametrize(
+        "input_shape, kernel_size, stride, pad",
+        itertools.product(
+            [(1, 3, 15), (1, 1, 7), (1, 3, 10)], [1, 2, 3], [1, 2], [0, 1],
+        ),
+    )
+    def test_max_pool1d(self, input_shape, kernel_size, stride, pad):
+        if pad > kernel_size / 2:
+            # Because this test is xfail, we have to fail rather than
+            # just return here, otherwise these test cases unexpectedly pass.
+            # This can be changed to `return` once the above radar
+            # is fixed and the test is no longer xfail.
+            raise ValueError("pad must be less than half the kernel size")
+        model = nn.MaxPool1d(kernel_size, stride, pad, ceil_mode=False)
+        run_numerical_test(input_shape, model)
+
+    @pytest.mark.parametrize(
+        "input_shape, kernel_size, stride, pad",
+        itertools.product(
+            [(1, 3, 15, 15), (1, 1, 7, 7), (1, 3, 10, 10)], [1, 2, 3], [1, 2], [0, 1],
+        ),
+    )
+    def test_max_pool2d(self, input_shape, kernel_size, stride, pad):
+        if pad > kernel_size / 2:
+            return
+        model = nn.MaxPool2d(kernel_size, stride, pad, ceil_mode=False)
+        run_numerical_test(input_shape, model)
+
+    @pytest.mark.parametrize(
+        "input_shape, kernel_size, stride, pad",
+        itertools.product(
+            [(1, 3, 15, 15), (1, 1, 7, 7), (1, 3, 10, 10)], [3], [1, 2], [0, 1],
+        ),
+    )
+    def test_max_pool2d_ceil_mode(self, input_shape, kernel_size, stride, pad):
+        if pad > kernel_size / 2:
+            return
+        model = nn.MaxPool2d(kernel_size, stride, pad, ceil_mode=True)
+        run_numerical_test(input_shape, model)
+
+    # This tests an edge case where the list of tensors to concatenate only
+    # has one item. NN throws an error for this case, hence why we have to
+    # run through the full conversion process to test it.
+    def test_cat(self):
+        class TestNet(nn.Module):
+            def __init__(self):
+                super(TestNet, self).__init__()
+
+            def forward(self, x):
+                x = torch.cat((x,), axis=1)
+                return x
+
+        model = TestNet()
+        run_numerical_test((1, 3, 16, 16), model)
+
+    def _pytorch_hidden_to_coreml(self, x):
+        # Split of Direction axis
+        f, b = torch.split(x, [1] * x.shape[0], dim=0)
+        # Concat on Hidden Size axis
+        x = torch.cat((f, b), dim=2)
+        # NOTE:
+        # We are ommiting a squeeze because the conversion
+        # function for the mil op lstm unsqueezes the num_layers
+        # dimension
+        return x
+
+    def _test_lstm(
+        self,
+        input_size,
+        hidden_size,
+        num_layers,
+        bias,
+        batch_first,
+        dropout,
+        bidirectional,
+    ):
+        model = nn.LSTM(
+            input_size=input_size,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            bias=bias,
+            batch_first=batch_first,
+            dropout=dropout,
+            bidirectional=bidirectional,
+        )
+        SEQUENCE_LENGTH = 3
+        BATCH_SIZE = 2
+
+        num_directions = int(bidirectional) + 1
+
+        # (seq_len, batch, input_size)
+        if batch_first:
+            _input = torch.rand(BATCH_SIZE, SEQUENCE_LENGTH, input_size)
+        else:
+            _input = torch.randn(SEQUENCE_LENGTH, BATCH_SIZE, input_size)
+
+        h0 = torch.randn(num_layers * num_directions, BATCH_SIZE, hidden_size)
+        c0 = torch.randn(num_layers * num_directions, BATCH_SIZE, hidden_size)
+
+        inputs = (_input, (h0, c0))
+        expected_results = model(*inputs)
+        # Need to do some output reshaping if bidirectional
+        if bidirectional:
+            ex_hn = self._pytorch_hidden_to_coreml(expected_results[1][0])
+            ex_cn = self._pytorch_hidden_to_coreml(expected_results[1][1])
+            expected_results = (expected_results[0], (ex_hn, ex_cn))
+        run_numerical_test(inputs, model, expected_results, input_as_shape=False)
+
+    @pytest.mark.parametrize(
+        "input_size, hidden_size, num_layers, bias, batch_first, dropout, bidirectional",
+        itertools.product([7], [5], [1], [True, False], [False], [0.3], [True, False]),
+    )
+    def test_lstm(
+        self,
+        input_size,
+        hidden_size,
+        num_layers,
+        bias,
+        batch_first,
+        dropout,
+        bidirectional,
+    ):
+        self._test_lstm(
+            input_size,
+            hidden_size,
+            num_layers,
+            bias,
+            batch_first,
+            dropout,
+            bidirectional,
+        )
+
+    @pytest.mark.parametrize(
+        "input_size, hidden_size, num_layers, bias, batch_first, dropout, bidirectional",
+        [
+            (7, 3, 1, True, True, 0.3, True),
+            (7, 3, 2, True, True, 0.3, True),
+            (7, 3, 2, False, False, 0.3, False),
+        ],
+    )
+    def test_lstm_xexception(
+        self,
+        input_size,
+        hidden_size,
+        num_layers,
+        bias,
+        batch_first,
+        dropout,
+        bidirectional,
+    ):
+        with pytest.raises(ValueError):
+            self._test_lstm(
+                input_size,
+                hidden_size,
+                num_layers,
+                bias,
+                batch_first,
+                dropout,
+                bidirectional,
+            )
+
+    @pytest.mark.parametrize(
+        "input_shape, dim, keepdim",
+        itertools.product([(2, 2), (1, 1)], [0, 1], [True, False]),
+    )
+    def test_max(self, input_shape, dim, keepdim):
+        class TestMax(nn.Module):
+            def __init__(self):
+                super(TestMax, self).__init__()
+
+            def forward(self, x):
+                return torch.max(x, dim=dim, keepdim=keepdim)
+
+        input_data = torch.rand(input_shape)
+        model = TestMax()
+        # TODO: Expected results are flipped due to naming issue:
+        # rdar://62681982 (Determine the output names of MLModels)
+        expected_results = model(input_data)[::-1]
+        run_numerical_test(
+            input_data, model, expected_results=expected_results, input_as_shape=False
+        )
diff --git a/coremltools/converters/mil/frontend/torch/test/test_ops.py b/coremltools/converters/mil/frontend/torch/test/test_ops.py
new file mode 100644
index 000000000..ed86f7757
--- /dev/null
+++ b/coremltools/converters/mil/frontend/torch/test/test_ops.py
@@ -0,0 +1,1827 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import itertools
+
+import numpy as np
+import pytest
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from coremltools.converters.mil.mil import types
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil import Function, get_new_symbol
+from coremltools.converters.mil.mil.var import Var
+
+from .. import ops
+from ..converter import TorchConverter, TranscriptionContext
+from ..internal_graph import InternalTorchIRNode
+
+
+class TestTorchOps:
+    """Class containing tests for converting TorchIR -> CoreML ops.
+
+    These tests interface with only the InternalTorchIRGraph and do not
+    build a torch module. Thus, they are much faster then the numerical tests.
+    However, for some ops it is necessary to use the torch module to verify
+    numerical output so they are placed the numerical tests.
+
+    NOTE: Confused where @context is coming from? Its from the pytest fixture defined below.
+    """
+
+    @pytest.fixture
+    def context(self):
+        return TranscriptionContext()
+
+    @pytest.fixture
+    def set_random_seeds(self):
+        torch.manual_seed(1)
+        np.random.seed(1)
+
+    @pytest.mark.parametrize("dtype", [torch.bool, torch.float, torch.int])
+    def test_constant(self, context, dtype):
+        test_data = torch.ones(1, dtype=dtype)
+        node = InternalTorchIRNode(
+            attr={"value": test_data}, kind="constant", inputs=[], outputs=["1"]
+        )
+        ssa = self._construct_test_graph(context, ops.constant, node, "1")
+        assert np.allclose(test_data, ssa.val)
+        assert test_data.shape == ssa.shape
+
+    def test_constant_magic(self, context):
+        test_val = ops.PYTORCH_MAGIC_DEFAULT
+        node = InternalTorchIRNode(
+            attr={"value": test_val}, kind="constant", inputs=[], outputs=["1"]
+        )
+        ssa = self._construct_test_graph(context, ops.constant, node, "1")
+        # We expect the magic default to get converted to None
+        assert ssa is None
+
+    @staticmethod
+    def _gen_constants(size, vals):
+        """Helper function. Generates a list of internal constant nodes.
+
+        Arguments:
+            size: number of constants to generate
+            vals: Either a list of values for each constant or one value used for all constants."""
+        is_list = isinstance(vals, list)
+        if is_list:
+            if len(vals) != size:
+                raise ValueError("len(@vals): {} != size: {}".format(len(vals), size))
+        constants = []
+        for index in range(size):
+            if is_list:
+                val = vals[index]
+            else:
+                val = vals
+            constants.append(
+                InternalTorchIRNode(
+                    attr={"value": val},
+                    kind="constant",
+                    inputs=[],
+                    outputs=[str(index)],
+                )
+            )
+        input_list = [str(i) for i in range(size)]
+        output_name = str(len(input_list))
+        return constants, input_list, output_name
+
+    @staticmethod
+    def _construct_test_graph(
+        context, test_op, test_node, output_name=None, graph_inputs=None, constants=None
+    ):
+        """ Construct an Function for the given @graph_inputs, @constants,
+            and @test_node. Returns the output of the graph, which is the ssa
+            Var of the given @output_name.
+        """
+        if graph_inputs is None:
+            graph_inputs = {}
+        if constants is None:
+            constants = []
+
+        with Function(inputs=graph_inputs) as ssa_func:
+            for name in ssa_func.inputs.keys():
+                context.add(ssa_func.inputs[name])
+            for node in constants:
+                ops.constant(context, node)
+            test_op(context, test_node)
+
+        ssa = None
+        if output_name:
+            ssa = context[output_name]
+        return ssa
+
+    def _test_elementwise_binary(
+        self, context, op_name, op, test_input, num_constants, expected_result
+    ):
+        """Helper function, runs op on test input and compares against expected result"""
+        constants, input_list, output_name = self._gen_constants(
+            num_constants, test_input
+        )
+        eb_node = InternalTorchIRNode(
+            kind=op_name, inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, op, eb_node, output_name, constants=constants
+        )
+        np.testing.assert_allclose(expected_result, ssa.val, atol=1e-7)
+
+    def _test_cast(self, context, test_val, op_kind, op_func, python_type):
+        constants, input_list, output_name = self._gen_constants(1, [test_val])
+        node = InternalTorchIRNode(
+            kind=op_kind, inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, op_func, node, output_name, constants=constants
+        )
+        assert ssa.val == python_type(test_val)
+
+    def _test_activation(
+        self, context, input_shape, constants_list, op_kind, op_func, torch_func, atol
+    ):
+        test_input = torch.rand(input_shape)
+        constants, input_list, output_name = self._gen_constants(
+            len(constants_list) + 1, [test_input] + constants_list
+        )
+        node = InternalTorchIRNode(
+            kind=op_kind, inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, op_func, node, output_name, constants=constants
+        )
+        expected_result = torch_func(test_input).numpy()
+        np.testing.assert_allclose(expected_result, ssa.val, atol=atol)
+
+    def test_add(self, context):
+        test_input_1 = np.random.rand(2, 3)
+        test_input_2 = np.random.rand(2, 3)
+        scale_factor = 1
+        self._test_elementwise_binary(
+            context,
+            "Add",
+            ops.add,
+            [test_input_1, test_input_2, scale_factor],
+            3,
+            test_input_1 + test_input_2,
+        )
+
+    def test_add_no_scale_factor(self, context):
+        test_input_1 = np.random.rand(2, 3)
+        test_input_2 = np.random.rand(2, 3)
+        self._test_elementwise_binary(
+            context,
+            "Add",
+            ops.add,
+            [test_input_1, test_input_2],
+            2,
+            test_input_1 + test_input_2,
+        )
+
+    @pytest.mark.parametrize(
+        "test_input_1, test_input_2",
+        [(np.random.rand(3, 2), np.random.rand(3, 2)), (np.random.rand(3, 2), 5),],
+    )
+    def test_sub(self, context, test_input_1, test_input_2):
+        scale_factor = 1
+        self._test_elementwise_binary(
+            context,
+            "Sub",
+            ops.sub,
+            [test_input_1, test_input_2, scale_factor],
+            3,
+            test_input_1 - test_input_2,
+        )
+
+    @pytest.mark.parametrize(
+        "test_input_1, test_input_2",
+        [(np.random.rand(3, 2), np.random.rand(3, 2)), (np.random.rand(3, 2), 5),],
+    )
+    def test_rsub(self, context, test_input_1, test_input_2):
+        scale_factor = 1
+        self._test_elementwise_binary(
+            context,
+            "rsub",
+            ops.sub,
+            [test_input_1, test_input_2, scale_factor],
+            3,
+            # Note the reversal of arg ordering relative to 'sub'
+            test_input_2 - test_input_1,
+        )
+
+    def test_mul(self, context):
+        test_input_1 = np.random.rand(3, 2)
+        test_input_2 = np.random.rand(3, 2)
+        self._test_elementwise_binary(
+            context,
+            "Mul",
+            ops.mul,
+            [test_input_1, test_input_2],
+            2,
+            test_input_1 * test_input_2,
+        )
+
+    def test_div(self, context):
+        test_input_1 = np.random.rand(3, 2)
+        test_input_2 = np.random.rand(3, 2)
+        self._test_elementwise_binary(
+            context,
+            "Div",
+            ops.div,
+            [test_input_1, test_input_2],
+            2,
+            np.divide(test_input_1, test_input_2),
+        )
+
+    def test_floor_divide(self, context):
+        test_input_1 = np.random.randint(low=1, high=100, size=(3, 2))
+        test_input_2 = np.random.randint(low=1, high=100, size=(3, 2))
+        self._test_elementwise_binary(
+            context,
+            "floor_divide",
+            ops.floor_divide,
+            [test_input_1, test_input_2],
+            2,
+            np.floor_divide(test_input_1, test_input_2),
+        )
+
+    def test_pow(self, context):
+        test_input_1 = np.random.rand(3, 2)
+        test_input_2 = np.random.rand(3, 2)
+        self._test_elementwise_binary(
+            context,
+            "Pow",
+            ops.pow_,
+            [test_input_1, test_input_2],
+            2,
+            np.power(test_input_1, test_input_2),
+        )
+
+    def test_eq(self, context):
+        test_input_1 = torch.zeros([2, 3, 4, 5, 6]).float()
+        test_input_2 = torch.ones([2, 3, 4, 5, 6]).float()
+        test_input_2[0][0][0][0][0] = 0
+        expected_output = (test_input_1 == test_input_2).float()
+
+        self._test_elementwise_binary(
+            context, "Eq", ops.eq, [test_input_1, test_input_2], 2, expected_output
+        )
+
+    def test_ne(self, context):
+        test_input_1 = torch.zeros([2, 3, 4, 5, 6]).float()
+        test_input_2 = torch.ones([2, 3, 4, 5, 6]).float()
+        test_input_2[0][0][0][0][0] = 0
+        expected_output = (test_input_1 != test_input_2).float()
+
+        self._test_elementwise_binary(
+            context, "ne", ops.ne, [test_input_1, test_input_2], 2, expected_output
+        )
+
+    def test_le(self, context):
+        test_input_1 = torch.zeros([2, 3, 4, 5, 6]).float()
+        test_input_2 = torch.ones([2, 3, 4, 5, 6]).float()
+        test_input_2[0][0][0][0][0] = 0
+        expected_output = (test_input_1 <= test_input_2).float()
+
+        self._test_elementwise_binary(
+            context, "Le", ops.le, [test_input_1, test_input_2], 2, expected_output
+        )
+
+    def test_lt(self, context):
+        test_input_1 = torch.zeros([2, 3, 4, 5, 6]).float()
+        test_input_2 = torch.ones([2, 3, 4, 5, 6]).float()
+        test_input_2[0][0][0][0][0] = 0
+        expected_output = (test_input_1 < test_input_2).float()
+
+        self._test_elementwise_binary(
+            context, "Lt", ops.lt, [test_input_1, test_input_2], 2, expected_output
+        )
+
+    def test_ge(self, context):
+        test_input_1 = torch.zeros([2, 3, 4, 5, 6]).float()
+        test_input_2 = torch.ones([2, 3, 4, 5, 6]).float()
+        test_input_2[0][0][0][0][0] = 0
+        expected_output = (test_input_1 >= test_input_2).float()
+
+        self._test_elementwise_binary(
+            context, "Ge", ops.ge, [test_input_1, test_input_2], 2, expected_output
+        )
+
+    def test_gt(self, context):
+        test_input_1 = torch.zeros([2, 3, 4, 5, 6]).float()
+        test_input_2 = torch.ones([2, 3, 4, 5, 6]).float()
+        test_input_2[0][0][0][0][0] = 0
+        expected_output = (test_input_1 > test_input_2).float()
+
+        self._test_elementwise_binary(
+            context, "Gt", ops.gt, [test_input_1, test_input_2], 2, expected_output
+        )
+
+    @pytest.mark.parametrize(
+        "size, array_type",
+        itertools.product(
+            [1, 5, 7],
+            [
+                ("ListConstruct", ops.listconstruct),
+                ("TupleConstruct", ops.tupleconstruct),
+            ],
+        ),
+    )
+    def test_arrayconstruct_scalars(self, context, size, array_type):
+        constant_vals = list(range(size))
+        array_kind = array_type[0]
+        array_op = array_type[1]
+        constants, input_list, output_name = self._gen_constants(size, constant_vals)
+        ac_node = InternalTorchIRNode(
+            kind=array_kind, inputs=input_list, outputs=[output_name],
+        )
+        ssa = self._construct_test_graph(
+            context, array_op, ac_node, output_name, constants=constants
+        )
+        expected_val = np.arange(size)
+        np.testing.assert_equal(ssa.shape, (size,))
+        np.testing.assert_array_equal(ssa.val, expected_val)
+
+    @pytest.mark.parametrize(
+        "shape1, shape2, array_type",
+        itertools.product(
+            [(1, 2), (3, 4, 5), (2,)],
+            [(2, 1), (1, 4, 5), (3,)],
+            [
+                ("ListConstruct", ops.listconstruct),
+                ("TupleConstruct", ops.tupleconstruct),
+            ],
+        ),
+    )
+    def test_arrayconstruct_nonscalar(self, context, shape1, shape2, array_type):
+        tensor1 = torch.rand(shape1)
+        tensor2 = torch.rand(shape2)
+        array_kind = array_type[0]
+        array_op = array_type[1]
+        constants, input_list, output_name = self._gen_constants(2, [tensor1, tensor2])
+        ac_node = InternalTorchIRNode(
+            kind=array_kind, inputs=input_list, outputs=[output_name],
+        )
+        ssa = self._construct_test_graph(
+            context, array_op, ac_node, output_name, constants=constants
+        )
+        expected_val = (tensor1.numpy(), tensor2.numpy())
+        np.testing.assert_equal(len(ssa), 2)
+        for x, y in zip(ssa, expected_val):
+            np.testing.assert_allclose(x.val, y)
+
+    @pytest.mark.parametrize(
+        "input_shape, dim0, dim1",
+        [
+            x
+            for x in itertools.product(
+                [(1, 2, 3), (1, 2, 3, 4), (1, 2, 3, 4, 5)], [0, 1, -1], [0, 2, -2],
+            )
+        ]
+        + [((1, 2), None, None)],
+    )
+    def test_transpose(self, context, input_shape, dim0, dim1):
+        test_input = torch.rand(input_shape)
+
+        constant_list = [test_input]
+        if len(input_shape) > 2:
+            constant_list += [dim0, dim1]
+            kind = "transpose"
+            expected_result = torch.transpose(test_input, dim0, dim1)
+        else:
+            kind = "t"
+            expected_result = test_input.t()
+
+        constants, input_list, output_name = self._gen_constants(
+            len(constant_list), constant_list
+        )
+        transpose_node = InternalTorchIRNode(
+            kind=kind, inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.transpose, transpose_node, output_name, constants=constants,
+        )
+        np.testing.assert_array_equal(expected_result.shape, ssa.shape)
+        np.testing.assert_allclose(expected_result, ssa.val)
+
+    @pytest.mark.parametrize(
+        "dim1, dim2, dim3", itertools.product([1, 2, 5], [2, 5, 10], [1, 2, 5]),
+    )
+    def test_matmul(self, context, dim1, dim2, dim3):
+        mat1 = torch.rand((dim1, dim2))
+        mat2 = torch.rand((dim2, dim3))
+        constant_vals = [
+            mat1,
+            mat2,
+        ]
+        constants, input_list, output_name = self._gen_constants(2, constant_vals)
+
+        matmul_node = InternalTorchIRNode(
+            kind="matmul", inputs=input_list, outputs=[output_name],
+        )
+
+        ssa = self._construct_test_graph(
+            context, ops.matmul, matmul_node, output_name, constants=constants
+        )
+        expected_result = torch.matmul(mat1, mat2).detach().numpy()
+        assert np.allclose(expected_result, ssa.val)
+
+    @pytest.mark.parametrize(
+        "input_shape, axis, expected_shape",
+        [
+            ((1, 2), None, (2,)),
+            ((1, 2), 0, (2,)),
+            ((1, 2, 1), None, (2,)),
+            ((1, 2, 1, 1), None, (2,)),
+            ((1, 2, 1, 1), 2, (1, 2, 1)),
+            ((1, 2, 1, 1, 1), None, (2,)),
+        ],
+    )
+    def test_squeeze(self, context, input_shape, axis, expected_shape):
+        test_data = torch.rand(input_shape)
+        if axis is None:
+            constants, input_list, output_name = self._gen_constants(1, test_data)
+        else:
+            constants, input_list, output_name = self._gen_constants(
+                2, [test_data, axis]
+            )
+        squeeze_node = InternalTorchIRNode(
+            kind="Squeeze", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.squeeze, squeeze_node, output_name, constants=constants
+        )
+        if axis is None:
+            expected_result = torch.squeeze(test_data)
+        else:
+            expected_result = torch.squeeze(test_data, axis)
+        assert np.allclose(expected_result, ssa.val)
+        assert expected_result.size() == torch.Size(expected_shape)
+
+    @pytest.mark.parametrize(
+        "input_shape, axis, expected_shape",
+        [
+            ((2,), 0, (1, 2)),
+            ((2,), 1, (2, 1)),
+            ((2,), -1, (2, 1)),
+            ((2, 3), 1, (2, 1, 3)),
+        ],
+    )
+    def test_unsqueeze(self, context, input_shape, axis, expected_shape):
+        test_data = torch.rand(input_shape)
+        constants, input_list, output_name = self._gen_constants(2, [test_data, axis])
+        unsqueeze_node = InternalTorchIRNode(
+            kind="Unsqueeze", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.unsqueeze, unsqueeze_node, output_name, constants=constants
+        )
+        expected_result = torch.unsqueeze(test_data, axis)
+        assert np.allclose(expected_result, ssa.val)
+        assert expected_result.size() == torch.Size(expected_shape)
+
+    @pytest.mark.parametrize(
+        "input_shape, start, end",
+        [
+            ((2, 1, 1, 2), 1, 3),
+            ((2, 2, 1, 1), 1, -2),
+            ((1, 1, 1), 0, 2),
+            ((1, 2), 0, 1),
+            ((1, 2), 1, 1),
+            ((1, 1), 1, -1),
+            ((1,), 0, 0),
+        ],
+    )
+    def test_flatten(self, context, input_shape, start, end):
+        test_data = torch.rand(input_shape)
+        constants, input_list, output_name = self._gen_constants(
+            3, [test_data, start, end]
+        )
+        flatten_node = InternalTorchIRNode(
+            kind="Flatten", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.flatten, flatten_node, output_name, constants=constants
+        )
+        expected_result = torch.flatten(test_data, start, end)
+        assert np.allclose(expected_result, ssa.val)
+
+    @pytest.mark.parametrize(
+        "start, end", [(0, -5), (100, 2), (2, 100), (-3, -4),],
+    )
+    def test_flatten_exception(self, context, start, end):
+        test_data = torch.rand(1, 1, 1, 1)
+        constants, input_list, output_name = self._gen_constants(
+            3, [test_data, start, end]
+        )
+        flatten_node = InternalTorchIRNode(
+            kind="Flatten", inputs=input_list, outputs=[output_name]
+        )
+        with pytest.raises(ValueError):
+            self._construct_test_graph(
+                context, ops.flatten, flatten_node, output_name, constants=constants,
+            )
+
+    @pytest.mark.parametrize(
+        "input_shape", [(2, 3), (2, 3, 4), (2, 3, 4, 5), (2, 3, 4, 5, 6),],
+    )
+    def test_permute(self, context, input_shape):
+        test_data = torch.rand(*input_shape)
+        permutation = list(range(len(input_shape)))
+        np.random.shuffle(permutation)
+        constants, input_list, output_name = self._gen_constants(
+            2, [test_data, permutation]
+        )
+        permute_node = InternalTorchIRNode(
+            kind="Permute", inputs=input_list, outputs=[output_name],
+        )
+        ssa = self._construct_test_graph(
+            context, ops.permute, permute_node, output_name, constants=constants
+        )
+        expected_result = test_data.permute(*permutation)
+        assert expected_result.shape == ssa.shape
+
+    @pytest.mark.parametrize(
+        "in_features, out_features, scaling",
+        itertools.product([10, 25, 100], [3, 6], [1.0, 0.5]),
+    )
+    def test_addmm(self, context, in_features, out_features, scaling):
+        input_data = torch.rand((1, in_features))
+        weight_data = torch.rand((in_features, out_features))
+        bias_data = torch.rand((out_features))
+        constant_vals = [
+            scaling,
+            input_data,
+            weight_data,
+            bias_data,
+        ]
+        constants, _, output_name = self._gen_constants(4, constant_vals)
+
+        addmm_node = InternalTorchIRNode(
+            kind="addmm", inputs=["3", "1", "2", "0", "0"], outputs=[output_name],
+        )
+
+        ssa = self._construct_test_graph(
+            context, ops.addmm, addmm_node, output_name, constants=constants
+        )
+        torch_linear = nn.Linear(in_features=in_features, out_features=out_features,)
+        expected_shape = tuple(torch_linear(input_data).shape)
+        assert expected_shape == ssa.shape
+
+    @pytest.mark.parametrize(
+        "height, width, kernel_size, stride, padding, dilation",
+        itertools.product([5, 6], [5, 7], [1, 3], [1, 3], [1, 3], [1, 3]),
+    )
+    def test_convolution2d(
+        self,
+        context,
+        height,
+        width,
+        kernel_size,
+        stride,
+        padding,
+        dilation,
+        groups=1,
+        in_channels=1,
+        out_channels=2,
+    ):
+        test_input = torch.rand(1, in_channels, height, width)
+        constant_vals = [
+            1,  # None argument
+            test_input,
+            np.random.rand(
+                out_channels, in_channels, kernel_size, kernel_size
+            ),  # weights
+            np.random.rand(out_channels),  # bias
+            np.array([stride, stride]),
+            np.array([padding, padding]),
+            np.array([dilation, dilation]),
+            False,  # transposed
+            np.array([0, 0]),  # output_pad
+            groups,
+        ]
+        constants, _, output_name = self._gen_constants(
+            len(constant_vals), constant_vals
+        )
+        # For reference, the values for `kind` and `inputs` indices are determined from the definition for Torch's
+        # `at::_convolution` used for all convolutions. The link below is approximately correct at the time of writing.
+        # https://github.com/pytorch/pytorch/blob/bd604mb5b7ae4f6388aca461891d620b0d485fbb/aten/src/ATen/native/Convolution.cpp#L544
+        conv_node = InternalTorchIRNode(
+            kind="_convolution",
+            inputs=["1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "0", "0"],
+            outputs=[output_name],
+        )
+
+        ssa = self._construct_test_graph(
+            context, ops._convolution, conv_node, output_name, constants=constants
+        )
+        torch_conv = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+        )
+        expected_shape = tuple(torch_conv(test_input).shape)
+        assert ssa.val == None
+        assert expected_shape == ssa.shape
+
+    @pytest.mark.parametrize(
+        "depth, height, width, kernel_size, stride, padding, dilation, groups",
+        itertools.product(
+            [5, 5],
+            [5, 6],
+            [5, 7],
+            [1, 3],
+            [(1, 1, 1), (3, 2, 1)],
+            [(1, 1, 1), (1, 3, 2)],
+            [(1, 1, 1), (1, 2, 3)],
+            [
+                1,
+                -1,
+            ],  # -1 groups indicates it should be set to the number of input channels for depthwise convolution
+        ),
+    )
+    def test_convolution3d(
+        self,
+        context,
+        depth,
+        height,
+        width,
+        kernel_size,
+        stride,
+        padding,
+        dilation,
+        groups,
+        in_channels=2,
+        out_channels=4,
+    ):
+        if groups == -1:
+            groups = in_channels
+        test_input = torch.rand(1, in_channels, depth, height, width)
+        constant_vals = [
+            1,  # None argument
+            test_input,
+            np.random.rand(
+                out_channels,
+                in_channels // groups,
+                kernel_size,
+                kernel_size,
+                kernel_size,
+            ),  # weights
+            np.random.rand(out_channels),  # bias
+            # PyTorch's Conv3d accepts either an int (for all dimensions) or a 3-tuple of ints (one per dimension)
+            np.array([stride[0], stride[1], stride[2]]),
+            np.array([padding[0], padding[1], padding[2]]),
+            np.array([dilation[0], dilation[1], dilation[2]]),
+            False,  # transposed
+            np.array([0, 0, 0]),  # out_pad
+            groups,
+        ]
+        constants, _, output_name = self._gen_constants(
+            len(constant_vals), constant_vals
+        )
+        # For reference, the values for `kind` and `inputs` indices are determined from the definition for Torch's
+        # `at::_convolution` used for all convolutions. The link below is approximately correct at the time of writing.
+        # https://github.com/pytorch/pytorch/blob/bd604mb5b7ae4f6388aca461891d620b0d485fbb/aten/src/ATen/native/Convolution.cpp#L544
+        conv_node = InternalTorchIRNode(
+            kind="_convolution",
+            inputs=["1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "0", "0"],
+            outputs=[output_name],
+        )
+
+        ssa = self._construct_test_graph(
+            context, ops._convolution, conv_node, output_name, constants=constants
+        )
+        torch_conv = nn.Conv3d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+        )
+        expected_result = torch_conv(test_input)
+        expected_shape = tuple(expected_result.shape)
+        assert ssa.val is None
+        assert expected_shape == ssa.shape
+
+    @pytest.mark.parametrize(
+        "height, width, kernel_size, stride, padding, dilation",
+        itertools.product([5, 6], [5, 7], [1, 3], [2, 3], [0, 1], [1, 3]),
+    )
+    def test_convolution_transpose2d(
+        self,
+        context,
+        height,
+        width,
+        kernel_size,
+        stride,
+        padding,
+        dilation,
+        groups=1,
+        in_channels=1,
+        out_channels=2,
+    ):
+        test_input = torch.rand(1, in_channels, height, width)
+
+        constant_vals = [
+            np.random.rand(
+                in_channels, out_channels, kernel_size, kernel_size
+            ),  # weights
+            np.random.rand(out_channels),  # bias
+            np.array([stride, stride]),
+            np.array([padding, padding]),
+            np.array([dilation, dilation]),
+            True,  # transposed,
+            np.array([0, 0]),  # output_pad
+            groups,
+            False,
+            False,
+            False,
+        ]
+        graph_inputs = {"input": mb.placeholder(test_input.shape, dtype=types.float)}
+
+        constants, input_list, output_name = self._gen_constants(
+            len(constant_vals), constant_vals
+        )
+        conv_node = InternalTorchIRNode(
+            kind="_convolution", inputs=["input"] + input_list, outputs=[output_name],
+        )
+
+        ssa = self._construct_test_graph(
+            context,
+            ops._convolution,
+            conv_node,
+            output_name,
+            constants=constants,
+            graph_inputs=graph_inputs,
+        )
+        torch_conv = nn.ConvTranspose2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+        )
+        expected_shape = tuple(torch_conv(test_input).shape)
+        assert ssa.val == None
+        assert expected_shape == ssa.shape
+
+    @pytest.mark.parametrize(
+        "input_shape, dim, keepdim",
+        itertools.product([(3, 20, 20), (1, 50, 50)], [0, 1, 2, [0, 2]], [True, False]),
+    )
+    def test_mean(self, context, input_shape, dim, keepdim):
+        test_input = torch.rand(*input_shape)
+
+        constants, input_list, output_name = self._gen_constants(
+            4, [test_input, dim, keepdim, None]
+        )
+        mean_node = InternalTorchIRNode(
+            kind="mean", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.mean, mean_node, output_name, constants=constants
+        )
+        expected_result = torch.mean(test_input, dim, keepdim)
+        assert np.allclose(expected_result, ssa.val)
+
+    def test_mean_no_dims(self, context):
+        test_input = torch.rand((3, 20, 20))
+
+        constants, input_list, output_name = self._gen_constants(2, [test_input, None])
+        mean_node = InternalTorchIRNode(
+            kind="mean", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.mean, mean_node, output_name, constants=constants
+        )
+        expected_result = torch.mean(test_input)
+        assert np.allclose(expected_result, ssa.val)
+
+    def test_embedding(self, context):
+        EMBEDDING_DIMENSION = 10
+        NUM_EMBEDDINGS = 20
+        input_shape = (NUM_EMBEDDINGS, EMBEDDING_DIMENSION)
+        # size is arbitrary for indices
+        indices = np.random.randint(NUM_EMBEDDINGS, size=100)
+        test_input = torch.rand(input_shape)
+        constants, input_list, output_name = self._gen_constants(
+            2, [test_input, indices]
+        )
+        gather_node = InternalTorchIRNode(
+            kind="embedding", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.embedding, gather_node, output_name, constants=constants
+        )
+        torch_embedding = nn.Embedding.from_pretrained(test_input)
+        expected_result = torch_embedding(torch.LongTensor(indices))
+        assert np.allclose(expected_result, ssa.val)
+
+    @pytest.mark.parametrize(
+        "dim", [0, 1, 2, 3, 4],
+    )
+    def test_size(self, context, dim):
+        test_input = torch.rand(1, 2, 3, 4, 5)
+
+        graph_inputs = {"input": mb.placeholder(test_input.shape, dtype=types.float)}
+        constants, input_list, output_name = self._gen_constants(1, [dim])
+        size_node = InternalTorchIRNode(
+            kind="size", inputs=["input"] + input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context,
+            ops.size,
+            size_node,
+            output_name,
+            constants=constants,
+            graph_inputs=graph_inputs,
+        )
+        expected_result = test_input.shape[dim]
+        assert expected_result == ssa.val
+
+    @pytest.mark.parametrize(
+        "dim", [0, 1],
+    )
+    def test_size_symbolic(self, context, dim):
+        test_shape = (3, get_new_symbol())
+        graph_inputs = {"input": mb.placeholder(shape=test_shape, dtype=types.float)}
+        constants, input_list, output_name = self._gen_constants(1, [dim])
+        size_node = InternalTorchIRNode(
+            kind="size", inputs=["input"] + input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context,
+            ops.size,
+            size_node,
+            output_name,
+            constants=constants,
+            graph_inputs=graph_inputs,
+        )
+        expected_result = test_shape[dim]
+        assert expected_result == ssa.sym_val
+
+    @pytest.mark.parametrize(
+        "input_size, shape",
+        itertools.product([(5, 12), (1, 4, 15), (3, 5, 4)], [(3, 20), (-1, 6), (60,)],),
+    )
+    def test_view(self, context, input_size, shape):
+        test_input = torch.rand(input_size)
+
+        constants, input_list, output_name = self._gen_constants(2, [test_input, shape])
+        view_node = InternalTorchIRNode(
+            kind="view", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.view, view_node, output_name, constants=constants
+        )
+        expected_result = test_input.view(shape)
+        assert np.allclose(expected_result, ssa.val)
+
+    @pytest.mark.parametrize(
+        "input_shape, output_shape",
+        itertools.product(
+            [(1, 3, 15, 15), (1, 1, 2, 2), (1, 3, 10, 10)], [(1, 1), (2, 2), (2, 1)],
+        ),
+    )
+    def test_adaptive_avg_pool2d(self, context, input_shape, output_shape):
+        test_input = torch.rand(input_shape)
+
+        constants, input_list, output_name = self._gen_constants(
+            2, [test_input, output_shape]
+        )
+
+        adaptive_avg_pool2d_node = InternalTorchIRNode(
+            kind="adaptive_avg_pool2d", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context,
+            ops.adaptive_avg_pool2d,
+            adaptive_avg_pool2d_node,
+            output_name,
+            constants=constants,
+        )
+        expected_result = torch._adaptive_avg_pool2d(test_input, output_shape)
+        expected_shape = tuple(expected_result.shape)
+        assert expected_shape == ssa.shape
+        # We only expect numerical output when reducing to global average.
+        if output_shape == (1, 1):
+            assert np.allclose(expected_result, ssa.val)
+
+    def test_adaptive_avg_pool2d_exception(self, context):
+        # For this test, the input tensor HW channels are dynamic.
+        input_shape = [1, 3, get_new_symbol(), get_new_symbol()]
+        graph_inputs = {"input": mb.placeholder(input_shape, dtype=types.float)}
+        constants, input_list, output_name = self._gen_constants(1, [(2, 1)])
+        adaptive_avg_pool2d_node = InternalTorchIRNode(
+            kind="adaptive_avg_pool2d",
+            inputs=["input"] + input_list,
+            outputs=[output_name],
+        )
+        with pytest.raises(ValueError):
+            ssa = self._construct_test_graph(
+                context,
+                ops.adaptive_avg_pool2d,
+                adaptive_avg_pool2d_node,
+                output_name,
+                constants=constants,
+                graph_inputs=graph_inputs,
+            )
+
+    @pytest.mark.parametrize("input_shape", [(1, 3, 15, 15), (1, 1, 1, 1)])
+    def test_batch_norm(self, context, input_shape):
+        test_input = torch.rand(input_shape)
+        channels = input_shape[1]
+        constants, input_list, output_name = self._gen_constants(
+            9,
+            [
+                torch.rand(input_shape),  # input
+                torch.rand(channels),  # weight
+                torch.rand(channels),  # bias
+                torch.rand(channels),  # running mean
+                torch.rand(channels),  # running var
+                0,  # training
+                0.1,  # momentum
+                1e-6,  # eps
+                1,  # cudnn_enabled
+            ],
+        )
+
+        batch_norm_node = InternalTorchIRNode(
+            kind="batch_norm", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.batch_norm, batch_norm_node, output_name, constants=constants
+        )
+        assert ssa.val == None
+        assert ssa.shape == tuple(test_input.shape)
+
+    @pytest.mark.parametrize(
+        "min_val, max_val", [(-1.0, 1.0), (0.0, 0.1), (1.0, 3.0), (-1.0, 6.0),]
+    )
+    def test_hardtanh(self, context, min_val, max_val):
+        self._test_activation(
+            context,
+            (3, 4, 5),
+            [min_val, max_val],
+            "hardtanh_",
+            ops.hardtanh_,
+            nn.Hardtanh(min_val, max_val).eval(),
+            atol=1e-6,
+        )
+
+    @pytest.mark.parametrize("axis", [1, 2, 3])
+    def test_cat(self, context, axis):
+        input_shape = (1, 3, 240, 320)
+
+        test_input1 = torch.rand(input_shape)
+        test_input2 = torch.rand(input_shape)
+        const_input = torch.rand(input_shape)
+
+        graph_inputs = {
+            "input1": mb.placeholder(input_shape, dtype=types.float),
+            "input2": mb.placeholder(input_shape, dtype=types.float),
+        }
+        dim_node = InternalTorchIRNode(
+            attr={"value": axis}, kind="constant", inputs=[], outputs=["0"],
+        )
+        const_tensor_node = InternalTorchIRNode(
+            attr={"value": const_input.numpy()},
+            kind="constant",
+            inputs=[],
+            outputs=["1"],
+        )
+        listconstruct_node = InternalTorchIRNode(
+            kind="listconstruct", inputs=["1", "input1", "input2"], outputs=["2"]
+        )
+        cat_node = InternalTorchIRNode(
+            kind="cat", inputs=["2", "0"], outputs=["output"]
+        )
+
+        with Function(inputs=graph_inputs) as ssa_func:
+            context.add(ssa_func.inputs["input1"])
+            context.add(ssa_func.inputs["input2"])
+            ops.constant(context, dim_node)
+            ops.constant(context, const_tensor_node)
+            ops.listconstruct(context, listconstruct_node)
+            ops.cat(context, cat_node)
+
+        ssa = context["output"]
+        expected_result = torch.cat(
+            (const_input, test_input1, test_input2), dim=axis
+        ).numpy()
+        assert np.allclose(expected_result.shape, ssa.shape)
+
+    @pytest.mark.parametrize("axis", [0, 1, 2, 3, 4])
+    def test_stack(self, context, axis):
+        input_shape = (1, 3, 240, 320)
+
+        test_input1 = torch.rand(input_shape)
+        test_input2 = torch.rand(input_shape)
+        const_input = torch.rand(input_shape)
+
+        graph_inputs = {
+            "input1": mb.placeholder(input_shape, dtype=types.float),
+            "input2": mb.placeholder(input_shape, dtype=types.float),
+        }
+        dim_node = InternalTorchIRNode(
+            attr={"value": axis}, kind="constant", inputs=[], outputs=["0"],
+        )
+        const_tensor_node = InternalTorchIRNode(
+            attr={"value": const_input.numpy()},
+            kind="constant",
+            inputs=[],
+            outputs=["1"],
+        )
+        listconstruct_node = InternalTorchIRNode(
+            kind="listconstruct", inputs=["1", "input1", "input2"], outputs=["2"]
+        )
+        stack_node = InternalTorchIRNode(
+            kind="stack", inputs=["2", "0"], outputs=["output"]
+        )
+
+        with Function(inputs=graph_inputs) as ssa_func:
+            context.add(ssa_func.inputs["input1"])
+            context.add(ssa_func.inputs["input2"])
+            ops.constant(context, dim_node)
+            ops.constant(context, const_tensor_node)
+            ops.listconstruct(context, listconstruct_node)
+            ops.stack(context, stack_node)
+
+        ssa = context["output"]
+        expected_result = np.stack((const_input, test_input1, test_input2), axis=axis)
+        assert np.allclose(expected_result.shape, ssa.shape)
+
+    def test_item(self, context):
+        const_val = 0
+        constants, input_list, output_name = self._gen_constants(1, [const_val])
+        item_node = InternalTorchIRNode(
+            kind="item", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.item, item_node, output_name, constants=constants
+        )
+        assert ssa.val == const_val
+
+    def test_item_exception(self, context):
+        const_val = [0, 1]
+        constants, input_list, output_name = self._gen_constants(1, [const_val])
+        item_node = InternalTorchIRNode(
+            kind="item", inputs=input_list, outputs=[output_name]
+        )
+        with pytest.raises(ValueError):
+            ssa = self._construct_test_graph(
+                context, ops.item, item_node, output_name, constants=constants,
+            )
+
+    @pytest.mark.parametrize("test_val", [1, 1.5, False])
+    def test_bool(self, context, test_val):
+        self._test_cast(context, test_val, "bool", ops._bool, bool)
+
+    @pytest.mark.parametrize("test_val", [1, 1.5, -0.3])
+    def test_int(self, context, test_val):
+        self._test_cast(context, test_val, "int", ops._int, int)
+
+    @pytest.mark.parametrize("input_shape", [(1, 3, 15, 15), (1, 1, 1, 1)])
+    def test_layer_norm(self, context, input_shape):
+        graph_inputs = {"input": mb.placeholder(input_shape, dtype=types.float)}
+        channels = input_shape[1]
+        constants, input_list, output_name = self._gen_constants(
+            5,
+            [
+                input_shape,  # normalized shape
+                torch.rand(channels),  # weight
+                torch.rand(channels),  # running bias
+                1e-6,
+                1,  # cudnn enabled
+            ],
+        )
+
+        layer_norm_node = InternalTorchIRNode(
+            kind="layer_norm", inputs=["input"] + input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context,
+            ops.layer_norm,
+            layer_norm_node,
+            output_name,
+            graph_inputs=graph_inputs,
+            constants=constants,
+        )
+        assert ssa.val == None
+        assert ssa.shape == input_shape
+
+    @pytest.mark.parametrize("shape", [(1, 2), (2, 3, 4, 5), (3, 4, 5),])
+    def test_ones(self, context, shape):
+        constants, constant_input_list, output_name = self._gen_constants(
+            6, [shape, 1, 1, 1, 1, 1]
+        )
+        ones_node = InternalTorchIRNode(
+            kind="ones", inputs=constant_input_list, outputs=[output_name],
+        )
+        ssa = self._construct_test_graph(
+            context, ops.ones, ones_node, output_name, constants=constants,
+        )
+        assert ssa.shape == shape
+
+    @pytest.mark.parametrize("input_shape", [(1, 2), (2, 3, 4, 5), (3, 4, 5),])
+    def test_ones_like(self, context, input_shape):
+        graph_inputs = {"input": mb.placeholder(input_shape, dtype=types.float)}
+        constants, constant_input_list, output_name = self._gen_constants(5, 1)
+        ones_node = InternalTorchIRNode(
+            kind="ones_like",
+            inputs=["input"] + constant_input_list,
+            outputs=[output_name],
+        )
+        ssa = self._construct_test_graph(
+            context,
+            ops.ones_like,
+            ones_node,
+            output_name,
+            graph_inputs=graph_inputs,
+            constants=constants,
+        )
+        assert ssa.shape == input_shape
+
+    @pytest.mark.parametrize(
+        "input_size, dim, index",
+        itertools.product(
+            [(13, 43, 10), (39, 14, 11, 9)], [0, 1, 2], [0, 1, 3, 8, -1],
+        ),
+    )
+    def test_select(self, context, input_size, dim, index):
+        graph_inputs = {"input1": mb.placeholder(input_size, dtype=types.float)}
+        constants, constant_input_list, output_name = self._gen_constants(
+            2, [dim, index]
+        )
+        select_node = InternalTorchIRNode(
+            kind="select",
+            inputs=["input1"] + constant_input_list,
+            outputs=[output_name],
+        )
+        ssa = self._construct_test_graph(
+            context,
+            ops.select,
+            select_node,
+            output_name,
+            graph_inputs=graph_inputs,
+            constants=constants,
+        )
+        select_index = index
+        if index < 0:
+            select_index += input_size[dim]
+        expected_shape = tuple(
+            torch.rand(input_size)
+            .index_select(dim, torch.tensor([select_index]))
+            .squeeze(dim)
+            .shape
+        )
+        assert np.allclose(ssa.shape, expected_shape)
+
+    @pytest.mark.parametrize(
+        "dynamic, test_tuple", itertools.product([True, False], [True, False])
+    )
+    def test_tuple_and_list_unpack(self, context, dynamic, test_tuple):
+        """
+            if @dynamic is True then packs up a dynamic input
+            if @test_tuple is True tests tupleUnpack else tests listUnpack
+        """
+        if test_tuple:
+            construct_op = ops.tupleconstruct
+            construct_name = "TupleConstruct"
+            unpack_name = "TupleUnpack"
+        else:
+            construct_op = ops.listconstruct
+            construct_name = "ListConstruct"
+            unpack_name = "ListUnpack"
+
+        input_shape = (1, 2, 3)
+        constant_vals = [str(i) for i in range(1, 6)]
+        constants_unpacked = [str(i) for i in range(6, 11)]
+        constants, input_list, _ = self._gen_constants(5, constant_vals)
+        output_list = constants_unpacked[:]
+        graph_inputs = {}
+        if dynamic:
+            graph_input_name = "input1"
+            graph_inputs = {
+                graph_input_name: mb.placeholder(input_shape, dtype=types.float)
+            }
+            input_list += [graph_input_name]
+            output_list += [graph_input_name + "_out"]
+
+        construct_node = InternalTorchIRNode(
+            kind=construct_name, inputs=input_list, outputs=["construct"],
+        )
+        unpack_node = InternalTorchIRNode(
+            kind=unpack_name, inputs=["construct"], outputs=output_list
+        )
+        with Function(inputs=graph_inputs) as ssa_func:
+            if dynamic:
+                context.add(ssa_func.inputs["input1"])
+            for node in constants:
+                ops.constant(context, node)
+            construct_op(context, construct_node)
+            ops.tupleunpack(context, unpack_node)
+
+        ssa_constants = []
+        for name in constants_unpacked:
+            ssa_constants.append(context[name].val)
+        assert ssa_constants == constant_vals
+
+        if dynamic:
+            ssa_dyanmic = context[graph_input_name + "_out"]
+            assert ssa_dyanmic.val is None
+            assert ssa_dyanmic.shape == input_shape
+
+    def _test_pool(
+        self, context, test_input, param_list, op_kind, op_func, expected_result
+    ):
+        constants, input_list, output_name = self._gen_constants(
+            len(param_list) + 1, [test_input] + param_list,
+        )
+
+        pool_node = InternalTorchIRNode(
+            kind=op_kind, inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, op_func, pool_node, output_name, constants=constants,
+        )
+        expected_shape = tuple(expected_result.shape)
+        assert expected_shape == ssa.shape
+
+    @pytest.mark.parametrize(
+        "input_shape, kernel_size, stride, pad, include_pad, ceil_mode",
+        itertools.product(
+            [(1, 3, 15), (1, 1, 7), (1, 3, 10)],
+            [1, 3],
+            [1, 2],
+            [0, 1],
+            [True, False],
+            [False, True],
+        ),
+    )
+    def test_avg_pool1d(
+        self, context, input_shape, kernel_size, stride, pad, include_pad, ceil_mode,
+    ):
+        if pad > kernel_size / 2:
+            return
+        test_input = torch.rand(input_shape)
+        expected_result = F.avg_pool1d(
+            test_input,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=pad,
+            ceil_mode=ceil_mode,
+            count_include_pad=include_pad,
+        )
+        self._test_pool(
+            context,
+            test_input,
+            [[kernel_size], [stride], [pad], ceil_mode, not include_pad],
+            "avg_pool1d",
+            ops.avg_pool1d,
+            expected_result,
+        )
+
+    @pytest.mark.parametrize(
+        "input_shape, kernel_size, stride, pad, include_pad, ceil_mode",
+        itertools.product(
+            [(1, 3, 15, 15), (1, 1, 7, 7), (1, 3, 10, 10)],
+            [1, 3],
+            [1, 2],
+            [0, 1],
+            [True, False],
+            [False, True],
+        ),
+    )
+    def test_avg_pool2d(
+        self, context, input_shape, kernel_size, stride, pad, include_pad, ceil_mode,
+    ):
+        if pad > kernel_size / 2:
+            return
+        test_input = torch.rand(input_shape)
+        expected_result = F.avg_pool2d(
+            test_input,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=pad,
+            ceil_mode=ceil_mode,
+            count_include_pad=include_pad,
+        )
+        self._test_pool(
+            context,
+            test_input,
+            [
+                [kernel_size, kernel_size],
+                [stride, stride],
+                [pad, pad],
+                ceil_mode,
+                not include_pad,
+                None,
+            ],
+            "avg_pool2d",
+            ops.avg_pool2d,
+            expected_result,
+        )
+
+    @pytest.mark.parametrize(
+        "input_shape, kernel_size, stride, pad, ceil_mode",
+        itertools.product(
+            [(1, 3, 15), (1, 1, 7), (1, 3, 10)], [1, 3], [1, 2], [0, 1], [False, True]
+        ),
+    )
+    @pytest.mark.xfail(reason="torch converter for max_pool1d not implemented")
+    def test_max_pool1d(
+        self, context, input_shape, kernel_size, stride, pad, ceil_mode
+    ):
+        if pad > kernel_size / 2:
+            # Because this test is xfail, we have to fail rather than
+            # just return here, otherwise these test cases unexpectedly pass.
+            # This can be changed to `return` once the above radar
+            # is fixed and the test is no longer xfail.
+            raise ValueError("pad must be less than half the kernel size")
+        test_input = torch.rand(input_shape)
+        expected_result = F.max_pool1d(
+            test_input,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=pad,
+            ceil_mode=ceil_mode,
+        )
+        self._test_pool(
+            context,
+            test_input,
+            [[kernel_size], [stride], [pad], [1], ceil_mode],
+            "max_pool1d",
+            ops.max_pool1d,
+            expected_result,
+        )
+
+    @pytest.mark.parametrize(
+        "input_shape, kernel_size, stride, pad, ceil_mode",
+        itertools.product(
+            [(1, 3, 15, 15), (1, 1, 7, 7), (1, 3, 10, 10)],
+            [1, 3],
+            [1, 2],
+            [0, 1],
+            [False, True],
+        ),
+    )
+    def test_max_pool2d(
+        self, context, input_shape, kernel_size, stride, pad, ceil_mode,
+    ):
+        if pad > kernel_size / 2:
+            return
+        test_input = torch.rand(input_shape)
+        expected_result = F.max_pool2d(
+            test_input,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=pad,
+            ceil_mode=ceil_mode,
+        )
+        self._test_pool(
+            context,
+            test_input,
+            [
+                [kernel_size, kernel_size],
+                [stride, stride],
+                [pad, pad],
+                [1, 1,],  # dilation
+                ceil_mode,
+            ],
+            "max_pool2d",
+            ops.max_pool2d,
+            expected_result,
+        )
+
+    @pytest.mark.parametrize("dim", [0, 1, 2])
+    def test_softmax(self, context, dim):
+        self._test_activation(
+            context,
+            (3, 4, 5),
+            [dim, None],
+            "softmax",
+            ops.softmax,
+            nn.Softmax(dim=dim).eval(),
+            atol=1e-6,
+        )
+
+    def test_relu(self, context):
+        self._test_activation(
+            context, (3, 4, 5), [], "relu", ops.relu, nn.ReLU().eval(), atol=1e-6
+        )
+
+    @pytest.mark.parametrize("dim", [0, 1, 2])
+    def test_log_softmax(self, context, dim):
+        self._test_activation(
+            context,
+            (3, 4, 5),
+            [dim, None],
+            "log_softmax",
+            ops.log_softmax,
+            nn.LogSoftmax(dim=dim).eval(),
+            atol=1e-6,
+        )
+
+    def test_sigmoid(self, context):
+        self._test_activation(
+            context,
+            (3, 4, 5),
+            [],
+            "sigmoid",
+            ops.sigmoid,
+            nn.Sigmoid().eval(),
+            atol=1e-6,
+        )
+
+    def test_gelu(self, context):
+        self._test_activation(
+            context, (3, 4, 5), [], "gelu", ops.gelu, nn.GELU().eval(), atol=1e-6
+        )
+
+    @pytest.mark.parametrize(
+        "dim, start, end, step",
+        itertools.product([0, 1, 2], [0, 1, 2], [3, 4, 5, None], [1, 2]),
+    )
+    def test_slice(self, context, dim, start, end, step):
+        test_input = torch.rand(5, 5, 5)
+        constants, input_list, output_name = self._gen_constants(
+            5, [test_input, dim, start, end, step]
+        )
+        node = InternalTorchIRNode(
+            kind="slice", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops._slice, node, output_name, constants=constants
+        )
+        if end is None:
+            end = test_input.shape[dim]
+        expected_result = test_input.index_select(
+            dim, torch.LongTensor(range(start, end, step))
+        )
+        np.testing.assert_allclose(expected_result, ssa.val)
+
+    @pytest.mark.parametrize(
+        "split_sizes, dim, make_explicit",
+        itertools.product([2, 3], [0, 1, 2], [True, False]),
+    )
+    def test_split(self, context, split_sizes, dim, make_explicit):
+        test_input = torch.rand(3, 4, 5)
+        if make_explicit:
+            # Explicitly provide the size of each split. This will be two
+            # splits, the given size and the remainder.
+            split_sizes = [split_sizes, test_input.shape[dim] - split_sizes]
+        constants, input_list, output_name = self._gen_constants(
+            3, [test_input, split_sizes, dim]
+        )
+        node = InternalTorchIRNode(
+            kind="split", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.split, node, output_name, constants=constants
+        )
+        expected_result = torch.split(test_input, split_sizes, dim)
+        if not isinstance(ssa, list):
+            ssa = [ssa]
+
+        for ex_res, ssa_res in zip(expected_result, ssa):
+            np.testing.assert_allclose(ex_res.numpy(), ssa_res.val, atol=1e-6)
+
+    @pytest.mark.parametrize(
+        "num_args, dtype", itertools.product([4, 5, 6], [0, 1, 2, 3, 4, 5, 6, 7, 11])
+    )
+    def test_to(self, context, num_args, dtype):
+        test_input = torch.rand(1, 2, 3)
+        # These args should be unused
+        copy = True
+        non_blocking = True
+        device = 1337
+
+        constants_list = [non_blocking, copy]
+        if num_args == 4:
+            constants_list = [dtype] + constants_list
+        elif num_args == 5:
+            constants_list = [device, dtype] + constants_list
+        else:
+            constants_list = [device, dtype, copy] + constants_list
+        constants_list = [test_input] + constants_list
+        constants, input_list, output_name = self._gen_constants(
+            len(constants_list), constants_list
+        )
+        to_node = InternalTorchIRNode(
+            kind="to", inputs=input_list, outputs=[output_name]
+        )
+        if num_args == 6:
+            with pytest.raises(ValueError):
+                ssa = self._construct_test_graph(
+                    context, ops.to, to_node, output_name, constants=constants,
+                )
+        else:
+            ssa = self._construct_test_graph(
+                context, ops.to, to_node, output_name, constants=constants,
+            )
+            if num_args == 3:
+                expected_result = test_input.numpy()
+            else:
+                expected_result = test_input.to(
+                    dtype=ops.NUM_TO_TORCH_DTYPE[dtype]
+                ).numpy()
+            assert np.allclose(expected_result, ssa.val)
+
+    def test_floor(self, context):
+        test_input = torch.rand(1, 2, 3) * 10
+        constants, input_list, output_name = self._gen_constants(1, test_input)
+        floor_node = InternalTorchIRNode(
+            kind="floor", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.floor, floor_node, output_name, constants=constants,
+        )
+        expected_result = test_input.floor()
+        assert np.allclose(expected_result, ssa.val)
+
+    def test_erf(self, context):
+        test_input = torch.rand(1, 2, 3, 4)
+        constants, input_list, output_name = self._gen_constants(1, test_input)
+        node = InternalTorchIRNode(kind="erf", inputs=input_list, outputs=[output_name])
+        ssa = self._construct_test_graph(
+            context, ops.erf, node, output_name, constants=constants
+        )
+        expected_result = test_input.erf()
+        assert np.allclose(expected_result, ssa.val)
+
+    def test_implicittensortonum(self, context):
+        input_shape = (1,)
+        graph_input_name = "input1"
+        graph_inputs = {
+            graph_input_name: mb.placeholder(input_shape, dtype=types.float)
+        }
+        output_name = "1"
+        node = InternalTorchIRNode(
+            kind="implicittensortonum", inputs=["input1"], outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context,
+            ops.implicittensortonum,
+            node,
+            output_name,
+            graph_inputs=graph_inputs,
+        )
+        assert ssa.shape == ()
+
+    @pytest.mark.parametrize(
+        "chunks, dim", itertools.product([2, 3, 5], [0, 1, 2, 3]),
+    )
+    def test_constantchunk(self, context, chunks, dim):
+        test_input = torch.rand(5, 8, 9, 11)
+        expected_result = test_input.chunk(chunks, dim=dim)
+        constants, input_list, first_output = self._gen_constants(1, [test_input])
+        outputs = [str(int(first_output) + i) for i in range(len(expected_result))]
+        node = InternalTorchIRNode(
+            attr={"chunks": chunks, "dim": dim},
+            kind="constantchunk",
+            inputs=input_list,
+            outputs=outputs,
+        )
+        self._construct_test_graph(
+            context, ops.constantchunk, node, first_output, constants=constants
+        )
+        actual_result = [context[name] for name in outputs]
+
+        np.testing.assert_equal(len(expected_result), len(actual_result))
+        for ex_res, ssa_res in zip(expected_result, actual_result):
+            np.testing.assert_allclose(ex_res.numpy(), ssa_res.val, atol=1e-6)
+
+    @pytest.mark.parametrize(
+        "input_shape, shape",
+        [
+            ((3, 1), (3, 4)),
+            ((3, 1), (-1, 4)),
+            ((3, 1, 1), (3, 4, 1)),
+            ((3, 1, 1), (3, -1, 5)),
+            ((3, 1, 1), (3, 4, 5)),
+            ((1, 3, 1, 1), (2, 3, -1, 1)),
+            ((1, 3, 4, 1), (2, 3, -1, 5)),
+        ],
+    )
+    def test_expand(self, context, input_shape, shape):
+        test_input = torch.rand(input_shape)
+        constants, input_list, output_name = self._gen_constants(2, [test_input, shape])
+        node = InternalTorchIRNode(
+            kind="expand", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.expand, node, output_name, constants=constants
+        )
+        expected_result = test_input.expand(shape)
+        np.testing.assert_allclose(expected_result, ssa.val)
+
+    @pytest.mark.parametrize(
+        "input_shape, other_shape",
+        [
+            ((3, 1), (3, 4)),
+            ((3, 1, 1), (3, 4, 1)),
+            ((3, 1, 1), (3, 4, 5)),
+            ((1, 3, 1, 1), (2, 3, 4, 1)),
+            ((1, 3, 4, 1), (2, 3, 4, 5)),
+            ((1, 3, 4, 1), (1, 3, 4, 5)),
+        ],
+    )
+    def test_expand_as(self, context, input_shape, other_shape):
+        test_input = torch.rand(input_shape)
+        other = torch.rand(other_shape)
+        constants, input_list, output_name = self._gen_constants(2, [test_input, other])
+        node = InternalTorchIRNode(
+            kind="expand_as", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.expand_as, node, output_name, constants=constants
+        )
+        expected_result = test_input.expand_as(other)
+        np.testing.assert_allclose(expected_result, ssa.val)
+
+    @pytest.mark.parametrize(
+        "start, end, step",
+        [x for x in itertools.product((None, 0, 2), (5, 10), (None,),)]
+        + [x for x in itertools.product((0, 2), (5, 10), (1, 2))],
+    )
+    def test_arange(self, context, start, end, step):
+        # Arange can get [end], [start, end], or [start, end, step]
+        args = [x for x in [start, end, step] if x is not None]
+        args += [0, 0, 0, False]  # Extra args needed but ignored by arange
+        constants, input_list, output_name = self._gen_constants(len(args), args)
+        node = InternalTorchIRNode(
+            kind="arange", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.arange, node, output_name, constants=constants
+        )
+        kwargs = {"end": end}
+        if start is not None:
+            kwargs["start"] = start
+        if step is not None:
+            kwargs["step"] = step
+        expected_result = torch.arange(**kwargs)
+        np.testing.assert_allclose(expected_result, ssa.val)
+
+    @pytest.mark.parametrize(
+        "input_shape, axis",
+        [((2, 3), 0), ((2, 3, 4), 1), ((2, 3, 4, 5), 0), ((2, 3, 4, 5), 2),],
+    )
+    def test_masked_fill(self, context, input_shape, axis):
+        mask_shape = list(input_shape)
+        mask_shape[axis] = 1
+        mask = torch.randint(0, 1, mask_shape, dtype=torch.bool)
+        input_data = torch.rand(input_shape)
+        value = -1.0
+        constants, input_list, output_name = self._gen_constants(
+            3, [input_data, mask, value]
+        )
+        node = InternalTorchIRNode(
+            kind="masked_fill", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.masked_fill, node, output_name, constants=constants
+        )
+        expected_result = input_data.masked_fill(mask, value)
+        np.testing.assert_allclose(expected_result, ssa.val)
+
+    @pytest.mark.parametrize("sizes", itertools.permutations([1, 2, 3]))
+    def test_meshgrid(self, context, sizes):
+        input_tensors = [torch.rand(size) for size in sizes]
+        expected_results = torch.meshgrid(input_tensors)
+        constants, input_list, output_name = self._gen_constants(3, input_tensors)
+        node = InternalTorchIRNode(
+            kind="meshgrid", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.meshgrid, node, output_name, constants=constants,
+        )
+        for expected_result, ssa_result in zip(expected_results, ssa):
+            np.testing.assert_allclose(expected_result.numpy(), ssa_result.val)
+
+    @pytest.mark.parametrize(
+        "noop_kind",
+        ["dropout", "dropout_", "feature_dropout", "contiguous", "device", "detach"],
+    )
+    def test_noops(self, context, noop_kind):
+        test_input = torch.rand(3, 4, 5)
+        constants, input_list, output_name = self._gen_constants(
+            3, [test_input, "test", "test"]
+        )
+        node = InternalTorchIRNode(
+            kind=noop_kind, inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.noop, node, output_name, constants=constants
+        )
+        assert np.allclose(test_input.numpy(), ssa.val)
+
+    def test_tanh(self, context):
+        test_input = torch.rand(3, 4, 5)
+        constants, input_list, output_name = self._gen_constants(1, [test_input])
+        node = InternalTorchIRNode(
+            kind="tanh", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.tanh, node, output_name, constants=constants
+        )
+        expected_result = torch.tanh(test_input)
+        assert np.allclose(expected_result.numpy(), ssa.val)
+
+    # TODO: test for @keepdim==True when the backend bug is fixed.
+    # rdar://62566799
+    @pytest.mark.parametrize(
+        "input_shape, dim, keepdim",
+        itertools.product([(3, 20, 20), (1, 50, 50)], [0, 1, 2], [False]),
+    )
+    def test_argmax(self, context, input_shape, dim, keepdim):
+        test_input = torch.rand(*input_shape)
+
+        constants, input_list, output_name = self._gen_constants(
+            4, [test_input, dim, keepdim, None]
+        )
+        node = InternalTorchIRNode(
+            kind="argmax", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.argmax, node, output_name, constants=constants
+        )
+        expected_result = torch.argmax(test_input, dim, keepdim)
+        np.testing.assert_allclose(expected_result, ssa.val)
+
+    @pytest.mark.parametrize(
+        "size, dtype", itertools.product([(1, 2, 3, 4), (1,)], [11, 0, 1, 6]),
+    )
+    def test_zeros(self, context, size, dtype):
+        layout = 0  # unused
+        device = 0  # unused
+        pin_memory = 0  # unused
+        constants, input_list, output_name = self._gen_constants(
+            5, [size, dtype, layout, device, pin_memory]
+        )
+        node = InternalTorchIRNode(
+            kind="zeros", inputs=input_list, outputs=[output_name]
+        )
+        ssa = self._construct_test_graph(
+            context, ops.zeros, node, output_name, constants=constants
+        )
+        expected_result = torch.zeros(size, dtype=ops.NUM_TO_TORCH_DTYPE[dtype])
+        np.testing.assert_allclose(expected_result, ssa.val)
+
+    # TODO: Reduce rtol
+    # rdar://62868763 (Numerical discrepancy between torch.exp and coreml MIL exp operation)
+    @pytest.mark.parametrize("input_size", [(1, 2, 3, 4), (1,)])
+    def test_exp(self, context, input_size):
+        test_input = torch.rand(input_size)
+        constants, input_list, output_name = self._gen_constants(1, test_input)
+        node = InternalTorchIRNode(kind="exp", inputs=input_list, outputs=[output_name])
+        ssa = self._construct_test_graph(
+            context, ops.exp, node, output_name, constants=constants
+        )
+        expected_result = torch.exp(test_input)
+        np.testing.assert_allclose(expected_result, ssa.val, rtol=1e-06)
+
+    @pytest.mark.parametrize(
+        "input_size, dim, keepdim",
+        itertools.product([(1, 2, 3, 4)], [0, 1, 2], [True, False]),
+    )
+    def test_max(self, context, input_size, dim, keepdim):
+        test_input = torch.rand(input_size)
+        constants, input_list, _ = self._gen_constants(3, [test_input, dim, keepdim])
+        node = InternalTorchIRNode(
+            kind="max", inputs=input_list, outputs=["out1", "out2"],
+        )
+        ssa = self._construct_test_graph(context, ops.max, node, constants=constants)
+        index_result = context["out1"].val
+        max_result = context["out2"].val
+        expected_index, expected_max = torch.max(test_input, dim=dim, keepdim=keepdim)
+
+    @pytest.mark.parametrize(
+        "input_size, dim, descending",
+        itertools.product([(2, 3, 4), (1, 2, 3, 4)], [0, 1, 2], [True, False]),
+    )
+    def test_sort(self, context, input_size, dim, descending):
+        test_input = torch.rand(input_size)
+        constants, input_list, output_name = self._gen_constants(
+            3, [test_input, dim, descending]
+        )
+        node = InternalTorchIRNode(
+            kind="sort", inputs=input_list, outputs=["out1", "out2"],
+        )
+        ssa = self._construct_test_graph(context, ops.sort, node, constants=constants)
+        expected_sort, expected_index = torch.sort(
+            test_input, dim=dim, descending=descending
+        )
+        sort_result = context["out1"].val
+        index_result = context["out2"].val
+        np.testing.assert_allclose(expected_sort, sort_result)
+        np.testing.assert_allclose(expected_index, index_result)
diff --git a/coremltools/converters/mil/frontend/torch/test/testing_utils.py b/coremltools/converters/mil/frontend/torch/test/testing_utils.py
new file mode 100644
index 000000000..51003e667
--- /dev/null
+++ b/coremltools/converters/mil/frontend/torch/test/testing_utils.py
@@ -0,0 +1,120 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import numpy as np
+import torch
+
+from six import string_types as _string_types
+from coremltools import TensorType
+from coremltools.converters import convert
+from coremltools.models import MLModel
+from coremltools._deps import _IS_MACOS
+
+
+def _flatten(object):
+    flattened_list = []
+    for item in object:
+        if isinstance(item, (list, tuple)):
+            flattened_list.extend(_flatten(item))
+        else:
+            flattened_list.append(item)
+    return flattened_list
+
+
+def convert_to_coreml_inputs(input_description, inputs):
+    """Convenience function to combine a CoreML model's input description and
+    set of raw inputs into the format expected by the model's predict function.
+    """
+    flattened_inputs = _flatten(inputs)
+    coreml_inputs = {
+        str(x): inp.numpy() for x, inp in zip(input_description, flattened_inputs)
+    }
+    return coreml_inputs
+
+
+def convert_to_mlmodel(model_spec, tensor_inputs):
+    def _convert_to_inputtype(inputs):
+        if isinstance(inputs, list):
+            return [_convert_to_inputtype(x) for x in inputs]
+        elif isinstance(inputs, tuple):
+            return tuple([_convert_to_inputtype(x) for x in inputs])
+        elif isinstance(inputs, torch.Tensor):
+            return TensorType(shape=inputs.shape)
+        else:
+            raise ValueError(
+                "Unable to parse type {} into InputType.".format(type(inputs))
+            )
+
+    mlmodel = convert(model_spec, inputs=list(_convert_to_inputtype(tensor_inputs)))
+    return mlmodel
+
+
+def generate_input_data(input_size):
+    if isinstance(input_size, list):
+        return [torch.rand(_size) for _size in input_size]
+    else:
+        return torch.rand(input_size)
+
+
+def trace_model(model, input_data):
+    model.eval()
+    if isinstance(input_data, list):
+        input_data = tuple(input_data)
+    torch_model = torch.jit.trace(model, input_data)
+    return torch_model
+
+
+def run_numerical_test(
+    input_data, model, expected_results=None, places=5, input_as_shape=True
+):
+    """
+        Traces a model and runs a numerical test.
+        Args:
+            input_as_shape <bool>: If true generates random input data with shape.
+            expected_results <iterable, optional>: Expected result from running pytorch model.
+    """
+    model.eval()
+    if input_as_shape:
+        input_data = generate_input_data(input_data)
+    model_spec = trace_model(model, input_data)
+    convert_and_compare(
+        input_data, model_spec, expected_results=expected_results, atol=10.0 ** -places
+    )
+
+
+def flatten_and_detach_torch_results(torch_results):
+    if isinstance(torch_results, (list, tuple)):
+        return [x.detach().numpy() for x in _flatten(torch_results)]
+    # Do not need to flatten
+    return [torch_results.detach().numpy()]
+
+
+def convert_and_compare(input_data, model_spec, expected_results=None, atol=1e-5):
+    """
+        If expected results is not set, it will by default
+        be set to the flattened output of the torch model.
+    """
+    if isinstance(model_spec, _string_types):
+        torch_model = torch.jit.load(model_spec)
+    else:
+        torch_model = model_spec
+
+    if not isinstance(input_data, (list, tuple)):
+        input_data = [input_data]
+
+    if not expected_results:
+        expected_results = torch_model(*input_data)
+    expected_results = flatten_and_detach_torch_results(expected_results)
+    mlmodel = convert_to_mlmodel(model_spec, input_data)
+    coreml_inputs = convert_to_coreml_inputs(mlmodel.input_description, input_data)
+    if _IS_MACOS:
+        coreml_results = mlmodel.predict(coreml_inputs)
+        sorted_coreml_results = [
+            coreml_results[key] for key in sorted(coreml_results.keys())
+        ]
+
+        for torch_result, coreml_result in zip(expected_results, sorted_coreml_results):
+            np.testing.assert_equal(coreml_result.shape, torch_result.shape)
+            np.testing.assert_allclose(coreml_result, torch_result, atol=atol)
diff --git a/coremltools/converters/mil/frontend/torch/torch_op_registry.py b/coremltools/converters/mil/frontend/torch/torch_op_registry.py
new file mode 100644
index 000000000..461616e18
--- /dev/null
+++ b/coremltools/converters/mil/frontend/torch/torch_op_registry.py
@@ -0,0 +1,44 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+_TORCH_OPS_REGISTRY = {}
+
+
+def register_torch_op(_func=None, torch_alias=None, override=False):
+    """
+    Registration routine for PyTorch operators
+    _func: (PyTorch conversion function) [Default=None]
+        PyTorch conversion function to register
+
+    torch_alias: (List of string) [Default=None]
+        All other PyTorch operators that should also be mapped to
+        current conversion routine.
+        e.g. Sort aliased with SortV1, SortV2
+        All provided alias operators must not be registered previously.
+
+    override: (Boolean) [Default=False]
+        If True, overrides earlier registration i.e. specified
+        operator and alias will start pointing to current conversion
+        function.
+        Otherwise, duplicate registration will error out.
+    """
+
+    def func_wrapper(func):
+        f_name = func.__name__
+        if not override and f_name in _TORCH_OPS_REGISTRY:
+            raise ValueError("Torch Op {} already registered.".format(f_name))
+        _TORCH_OPS_REGISTRY[f_name] = func
+        if torch_alias is not None:
+            for name in torch_alias:
+                if not override and name in _TORCH_OPS_REGISTRY:
+                    msg = "Torch Op alias {} already registered."
+                    raise ValueError(msg.format(name))
+                _TORCH_OPS_REGISTRY[name] = func
+        return func
+
+    if _func is None:
+        # decorator called without argument
+        return func_wrapper
+    return func_wrapper(_func)
diff --git a/coremltools/converters/mil/input_types.py b/coremltools/converters/mil/input_types.py
new file mode 100644
index 000000000..d37c9e92d
--- /dev/null
+++ b/coremltools/converters/mil/input_types.py
@@ -0,0 +1,321 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import logging
+import numpy as np
+import six
+from coremltools.converters.mil.mil.types.symbolic import is_symbolic
+from coremltools.converters.mil.mil import types
+from coremltools.converters.mil.mil.types.type_mapping import (
+    numpy_type_to_builtin_type,
+    is_builtin,
+)
+
+
+class ClassifierConfig(object):
+    def __init__(
+        self,
+        class_labels,
+        predicted_feature_name="classLabel",
+        predicted_probabilities_output=None,
+    ):
+        """
+        Configuration for classifier models.
+
+        Attributes:
+
+        class_labels: str / list of int / list of str
+            If a list if given, the list maps the index of the output of a
+            neural network to labels in a classifier.
+            If a str is given, the str points to a file which maps the index
+            to labels in a classifier.
+
+        predicted_feature_name: str
+            Name of the output feature for the class labels exposed in the
+            Core ML neural network classifier, defaults: 'classLabel'.
+
+        predicted_probabilities_output: str
+            If provided, then this is the name of the neural network blob which
+            generates the probabilities for each class label (typically the output
+            of a softmax layer). If not provided, then the last output layer is
+            assumed.
+        """
+        self.class_labels = class_labels
+        self.predicted_feature_name = predicted_feature_name
+        self.predicted_probabilities_output = predicted_probabilities_output
+
+
+class InputType(object):
+    def __init__(self, name=None, shape=None, dtype=types.fp32):
+        """
+        The Input Type for inputs fed into the model.
+
+        Attributes:
+
+        name: (str)
+            The name of the input.
+        shape: list, tuple, Shape object, EnumeratedShapes object or None
+            The shape(s) that are valid for this input.
+            If set to None, the shape will be infered from the model itself.
+        """
+
+        self.name = name
+        if shape is not None:
+            self.shape = _get_shaping_class(shape)
+        else:
+            self.shape = None
+        self.dtype = dtype
+
+
+class ImageType(InputType):
+    def __init__(
+        self,
+        name=None,
+        shape=None,
+        scale=1.0,
+        bias=None,
+        color_layout="RGB",
+        channel_first=None,
+    ):
+        """
+        Configuration class used for image inputs in CoreML.
+
+        Attributes:
+
+        scale: (float)
+            The scaling factor for all values in the image channels.
+        bias: float or list of float
+            If `color_layout` is 'G', bias would be a float
+            If `color_layout` is 'RGB' or 'BGR', bias would be a list of float
+        color_layout: string
+            Color layout of the image.
+            Valid values:
+                'G': Grayscale
+                'RGB': [Red, Green, Blue]
+                'BRG': [Blue, Red, Green]
+        channel_first: (bool) or None
+            Set to True if input format is channel first.
+            Default format is for TF is channel last. (channel_first=False)
+                              for PyTorch is channel first. (channel_first=True)
+        """
+        super(ImageType, self).__init__(name, shape)
+        self.scale = scale
+        if color_layout not in ["G", "RGB", "BGR"]:
+            raise ValueError(
+                "color_layout should be one of ['G', 'RGB', 'BGR'], got '{}' instead".format(
+                    color_layout
+                )
+            )
+        self.color_layout = color_layout
+
+        if bias is None:
+            self.bias = 0.0 if color_layout == "G" else [0.0, 0.0, 0.0]
+        else:
+            self.bias = bias
+        self.channel_first = channel_first
+
+
+class TensorType(InputType):
+    def __init__(self, name=None, shape=None, dtype=None):
+        super(TensorType, self).__init__(name, shape)
+        if dtype is None:
+            self.dtype = types.fp32
+        elif is_builtin(dtype):
+            self.dtype = dtype
+        else:
+            # Assume dtype is numpy type
+            try:
+                self.dtype = numpy_type_to_builtin_type(dtype)
+            except TypeError:
+                raise TypeError("dtype={} is unsupported".format(dtype))
+
+
+class RangeDim(object):
+    def __init__(self, lower_bound=1, upper_bound=-1, default=None):
+        """
+        A class that can be used to give a range of accepted shapes.
+
+        Attribute:
+
+        lower_bound: (int)
+            The minimum valid value for the shape.
+        upper_bound: (int)
+            The maximum valid value for the shape.
+            Set to -1 if there's no upper limit.
+        default: (int) or None
+            The default value that is used for initiating the model, and set in
+            the metadata of the model file.
+            If set to None, `lower_bound` would be used as default.
+        """
+        self.lower_bound = lower_bound
+        self.upper_bound = upper_bound
+        if default is None:
+            self.default = lower_bound
+        else:
+            if default < lower_bound:
+                raise ValueError(
+                    "Default value {} is less than minimum value ({}) for range".format(
+                        default, lower_bound
+                    )
+                )
+            if upper_bound > 0 and default > upper_bound:
+                raise ValueError(
+                    "Default value {} is greater than maximum value ({}) for range".format(
+                        default, upper_bound
+                    )
+                )
+            self.default = default
+
+
+class Shape(object):
+    def __init__(self, shape, default=None):
+        """
+        The basic shape class to be set in InputType.
+
+        Attribute:
+
+        shape: list of (int), symbolic values, RangeDim object
+            The valid shape of the input
+        default: tuple of int or None
+            The default shape that is used for initiating the model, and set in
+            the metadata of the model file.
+            If None, then `shape` would be used.
+        """
+        from coremltools.converters.mil.mil import get_new_symbol
+
+        if not isinstance(shape, (list, tuple)):
+            raise ValueError(
+                "Shape should be list or tuple, got type {} instead".format(type(shape))
+            )
+        self.symbolic_shape = []
+        shape = list(shape)
+        for idx, s in enumerate(shape):
+            if s is None or s == -1 or isinstance(s, RangeDim):
+                sym = get_new_symbol()
+                self.symbolic_shape.append(sym)
+                if s is None or s == -1:
+                    shape[idx] = sym
+            elif isinstance(s, (np.generic, six.integer_types)) or is_symbolic(s):
+                self.symbolic_shape.append(s)
+            else:
+                raise ValueError(
+                    "Unknown type {} to build symbolic shape.".format(type(s))
+                )
+
+        self.shape = tuple(shape)
+        if default is not None:
+            if not isinstance(default, (list, tuple)):
+                raise ValueError(
+                    "Default shape should be list or tuple, got type {} instead".format(
+                        type(default)
+                    )
+                )
+            for idx, s in enumerate(default):
+                if not isinstance(
+                    s, (np.generic, six.integer_types)
+                ) and not is_symbolic(s):
+                    raise ValueError(
+                        "Default shape invalid, got error at index {} which is {}".format(
+                            idx, s
+                        )
+                    )
+        else:
+            default = []
+            for idx, s in enumerate(self.shape):
+                if isinstance(s, RangeDim):
+                    default.append(s.default)
+                elif s is None or s == -1:
+                    default.append(self.symbolic_shape[idx])
+                else:
+                    default.append(s)
+        self.default = tuple(default)
+
+
+class EnumeratedShapes(object):
+    def __init__(self, shapes, default=None):
+        """
+        A shape class that is used for setting multiple valid shape in InputType.
+
+        shapes: list of Shape objects, or Shape-compatible lists.
+            The valid shapes of the inputs.
+            If input provided is not Shape object, but can be converted to Shape,
+            the Shape object would be stored in `shapes` instead.
+        default: tuple of int or None
+            The default shape that is used for initiating the model, and set in
+            the metadata of the model file.
+            If None, then the first element in `shapes` would be used.
+        """
+        from coremltools.converters.mil.mil import get_new_symbol
+
+        if not isinstance(shapes, (list, tuple)):
+            raise ValueError(
+                "EnumeratedShapes should be list or tuple of shape, got type {} instead".format(
+                    type(shapes)
+                )
+            )
+        if len(shapes) < 2:
+            raise ValueError(
+                "EnumeratedShapes should be take a list or tuple with len >= 2, got {} instead".format(
+                    len(shapes)
+                )
+            )
+
+        self.shapes = []
+        for idx, s in enumerate(shapes):
+            if isinstance(s, Shape):
+                self.shapes.append(s)
+            else:
+                self.shapes.append(Shape(s))
+
+        self.symbolic_shape = self.shapes[0].symbolic_shape
+        for shape in self.shapes:
+            for idx, s in enumerate(shape.symbolic_shape):
+                if is_symbolic(self.symbolic_shape[idx]):
+                    continue
+                elif is_symbolic(s):
+                    self.symbolic_shape[idx] = s
+                elif s != self.symbolic_shape[idx]:
+                    self.symbolic_shape[idx] = get_new_symbol()
+
+        if default is not None:
+            if not isinstance(default, (list, tuple)):
+                raise ValueError(
+                    "Default shape should be list or tuple, got type {} instead".format(
+                        type(default)
+                    )
+                )
+            for idx, s in enumerate(default):
+                if not isinstance(
+                    s, (np.generic, six.integer_types)
+                ) and not is_symbolic(s):
+                    raise ValueError(
+                        "Default shape invalid, got error at index {} which is {}".format(
+                            idx, s
+                        )
+                    )
+        else:
+            default = self.shapes[0].default
+        self.default = default
+
+
+def _get_shaping_class(shape):
+    """
+        Returns a Shape class or EnumeratedShapes class for `shape`
+        where `shape` could be lists/tuple/Shape/EnumeratedShapes/etc.
+    """
+    if isinstance(shape, (Shape, EnumeratedShapes)):
+        return shape
+
+    try:
+        enum_shape = EnumeratedShapes(shape)
+        return enum_shape
+    except ValueError:
+        pass
+    try:
+        shape = Shape(shape)
+        return shape
+    except ValueError:
+        pass
+    raise ValueError("Can't convert to CoreML shaping class from {}.".format(shape))
diff --git a/coremltools/converters/mil/mil/__init__.py b/coremltools/converters/mil/mil/__init__.py
new file mode 100644
index 000000000..02d07a27a
--- /dev/null
+++ b/coremltools/converters/mil/mil/__init__.py
@@ -0,0 +1,15 @@
+SPACES = "  "
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from .block import curr_block, Block, Function
+from .input_type import *
+from .operation import *
+from .program import *
+from .var import *
+
+from .builder import Builder
+from .ops.defs._op_reqs import register_op
diff --git a/coremltools/converters/mil/mil/block.py b/coremltools/converters/mil/mil/block.py
new file mode 100644
index 000000000..eb211aa72
--- /dev/null
+++ b/coremltools/converters/mil/mil/block.py
@@ -0,0 +1,1328 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from collections import Counter, OrderedDict
+import copy
+import logging
+import numpy as _np
+from . import SPACES, types
+from .var import Var, InternalVar, ListVar
+from .visitors.dot_visitor import DotVisitor
+from .types.symbolic import (
+    k_used_symbols,
+    k_num_internal_syms,
+    any_symbolic,
+    is_symbolic,
+)
+from .input_type import TupleInputType
+
+# BLOCK_STACK[-1] is the current block
+BLOCK_STACK = []
+
+
+class InvalidBlockStateError(Exception):
+    pass
+
+
+def curr_block():
+    if len(BLOCK_STACK) == 0:
+        raise ValueError("Must call Builder inside an Function" + " or Block")
+    return BLOCK_STACK[-1]
+
+
+def is_internal_input(arg_name):
+    return arg_name[0] == "_"
+
+
+def _is_compatible_symbolic_array(a, b):
+    """
+    A helper function which check if two numpy array with symbolic value.
+    For instance, a = np.array([is0, 1])
+                  b = np.array([is1, 1])
+    are considered compatible.
+                  a = np.array([is0, 1])
+                  b = np.array([is1, -1])
+    are not.
+    """
+    assert any_symbolic(a) and any_symbolic(b)
+    if not a.shape == b.shape:
+        return False
+    a = a.flatten()
+    b = b.flatten()
+    for t, v in zip(a, b):
+        if not is_symbolic(t) and not is_symbolic(v):
+            if t != v:
+                return False
+        elif not is_symbolic(t) or not is_symbolic(v):
+            return False
+        if t != v:
+            logging.warning("Try to replace var with different symbolic values.")
+    return True
+
+
+def _check_is_compatible_type(type1, type2):
+    if not types.is_subtype(type1, type2):
+        is_comp, _ = types.is_tensor_and_is_compatible(type1, type2)
+        return is_comp
+    return True
+
+
+VALUE = 1
+SYMBOL = 2
+NONE = 4
+ALL = 7
+
+
+def precondition(allow=ALL):
+    """
+    A helper decorator for value_inference method.
+    Decorate value_inference with parameter VALUE/SYMBOL/NONE or ALL.
+    For VALUE/SYMBOL/NONE use logical or ( | ) for multiple allowance.
+    Note that:
+        1. ALL == VALUE | SYMBOL | NONE
+        2. Chosen flag (some or all VALUE/SYMBOL/NONE) must be satisfied
+           by EVERY INPUTS for the precondition to be satisfied.
+
+    The meaning for each flag is:
+    VALUE: value that can be materialized during compile time
+    SYMBOL: value that cannot be materialized by exist as a symbol value
+    NONE: a None value
+
+    Usage:
+    @precondition(allow=VALUE|SYMBOL)
+    def value_inference(self):
+        '''some value_inference implementation'''
+    """
+    ALLOW_VALUE = allow & VALUE
+    ALLOW_SYMBOL = allow & SYMBOL
+    ALLOW_NONE = allow & NONE
+
+    def process(v, has_value, has_symbol, has_none):
+        """
+        v: Var
+
+        Return updated has_value, has_symbol, has_none
+        """
+        if any_symbolic(v.sym_val):
+            return has_value, True, has_none
+        elif v.val is None:
+            return has_value, has_symbol, True
+        return True, has_symbol, has_none
+
+    def decorator(func):
+        def wrapper(self):
+            HAS_VALUE = False
+            HAS_SYMBOL = False
+            HAS_NONE = False
+            for in_name, in_type in self._input_types.items():
+                if in_type.optional:
+                    # Optional inputs are not required to invoke value_inference()
+                    continue
+
+                if isinstance(in_type, TupleInputType):
+                    for v in self._input_vars[in_name]:
+                        HAS_VALUE, HAS_SYMBOL, HAS_NONE = process(
+                            v, HAS_VALUE, HAS_SYMBOL, HAS_NONE
+                        )
+                else:
+                    HAS_VALUE, HAS_SYMBOL, HAS_NONE = process(
+                        self._input_vars[in_name], HAS_VALUE, HAS_SYMBOL, HAS_NONE
+                    )
+
+            if HAS_VALUE and not ALLOW_VALUE:
+                msg = "Implementation of value_inference() for op {} doesn't support input with VALUE"
+                raise NotImplementedError(msg.format(self.op_type))
+            elif HAS_SYMBOL and not ALLOW_SYMBOL:
+                msg = "Implementation of value_inference() for op {} doesn't support input with SYMBOL"
+                raise NotImplementedError(msg.format(self.op_type))
+            elif HAS_NONE and not ALLOW_NONE:
+                msg = "Implementation of value_inference() for op {} doesn't support input with NONE"
+                raise NotImplementedError(msg.format(self.op_type))
+            else:
+                return func(self)
+
+        return wrapper
+
+    return decorator
+
+
+class Operation(object):
+    """
+    Represents Operation in MIL.Program.
+
+    # Properties
+    name (str):
+        The name of the operation
+
+    input_types (InputSpec, class attr):
+        Read-only named input types from all subclasses. Input types are used
+        to validate `inputs`.
+
+    inputs [_input_vars] (dict of str --> Var):
+        An Operation (subclass of Operation) only has access to input Var,
+        which is already validated against `input_spec`.
+
+    outputs [_output_vars] (list of Var):
+        List of output var based on type inference. Read-only
+    """
+
+    def __init__(self, **kwargs):
+        self._input_types = self.input_spec.input_types
+        self.name = kwargs.get("name", None)
+
+        self._output_vars = None
+        self._input_vars = {}
+        self.blocks = []
+        self.enclosing_block = curr_block()
+        self._validate_and_set_inputs(**kwargs)
+
+    def set_inputs(self, **kwargs):
+        self._validate_and_set_inputs(**kwargs)
+        if not kwargs.get("no_check_var_types", False):
+            self.type_value_inference()
+
+    def get_flattened_inputs(self):
+        """
+        Returns:
+        list[Var]. Flatten all tuple inputs
+        """
+        flat_inputs = []
+        for v in self.inputs.values():
+            if isinstance(v, (list, tuple)):
+                flat_inputs.extend(v)
+            else:
+                flat_inputs.append(v)
+        return flat_inputs
+
+    def type_value_inference(self, overwrite_output=False):
+        """
+        Perform type inference and auto_val computation based on new input Vars
+        in kwargs. If self._output_vars is None then we generate _output_vars;
+        otherwise no new Var is created, but type inference result is verified
+        against existing _output_vars, if overwrite_output is False.
+
+        If overwrite_output is True, then the type inference result overwrites the
+        existing _output_vars
+        """
+        output_types = self.type_inference()
+        if not isinstance(output_types, tuple):
+            output_types = (output_types,)
+        output_vals = self._auto_val(output_types)
+        try:
+            output_names = self.output_names()
+            if not isinstance(output_names, tuple):
+                output_names = (output_names,)
+        except NotImplementedError as e:
+            if len(output_types) > 1:
+                output_names = tuple(str(i) for i, _ in enumerate(output_types))
+            else:
+                output_names = ("",)  # output name same as op name.
+
+        # Combine (output_names, output_types, output_vals) to create output
+        # Vars.
+        if self._output_vars is None:
+            self._output_vars = []
+            for i, (n, sym_type, sym_val) in enumerate(
+                zip(output_names, output_types, output_vals)
+            ):
+                name = self.name + ":" + n if n != "" else self.name
+                if types.is_list(sym_type):
+                    new_var = ListVar(
+                        name,
+                        elem_type=sym_type.T[0],
+                        init_length=sym_type.T[1],
+                        dynamic_length=sym_type.T[2],
+                        op=self,
+                        op_output_idx=i,
+                    )
+                else:
+                    new_var = Var(name, sym_type, sym_val, op=self, op_output_idx=i)
+                self._output_vars.append(new_var)
+        else:
+            # Check new inference result against existing self._output_vars.
+            for i, (n, sym_type, sym_val) in enumerate(
+                zip(output_names, output_types, output_vals)
+            ):
+                out_var = self._output_vars[i]
+                # Check type inference
+                if overwrite_output:
+                    out_var._sym_type = sym_type
+                elif not _check_is_compatible_type(sym_type, out_var.sym_type):
+                    msg = "Output Var {} in op {} type changes with new input Vars"
+                    raise ValueError(msg.format(out_var.name, self.name))
+
+                # Check value inference
+                if sym_val is not None and out_var.sym_val is None:
+                    if overwrite_output:
+                        out_var._sym_val = sym_val
+
+                if sym_val is not None and out_var.sym_val is not None:
+                    if _np.any(sym_val.val != out_var.sym_val):
+                        if overwrite_output:
+                            out_var._sym_val = sym_val
+                        else:
+                            msg = "value_inference differs for var {} in op {}"
+                            if not any_symbolic(sym_val.val):
+                                raise ValueError(msg.format(out_var.name, self.name))
+                            elif not _is_compatible_symbolic_array(
+                                sym_val.val, out_var.sym_val
+                            ):
+                                raise ValueError(msg.format(out_var.name, self.name))
+
+    def _auto_val(self, output_types):
+        """
+        # Evaluation is two stage:
+        #
+        # Stage 1: Check whether the method value_inference() is implemented
+        #
+        # Stage 2: Check if there's an value_inference() implementation
+        #          for given input types.
+        #
+        # Suppose input are all SYMBOL:
+        # Case 1: No value_inference() implemented => fail at stage 1
+        # Case 2: If value_inference() implemented, but requires all VALUE not
+        #         SYMBOL => fail at stage 2
+        # Case 3: If value_inference() implemented, and has no restriction on
+        #         input types => Success
+        #
+        # If either stage fails, outputs[i].val is None.
+        # Otherwise, output[i].sym_val is not None.
+
+        output_types: tuple of builtin types
+
+        Returns:
+            output_vals: tuple of builtin type with value, or tuple of None
+        """
+        do_auto_val = True
+
+        if do_auto_val:
+            # Is self.value_inference implemented for corresponding input?
+            try:
+                vals = self.value_inference()
+            except NotImplementedError as e:
+                do_auto_val = False
+
+        if not do_auto_val:
+            # No auto_val possible.
+            return tuple(None for _ in output_types)
+
+        if not isinstance(vals, (tuple, list)):
+            vals = (vals,)
+        for val in vals:
+            if val is None:
+                do_auto_val = False
+        if not do_auto_val:
+            # No auto_val possible.
+            return tuple(None for _ in output_types)
+
+        auto_val = []
+        for t, v in zip(output_types, vals):
+            builtin_val = t()
+            builtin_val.val = v
+            auto_val.append(builtin_val)
+        return auto_val
+
+    def value_inference(self):
+        """
+        Optional Python implementation of the op based on (materialized) values
+        in `self.input_var`. Return a builtin value (single output) or a tuple of
+        builtin values (multi-outputs) of the same length as returned by `
+        type_inference`
+        """
+        msg = "value_inference() is not implemented by op {}"
+        raise NotImplementedError(msg.format(self.op_type))
+
+    def output_names(self):
+        """
+        Optional. If implemented, we set the output var i name as
+        self.name + "/" + output_names[i]
+
+        Returns a string (single output) or tuple of strings
+        """
+        msg = "output_names() is not implemented by op {}"
+        raise NotImplementedError(msg.format(self.op_type))
+
+    def type_inference(self):
+        """
+        Return (builtin_type, builtin_val) pair from type inference.
+        builtin_val may be None if symbolic_value is not attainable at compile
+        time.
+        """
+        raise NotImplementedError("This function must be implemented by each op")
+
+    def build_nested_blocks(self):
+        """
+        Build nested blocks (for cond and while_loop and other composite
+        blocks)
+        """
+        pass
+
+    def _validate_and_set_inputs(self, **kwargs):
+        non_attributes = [
+            "name",
+            "symbolic_datatype",
+            "datatype",
+            "symbolic_value",
+            "value",
+            "version",
+            "before_op",
+            "no_check_var_visibility",  # no_check_var_visibility==True to deviate from SSA
+            "no_check_var_types",  # no_check_var_types==True to force set inputs, even if type does not match with earlier ones
+        ]
+        op_inputs = list(self._input_types.keys())
+        legal_args = op_inputs + non_attributes
+        no_check_var_visibility = kwargs.get("no_check_var_visibility", False)
+        no_check_var_types = kwargs.get("no_check_var_types", False)
+
+        for key in kwargs.keys():
+            if key not in legal_args:
+                raise RuntimeError(
+                    "Unknown input '{}' for op '{}'".format(key, self.op_type)
+                )
+
+        def check_and_detach(v_new, v_old, op, no_check_var_types):
+            # Check new var's sym_type is compatible with the
+            # existing's sym_type.
+            if (
+                not _check_is_compatible_type(v_new.sym_type, v_old.sym_type)
+                and not no_check_var_types
+            ):
+                msg = "New var type {} not a subtype of " + "existing var type {}"
+                raise ValueError(msg.format(v_new.sym_type, v_old.sym_type))
+            v_old.remove_child_op(op, no_check_var_types)
+
+        parsed_inputs = self.input_spec.parse_inputs(kwargs)
+        for (name, var) in parsed_inputs:
+            setattr(self, name, var)
+            if var is not None and not isinstance(var, InternalVar):
+                # Remove this operation itself from existing input Var's child_ops
+                existing_input_var = self._input_vars.get(name, None)
+                if existing_input_var is not None:
+                    if isinstance(existing_input_var, (list, tuple)):
+                        for v_old, v_new in zip(existing_input_var, var):
+                            check_and_detach(v_new, v_old, self, no_check_var_types)
+                    else:
+                        check_and_detach(
+                            var, existing_input_var, self, no_check_var_types
+                        )
+
+                # Set var as input_var
+                if isinstance(var, Var):
+                    var.add_child_op(self)
+                elif isinstance(var, (tuple, list)):
+                    for v in var:
+                        v.add_child_op(self)
+                # ignore function inputs
+                self._input_vars[name] = var
+
+    @property
+    def inputs(self):
+        return self._input_vars
+
+    @property
+    def outputs(self):
+        return self._output_vars
+
+    @property
+    def op_type(self):
+        return type(self).__name__
+
+    def remove_from_block(self):
+        """
+        Remove / detach itself from the enclosing block. See SsaBlock.remove_ops
+        for details.
+        """
+        self.enclosing_block.remove_ops([self])
+
+    @staticmethod
+    def var_to_str(v):
+        if isinstance(v, (tuple, list)):
+            return "(" + ", ".join(["%" + s.name for s in v]) + ")"
+        else:
+            return "%" + v.name
+
+    def indented_str(self, indent=""):
+        s = indent
+        if self.outputs is not None:
+            s += ", ".join([str(o) for o in self.outputs])
+        s += " = " + self.op_type + "("
+        if self.op_type == "const":
+            if self.mode.val == "immediate_value":
+                if isinstance(self.val.sym_val, (np.generic, np.ndarray)):
+                    val_str = str(self.val.sym_val.tolist())
+                else:
+                    val_str = (
+                        '"' + self.val.sym_val + '"'
+                        if isinstance(self.val.sym_val, six.string_types)
+                        else str(self.val.sym_val)
+                    )
+                s += "val=" + val_str
+            else:
+                s += "val=(file_value)"
+        else:
+            s += ", ".join(
+                [
+                    k + "=" + Operation.var_to_str(self.inputs[k])
+                    for k in self._input_types.keys()
+                    if k in self.inputs and not is_internal_input(k)
+                ]
+            )
+        s += ', name="{}")\n'.format(self.name)
+        for b in self.blocks:
+            s += b.indented_str(indent=indent + SPACES)
+        return s
+
+    def __repr__(self):
+        return str(self)
+
+    def __str__(self):
+        return self.indented_str(SPACES)
+
+
+class InvalidBlockStateError(Exception):
+    pass
+
+
+class Block(object):
+    __slots__ = [
+        "name",
+        "_block_inputs",
+        "_outputs",
+        "operations",
+        "_internal_vars",
+        "outer_op",
+    ]
+
+    counter = 0
+
+    @classmethod
+    def _get_new_name(cls):
+        curr_val = cls.counter
+        cls.counter += 1
+        return "block" + str(curr_val)
+
+    def __init__(self, block_inputs=None, outer_op=None, name=None):
+        """
+        Inputs:
+
+        block_inputs: python tuple[Var].
+            block_inputs is None except when the block represents loop.
+
+        outer_op: Operation
+            The enclosing op. None iff this Block is an Function.
+
+        function_inputs: tuple[Var]
+            function_inputs are always visible for this block and all blocks
+            nested within. If function_inputs is None, get it from
+            `outer_op.block`
+        """
+        self.name = name
+        if self.name is None:
+            self.name = Block._get_new_name()
+
+        # list[Operation]. Topologically sorted.
+        self.operations = []
+
+        # Must be set before self.validate()
+        self.outer_op = outer_op
+
+        self.set_inputs(block_inputs)
+
+        # list[Var]. This is converted to str when generating MIL proto.
+        self._outputs = []
+
+        # If we create const, whose inputs (mode, val) cannot be const
+        # (infinite recursion). They must be considered as always visible.
+        self._internal_vars = set()
+
+        if self.outer_op is None and not isinstance(self, Function):
+            msg = "Block {} is not Function and thus outer_op cannot be None"
+            raise ValueError(msg.format(self.name))
+
+        self.validate()
+
+    def validate(self):
+        """
+        Basic validation to protect against some invalid state.
+        """
+        for op in self.operations:
+            for b in op.blocks:
+                b.validate()
+            if op.outputs is None:
+                raise InvalidBlockStateError()
+
+            # Check the input output relationships
+            # from outputs -> inputs
+            for ov in op.outputs:
+                child_op_count = Counter(ov.child_ops)
+                for next_op, c in child_op_count.items():
+                    c_actual = next_op.get_flattened_inputs().count(ov)
+                    if c_actual != c:
+                        msg = (
+                            "Var {} should be consumed by op {} {}"
+                            + " times, but op {} uses it {} times.\n{}"
+                        )
+                        raise InvalidBlockStateError(
+                            msg.format(
+                                ov.name,
+                                next_op.name,
+                                c,
+                                next_op.name,
+                                c_actual,
+                                next_op,
+                            )
+                        )
+
+            # from inputs -> outputs
+            input_var_count = Counter(op.get_flattened_inputs())
+            for iv, c in input_var_count.items():
+                c_actual = iv.child_ops.count(op)
+                if c_actual != c:
+                    msg = (
+                        "Var {} should be consumed by op {} {}"
+                        + " times, but op {} uses it {} times.\n{}"
+                    )
+                    raise InvalidBlockStateError(
+                        msg.format(iv.name, op.name, c_actual, op.name, c, op)
+                    )
+
+        # 1 to 1 mapping between Block outputs and Var.consuming_blocks
+        for op in self.operations:
+            for ov in op.outputs:
+                for b in ov.consuming_blocks:
+                    if ov not in b.outputs:
+                        msg = "Var {} should be output of block {}: {}"
+                        raise ValueError(msg.format(ov.name, b.name, b))
+
+        for v in self.outputs:
+            if self not in v.consuming_blocks:
+                msg = "Var {} should be output of block {}: {}"
+                raise ValueError(msg.format(ov.name, b.name, b))
+
+    def set_inputs(self, block_inputs):
+        """
+        block_inputs must be a var in enclosing block. We'd duplicate and
+        create a new var with corresponding types but no association with any
+        existing Var or Operation. Example:
+
+        #    main(%a: (1, 2, fp32),
+        #         %b: (1, 2, fp32),
+        #         %c: (1, 2, fp32)) {
+        #      block0() {
+        #        %const1: (1, fp32) = const(...)
+        #        %loop:0: (1, 2, fp32), %loop:1: (1, 2, fp32) = \
+        #        while_loop(loop_vars=(%a, %b))
+        #          loop_cond(%a.x, %b.x) {
+        #            %blah: (bool) = some_op(x=%a.x, y=%b.x)
+        #            %cond_var: (bool) = some_op2(x=%a.x, y=%blah)
+        #          } -> (%cond_var)
+        #          loop_body(%a.x, %b.x) {
+        #            %add_0: (1, 2, fp32) = add(x=%a.x, y=%b.x)
+        #          } -> (%add_0, %b.x)
+        #        %linear: (1, fp32) = linear(...)
+        #      } -> (%loop:0, %loop:1)
+        #    }
+
+        `some_op` in `loop_cond` block can access %a, %b, %a.x, %b.x
+        `some_op`, however, cannot take %linear as input.
+
+        Since a duplicate of Var (%a.x for %a) is always created, there will
+        never be any shadowing.
+        """
+        # block_inputs: list[Var]
+        if block_inputs is not None:
+            self._block_inputs = tuple(copy.copy(v) for v in block_inputs)
+            # Keep track the vars we shadow
+            for v in self._block_inputs:
+                v._op = None
+                v.op_output_idx = None
+                v._child_ops = list()
+                v.name = v.name + ".x"
+                v._sym_val = None
+                v.consuming_blocks = list()
+        else:
+            self._block_inputs = tuple()
+
+    def remove_inputs(self, curr_input_vars):
+        """
+        curr_input_vars: list[Var], whose elements must be in
+        self._block_inputs.
+        """
+        self.validate()
+        remove_idx = [self._block_inputs.index(v) for v in curr_input_vars]
+        self._block_inputs = [
+            v for i, v in enumerate(self._block_inputs) if i not in remove_idx
+        ]
+
+    def find_ops(self, prefix=None, op_type=None):
+        """
+        Return list of ops with name matching `prefix` if specified and
+        op_type, if specified. At least one of {prefix, op_type} must be specified.
+
+        prefix: str
+
+        Return list[Operation]. Empty list if no op satisfies.
+        """
+        if prefix is None and op_type is None:
+            raise ValueError("Must specify one of {prefix, op_type}")
+        found_ops = []
+        for op in self.operations:
+            prefix_match = prefix is None or op.name[: len(prefix)] == prefix
+            op_type_match = op_type is None or op.op_type == op_type
+            if prefix_match and op_type_match:
+                found_ops.append(op)
+            for b in op.blocks:
+                found_ops.extend(b.find_ops(prefix=prefix, op_type=op_type))
+        return found_ops
+
+    def add_internal_var(self, internal_var):
+        if not isinstance(internal_var, InternalVar):
+            raise ValueError("Only InternalVar can be manually added to Block.")
+        self._internal_vars.add(internal_var)
+
+    @property
+    def inputs(self):
+        return self._block_inputs
+
+    @property
+    def outputs(self):
+        return self._outputs
+
+    def set_outputs(self, outputs):
+        """
+        outputs: list[Var]
+        """
+        if not isinstance(outputs, list):
+            raise ValueError("Outputs must be list of Vars")
+
+        self.validate()
+        visible_vars = self._visible_vars_from_enclosing_block()
+        _, visible_vars_in_block = self._visible_vars_in_block()
+        visible_vars.update(visible_vars_in_block)
+        for ov in outputs:
+            if ov not in visible_vars:
+                msg = (
+                    "Var {} is not visible in block {} and thus cannot "
+                    + "be a block output.\n{}"
+                )
+                raise ValueError(msg.format(ov.name, self.name, self))
+
+        for ov in self._outputs:
+            ov.consuming_blocks.remove(self)
+
+        # Need to copy, or block's output would be completely tied to a var's
+        # output and we cannot replace a block output with another var's
+        # output.
+        self._outputs = copy.copy(outputs)
+        for ov in outputs:
+            ov.consuming_blocks.append(self)
+
+    def __enter__(self):
+        global BLOCK_STACK
+        BLOCK_STACK.append(self)
+        return self
+
+    def __exit__(self, type, value, traceback):
+        global BLOCK_STACK
+        BLOCK_STACK = BLOCK_STACK[:-1]
+
+    def _visible_vars_in_block(self, target_op=None, inclusive=True):
+        """
+        Returns:
+
+        index (int) of target_op in self.operations if target_op not None,
+        undefined otherwise.
+
+        Raises:
+
+        ValueError if target_op not None and not found in self.operations.
+
+        visible_vars: set[Var]
+            Vars returned by ops in the block (self) visible (and equal to
+            if inclusive==True) target_op.  If target_op is not found or None,
+            include all vars output by self.operations. Examples:
+
+        #    main(%a: (1, 2, fp32),
+        #         %b: (1, 2, fp32),
+        #         %c: (1, 2, fp32)) {
+        #      block0() {
+        #        %const1: (1, fp32) = const(...)
+        #        %loop:0: (1, 2, fp32), %loop:1: (1, 2, fp32) = \
+        #        while_loop(loop_vars=(%a, %b))
+        #          loop_cond(%a.x, %b.x) {
+        #            %blah: (bool) = some_op(x=%a.x, y=%b.x)
+        #            %cond_var: (bool) = some_op2(x=%a.x, y=%blah)
+        #          } -> (%cond_var)
+        #          loop_body(%a.x, %b.x) {
+        #            %add_0: (1, 2, fp32) = add(x=%a.x, y=%b.x)
+        #          } -> (%add_0, %b.x)
+        #        %linear: (1, fp32) = linear(...)
+        #      } -> (%loop:0, %loop:1)
+        #    }
+        #
+
+        Let V0 and V1 be the set of internal_vars of block0 and loop_cond
+        block that supplies const vals (for const).
+
+        Ex1: self = block0, target_op = linear.
+        idx = 2
+        visible_vars = {%const1, %loop:0, %loop:1, %linear, V0}
+
+        Ex2: self = loop_cond, target_op = None.
+        idx = undefined
+        visible_vars = {%a.x, %b.x, %blah, %cond_var, V1}
+
+        Ex3: self = loop_cond, target_op = some_op.
+        idx = 0
+        visible_vars = {%a.x, %b.x, %blah, V1}
+
+        Ex4: self = loop_cond, target_op = linear.
+        raises ValueError (linear not found in loop_cond block)
+        """
+        visible_vars = set(self._internal_vars)
+        visible_vars.update(self.inputs)
+        idx = -1
+        # find the location of target_op
+        for i, op in enumerate(self.operations):
+            if op == target_op:
+                if inclusive:
+                    visible_vars.update(op.outputs)
+                return i, visible_vars
+            visible_vars.update(op.outputs)
+        if target_op is not None:
+            msg = "Op {} not found in {}: {}"
+            raise ValueError(msg.format(target_op.name, self.name, self))
+        return idx, visible_vars
+
+    def _visible_vars_from_enclosing_block(self):
+        """
+        Returns:
+
+        visible_vars: Vars from lexical scopes visible at the beginning of the
+        block, up to but not including outputs from before_op. Given program:
+
+        #    main(%a: (1, 2, fp32),
+        #         %b: (1, 2, fp32),
+        #         %c: (1, 2, fp32)) {
+        #      block0() {
+        #        %const1: (1, fp32) = const(...)
+        #        %loop:0: (1, 2, fp32), %loop:1: (1, 2, fp32) = \
+        #        while_loop(loop_vars=(%a, %b))
+        #          loop_cond(%a.x, %b.x) {
+        #            %blah: (bool) = some_op(x=%a.x, y=%b.x)
+        #            %cond_var: (bool) = some_op2(x=%a.x, y=%blah)
+        #          } -> (%cond_var)
+        #          loop_body(%a.x, %b.x) {
+        #            %add_0: (1, 2, fp32) = add(x=%a.x, y=%b.x)
+        #          } -> (%add_0, %b.x)
+        #        %const2: (1, fp32) = const(...)
+        #      } -> (%loop:0, %loop:1)
+        #    }
+
+        Let V0 be the set of internal_vars of block0 block that supplies const
+        vals (for const).
+
+        Ex1: self = block0
+            visible_vars = {%a, %b, %c} (function input)
+
+        Ex2: self = loop_cond.
+            visible_vars = {%a, %b, %c, %const1, V0} (Note that %const2 is not
+            part of the set)
+        """
+        visible_vars = set()
+
+        # function inputs are considered external to the block.
+        if isinstance(self, Function):
+            # block in function only has function_inputs as from enclosing
+            # block (Ex1 above).
+            visible_vars.update(self.function_inputs)
+            return visible_vars
+
+        if self.outer_op is not None:
+            enclosing_block = self.outer_op.enclosing_block
+            vars_at_start = enclosing_block._visible_vars_from_enclosing_block()
+            visible_vars.update(vars_at_start)
+            _, visible_vars_in_block = enclosing_block._visible_vars_in_block(
+                self.outer_op, inclusive=False
+            )
+            visible_vars.update(visible_vars_in_block)
+
+        return visible_vars
+
+    def _insert_op_before(self, new_op, before_op=None):
+        """
+        A private API used by builder. Please use `builder.YOUR_OP(...,before_op)`.
+
+        new_op's outputs are not used (not input to any other op) after
+        this call. All inputs to new_op must be visible at or before
+        the before_op (i.e., new_op must be added in topologically sorted
+        order). Note that this is more restrictive than MIL, whose Block
+        supports lexical scoping and thus an op can reference Var in enclosing
+        scopes. new_op.name must be unique in the block.
+
+        before_op=None to append new_op at the end of self.operations.
+
+        Given:   %2 = op0(%1, %1)
+                 %4 = op2(%1)
+                 %6 = op3(%4, %4)
+
+        Execute: insert_op_before(op1, before_op=op2),
+                 where %3 = op1(%1, %2)
+
+        Result:  %2 = op0(%1, %1)
+                 %3 = op1(%1, %2)
+                 %4 = op2(%1)
+                 %6 = op3(%4, %4)
+
+        Comment: We assume op1 has been constructed outside the block with
+        %1, %2 as inputs. Typically it's builder's job to create an op and
+        insert into the current block.
+
+        Comment: insert_op_before(op1, before_op=op0) would error as %2 (an input to op1)
+        is not visible before op0.
+        """
+        self.validate()
+        visible_vars = self._visible_vars_from_enclosing_block()
+        if before_op is not None:
+            idx, visible_vars_in_block = self._visible_vars_in_block(
+                before_op, inclusive=True
+            )
+            visible_vars.update(visible_vars_in_block)
+        else:
+            _, visible_vars_in_block = self._visible_vars_in_block()
+            visible_vars.update(visible_vars_in_block)
+
+        # check inputs are visible
+        for k, v in new_op.inputs.items():
+            if not isinstance(v, (Var, tuple)):
+                continue
+            if isinstance(v, Var):
+                vs = [v]
+            else:
+                vs = v
+            for s in vs:
+                if s not in visible_vars:
+                    before_op_name = before_op.name if before_op is not None else "None"
+                    msg = "Op '{}' input {}={} is not in scope of {} before {}"
+                    raise ValueError(
+                        msg.format(new_op.name, k, s.name, self.name, before_op_name)
+                    )
+
+        # add new_op
+        if before_op is None:
+            self.operations.append(new_op)
+        else:
+            self.operations.insert(idx, new_op)
+
+    def _replace_var(
+        self,
+        old_var,
+        new_var,
+        start=0,
+        end_id=-1,
+        no_check_var_visibility=False,
+        no_check_var_types=False,
+    ):
+        """Helper function for replace_uses_of_var_after_op"""
+        num_ops_affected = 0
+
+        if end_id == -1:
+            op_list = self.operations[start:]
+        else:
+            op_list = self.operations[start : end_id + 1]
+
+        for op in op_list:
+            new_inputs = {
+                "no_check_var_visibility": no_check_var_visibility,
+                "no_check_var_types": no_check_var_types,
+            }
+            affected = False
+            for k, v in op.inputs.items():
+                if isinstance(v, (list, tuple)) and old_var in v:
+                    new_inputs[k] = tuple(new_var if vv == old_var else vv for vv in v)
+                    affected = True
+                elif v == old_var:
+                    new_inputs[k] = new_var
+                    affected = True
+                else:
+                    new_inputs[k] = v
+            if affected:
+                num_ops_affected += 1
+                op.set_inputs(**new_inputs)
+
+            # Replace recursively.
+            for b in op.blocks:
+                num_ops_affected += b._replace_var(old_var, new_var)
+
+        if end_id != -1 and old_var.op not in op_list:
+            return num_ops_affected
+
+        # If old_var is block's output, replace as well.
+        if old_var in self._outputs:
+            idx = self._outputs.index(old_var)
+            self._outputs[idx] = new_var
+            new_var.consuming_blocks.append(self)
+
+            # This block no longer uses `old_var` as its outputs
+            old_var.consuming_blocks.remove(self)
+
+            # if rename_new_var_if_fn_output:
+            # Ensure output name is consistent
+            if isinstance(self, Function):
+                new_var.name = old_var.name
+        return num_ops_affected
+
+    def replace_uses_of_var_after_op(
+        self,
+        anchor_op,
+        old_var,
+        new_var,
+        no_check_var_visibility=False,
+        end_op=None,
+        no_check_var_types=False,
+    ):
+        """
+        Replace all uses of `old_var` with `new_var` after `anchor_op`,
+        and before `end_op` (inclusive).
+
+        That is all the ops that use `old_var` will now use `new_var`.
+        The op that produces the `old_var` will continue to produce it, its output
+        won't be replaced by `new_var`.
+
+        If `anchor_op` is None, replace all input occurrences of `old_var` in the block.
+        If `end_op` is None, all occurrences of `old_var` are replaced in the block starting from the op just
+        after `anchor_op`
+
+        no_check_var_visibility: True to disable the check ensuring new_var is visible
+        (visibility requirement depends on anchor_op).
+
+        no_check_var_types: An error will be raised if the type of new_var is not same as the old_var, unless
+        `no_check_var_types` is set to True. Normally type inference is re-invoked for all the child ops of `old_var`
+         after updating it to `new_var`. However, this is skipped if `no_check_var_types` is set to True.
+
+        old_var, new_var must meet the following conditions:
+
+        - old_var, new_var both existing within the block. This implies that
+          the op generating new_var must be inserted prior to this
+          replacement.
+
+        - Affected ops (i.e., Operation after anchor_op that take old_var as
+          input) must generate the same type inference results as before.
+
+        - new_var must be visible at or before anchor_op in the order of
+          self.operations.
+
+        Given:   %2 = op0(%1, %1)
+                 %3 = op1(%1, %2)
+                 %4 = op2(%1)
+                 %6 = op3(%4, %4)
+
+        Execute: replace_uses_of_var_after_op(op2, %4, %3)
+
+        Result:  %2 = op0(%1, %1)
+                 %3 = op1(%1, %2)
+                 %4 = op2(%1)
+                 %6 = op3(%3, %3)     # type inference check against %6
+
+
+        Comment: Execute: replace_uses_of_var_after_op(op1, %4, %3) would lead to
+        identical results, as op2 does not take %4 as input.
+
+        Comment: replace_uses_of_var_after_op(op0, %4, %3) would cause error as %3 is
+        after op0
+
+        Comment: To avoid clutter, we drop the names of arguments and return
+        Var in the illustration above.
+
+
+        Another example, usage of "end_op":
+
+        Given:   %2 = op0(%1, %1)
+                 %3 = op1()
+                 %4 = op2(%1, %2)
+                 %5 = op3(%2)
+
+        if execute replace_uses_of_var_after_op(anchor_op=op0, old_var=%2, new_var=%3)
+
+        Result:  %2 = op0(%1, %1)
+                 %3 = op1()
+                 %4 = op2(%1, %3)
+                 %5 = op3(%3)
+
+        if execute replace_uses_of_var_after_op(anchor_op=op0, old_var=%2, new_var=%3, end_op=op2)
+
+        Result:  %2 = op0(%1, %1)
+                 %3 = op1()
+                 %4 = op2(%1, %3)           # %2 is replaced with %3 till here
+                 %5 = op3(%2)               # will continue using %2
+
+        """
+        if not no_check_var_visibility:
+            self.validate()
+        # Get visible vars from enclosing block
+        visible_vars = self._visible_vars_from_enclosing_block()
+
+        if anchor_op is not None:
+            # Get visible vars from the current block
+            idx, block_vars = self._visible_vars_in_block(anchor_op, inclusive=True)
+            visible_vars.update(block_vars)
+
+            # start from the next op, excluding `anchor_op`
+            start = idx + 1
+        else:
+            visible_vars.update(self._block_inputs)
+            visible_vars.update(self._internal_vars)
+            # Perform replacement from beginning
+            start = 0
+
+        if not no_check_var_visibility and new_var not in visible_vars:
+            msg = (
+                "new_var '{}' is not visible in block '{}' at or before "
+                + "anchor_op '{}'"
+            )
+            anchor_op_name = "None" if anchor_op is None else anchor_op.name
+            raise ValueError(msg.format(new_var.name, self.name, anchor_op_name))
+
+        if end_op is not None:
+            end_id, _ = self._visible_vars_in_block(end_op, inclusive=True)
+        else:
+            end_id = -1
+
+        if end_id > start:
+            msg = "end_op '{}' comes before the anchor_op '{}'"
+            raise ValueError(msg.format(end_op.name, anchor_op.name))
+
+        num_ops_affected = self._replace_var(
+            old_var,
+            new_var,
+            start=start,
+            end_id=end_id,
+            no_check_var_visibility=no_check_var_visibility,
+            no_check_var_types=no_check_var_types,
+        )
+
+        logging.debug("Num ops affected in replacing var: {}".format(num_ops_affected))
+
+    def remove_ops(self, existing_ops):
+        """
+        Remove `existing_ops` (list[Operation]) that must be pre-existing in
+        the block. Error if any other op in the block uses output Vars of
+        `existing_ops`
+        """
+        self.validate()
+        idxs = [-1] * len(existing_ops)
+        existing_ops_set = set(existing_ops)
+        for i, op in enumerate(self.operations):
+            if op in existing_ops_set:
+                idxs[existing_ops.index(op)] = i
+        if -1 in idxs:
+            not_found = []
+            for i, op in zip(idxs, existing_ops):
+                if i == -1:
+                    not_found.append(op.name)
+            raise ValueError(
+                "Ops {} not found in block {}".format(not_found, self.name)
+            )
+
+        # Remove ops in reverse topological order
+        pairs = list(zip(idxs, existing_ops))
+        pairs.sort(key=lambda x: x[0], reverse=True)
+
+        for idx, op in pairs:
+            for i, v in enumerate(op.outputs):
+                # Check that no ops depend on op's outputs
+                if len(v.child_ops) > 0:
+                    child_op_names = [s.name for s in v.child_ops]
+                    msg = (
+                        "Cannot delete op '{}' with active output at id {}: '{}' "
+                        + "used by ops {}"
+                    )
+                    raise ValueError(msg.format(op.name, i, v.name, child_op_names))
+                # Check that the output Var isn't block's output
+                if v in self._outputs:
+                    msg = (
+                        "cannot delete op {} with output {}: {} "
+                        + "that's block {}'s output"
+                    )
+                    raise ValueError(msg.format(op.name, i, v.name, self.name))
+
+            for b in op.blocks:
+                b.set_outputs([])
+                b.remove_ops(b.operations)
+
+            # remove the op (in reverse topological order)
+            self.operations.pop(idx)
+            op.enclosing_block = None
+
+            for v in op.inputs.values():
+                if isinstance(v, (tuple, list)):
+                    for vv in v:
+                        vv.remove_child_op(op)
+                else:
+                    v.remove_child_op(op)
+
+    def operations_for_vars(self, end_vs):
+        """
+        Inputs:
+
+        end_vs: list[Operation].
+
+        Return:
+
+        list[Operation] which are subset of self.operations that are ancestors
+        of `end_vs`. Also do recursion into nested blocks.
+        """
+        used_vars = set(end_vs)
+        used_ops = []
+        for op in reversed(self.operations):
+            # if none of op's output is used, delete op
+            if not set(op.outputs).intersection(used_vars):
+                continue
+
+            used_ops.append(op)  # append in reverse topological order
+
+            # recursively search for nested blocks
+            ops_to_check = []
+            for b in op.blocks:
+                ops_to_check += b.operations_for_vars(b.outputs)
+            ops_to_check.append(op)
+
+            # mark used vars
+            for op_to_check in ops_to_check:
+                # mark all op's inputs to used
+                for _, input_var in op_to_check.inputs.items():
+                    if isinstance(input_var, (tuple, list)):
+                        used_vars.update(list(input_var))
+                    else:
+                        used_vars.add(input_var)
+
+        return used_ops[::-1]
+
+    def indented_str(self, indent=None):
+        if indent is None:
+            indent = ""
+        s = (
+            indent
+            + self.name
+            + "("
+            + ", ".join(["%" + var.name for var in self._block_inputs])
+        )
+        s += ") {\n"
+        for op in self.operations:
+            s += op.indented_str(indent + SPACES * 1)
+        s += indent + "} -> ("
+        if self._outputs is not None:
+            s += ", ".join(["%" + v.name for v in self._outputs])
+        s += ")\n"
+        return s
+
+    def __repr__(self):
+        return self.__str__()
+
+    def __str__(self):
+        return self.indented_str()
+
+    def get_dot_string(
+        self,
+        function_name="main",
+        prefix_id=0,
+        highlight_debug_op_types=None,
+        highlight_debug_op_names=None,
+    ):
+        """
+        Return the dot string that can be used to show the block
+        with dot. Const ops are not added to the dot string.
+
+        * Input vars : yellow
+        * output vars : goldenrod2
+        * op names that user wants to highlight, provided in "highlight_debug_op_names": cyan
+        * op types that user wants to highlight, provided in "highlight_debug_op_types": green
+
+        Examples
+        --------
+        >>> import graphviz
+        >>> graphviz.Source(block.get_dot_string()).view()
+        >>> # OR
+        >>> graphviz.Source(block.get_dot_string()).view(filename='graph.pdf')
+        """
+        if highlight_debug_op_types is None:
+            highlight_debug_op_types = []
+        if highlight_debug_op_names is None:
+            highlight_debug_op_names = []
+
+        dotstring = "digraph g {\n" + "\tcompound=true;\n"
+
+        input_var_names = list(self.inputs.keys())
+        output_var_names = [v.name for v in self.outputs]
+
+        debug_op_types = []
+        if len(highlight_debug_op_types) > 0:
+            for op in self.operations:
+                if op.op_type in highlight_debug_op_types:
+                    debug_op_types.append(op.name)
+
+        vis = DotVisitor()
+        vis.highlight_nodes(input_var_names, "yellow").highlight_nodes(
+            output_var_names, "goldenrod2"
+        ).highlight_nodes(highlight_debug_op_names, "cyan").highlight_nodes(
+            debug_op_types, "green"
+        )
+
+        vis.visit_all(self, nodename_prefix=str(prefix_id))
+        res = vis.get_result("subgraph", "cluster_" + function_name.replace("/", "_"))
+        dotstring += "\n".join("\t" + r for r in res.split("\n")) + "\n"
+        dotstring += "}"
+        return dotstring
+
+
+class Function(Block):
+    """
+    """
+
+    def __init__(self, inputs):
+        """
+        inputs: str -> placeholder
+        """
+        self.placeholder_inputs = inputs
+        # str -> Var
+        self._input_dict = OrderedDict()
+        for k, v in self.placeholder_inputs.items():
+            v.set_name(k)  # set to user input name
+            self._input_dict[k] = v.outputs[0]
+        self.function_inputs = tuple(self._input_dict.values())
+
+        global k_used_symbols
+        global k_num_internal_syms
+        for inp in self.function_inputs:
+            if types.is_tensor(inp.dtype):
+                shapes = inp.dtype.get_shape()
+                for s in shapes:
+                    if is_symbolic(s):
+                        k_used_symbols.add(s)
+        super(Function, self).__init__()
+
+    # Override Block's input
+    @property
+    def inputs(self):
+        return self._input_dict
+
+    def __repr__(self):
+        return self.__str__()
+
+    def __str__(self):
+        return self.to_str("function")
+
+    def to_str(self, func_name="function"):
+        if len(self._input_dict) == 0:
+            s = func_name + "() {"
+        else:
+            inputs = [(in_name, ph) for in_name, ph in self._input_dict.items()]
+            s = func_name + "(" + str(inputs[0][1])
+            for in_name, ph in inputs[1:]:
+                s += ",\n" + " " * (len(func_name) + 1) + str(ph)
+            s += ") {\n"
+            s += self.indented_str(SPACES)
+            s += "}\n"
+        return s
diff --git a/coremltools/converters/mil/mil/builder.py b/coremltools/converters/mil/mil/builder.py
new file mode 100644
index 000000000..0d7b2fe32
--- /dev/null
+++ b/coremltools/converters/mil/mil/builder.py
@@ -0,0 +1,243 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from collections import defaultdict
+import copy
+import logging
+import six
+import numbers
+import numpy as np
+
+from coremltools.converters.mil.mil.types.symbolic import any_symbolic
+
+from . import curr_block, Program, Function, Placeholder, is_internal_input
+from .input_type import (
+    _InputType,
+    InternalStringInputType,
+    InternalScalarOrTensorInputType,
+    ScalarOrTensorInputType,
+    TupleInputType,
+    InputSpec,
+    InternalInputType,
+    PyFunctionInputType,
+)
+from .var import InternalVar, Var
+
+
+def get_const_mode(val):
+    # Heuristics to decide between file_value and immediate_value
+    if isinstance(val, (np.ndarray, np.generic)) and val.size > 10:
+        return "file_value"
+    return "immediate_value"
+
+
+def is_python_value(val):
+    return (
+        isinstance(val, (np.generic, np.ndarray))
+        or isinstance(val, numbers.Number)
+        or isinstance(val, six.string_types)
+        or isinstance(val, bool)
+        or (isinstance(val, (tuple, list)) and all(is_python_value(v) for v in val))
+    )
+
+
+class Builder:
+    """
+    Singleton builder.
+
+    Example:
+
+    from coremltools.converters.mil.mil import Builder as mb
+    from coremltools.converters.mil.mil import Program, Function
+
+    prog = Program()
+    func_inputs = {"x": mb.placeholder(_shape=[2,3]),
+                   "y": mb.placeholder(_shape=[2,3])}
+    with Function(func_inputs) as ssa_fun:
+      x, y = ssa_fun.inputs['x'], ssa_fun.inputs['x']
+      res_var = mb.add(x=x, y=y) # created within ssa_fun block
+      ssa_fun.set_outputs([res_var])
+    prog.add_function("main", ssa_fun)
+    """
+
+    name_count = defaultdict(int)
+
+    @classmethod
+    def _get_free_name(cls, name):
+        new_name = name + "_" + str(cls.name_count[name])
+        cls.name_count[name] += 1
+        return new_name
+
+    @classmethod
+    def _maybe_set_name(cls, kwargs, op_type):
+        if "name" not in kwargs:
+            kwargs["name"] = cls._get_free_name(op_type)
+        return kwargs
+
+    @classmethod
+    def _add_const(cls, val, name, before_op):
+        if not is_python_value(val):
+            raise ValueError("Cannot add const {}".format(val))
+        if any_symbolic(val):
+            msg = (
+                "Python native vals (list, tuple), np.array that are"
+                + "operation inputs cannot have symbolic values. Consider feeding"
+                + "symbolic shape in through placeholder and use mb.shape() "
+                + "operator. Input {}: {}"
+            )
+            raise ValueError(msg.format(name, val))
+        const_name = cls._get_free_name(name)
+        mode = get_const_mode(val)
+        logging.debug("Adding const op '{}'".format(const_name))
+        output_var = cls.const(mode=mode, val=val, name=const_name, before_op=before_op)
+        return output_var
+
+    @classmethod
+    def _create_input_vars(cls, input_spec, op_name, op_cls, before_op, kwargs):
+        """
+        1. Create Var for optional input types with default values that's not
+        specified.
+
+        2. Convert python primitive types to Var.
+
+        Inputs:
+
+        input_spec (InputSpec)
+        op_name (str): op name.
+        before_op: created all vars / const op will come right before
+                   `before_op` in the block's order. None to append at the end.
+        """
+        update_dict = {}
+        for in_name, in_type in input_spec.input_types.items():
+            new_var_name = op_name + "_" + in_name
+            if not in_type.optional and in_name not in kwargs:
+                raise ValueError(
+                    "Input {} is required for op {}.".format(in_name, op_cls.__name__)
+                )
+
+            if in_name in kwargs and isinstance(kwargs[in_name], Var):
+                # check const
+                if in_type.const and kwargs[in_name].val is None:
+                    msg = "Input '{}' of op '{}' ({}) must be const at compile time."
+                    raise ValueError(msg.format(in_name, op_name, op_cls.__name__))
+
+            elif in_name in kwargs:
+                # Provided value is not Var. Create a Var from kwargs[in_name]
+                val = kwargs[in_name]
+                # create Var for numpy / python primitive
+                if isinstance(in_type, InternalInputType):
+                    # Shove all internal inputs to InternalVar (unknown type).
+                    var = InternalVar(val, name=new_var_name)
+                    curr_block().add_internal_var(var)
+                else:
+                    if isinstance(in_type, TupleInputType):
+                        var = []
+                        for i, v in enumerate(val):
+                            if isinstance(v, Var):
+                                var.append(v)
+                                continue
+                            var.append(
+                                cls._add_const(v, new_var_name + str(i), before_op)
+                            )
+                    elif isinstance(in_type, ScalarOrTensorInputType):
+                        var = cls._add_const(val, new_var_name, before_op)
+                    else:
+                        msg = "Cannot convert input {} of type {} to Var (op: {})"
+                        raise ValueError(
+                            msg.format(in_name, type(in_type).__name__, op_name)
+                        )
+                update_dict[in_name] = var
+
+            elif in_name not in kwargs and in_type.default is not None:
+                if isinstance(in_type, PyFunctionInputType):
+                    msg = "Default value is not allowed for PyFunctionInputType"
+                    raise ValueError(msg)
+                # Create a Var from the default value.
+                if is_internal_input(in_name):
+                    var = InternalVar(in_type.default, name=new_var_name)
+                    curr_block().add_internal_var(var)
+                elif isinstance(in_type, TupleInputType):
+                    var = tuple(
+                        cls._add_const(v, new_var_name + str(i), before_op)
+                        for i, v in enumerate(in_type.default)
+                    )
+                else:
+                    var = cls._add_const(in_type.default, new_var_name, before_op)
+                update_dict[in_name] = var
+
+        kwargs.update(update_dict)
+
+        return kwargs
+
+    @classmethod
+    def _add_op(cls, op_cls, **kwargs):
+        """
+        Add an op of type `op_cls` (e.g., convolution) to current block.
+        """
+        kwargs = cls._maybe_set_name(kwargs, op_cls.__name__)
+        logging.info(
+            "Adding op '{}' of type {}".format(kwargs["name"], op_cls.__name__)
+        )
+        before_op = kwargs.get("before_op", None)
+        kwargs = {k: v for k, v in kwargs.items() if v is not None}
+        kwargs = cls._create_input_vars(
+            op_cls.input_spec, kwargs["name"], op_cls, before_op, kwargs
+        )
+        new_op = op_cls(**kwargs)
+        curr_block()._insert_op_before(new_op, before_op=before_op)
+        new_op.build_nested_blocks()
+        new_op.type_value_inference()
+        if len(new_op.outputs) == 1:
+            return new_op.outputs[0]
+        return new_op.outputs
+
+    @staticmethod
+    def placeholder(shape, dtype=None):
+        return Placeholder(shape, dtype)
+
+    @staticmethod
+    def TensorSpec(shape, dtype=None):
+        return Placeholder(shape, dtype)
+
+    @staticmethod
+    def program(input_specs=None):
+        """
+        Usage:
+
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1,2))])
+        def prog(a):
+            return mb.add(x=a, y=2)
+        """
+        if input_specs is None:
+            input_specs = []
+
+        def wrapper(main_block):
+            program = Program()
+            num_args = main_block.__code__.co_argcount
+            arg_names = list(main_block.__code__.co_varnames)[:num_args]
+            if len(input_specs) != num_args:
+                msg = "{} expects {} inputs: {}. Got {} input_specs."
+                raise ValueError(
+                    msg.format(
+                        main_block.__name__, num_args, arg_names, len(input_specs)
+                    )
+                )
+            input_spec_dict = {k: v for k, v in zip(arg_names, input_specs)}
+            with Function(input_spec_dict) as func:
+                input_vars = [func.inputs[a] for a in arg_names]
+                outputs = main_block(*input_vars)
+                if isinstance(outputs, tuple):
+                    outputs = list(outputs)
+                elif not isinstance(outputs, list):
+                    outputs = [outputs]
+                func.set_outputs(outputs)
+                program.add_function("main", func)
+            return program
+
+        return wrapper
+
+
+"""importing ops triggers installation of  all ops into Builder"""
+from .ops import defs as _ops
diff --git a/coremltools/converters/mil/mil/input_type.py b/coremltools/converters/mil/mil/input_type.py
new file mode 100644
index 000000000..d51af358e
--- /dev/null
+++ b/coremltools/converters/mil/mil/input_type.py
@@ -0,0 +1,347 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil import types
+from .var import InternalVar
+from collections import OrderedDict
+
+
+class InputSpec(object):
+    def __init__(self, **kwargs):
+        # Since python 3.6, kwargs preserves the input order. See
+        # https://docs.python.org/3/whatsnew/3.6.html#whatsnew36-pep468
+        self._input_types = [(k, v) for k, v in kwargs.items()]
+        self._ordered_dict = OrderedDict()
+        for k, v in self._input_types:
+            self._ordered_dict[k] = v
+
+    def __add__(self, input_spec):
+        self._input_types.extend(input_spec._input_types)
+        for k, v in input_spec._input_types:
+            self._ordered_dict[k] = v
+        return self
+
+    @property
+    def input_types(self):
+        """
+        Ordered dict[str, _InputType] (name, input_type)
+        """
+        return self._ordered_dict
+
+    def parse_inputs(self, kwargs):
+        """ Parse and extract (name, value) pairs from kwargs according to the spec.
+
+        Args:
+            kwargs: must contain a Var compatible with
+                    compatible type for each
+                    1) required _InputType
+                    2) optional _InputType with default value
+
+        Return:
+            out: List[(name, Var or None)]
+                The list has the same length as the `input_types`.
+                `(k, None)` is in the list iff input_type of `k`
+                is optional, has no default value, and
+                `k` is not specified in the input.
+
+        Raise:
+            TypeError if value type is incompatible
+            ValueError if a require input is missing
+        """
+        ret = []
+        no_check_var_visibility = kwargs.get("no_check_var_visibility", False)
+        for name, input_type in self.input_types.items():
+            if name in kwargs:
+                var = kwargs[name]
+                # TODO (jay): we should remove this internal var later as we
+                # further cleanup the interface
+                if isinstance(var, InternalVar) or input_type.is_compatible(var):
+                    ret.append((name, var))
+                else:
+                    msg = (
+                        "Input {} has type {} not compatible with "
+                        "expected type {}".format(name, var.sym_type, input_type)
+                    )
+                    raise TypeError(msg)
+            else:
+                # if input is not found in kwargs, it must be optional has no
+                # default value
+                if not input_type.optional or input_type.default:
+                    # Skip check on PyFunctionInput since created while_loop /
+                    # cond ops don't need to rebuild the nested blocks
+                    if no_check_var_visibility or isinstance(
+                        input_type, PyFunctionInputType
+                    ):
+                        continue
+                    raise ValueError("Input {} is required".format(name))
+                else:
+                    assert input_type.default is None
+                    ret.append((name, None))
+        return ret
+
+
+class _InputType(object):
+    """
+    (Untyped) input containing fundamental properties of all inputs to an
+    Operation:
+    """
+
+    def __init__(self, const=False, default=None, optional=False):
+        """
+        const (bool):
+            True if the InputType has to be constant / materialized at compile time.
+            Const InputType is semantically equivalent to attribute. By
+            default False. Read-only.
+
+        optional (bool):
+            If default is not None, optional will be set to True
+
+        default:
+            Default value of optional input. InputType is optional if a default
+            is provided or optional == True.  default can be int, float,
+            string, np.ndarray etc depending on subclass.
+
+        Note: _InputType should not be directly instantiated. Only its subclasses may
+        be instantiated.
+        """
+        self.default = default
+        self.const = const
+        self.optional = True if default is not None else optional
+
+    def is_compatible(self, v):
+        """
+        Return True if (possibly symbolic) value `v` is compatible. False
+        otherwise.
+
+        Inputs:
+
+        v (Var | ListVar | native python function): input
+
+        Comment: Define is_compatible as instance method to call proper subclass
+        methods.
+        """
+        return self._is_compatible(v)
+
+    def _is_compatible(self, v):
+        return True
+
+    def _get_predefined_datatype(self):
+        """
+        Override this function if datatype can be known without `_default` or
+        `_val`.
+        """
+        return None
+
+    def __str__(self):
+        return type(self).__name__
+
+
+class ListInputType(_InputType):
+    def __init__(self, **kwargs):
+        super(ListInputType, self).__init__(**kwargs)
+
+    def _is_compatible(self, v):
+        return types.is_list(v.sym_type)
+
+
+class ScalarOrTensorInputType(_InputType):
+    def __init__(self, **kwargs):
+        super(ScalarOrTensorInputType, self).__init__(**kwargs)
+
+    def _is_compatible(self, v):
+        return types.is_scalar(v.dtype) or types.is_tensor(v.dtype)
+
+
+class ListOrScalarOrTensorInputType(_InputType):
+    def __init__(self, **kwargs):
+        super(ListOrScalarOrTensorInputType, self).__init__(**kwargs)
+
+    def _is_compatible(self, v):
+        return (
+            types.is_list(v.sym_type)
+            or types.is_scalar(v.dtype)
+            or types.is_tensor(v.dtype)
+        )
+
+
+class IntInputType(ScalarOrTensorInputType):
+    """
+    Int input with _sym_type == types.int32 or _sym_type == types.int64
+    predefined to be types.int32 by default.
+
+    Set with IntAttribute.val
+    Raise error when value set is not integer.
+    """
+
+    def __init__(self, **kwargs):
+        super(IntInputType, self).__init__(**kwargs)
+
+    def _is_compatible(self, v):
+        return v.dtype in {types.int32, types.int64}
+
+    def _get_predefined_datatype(self):
+        return types.int32
+
+
+class BoolInputType(ScalarOrTensorInputType):
+    """
+    Int32 input, with _sym_type == types.int32
+
+    Set with IntAttribute.val
+    Raise error when value set is not integer.
+    """
+
+    def __init__(self, **kwargs):
+        super(BoolInputType, self).__init__(**kwargs)
+
+    def _is_compatible(self, v):
+        return v.dtype == types.bool
+
+    def _get_predefined_datatype(self):
+        return types.bool
+
+
+class FloatInputType(ScalarOrTensorInputType):
+    """
+    fp32 input, with _sym_type == types.fp32
+
+    Set with IntAttribute.val
+    Raise error when value set is not integer.
+    """
+
+    def __init__(self, **kwargs):
+        super(FloatInputType, self).__init__(**kwargs)
+
+    def _is_compatible(self, v):
+        return v.dtype == types.fp32
+
+    def _get_predefined_datatype(self):
+        return types.fp32
+
+
+class IntOrFloatInputType(ScalarOrTensorInputType):
+    """
+    input with _sym_type == types.int32 or _sym_type == types.int64 or _sym_type == types.fp32
+    predefined to be types.fp32 by default.
+    """
+
+    def __init__(self, **kwargs):
+        super(IntOrFloatInputType, self).__init__(**kwargs)
+
+    def _is_compatible(self, v):
+        return v.dtype in {types.int32, types.int64, types.fp32}
+
+    def _get_predefined_datatype(self):
+        return types.fp32
+
+
+class TensorInputType(ScalarOrTensorInputType):
+    """
+    TensorInputType must be numpy ndarray of numeric types. Min rank = 1. (Use
+    ScalarOrTensorInputType for possibly scalar input).
+    """
+
+    def __init__(self, **kwargs):
+        super(TensorInputType, self).__init__(**kwargs)
+
+    def _is_compatible(self, v):
+        return types.is_tensor(v.sym_type)
+
+
+class IntTensorInputType(ScalarOrTensorInputType):
+    """
+    Tensor input with int values, _sym_type == types.int32 or
+    _sym_type == types.int64
+
+    Raise error when value set is not integer.
+    """
+
+    def __init__(self, **kwargs):
+        super(IntTensorInputType, self).__init__(**kwargs)
+
+    def _is_compatible(self, v):
+        return types.is_tensor(v.sym_type) and v.dtype in {types.int32, types.int64}
+
+
+class IntOrIntTensorInputType(ScalarOrTensorInputType):
+    """
+    builtins.in32 or Tensor with int values, _sym_type == builtins.int32 or
+    _sym_type == builtins.int64
+
+    Raise error when value set is not integer.
+    """
+
+    def __init__(self, **kwargs):
+        super(IntOrIntTensorInputType, self).__init__(**kwargs)
+
+    def _is_compatible(self, v):
+        return v.dtype in {types.int32, types.int64}
+
+
+class BoolTensorInputType(ScalarOrTensorInputType):
+    def __init__(self, **kwargs):
+        super(BoolTensorInputType, self).__init__(**kwargs)
+
+    def _is_compatible(self, v):
+        return types.is_tensor(v.sym_type) and v.dtype == types.bool
+
+
+class StringInputType(ScalarOrTensorInputType):
+    def __init__(self, **kwargs):
+        super(StringInputType, self).__init__(**kwargs)
+
+    def _is_compatible(self, v):
+        return types.is_str(v.sym_type)
+
+
+class TupleInputType(_InputType):
+    def __init__(self, **kwargs):
+        super(TupleInputType, self).__init__(**kwargs)
+
+    def _is_compatible(self, v):
+        # We don't check the detail types within the tuple.
+        return isinstance(v, (tuple, list))
+
+
+class InternalInputType(_InputType):
+    """
+    InternalInputType specifies input types outside of Program's type system.
+    It allows ops to take, for example, python primitive types, instead of
+    only the builtin types.
+    """
+
+    def __init__(self, **kwargs):
+        super(InternalInputType, self).__init__(**kwargs)
+
+    def _is_compatible(self, v):
+        return True  # skip type check by default for InternalInputType.
+
+
+class PyFunctionInputType(InternalInputType):
+    """
+    Native python function.
+    """
+
+    def __init__(self, **kwargs):
+        super(PyFunctionInputType, self).__init__(**kwargs)
+
+    # def _is_compatible(self, v):
+    #    return callable(v.val)
+
+
+class InternalStringInputType(InternalInputType):
+    def __init__(self, **kwargs):
+        super(InternalStringInputType, self).__init__(**kwargs)
+
+    # def _is_compatible(self, v):
+    #    return types.is_str(v.sym_type)
+
+
+class InternalScalarOrTensorInputType(InternalInputType):
+    def __init__(self, **kwargs):
+        super(InternalScalarOrTensorInputType, self).__init__(**kwargs)
+
+    # def _is_compatible(self, v):
+    #    return types.is_scalar(v.dtype) or types.is_tensor(v.dtype)
diff --git a/coremltools/converters/mil/mil/operation.py b/coremltools/converters/mil/mil/operation.py
new file mode 100644
index 000000000..5f36a2db9
--- /dev/null
+++ b/coremltools/converters/mil/mil/operation.py
@@ -0,0 +1,453 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import logging
+import numpy as np
+import six
+from coremltools.converters.mil.mil import types
+from coremltools.converters.mil.mil.types.symbolic import is_symbolic, any_symbolic
+from . import SPACES
+from .block import curr_block, _check_is_compatible_type
+from .input_type import TupleInputType
+from .var import Var, InternalVar, ListVar
+
+VALUE = 1
+SYMBOL = 2
+NONE = 4
+ALL = 7
+
+
+def _is_compatible_symbolic_array(a, b):
+    """
+    A helper function which check if two numpy array with symbolic value.
+    For instance, a = np.array([is0, 1])
+                  b = np.array([is1, 1])
+    are considered compatible.
+                  a = np.array([is0, 1])
+                  b = np.array([is1, -1])
+    are not.
+    """
+    assert any_symbolic(a) and any_symbolic(b)
+    if not a.shape == b.shape:
+        return False
+    a = a.flatten()
+    b = b.flatten()
+    for t, v in zip(a, b):
+        if not is_symbolic(t) and not is_symbolic(v):
+            if t != v:
+                return False
+        elif not is_symbolic(t) or not is_symbolic(v):
+            return False
+    return True
+
+
+def precondition(allow=ALL):
+    """
+    A helper decorator for value_inference method.
+    Decorate value_inference with parameter VALUE/SYMBOL/NONE or ALL.
+    For VALUE/SYMBOL/NONE use logical or ( | ) for multiple allowance.
+    Note that:
+        1. ALL == VALUE | SYMBOL | NONE
+        2. Chosen flag (some or all VALUE/SYMBOL/NONE) must be satisfied
+           by EVERY INPUTS for the precondition to be satisfied.
+
+    The meaning for each flag is:
+    VALUE: value that can be materialized during compile time
+    SYMBOL: value that cannot be materialized by exist as a symbol value
+    NONE: a None value
+
+    Usage:
+    @precondition(allow=VALUE|SYMBOL)
+    def value_inference(self):
+        '''some value_inference implementation'''
+    """
+    ALLOW_VALUE = allow & VALUE
+    ALLOW_SYMBOL = allow & SYMBOL
+    ALLOW_NONE = allow & NONE
+
+    def process(v, has_value, has_symbol, has_none):
+        """
+        v: Var
+
+        Return updated has_value, has_symbol, has_none
+        """
+        if any_symbolic(v.sym_val):
+            return has_value, True, has_none
+        elif v.val is None:
+            return has_value, has_symbol, True
+        return True, has_symbol, has_none
+
+    def decorator(func):
+        def wrapper(self):
+            HAS_VALUE = False
+            HAS_SYMBOL = False
+            HAS_NONE = False
+            for in_name, in_type in self._input_types.items():
+                if in_type.optional:
+                    # Optional inputs are not required to invoke value_inference()
+                    continue
+
+                if isinstance(in_type, TupleInputType):
+                    for v in self._input_vars[in_name]:
+                        HAS_VALUE, HAS_SYMBOL, HAS_NONE = process(
+                            v, HAS_VALUE, HAS_SYMBOL, HAS_NONE
+                        )
+                else:
+                    HAS_VALUE, HAS_SYMBOL, HAS_NONE = process(
+                        self._input_vars[in_name], HAS_VALUE, HAS_SYMBOL, HAS_NONE
+                    )
+
+            if HAS_VALUE and not ALLOW_VALUE:
+                msg = "Implementation of value_inference() for op {} doesn't support input with VALUE"
+                raise NotImplementedError(msg.format(self.op_type))
+            elif HAS_SYMBOL and not ALLOW_SYMBOL:
+                msg = "Implementation of value_inference() for op {} doesn't support input with SYMBOL"
+                raise NotImplementedError(msg.format(self.op_type))
+            elif HAS_NONE and not ALLOW_NONE:
+                msg = "Implementation of value_inference() for op {} doesn't support input with NONE"
+                raise NotImplementedError(msg.format(self.op_type))
+            else:
+                return func(self)
+
+        return wrapper
+
+    return decorator
+
+
+def is_internal_input(arg_name):
+    return arg_name[0] == "_"
+
+
+class Operation(object):
+    """
+    Represents Operation in MIL.
+
+    # Properties
+    name (str):
+        The name of the operation
+
+    input_types (InputSpec, class attr):
+        Read-only named input types from all subclasses. Input types are used
+        to validate `inputs`.
+
+    inputs [_input_vars] (dict of str --> Var):
+        An Operation (subclass of Operation) only has access to input Var,
+        which is already validated against `input_spec`.
+
+    outputs [_output_vars] (list of Var):
+        List of output var based on type inference. Read-only
+    """
+
+    def __init__(self, **kwargs):
+        self._input_types = self.input_spec.input_types
+        self.name = kwargs.get("name", None)
+
+        self._output_vars = None
+        self._input_vars = {}
+        self.blocks = []
+        self.enclosing_block = curr_block()
+        self._validate_and_set_inputs(**kwargs)
+
+    def set_inputs(self, **kwargs):
+        self._validate_and_set_inputs(**kwargs)
+        if not kwargs.get("no_check_var_types", False):
+            self.type_value_inference()
+
+    def get_flattened_inputs(self):
+        """
+        Returns:
+        list[Var]. Flatten all tuple inputs
+        """
+        flat_inputs = []
+        for v in self.inputs.values():
+            if isinstance(v, (list, tuple)):
+                flat_inputs.extend(v)
+            else:
+                flat_inputs.append(v)
+        return flat_inputs
+
+    def type_value_inference(self, overwrite_output=False):
+        """
+        Perform type inference and auto_val computation based on new input Vars
+        in kwargs. If self._output_vars is None then we generate _output_vars;
+        otherwise no new Var is created, but type inference result is verified
+        against existing _output_vars, if overwrite_output is False.
+
+        If overwrite_output is True, then the type inference result overwrites the
+        existing _output_vars
+        """
+        output_types = self.type_inference()
+        if not isinstance(output_types, tuple):
+            output_types = (output_types,)
+        output_vals = self._auto_val(output_types)
+        try:
+            output_names = self.output_names()
+            if not isinstance(output_names, tuple):
+                output_names = (output_names,)
+        except NotImplementedError as e:
+            if len(output_types) > 1:
+                output_names = tuple(str(i) for i, _ in enumerate(output_types))
+            else:
+                output_names = ("",)  # output name same as op name.
+
+        # Combine (output_names, output_types, output_vals) to create output
+        # Vars.
+        if self._output_vars is None:
+            self._output_vars = []
+            for i, (n, sym_type, sym_val) in enumerate(
+                zip(output_names, output_types, output_vals)
+            ):
+                name = self.name + ":" + n if n != "" else self.name
+                if types.is_list(sym_type):
+                    new_var = ListVar(
+                        name,
+                        elem_type=sym_type.T[0],
+                        init_length=sym_type.T[1],
+                        dynamic_length=sym_type.T[2],
+                        op=self,
+                        op_output_idx=i,
+                    )
+                else:
+                    new_var = Var(name, sym_type, sym_val, op=self, op_output_idx=i)
+                self._output_vars.append(new_var)
+        else:
+            # Check new inference result against existing self._output_vars.
+            for i, (n, sym_type, sym_val) in enumerate(
+                zip(output_names, output_types, output_vals)
+            ):
+                out_var = self._output_vars[i]
+                # Check type inference
+                if overwrite_output:
+                    out_var._sym_type = sym_type
+                elif not _check_is_compatible_type(sym_type, out_var.sym_type):
+                    msg = "Output Var {} in op {} type changes with new input Vars"
+                    raise ValueError(msg.format(out_var.name, self.name))
+
+                # Check value inference
+                if sym_val is not None and out_var.sym_val is None:
+                    if overwrite_output:
+                        out_var._sym_val = sym_val
+
+                if sym_val is not None and out_var.sym_val is not None:
+                    if np.any(sym_val.val != out_var.sym_val):
+                        if overwrite_output:
+                            out_var._sym_val = sym_val
+                        else:
+                            msg = "value_inference differs for var {} in op {}"
+                            if not any_symbolic(sym_val.val):
+                                raise ValueError(msg.format(out_var.name, self.name))
+                            elif not _is_compatible_symbolic_array(
+                                sym_val.val, out_var.sym_val
+                            ):
+                                raise ValueError(msg.format(out_var.name, self.name))
+
+    def _auto_val(self, output_types):
+        """
+        # Evaluation is two stage:
+        #
+        # Stage 1: Check whether the method value_inference() is implemented
+        #
+        # Stage 2: Check if there's an value_inference() implementation
+        #          for given input types.
+        #
+        # Suppose input are all SYMBOL:
+        # Case 1: No value_inference() implemented => fail at stage 1
+        # Case 2: If value_inference() implemented, but requires all VALUE not
+        #         SYMBOL => fail at stage 2
+        # Case 3: If value_inference() implemented, and has no restriction on
+        #         input types => Success
+        #
+        # If either stage fails, outputs[i].val is None.
+        # Otherwise, output[i].sym_val is not None.
+
+        output_types: tuple of builtin types
+
+        Returns:
+            output_vals: tuple of builtin type with value, or tuple of None
+        """
+        do_auto_val = True
+
+        if do_auto_val:
+            # Is self.value_inference implemented for corresponding input?
+            try:
+                vals = self.value_inference()
+            except NotImplementedError as e:
+                do_auto_val = False
+
+        if not do_auto_val:
+            # No auto_val possible.
+            return tuple(None for _ in output_types)
+
+        if not isinstance(vals, (tuple, list)):
+            vals = (vals,)
+        for val in vals:
+            if val is None:
+                do_auto_val = False
+        if not do_auto_val:
+            # No auto_val possible.
+            return tuple(None for _ in output_types)
+
+        auto_val = []
+        for t, v in zip(output_types, vals):
+            builtin_val = t()
+            builtin_val.val = v
+            auto_val.append(builtin_val)
+        return auto_val
+
+    def value_inference(self):
+        """
+        Optional Python implementation of the op based on (materialized) values
+        in `self.input_var`. Return a builtin value (single output) or a tuple of
+        builtin values (multi-outputs) of the same length as returned by `
+        type_inference`
+        """
+        msg = "value_inference() is not implemented by op {}"
+        raise NotImplementedError(msg.format(self.op_type))
+
+    def output_names(self):
+        """
+        Optional. If implemented, we set the output var i name as
+        self.name + "/" + output_names[i]
+
+        Returns a string (single output) or tuple of strings
+        """
+        msg = "output_names() is not implemented by op {}"
+        raise NotImplementedError(msg.format(self.op_type))
+
+    def type_inference(self):
+        """
+        Return (builtin_type, builtin_val) pair from type inference.
+        builtin_val may be None if symbolic_value is not attainable at compile
+        time.
+        """
+        raise NotImplementedError("This function must be implemented by each op")
+
+    def build_nested_blocks(self):
+        """
+        Build nested blocks (for cond and while_loop and other composite
+        blocks)
+        """
+        pass
+
+    def _validate_and_set_inputs(self, **kwargs):
+        non_attributes = [
+            "name",
+            "symbolic_datatype",
+            "datatype",
+            "symbolic_value",
+            "value",
+            "version",
+            "before_op",
+            "no_check_var_visibility",  # no_check_var_visibility==True to deviate from SSA
+            "no_check_var_types",  # no_check_var_types==True to force set inputs, even if type does not match with earlier ones
+        ]
+        op_inputs = list(self._input_types.keys())
+        legal_args = op_inputs + non_attributes
+        no_check_var_visibility = kwargs.get("no_check_var_visibility", False)
+        no_check_var_types = kwargs.get("no_check_var_types", False)
+
+        for key in kwargs.keys():
+            if key not in legal_args:
+                raise RuntimeError(
+                    "Unknown input '{}' for op '{}'".format(key, self.op_type)
+                )
+
+        def check_and_detach(v_new, v_old, op, no_check_var_types):
+            # Check new var's sym_type is compatible with the
+            # existing's sym_type.
+            if (
+                not _check_is_compatible_type(v_new.sym_type, v_old.sym_type)
+                and not no_check_var_types
+            ):
+                msg = "New var type {} not a subtype of " + "existing var type {}"
+                raise ValueError(msg.format(v_new.sym_type, v_old.sym_type))
+            v_old.remove_child_op(op, no_check_var_types)
+
+        parsed_inputs = self.input_spec.parse_inputs(kwargs)
+        for (name, var) in parsed_inputs:
+            setattr(self, name, var)
+            if var is not None and not isinstance(var, InternalVar):
+                # Remove this operation itself from existing input Var's child_ops
+                existing_input_var = self._input_vars.get(name, None)
+                if existing_input_var is not None:
+                    if isinstance(existing_input_var, (list, tuple)):
+                        for v_old, v_new in zip(existing_input_var, var):
+                            check_and_detach(v_new, v_old, self, no_check_var_types)
+                    else:
+                        check_and_detach(
+                            var, existing_input_var, self, no_check_var_types
+                        )
+
+                # Set var as input_var
+                if isinstance(var, Var):
+                    var.add_child_op(self)
+                elif isinstance(var, (tuple, list)):
+                    for v in var:
+                        v.add_child_op(self)
+                # ignore function inputs
+                self._input_vars[name] = var
+
+    @property
+    def inputs(self):
+        return self._input_vars
+
+    @property
+    def outputs(self):
+        return self._output_vars
+
+    @property
+    def op_type(self):
+        return type(self).__name__
+
+    def remove_from_block(self):
+        """
+        Remove / detach itself from the enclosing block. See Block.remove_ops
+        for details.
+        """
+        self.enclosing_block.remove_ops([self])
+
+    @staticmethod
+    def var_to_str(v):
+        if isinstance(v, (tuple, list)):
+            return "(" + ", ".join(["%" + s.name for s in v]) + ")"
+        else:
+            return "%" + v.name
+
+    def indented_str(self, indent=""):
+        s = indent
+        if self.outputs is not None:
+            s += ", ".join([str(o) for o in self.outputs])
+        s += " = " + self.op_type + "("
+        if self.op_type == "const":
+            if self.mode.val == "immediate_value":
+                if isinstance(self.val.sym_val, (np.generic, np.ndarray)):
+                    val_str = str(self.val.sym_val.tolist())
+                else:
+                    val_str = (
+                        '"' + self.val.sym_val + '"'
+                        if isinstance(self.val.sym_val, six.string_types)
+                        else str(self.val.sym_val)
+                    )
+                s += "val=" + val_str
+            else:
+                s += "val=(file_value)"
+        else:
+            s += ", ".join(
+                [
+                    k + "=" + Operation.var_to_str(self.inputs[k])
+                    for k in self._input_types.keys()
+                    if k in self.inputs and not is_internal_input(k)
+                ]
+            )
+        s += ', name="{}")\n'.format(self.name)
+        for b in self.blocks:
+            s += b.indented_str(indent=indent + SPACES)
+        return s
+
+    def __repr__(self):
+        return str(self)
+
+    def __str__(self):
+        return self.indented_str(SPACES)
diff --git a/coremltools/converters/mil/mil/ops/__init__.py b/coremltools/converters/mil/mil/ops/__init__.py
new file mode 100644
index 000000000..d968a5ffe
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/__init__.py
@@ -0,0 +1,6 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from ..builder import get_const_mode
diff --git a/coremltools/converters/mil/mil/ops/defs/__init__.py b/coremltools/converters/mil/mil/ops/defs/__init__.py
new file mode 100644
index 000000000..3173bbad7
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/__init__.py
@@ -0,0 +1,21 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from .activation import *
+from .control_flow import *
+from .conv import *
+from .elementwise_binary import *
+from .elementwise_unary import *
+from .image_resizing import *
+from .linear import *
+from .normalization import *
+from .pool import *
+from .random import *
+from .recurrent import *
+from .reduction import *
+from .slicend import *
+from .scatter_gather import *
+from .tensor_operation import *
+from .tensor_transformation import *
diff --git a/coremltools/converters/mil/mil/ops/defs/_op_reqs.py b/coremltools/converters/mil/mil/ops/defs/_op_reqs.py
new file mode 100644
index 000000000..916b46c69
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/_op_reqs.py
@@ -0,0 +1,13 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import numpy as np
+
+from coremltools.converters.mil.mil import types
+from coremltools.converters.mil.mil import Operation, precondition, VALUE
+from coremltools.converters.mil.mil.input_type import *
+from coremltools.converters.mil.mil.ops.registry import SSAOpRegistry
+
+register_op = SSAOpRegistry.register_op
diff --git a/coremltools/converters/mil/mil/ops/defs/_utils.py b/coremltools/converters/mil/mil/ops/defs/_utils.py
new file mode 100644
index 000000000..7856b0582
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/_utils.py
@@ -0,0 +1,237 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import math
+
+import coremltools.converters
+import sympy as sm
+
+from coremltools.converters.mil.mil.types.symbolic import is_symbolic
+from ._op_reqs import *
+
+
+def broadcast_shapes(shape_x, shape_y):
+    """
+    Check and broadcast given input shapes.
+    :param shape_x: tuple of int or symbols
+        Shape of the first tensor (possibly symbolic).
+    :param shape_y: tuple of int or symbols
+        Shape of the second tensor (possibly symbolic).
+    :return: tuple of int or symbols
+        Result from broadcast.
+    """
+    shape_x = tuple(shape_x)
+    shape_y = tuple(shape_y)
+    if len(shape_x) < len(shape_y):
+        shape_x = tuple([1] * (len(shape_y) - len(shape_x))) + shape_x
+    if len(shape_y) < len(shape_x):
+        shape_y = tuple([1] * (len(shape_x) - len(shape_y))) + shape_y
+
+    ret_shapes = list()
+    for i in range(len(shape_x)):
+        x_unknown = is_symbolic(shape_x[i])
+        y_unknown = is_symbolic(shape_y[i])
+        if shape_x[i] == 1:
+            ret_shapes.append(shape_y[i])
+        elif shape_y[i] == 1:
+            ret_shapes.append(shape_x[i])
+        elif not y_unknown and shape_y[i] > 1:
+            if not x_unknown and shape_x[i] != shape_y[i]:
+                raise ValueError(
+                    "Incompatible dim {} in shapes {} vs. {}".format(
+                        i, shape_x, shape_y
+                    )
+                )
+            ret_shapes.append(shape_y[i])
+        elif not x_unknown and shape_x[i] > 1:
+            if not y_unknown and shape_x[i] != shape_y[i]:
+                raise ValueError(
+                    "Incompatible dim {} in shapes {} vs. {}".format(
+                        i, shape_x, shape_y
+                    )
+                )
+            ret_shapes.append(shape_x[i])
+        elif x_unknown or y_unknown:
+            ret_shapes.append(sm.functions.Max(shape_x[i], shape_y[i]))
+        else:
+            assert shape_x[i] == shape_y[i]
+            ret_shapes.append(shape_x[i])
+
+    return tuple(ret_shapes)
+
+
+def promoted_primitive_type(type1, type2):
+    """
+    Given a pair of tensor or primitive types, find the smallest type that can store an instance
+    of their primitive type.
+    """
+    ptype1 = type1.get_primitive() if types.is_tensor(type1) else type1
+    ptype2 = type2.get_primitive() if types.is_tensor(type2) else type2
+    return types.promote_types(ptype1, ptype2)
+
+
+def effective_kernel(kernel_shape, dilations):
+    """
+
+    Args:
+        kernel_shape: tuple[int] representing the kernel shape in each
+            given dimension.
+        dilations: tuple[int] representing the dilation of the kernel
+            in each given dimension.  Must be the same length as
+            kernel_shape, and is assumed to give the dimensions in
+            the same order as kernel_shape
+
+    Returns: tuple[int] representing the effective shape of the kernel
+        in each given dimension, with each dimension in the order given,
+        taking into account dilation.
+        See http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions
+        Note that a dilation of 1 is equivalent to having no dilation.
+
+    """
+    if len(kernel_shape) != len(dilations):
+        raise ValueError(
+            "kernel_shape ({}) and dilations ({}) must be the same length".format(
+                len(kernel_shape), len(dilations)
+            )
+        )
+    return [(k - 1) * d + 1 for k, d in zip(kernel_shape, dilations)]
+
+
+def aggregated_pad(
+    pad_type,
+    kernel_shape,
+    input_shape=None,
+    strides=None,
+    dilations=None,
+    custom_pad=None,
+):
+    """
+    Args
+        pad_type: string. Must be one of ('same', 'valid', 'custom')
+
+        kernel_shape: [kH, kW, ...]: spatial kernel dims (excluding channels)
+
+        input_shape: [iH, iW, ...]: spatial input dims (excluding channels)
+            Required iff pad_type == 'same'
+
+        strides: [sH, sW, ...]: spatial strides (excluding channels)
+            Required iff pad_type == 'same'
+
+        dilations: [dH, dW, ...]: dilations (excluding channels)
+            If not provided, defaults to [1, 1, ...], effectively no dilation.
+
+        custom_pad: Required iff pad_type == 'custom'.
+            custom_pad[2*i], custom_pad[2*i+1] are before/after custom padding
+            for spatial dim i.
+
+
+    Returns:
+        A list of total (before + after) padding for each spatial dimension in kernel_shape.
+    """
+    num_spatial_dims = len(kernel_shape)
+    if dilations is None:
+        dilations = [1] * num_spatial_dims
+    elif len(dilations) != num_spatial_dims:
+        raise ValueError(
+            "dilations must have same length as kernel_shape ({}, but got {})".format(
+                num_spatial_dims, len(dilations)
+            )
+        )
+    if pad_type == "same":
+        if input_shape is None or len(input_shape) != num_spatial_dims:
+            raise ValueError(
+                "For SAME padding input_shape must not be None and must have "
+                "same length as kernel_shape ({}, but got {})".format(
+                    num_spatial_dims,
+                    len(input_shape) if input_shape is not None else "None",
+                )
+            )
+        if strides is None or len(strides) != num_spatial_dims:
+            raise ValueError(
+                "For SAME padding strides must not be None and must have "
+                "same length as kernel_shape ({}, but got {})".format(
+                    num_spatial_dims, len(strides) if strides is not None else "None"
+                )
+            )
+        effective_ks = effective_kernel(kernel_shape, dilations)
+        return [
+            int(max(0, s * math.ceil(float(i) / float(s)) - i + k - s))
+            for i, k, s in zip(input_shape, effective_ks, strides)
+        ]
+    if pad_type == "valid":
+        return [0] * num_spatial_dims
+    if pad_type == "custom":
+        if custom_pad is None or len(custom_pad) != 2 * num_spatial_dims:
+            raise ValueError("Invalid custom_pad.")
+        return [
+            custom_pad[2 * d] + custom_pad[2 * d + 1] for d in range(num_spatial_dims)
+        ]
+    raise ValueError('Invalid padding pad_type "{}"'.format(pad_type))
+
+
+def spatial_dimensions_out_shape(
+    pad_type, input_shape, kernel_shape, strides, dilations=None, custom_pad=None
+):
+    """
+    Args
+        pad_type: string. Must be one of ('same', 'valid', 'custom')
+
+        input_shape: [iH, iW, ...]: spatial input dims (excluding channels)
+            Required iff pad_type == 'same'
+
+        kernel_shape: [kH, kW, ...]: spatial kernel dims (excluding channels)
+
+        strides: [sH, sW, ...]: spatial strides (excluding channels)
+            Required iff pad_type == 'same'
+
+        dilations: [dH, dW, ...]: dilations (excluding channels)
+            If not provided, defaults to [1, 1, ...], effectively no dilation.
+
+        custom_pad: Required iff pad_type == 'custom'.
+            custom_pad[2*i], custom_pad[2*i+1] are before/after custom padding
+            for spatial dim i.
+
+
+    Returns:
+        A list of spatial output sizes for each spatial dimension of kernel_shape.
+
+    """
+    num_spatial_dims = len(kernel_shape)
+    if dilations is None:
+        dilations = [1] * num_spatial_dims
+    if custom_pad is None:
+        custom_pad = [0] * num_spatial_dims * 2
+    if not (
+        len(input_shape)
+        == len(kernel_shape)
+        == len(strides)
+        == len(dilations)
+        == len(custom_pad) / 2
+    ):
+        raise ValueError(
+            "input_shape (length {}), kernel_shape (length {}), "
+            "strides (length {}), dilations (length {}), and "
+            "custom_pad (length {}) divided by two must all be "
+            "the same length".format(
+                len(input_shape),
+                len(kernel_shape),
+                len(strides),
+                len(dilations),
+                len(custom_pad),
+            )
+        )
+    pad = aggregated_pad(
+        pad_type=pad_type,
+        kernel_shape=kernel_shape,
+        input_shape=input_shape,
+        strides=strides,
+        dilations=dilations,
+        custom_pad=custom_pad,
+    )
+    effective_ks = effective_kernel(kernel_shape, dilations)
+    return [
+        (input_shape[r] + pad[r] - effective_ks[r]) // strides[r] + 1
+        for r in range(num_spatial_dims)
+    ]
diff --git a/coremltools/converters/mil/mil/ops/defs/activation.py b/coremltools/converters/mil/mil/ops/defs/activation.py
new file mode 100644
index 000000000..12f4c0590
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/activation.py
@@ -0,0 +1,601 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import scipy
+from ._op_reqs import *
+from .elementwise_unary import elementwise_unary
+
+
+@register_op(doc_str="")
+class clamped_relu(Operation):
+    """
+    Returns elementwise ``min(beta, x)`` if ``x >= 0``, ``min(beta, alpha * x)`` otherwise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    alpha: const fp32 (Required)
+    beta: const fp32 (Required)
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor of the same type and shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=ScalarOrTensorInputType(),
+        alpha=FloatInputType(const=True),
+        beta=FloatInputType(const=True),
+    )
+
+    def __init__(self, **kwargs):
+        super(clamped_relu, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        x = np.minimum(np.maximum(self.x.val, 0), self.beta.val)
+        y = np.minimum(np.minimum(self.x.val, 0) * self.alpha.val, self.beta.val)
+        return x + y
+
+    def type_inference(self):
+        return self.x.sym_type
+
+
+@register_op(doc_str="")
+class elu(Operation):
+    """
+    Returns elementwise ``x`` if ``x > 0``,  ``alpha * e^(x - 1)`` otherwise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    alpha: const fp32 (Optional)
+        * Default to ``1``
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor of the same shape and type as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=ScalarOrTensorInputType(), alpha=FloatInputType(const=True, default=1),
+    )
+
+    def __init__(self, **kwargs):
+        super(elu, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        b = np.copy(self.x.val)
+        b[b < 0] = self.alpha.val * (np.exp(b[b < 0]) - 1)
+        return b
+
+    def type_inference(self):
+        return self.x.sym_type
+
+
+@register_op(doc_str="")
+class gelu(Operation):
+    """
+    Returns the elementwise gaussian error linear unit activation on ``x``.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    mode : const str (Optional)
+        * Default to 'EXACT'.
+        * Can take values:
+            *"EXACT" : ``f(x) = 0.5x\left ( 1+\rm{erf}\left ( \frac{x}{\sqrt{2}} \right ) \right )``
+            *"TANH_APPROXIMATION" : ``f(x) = 0.5x\left ( 1+\rm{tanh}\left ( \sqrt{2/\pi}\left ( x + 0.044715x^3 \right ) \right ) \right )``
+            *"SIGMOID_APPROXIMATION" : ``f(x) = x*\rm{sigmoid}(1.702x)``
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor of the same shape and type as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=ScalarOrTensorInputType(), mode=StringInputType(const=True, default="EXACT"),
+    )
+
+    def __init__(self, **kwargs):
+        super(gelu, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        if self.mode.val == "TANH_APPROXIMATION":
+            a = np.sqrt(2 / np.pi) * (self.x.val + 0.044715 * np.power(self.x.val, 3))
+            return 0.5 * self.x.val * (1 + np.tanh(a))
+        elif self.mode.val == "SIGMOID_APPROXIMATION":
+            return self.x.val * (1 / (1 + np.exp(-(1.702 * self.x.val))))
+        else:
+            return 0.5 * self.x.val * (1 + scipy.special.erf(self.x.val / np.sqrt(2)))
+
+    def type_inference(self):
+        allowed_values = {"EXACT", "TANH_APPROXIMATION", "SIGMOID_APPROXIMATION"}
+        if self.mode.val not in allowed_values:
+            msg = '"gelu" op: unrecognized value of mode: "{}". Allowed values are {}'
+            raise ValueError(msg.format(self.mode.val, allowed_values))
+        return self.x.sym_type
+
+
+@register_op(doc_str="")
+class leaky_relu(Operation):
+    """
+    Elementwise apply ``x`` if ``x >= 0`` else ``alpha * x``.
+
+    Parameters
+    ----------
+    x: <*?, T> (Required)
+    alpha: const fp32 (Optional)
+        * Default to ``0.01``.
+
+    Returns
+    -------
+    tensor<*?, f32>
+        * a tensor of the same shape and type as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=ScalarOrTensorInputType(), alpha=FloatInputType(const=True, default=0.01),
+    )
+
+    def __init__(self, **kwargs):
+        super(leaky_relu, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        b = np.copy(self.x.val)
+        b[b < 0] *= self.alpha.val
+        return b
+
+    def type_inference(self):
+        return self.x.sym_type
+
+
+@register_op(doc_str="")
+class linear_activation(Operation):
+    """
+    Applies elementwise ``x * alpha + beta``.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    alpha: const fp32 (Required)
+    beta: const fp32 (Optional)
+        * Default to ``0``.
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor of the same shape and type as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=ScalarOrTensorInputType(),
+        alpha=FloatInputType(const=True),
+        beta=FloatInputType(const=True, default=0.0),
+    )
+
+    def __init__(self, **kwargs):
+        super(linear_activation, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return self.alpha.val * self.x.val + self.beta.val
+
+    def type_inference(self):
+        return self.x.sym_type
+
+
+@register_op(doc_str="")
+class prelu(Operation):
+    """
+    Returns ``x_i`` if ``x_i > 0``, ``alpha_i * x_i`` otherwise, where ``i = 1 ... C``.
+
+    Parameters
+    ----------
+    x: tensor<[b, C, n, m], T> (Required)
+    alpha: const tensor<[C], T>, (Required)
+
+    Returns
+    -------
+    tensor<[b, C, n, m], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(x=TensorInputType(), alpha=TensorInputType(const=True),)
+
+    def __init__(self, **kwargs):
+        super(prelu, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        alpha_br = self.alpha.val
+        for i in range(1, len(self.x.shape)):
+            alpha_br = np.expand_dims(alpha_br, i)
+        x_pos = np.maximum(self.x.val, 0)
+        b = np.minimum(self.x.val, 0)
+        return x_pos + b * alpha_br
+
+    def type_inference(self):
+        if len(self.x.shape) < 3:
+            raise ValueError("x should be at least rank 3")
+        if len(self.alpha.val.shape) != 1:
+            raise ValueError("alpha should be rank 1")
+        if self.x.shape[-3] != self.alpha.val.shape[0]:
+            raise ValueError(
+                "Size of dimension 0 of alpha should be the same as "
+                + "the size of dimension -3 of x."
+            )
+        return self.x.sym_type
+
+
+@register_op(doc_str="")
+class relu(elementwise_unary):
+    """
+    Returns elementwise applied rectified linear activation: ``min(x, 0)``.
+
+    Parameters
+    ----------
+    x: tensor<*?, f32> (Required)
+
+    Returns
+    -------
+    tensor<*?, f32>
+        * a tensor of the same shape and type as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(relu, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.maximum(self.x.val, 0)
+
+
+@register_op(doc_str="")
+class relu6(elementwise_unary):
+    """
+    Returns elementwise applied rectified linear activation: ``max(min(x, 0), 6)``.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor of the same shape and type as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(relu6, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.minimum(np.maximum(self.x.val, 0), 6)
+
+
+@register_op(doc_str="")
+class scaled_tanh(Operation):
+    """
+    Returns ``alpha * tan(beta * x)`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+        * Input range is ``(-inf, inf)``.
+    alpha: const f32 (Optional)
+        * Default to ``1``.
+    beta: const f32 (Optional)
+        * Default to ``1``.
+
+    Returns
+    -------
+    tensor<*?, f32>
+        * a tensor of the same shape and type as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=ScalarOrTensorInputType(),
+        alpha=FloatInputType(const=True, default=1),
+        beta=FloatInputType(const=True, default=1),
+    )
+
+    def __init__(self, **kwargs):
+        super(scaled_tanh, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return self.alpha.val * np.tanh(self.x.val * self.beta.val)
+
+    def type_inference(self):
+        return self.x.sym_type
+
+
+@register_op(doc_str="")
+class sigmoid(elementwise_unary):
+    """
+    Returns ``sigmoid(x)`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(sigmoid, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return 1 / (1 + np.exp(-self.x.val))
+
+
+@register_op(doc_str="")
+class sigmoid_hard(Operation):
+    """
+    Returns ``min( max( alpha * x + beta, 0 ), 1 )`` elementwise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    alpha: const f32 (Optional)
+        * Default to ``0.2``.
+    beta: const f32 (Optional)
+        * Default to ``0.5``.
+
+    Returns
+    -------
+    tensor<*?, f32>, a tensor of the same shape and type as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=ScalarOrTensorInputType(),
+        alpha=FloatInputType(const=True, default=0.2),
+        beta=FloatInputType(const=True, default=0.5),
+    )
+
+    def __init__(self, **kwargs):
+        super(sigmoid_hard, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.minimum(
+            np.maximum((self.alpha.val * self.x.val) + self.beta.val, 0), 1
+        )
+
+    def type_inference(self):
+        return self.x.sym_type
+
+
+@register_op(doc_str="")
+class softplus(elementwise_unary):
+    """
+    Returns ``log( 1 + e^x )`` elementwise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor of the same shape and type as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(softplus, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.log(1 + np.exp(-np.abs(self.x.val))) + np.maximum(self.x.val, 0)
+
+
+@register_op(doc_str="")
+class softplus_parametric(Operation):
+    """
+    Returns ``alpha_i * log( 1 + e^( beta_i * x_i ) )``, where ``i = 1 ... C``.
+
+    Parameters
+    ----------
+    x: tensor<[b, C, n, m], T> (Required)
+    alpha: const tensor<[C], f32> (Required)
+    beta: const tensor<[C], f32> (Required)
+
+    Returns
+    -------
+    tensor<[b, C, n, m], T>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        alpha=TensorInputType(const=True),
+        beta=TensorInputType(const=True),
+    )
+
+    def __init__(self, **kwargs):
+        super(softplus_parametric, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        alpha_br = np.copy(self.alpha.val)
+        beta_br = np.copy(self.beta.val)
+        for i in range(1, len(self.x.val.shape)):
+            alpha_br = np.expand_dims(alpha_br, i)
+            beta_br = np.expand_dims(beta_br, i)
+        return alpha_br * np.log(1 + np.exp(self.x.val * beta_br))
+
+    def type_inference(self):
+        if len(self.x.shape) < 3:
+            raise ValueError("x should be at least rank 3")
+        if len(self.alpha.val.shape) != 1:
+            raise ValueError("alpha should be rank 1")
+        if self.x.shape[-3] != self.alpha.val.shape[0]:
+            raise ValueError(
+                "Size of dimension 0 of alpha should be the same as "
+                + "the size of dimension -3 of x."
+            )
+        if len(self.beta.val.shape) != 1:
+            raise ValueError("beta should be rank 1")
+        if self.x.shape[-3] != self.beta.val.shape[0]:
+            raise ValueError(
+                "Size of dimension 0 of beta should be the same as "
+                + "the size of dimension -3 of x."
+            )
+        return self.x.sym_type
+
+
+@register_op(doc_str="")
+class softmax(Operation):
+    """
+    Returns ``exp(x) / tf.reduce_sum(tf.exp(x), axis)``.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    axis: const i32 (Optional)
+        * Default to ``-1``.
+
+    Returns
+    -------
+    tensor<*?, f32>, a tensor of the same shape and type as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        logit=TensorInputType(), axis=IntInputType(const=True, default=-1),
+    )
+
+    def __init__(self, **kwargs):
+        super(softmax, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return self.logit.sym_type
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        x = self.logit.val
+        axis = self.axis.val
+        return scipy.special.softmax(x, axis=axis)
+
+
+@register_op(doc_str="")
+class softsign(elementwise_unary):
+    """
+    Returns ``x / ( 1 + |x| )`` applied elementwise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor of the same shape and type as ``x``.
+    """
+
+    def __init__(self, **kwargs):
+        super(softsign, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return self.x.val / (1 + np.abs(self.x.val))
+
+
+@register_op(doc_str="")
+class thresholded_relu(Operation):
+    """
+    Returns ``x`` if ``x >= alpha``, ``0`` otherwise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    alpha: const f32 (Optional)
+        * Default to ``1``.
+
+    Returns
+    -------
+    tensor<*, T>
+        * a tensor of the same shape and type as ``x``.
+    """
+
+    input_spec = InputSpec(
+        x=ScalarOrTensorInputType(), alpha=FloatInputType(const=True, default=1),
+    )
+
+    def __init__(self, **kwargs):
+        super(thresholded_relu, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return self.x.sym_type
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.maximum(self.x.val - self.alpha.val, 0)
diff --git a/coremltools/converters/mil/mil/ops/defs/control_flow.py b/coremltools/converters/mil/mil/ops/defs/control_flow.py
new file mode 100644
index 000000000..ba027d983
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/control_flow.py
@@ -0,0 +1,433 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import six
+from coremltools.converters.mil.mil import types
+from coremltools.converters.mil.mil.types.type_mapping import (
+    numpy_val_to_builtin_val,
+    is_subtype,
+)
+from coremltools.converters.mil.mil import Block, SYMBOL, NONE
+from coremltools.converters.mil.mil.var import Var
+from coremltools.converters.mil.mil import get_new_symbol
+from ._op_reqs import *
+import logging
+
+
+@register_op(doc_str="")
+class cond(Operation):
+    """
+    Conditional execution. The return types must be identical between the true
+    and false branches.
+
+    Parameters
+    ----------
+    pred: tensor<[], bool> (Required)
+        * 0D tensor (scalar) predicate to switch between true and fall branches.
+
+    _true_fn: Python function (Required)
+	* A Python function that will be executed if ``cond`` evaluates to ``True``. It must take 0 input and return one or more values, whose types will be taken to be the return type of the operation.
+
+    _false_fn: Python function (Required)
+	* A Python function to be executed if ``cond`` evaluates to ``False``. It must take 0 input and has return types must match those of if_branch.
+
+    Returns
+    -------
+    Python tuple
+        * Tuple of ``Variables`` from one of the branches.
+    """
+
+    input_spec = InputSpec(
+        pred=BoolInputType(),
+        _true_fn=PyFunctionInputType(),
+        _false_fn=PyFunctionInputType(),
+    )
+
+    def __init__(self, **kwargs):
+        super(cond, self).__init__(**kwargs)
+
+    def build_nested_blocks(self):
+        # Cond block
+        true_block_name = self.name + "_true"
+        with Block(name=true_block_name, outer_op=self) as true_block:
+            true_func = self._true_fn.val
+            true_ret_vars = true_func()
+            if isinstance(true_ret_vars, tuple):
+                true_ret_vars = list(true_ret_vars)
+            if not isinstance(true_ret_vars, list):
+                true_ret_vars = [true_ret_vars]
+            true_block.set_outputs(true_ret_vars)
+            self.blocks.append(true_block)
+
+        false_block_name = self.name + "_false"
+        with Block(name=false_block_name, outer_op=self) as false_block:
+            false_func = self._false_fn.val
+            false_ret_vars = false_func()
+            if isinstance(false_ret_vars, tuple):
+                false_ret_vars = list(false_ret_vars)
+            if not isinstance(false_ret_vars, list):
+                false_ret_vars = [false_ret_vars]
+            false_block.set_outputs(false_ret_vars)
+            self.blocks.append(false_block)
+
+    def type_inference(self):
+        true_ret_vars = self.blocks[0].outputs
+        false_ret_vars = self.blocks[1].outputs
+        # Verify true_ret_vars has the same types as false_ret_vars
+        for i, (vt, vf) in enumerate(zip(true_ret_vars, false_ret_vars)):
+            if vt.sym_type != vf.sym_type:
+                msg = (
+                    "true branch output {} type {} mismatch false branch"
+                    + " output type {}"
+                )
+                raise ValueError(msg.format(vt.name, vt.sym_type, vf.sym_type))
+
+        return tuple(v.sym_type for v in true_ret_vars)
+
+
+@register_op(doc_str="")
+class const(Operation):
+    input_spec = InputSpec(
+        mode=InternalStringInputType(const=True, default="immediate_value"),
+        val=InternalScalarOrTensorInputType(const=True),
+    )
+
+    def __init__(self, **kwargs):
+        super(const, self).__init__(**kwargs)
+
+    def type_inference(self):
+        builtin_type, _ = self._get_type_val(self.val.val)
+        return builtin_type
+
+    def value_inference(self):
+        _, val = self._get_type_val(self.val.val)
+        return val
+
+    def _get_type_val(self, value):
+
+        if isinstance(value, (float, np.float64)):
+            value = np.float32(value)
+        elif isinstance(value, bool):
+            value = np.bool(value)
+        elif isinstance(value, (six.integer_types, np.int64)):
+            value = np.int32(value)
+        elif isinstance(value, (tuple, list, np.ndarray)):
+            value = np.array(value)
+            if value.dtype == np.int64:
+                # We use int32 by default.
+                value = value.astype(np.int32)
+
+            if value.dtype == np.float64:
+                # We use float32 by default.
+                value = value.astype(np.float32)
+
+        if not isinstance(value, (np.generic, np.ndarray, six.string_types, bool)):
+            raise ValueError("Unknown value for constant: {}".format(value))
+
+        _, builtin_type = numpy_val_to_builtin_val(value)
+        return builtin_type, value
+
+
+# Internal const can have symbolic value (for testing purpose)
+@register_op(doc_str="")
+class _const_symbolic(const):
+    def __init__(self, **kwargs):
+        super(_const_symbolic, self).__init__(**kwargs)
+
+    def type_inference(self):
+        builtin_type, _ = self._get_type_val(self.val.sym_val)
+        return builtin_type
+
+    def value_inference(self):
+        # We allow symbolic values in _const_symbolic
+        _, val = self._get_type_val(self.val.sym_val)
+        return val
+
+
+@register_op(doc_str="")
+class select(Operation):
+    """
+    Returns the elements selected from either ``a`` or ``b``, depending on
+    ``cond``. Shape of ``cond``, ``a``, ``b`` must be broadcastable.
+
+    ``a, b`` must be provided together, or neither is provided. If neither is
+    provided, returns the indices of ``cond`` that are ``True``.
+
+    Parameters
+    ----------
+    cond: tensor<[*D1], T> (Required)
+        * Tensor, when True (non-zero), select element from x, otherwise, y
+
+    a: tensor<[*D2], T> (Optional. Default to None)
+        * Values selected at indices where ``cond`` is True
+
+    b: tensor<[*D3], T> (Optional. Default to None)
+        * Values selected at indices where ``cond`` is False
+
+    Returns
+    -------
+    tensor<[*D_out], T> or tensor<[n, len(D1)], int32>
+        *  If ``a, b`` are both provided, return shape is based on broadcast rules from ``cond, a, b``. If ``a, b`` are ``None``, returns shape is 2D, where first dimension ``n`` is the number of matching indices in ``cond`` and ``len(D1)`` is the rank of ``cond``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        cond=TensorInputType(), a=TensorInputType(), b=TensorInputType()
+    )
+
+    def __init__(self, **kwargs):
+        super(select, self).__init__(**kwargs)
+
+    def type_inference(self):
+        a_type = self.a.sym_type
+        b_type = self.b.sym_type
+        if all([a_type, b_type]):
+            compatible, ret_type = types.is_tensor_and_is_compatible_general_shape(
+                a_type, b_type
+            )
+            if compatible:
+                return ret_type
+            elif a_type == b_type:
+                return a_type
+            else:
+                raise ValueError("Type mismatch {} vs. {}".format(a_type, b_type))
+        return a_type if a_type is not None else b_type
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.where(self.cond.val, self.a.val, self.b.val)
+
+
+@register_op(doc_str="")
+class while_loop(Operation):
+    """
+    Perform body repeatly while the condition cond is true.
+
+    Parameters
+    ----------
+    _cond: Python function  (Required)
+	* A Python function that takes ``loop_vars`` as positional arguments. The function must return a bool Var.
+
+    _body: Python function  (Required)
+	* A Python function that takes ``loop_vars`` as positional arguments. The function must return the same number of output vars as ``loop_var`` with the same types.
+
+    loop_vars: Python tuple (Required)
+	* Python tuple of ``Variables``.
+
+    Returns
+    -------
+    Python tuple
+        * Same type as ``loop_vars``
+    """
+
+    input_spec = InputSpec(
+        # arg name with underscore prefix won't be printed.
+        _cond=PyFunctionInputType(),
+        _body=PyFunctionInputType(),
+        loop_vars=TupleInputType(),
+    )
+
+    def __init__(self, **kwargs):
+        super(while_loop, self).__init__(**kwargs)
+
+    def build_nested_blocks(self):
+        # self.loop_vars is python tuple of Vars
+        # Cond block
+        block_name = self.name + "_block"
+        with Block(
+            block_inputs=self.loop_vars, outer_op=self, name=block_name
+        ) as block:
+            # Body func
+            body_func = self._body.val
+            exit_vars = body_func(*block.inputs)
+
+            # Cond func:
+            cond_func = self._cond.val
+            cond_var = cond_func(*block.inputs)
+            cond_vars = cond_var if isinstance(cond_var, list) else [cond_var]
+
+            # Concatenate the outputs
+            block.set_outputs(cond_vars + list(exit_vars))
+            self.blocks.append(block)
+
+        # Verify exit_vars has the same types as loop_vars
+        for v_in, v_out in zip(self.loop_vars, exit_vars):
+            if not is_subtype(v_out.sym_type, v_in.sym_type):
+                msg = (
+                    "loop_vars '{}' changes in the body of "
+                    "while_loop '{}':\n {} -> {}"
+                )
+                raise ValueError(
+                    msg.format(v_in.name, self.name, v_in.sym_type, v_out.sym_type)
+                )
+
+    def type_inference(self):
+        # Skip the conditional var
+        return tuple(v.sym_type for v in self.blocks[0].outputs[1:])
+
+
+# identity is used for renaming and is rarely necessary. See
+# `loop_invariant_elimination` pass for a rare use case.
+@register_op(doc_str="")
+class identity(Operation):
+    input_spec = InputSpec(x=ListOrScalarOrTensorInputType())
+
+    def __init__(self, **kwargs):
+        super(identity, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return self.x.sym_type
+
+    @precondition(allow=VALUE | SYMBOL)
+    def value_inference(self):
+        return self.x.sym_val
+
+
+@register_op(doc_str="")
+class make_list(Operation):
+    input_spec = InputSpec(
+        init_length=IntInputType(optional=True, default=1),
+        dynamic_length=BoolInputType(optional=True, default=True),
+        elem_shape=TensorInputType(const=True),
+        dtype=StringInputType(const=True, optional=True, default="fp32"),
+    )
+
+    def __init__(self, **kwargs):
+        super(make_list, self).__init__(**kwargs)
+
+    def type_inference(self):
+        builtin_dtype = types.string_to_builtin(self.dtype.val)
+        if builtin_dtype is None:
+            raise ValueError("Unsupported dtype {}".format(self.dtype.val))
+        elem_type = types.tensor(builtin_dtype, self.elem_shape.sym_val)
+        return types.list(
+            elem_type,
+            init_length=self.init_length.val,
+            dynamic_length=self.dynamic_length.val,
+        )
+
+
+@register_op(doc_str="")
+class list_length(Operation):
+    input_spec = InputSpec(ls=ListInputType(),)
+
+    def __init__(self, **kwargs):
+        super(list_length, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return types.int32
+
+    @precondition(allow=VALUE | SYMBOL | NONE)
+    def value_inference(self):
+        if not self.ls.dynamic_length:
+            return self.ls.init_length
+        raise NotImplementedError()
+
+
+@register_op(doc_str="")
+class list_write(Operation):
+    input_spec = InputSpec(
+        ls=ListInputType(), index=IntInputType(), value=TensorInputType(),
+    )
+
+    def __init__(self, **kwargs):
+        super(list_write, self).__init__(**kwargs)
+
+    def type_inference(self):
+        list_elem_type = self.ls.elem_type
+        value_type = self.value.sym_type
+        dynamic_length = self.ls.dynamic_length
+        init_length = self.ls.init_length
+
+        if list_elem_type is None:
+            # fill in the elem type using value's type info.
+            return types.list(
+                value_type, init_length=init_length, dynamic_length=dynamic_length
+            )
+        if list_elem_type == types.unknown:
+            msg = "Input ls elem type unknown. Override with {}"
+            logging.warning(msg.format(value_type))
+            return types.list(
+                value_type, init_length=init_length, dynamic_length=dynamic_length
+            )
+        if not types.is_subtype(value_type, list_elem_type):
+            msg = "Elem type mismatch: ls elem type {} vs " + "value type {}"
+            raise ValueError(msg.format(list_elem_type, value_type))
+        return self.ls.sym_type
+
+
+@register_op(doc_str="")
+class list_read(Operation):
+    input_spec = InputSpec(ls=ListInputType(), index=IntInputType(),)
+
+    def __init__(self, **kwargs):
+        super(list_read, self).__init__(**kwargs)
+
+    def type_inference(self):
+        list_elem_type = self.ls.elem_type
+        if list_elem_type is None:
+            msg = (
+                "Unknown element type. The List might not have been "
+                + "written to ({})"
+            )
+            raise ValueError(msg.format(self.name))
+        return list_elem_type
+
+
+@register_op(doc_str="")
+class list_gather(Operation):
+    input_spec = InputSpec(ls=ListInputType(), indices=IntTensorInputType(),)
+
+    def __init__(self, **kwargs):
+        super(list_gather, self).__init__(**kwargs)
+
+    def type_inference(self):
+        list_elem_type = self.ls.elem_type
+        if list_elem_type == types.unknown:
+            msg = (
+                "Unknown element type. The List might not have been "
+                + "written to ({})"
+            )
+            raise ValueError(msg.format(self.name))
+        elem_shape = list_elem_type.get_shape()
+        dtype = list_elem_type.get_primitive()
+        ret_shape = [self.indices.shape[0]] + list(elem_shape)
+        return types.tensor(dtype, tuple(ret_shape))
+
+
+@register_op(doc_str="")
+class list_scatter(Operation):
+    input_spec = InputSpec(
+        ls=ListInputType(), indices=IntTensorInputType(), value=TensorInputType(),
+    )
+
+    def __init__(self, **kwargs):
+        super(list_scatter, self).__init__(**kwargs)
+
+    def type_inference(self):
+        num_indices = self.indices.shape[0]
+        num_values = self.value.shape[0]
+        if num_values != num_indices:
+            raise ValueError(
+                "Cannot scatter {} values to {} indices".format(num_values, num_indices)
+            )
+        list_elem_type = self.ls.elem_type
+        value_type = self.value.sym_type
+        dynamic_length = self.ls.dynamic_length
+        init_length = self.ls.init_length
+
+        elem_type = types.tensor(value_type.get_primitive(), value_type.get_shape()[1:])
+        if list_elem_type == types.unknown:
+            # fill in the elem type using value's type info.
+            return types.list(
+                elem_type, dynamic_length=dynamic_length, init_length=init_length
+            )
+        if not types.is_subtype(elem_type, list_elem_type):
+            msg = "Elem type mismatch: ls elem type {} vs " + "value type {}"
+            raise ValueError(msg.format(list_elem_type, elem_type))
+        return self.ls.sym_type
diff --git a/coremltools/converters/mil/mil/ops/defs/conv.py b/coremltools/converters/mil/mil/ops/defs/conv.py
new file mode 100644
index 000000000..246ad04dd
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/conv.py
@@ -0,0 +1,267 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil.ops.defs._utils import spatial_dimensions_out_shape
+from ._op_reqs import *
+
+
+@register_op(doc_str="")
+class conv(Operation):
+    """
+    Perform convolution over input, currently supporting only 1D and 2D
+    convolution.
+
+    Parameters
+    ----------
+    x: tensor<[n, C_in, *d_in], T> (Required)
+
+        * ``d_in`` are (possibly runtime-determined) spatial dimensions (e.g., ``d_in = [224, 224]`` for 2D convolution).  ``1 <= len(d_in) <= 2`` (i.e., only 1D and 2D convolution) .
+        * ``C_in`` is the number of input channels or depth dimensions.
+        * ``n``  is the batch dimension
+
+    weight: tensor<[C_out, C_in/groups, *K], T> (Required)
+
+        * Filter weights. ``C_in`` is the number of input channels. ``C_in`` must be divisible by ``groups``
+	* ``K`` are kernel sizes (e.g. ``K = [KH, KW]`` for 2D conv).
+        * When ``dilations`` is not all 1, ``weight`` has to be const at compile time
+
+    strides: const tensor<[S], i32> (Optional. Default to one vector of length equals to the number of spatial dimensions)
+	* Strides along each of the spatial dimensions
+	* ``S == len(d_in)``
+
+    pad_type: const str (Required)
+
+	* Must be one of the followings
+
+            * ``valid``: No padding. This is equivalent to custom pad with ``pad[2*i] == pad[2*i+1] == 0, for i=0,...,len(d_in)-1``
+
+            * ``same``: input is padded such that out spatial shapes are ``d_out[i] = ceil(d_in[i] / strides[i])``. Specifically, for ``i = 0,..,,len(d_in)-1``, the equivalent paddings are
+
+		* When dilated kernel is even (i.e.  ``(K[i]-1)*dilations[i]+1)``
+		    *  ``pad[2*i] = ceil[((K[i]-1)*dilations[i]+1)/2]``
+		    *  ``pad[2*i+1] = floor[((K[i]-1)*dilations[i]+1)/2]``
+		* Otherwise,  ``pad[2*i] = pad[2*i+1] = (K[i]-1) * dilations[i] / 2``
+	    * custom: Specify custom padding in the parameter ``pad``
+
+    pad: const tensor<[P], i32> (Optional. Default to all zeros)
+	* ``len(P) = 2 * len(d_in)``
+	* ``pad`` should be specified if and only if ``pad_type == custom``.  Errors otherwise.
+        *  ``pad`` represents the number of elements to pad before and after each dimension. Specifically, ``pad[0], pad[1]`` are the pad size before / after spatial dimension 0, ``pad[2], pad[3]`` are the pad size before / after spatial dimension 1, etc.
+
+    dilations: const tensor<[S], i32> (Optional. Default to all 1s)
+	* Dilation value along each spatial dimension in ``d_in``. See `visualization <https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md>`_
+	* ``S == len(d_in)``
+
+    groups: const tensor<[], i32> (Optional. Default to 1)
+	* Input and output channels are split by ``groups``.
+
+        * ``C_in`` must be divisible by ``groups``. Maximum value for group is ``C_in``, in which case it is a depthwise convolution.
+
+	* For examples (assuming ``C_in = 16, C_out = 32``):
+
+            * ``groups == 1``, ``weight`` has shape ``[32, 16, KH, KW]``: all input channels are convolved with ``weight`` kernel to produce all output channels
+
+            * ``groups == 2``, ``weight`` has shape ``[32, 8, KH, KW]``: Input channel 0~7 are convolved with half of ``weight`` kernel to produce output channel 0~15. Similarly, input channel 8~15 are convolved with the other half of ``weight`` to product output channel 16~31.
+
+            * ``groups == C_in``, ``weight`` has shape ``[32, 1, KH, KW]``: each input channel is convolved with its own set of filters and each produce ``C_out / C_in = 2`` channels. This is equivalent to depthwise convolution.
+
+    bias: const tensor<[C_out],T> (Optional. Default to all 0)
+	* Bias along output channels
+
+
+    Returns
+    -------
+    tensor<[n, C_out, *d_out], T>
+        * Output activation has the same rank and spatial dimension as the input (i.e., ``len(d_out) == len(d_in)``)
+	* For ``i=0,..,len(d_in)-1, d_out[i] = floor [(D_in[i] + pad[2*i] + pad[2*i+1] - (K[i]-1)*dilations[i] - 1) / strides[i] ] + 1``
+
+    Attributes
+    ----------
+    T: fp32
+
+    See Also
+    --------
+    conv_transpose
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        weight=TensorInputType(),
+        bias=TensorInputType(const=True, optional=True, default=None),
+        strides=IntTensorInputType(const=True, optional=True, default=None),
+        pad_type=StringInputType(const=True, optional=True, default="valid"),
+        pad=IntTensorInputType(const=True, optional=True, default=None),
+        dilations=IntTensorInputType(const=True, optional=True, default=None),
+        groups=IntInputType(const=True, optional=True, default=1),
+    )
+
+    def __init__(self, **kwargs):
+        super(conv, self).__init__(**kwargs)
+
+    def type_inference(self):
+        inshape = self.x.shape
+        f_shape = self.weight.shape
+        kernel_shape = f_shape[2:]
+        num_dims = len(inshape) - 2
+        C_out = f_shape[0]
+        C_in = self.x.shape[1]
+        groups = self.groups.val
+
+        if self.bias is not None and self.bias.val.shape[0] != C_out:
+            msg = "# of bias values {} not equal to # output channels {}"
+        if C_in % groups != 0:
+            msg = "# of input channels {} not divisible by groups {}"
+            raise ValueError(msg.format(C_in, groups))
+        if C_in // groups != self.weight.shape[1]:
+            msg = "C_in / groups = {}/{} != weight[1] ({})"
+            raise ValueError(msg.format(C_in, groups, self.weight.shape[1]))
+
+        strides = [1] * num_dims if self.strides is None else self.strides.val
+        dilations = [1] * num_dims if self.dilations is None else self.dilations.val
+        custom_pad = None if self.pad is None else self.pad.val
+        N = inshape[0]
+        C_out = f_shape[0]
+        # spatial dimensions
+        d_out_shape = spatial_dimensions_out_shape(
+            pad_type=self.pad_type.val,
+            input_shape=inshape[2:],
+            kernel_shape=kernel_shape,
+            strides=strides,
+            dilations=dilations,
+            custom_pad=custom_pad,
+        )
+        retshape = [N, C_out] + d_out_shape
+        return types.tensor(self.x.dtype, tuple(retshape))
+
+
+@register_op(doc_str="")
+class conv_transpose(Operation):
+    """
+    Perform transposed convolution (aka deconvolution, fractionally stride
+    convolution) over input. ``conv_transpose`` can also be used to compute
+    gradient of conv. Currently support 1D and 2D only.
+
+    Parameters
+    ----------
+
+    x: tensor<[n,C_in,*D_in],T>  (Required)
+	* Input data. ``D_in`` are spatial dimensions. ``1 <= len(D_in) <= 2``
+        and ``C_in`` is the number of input channels.
+
+    weight: const tensor<[C_out,C_in/groups,*D_in], T> (Required)
+	* Filter weights. ``C_in, C_out`` are the number of input and output channels respectively.
+	* ``D_in`` are spatial dimensions. ``1 <= len(D_in) <= 2``
+
+    bias: const tensor<[C_out],T> (Optional. Default to all 0)
+	* Bias added along output channels
+
+    pad: const tensor<[P],i32> (Optional. Default to all 0s)
+	* Number of elements to pad before and after each dimension
+	* ``P == 2 * len(D_in)``
+	* ``pad[2*i], pad[2*i+1]`` are pad sizes before and after dimension ``i``, where ``0 <= i < len(D_in)``
+
+    output_shape: const tensor<[P],i32> (Optional. Default None)
+	* Expected output shape
+	* Padding is computed using pad_type and output_shape if provided
+
+    pad_type: const tensor<[P],i32> (Optional. Default valid)
+	* One of ``same``, ``valid``, or ``custom``
+
+    strides: const tensor<[S],i32> (Optional. Default to all 1s)
+	* Stride along each of the spatial dimensions. ``S == len(D_in)``
+
+    dilations: const tensor<[S],i32> (Optional. Default to all 1s)
+	* Dilation value along each spatial dimension in ``d_in``. See `conv`.
+        * ``S == len(D_in)``
+
+    groups: const tensor<[], i32> (Optional. Default to 1)
+	* Input and output channels are separated into ``groups``. ``C_in`` and ``C_out`` must be divisible by the number of groups. See ``conv`` for examples.
+
+    Returns
+    -------
+    tensor<[n,C_out,*D_out],T>
+	* ``D_out[i] = (D_in[i]-1) * strides[i] + pad[2*i] + pad[2*i+1] - (K[i] - 1) * dilations[i] + 1)] for i = 0, 1``
+
+    Attributes
+    ----------
+    T: fp32
+
+    See Also
+    --------
+    conv
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(),  # [n, C_in, spatial_dims]
+        weight=TensorInputType(const=True),  # [C_out, C_in, spatial_dims]
+        bias=TensorInputType(const=True, optional=True, default=None),
+        pad=IntTensorInputType(const=True, optional=True, default=None),
+        output_shape=IntTensorInputType(const=True, optional=True, default=None),
+        pad_type=StringInputType(const=True, optional=True, default="valid"),
+        strides=TensorInputType(const=True, optional=True, default=None),
+        dilations=TensorInputType(const=True, optional=True, default=None),
+        groups=IntInputType(const=True, optional=True, default=1),
+    )
+
+    def __init__(self, **kwargs):
+        super(conv_transpose, self).__init__(**kwargs)
+
+    def type_inference(self):
+        # Input shape is [n, C_in, spatial_dims]
+        in_shape = self.x.shape
+        # Weight shape is [C_out, C_in, spatial_dims]
+        f_shape = self.weight.shape
+        kernel_shape = f_shape[2:]
+        spatial_dim_rank = len(in_shape) - 2
+        N = in_shape[0]
+        C_in = self.x.shape[1]
+        groups = self.groups.val
+        C_out = f_shape[0] * groups
+
+        if self.bias is not None and self.bias.val.shape[0] != C_out:
+            msg = "# of bias values {} not equal to # output channels {}"
+            raise ValueError(msg.format(self.bias.val.shape[0], C_out))
+        if C_out % groups != 0:
+            msg = "# of input channels {} not divisible by groups {}"
+            raise ValueError(msg.format(C_in, groups))
+
+        # If output shape is given, return it
+        if self.output_shape is not None:
+            return types.tensor(
+                self.x.dtype, tuple([N, C_out] + list(self.output_shape.val))
+            )
+
+        strides = [1] * spatial_dim_rank if self.strides is None else self.strides.val
+        dilations = (
+            [1] * spatial_dim_rank if self.dilations is None else self.dilations.val
+        )
+        kernel_shape = [
+            (kernel_shape[r] - 1) * dilations[r] + 1 for r in range(spatial_dim_rank)
+        ]
+
+        D_in = in_shape[2:]  # spatial dimensions
+
+        # Deconv's output shape is non-deterministic, we follow TF shape logic here.
+        if self.pad_type.val == "same":
+            d_out_shape = [strides[r] * D_in[r] for r in range(spatial_dim_rank)]
+        elif self.pad_type.val == "valid":
+            d_out_shape = [
+                strides[r] * D_in[r] + kernel_shape[r] - 1
+                for r in range(spatial_dim_rank)
+            ]
+        elif self.pad_type.val == "custom":
+            if self.pad is None:
+                raise ValueError("self.pad must exist if pad_type is custom")
+            pad = self.pad.val
+            d_out_shape = [
+                strides[r] * (D_in[r] - 1)
+                + kernel_shape[r]
+                - pad[2 * r]
+                - pad[2 * r + 1]
+                for r in range(spatial_dim_rank)
+            ]
+
+        retshape = [N, C_out] + d_out_shape
+        return types.tensor(self.x.dtype, tuple(retshape))
diff --git a/coremltools/converters/mil/mil/ops/defs/elementwise_binary.py b/coremltools/converters/mil/mil/ops/defs/elementwise_binary.py
new file mode 100644
index 000000000..416f172a9
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/elementwise_binary.py
@@ -0,0 +1,575 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import operator
+from ._op_reqs import *
+from ._utils import promoted_primitive_type, broadcast_shapes
+
+"""
+Elementwise Binary Op Superclass
+"""
+
+
+class elementwise_binary(Operation):
+    input_spec = InputSpec(x=ScalarOrTensorInputType(), y=ScalarOrTensorInputType(),)
+
+    def __init__(self, **kwargs):
+        super(elementwise_binary, self).__init__(**kwargs)
+
+    def type_inference(self):
+        typea = self.x.sym_type
+        typeb = self.y.sym_type
+        primitive_type = promoted_primitive_type(typea, typeb)
+        if primitive_type is None:
+            raise ValueError("Incompatible primitive types in broadcast operation")
+        primitive_type = self.get_dtype(primitive_type)
+
+        # broadcast
+        if not types.is_tensor(typea) and not types.is_tensor(typeb):
+            # both typea and typeb are not tensors
+            return primitive_type
+        if types.is_tensor(typea) and not types.is_tensor(typeb):
+            # a is tensor, b is not
+            return types.tensor(primitive_type, typea.get_shape())
+        if not types.is_tensor(typea) and types.is_tensor(typeb):
+            # a is not tensor, b is
+            return types.tensor(primitive_type, typeb.get_shape())
+
+        # both a, b are tensors
+        shapea = list(typea.get_shape())
+        shapeb = list(typeb.get_shape())
+        ret_shape = broadcast_shapes(shapea, shapeb)
+        return types.tensor(primitive_type, ret_shape)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return self._cast_check_value_inferene(self.x.val, self.y.val)
+
+    def get_operator(self):
+        """
+        All subclasses have to implement this.
+        """
+        raise NotImplementedError()
+
+    def get_dtype(self, promoted_dtype):
+        """
+        Override if output primitive type is different from input types
+        (e.g., less, greater)
+        """
+        return promoted_dtype
+
+    def _cast_check_value_inferene(self, a, b):
+        """
+        If one of the input is tensor, cast the result to tensor.
+        """
+        to_cast = any([isinstance(x, np.ndarray) for x in [a, b]])
+        result = self.get_operator()(a, b)
+        return result if not to_cast else np.array(result)
+
+
+"""
+Elementwise Binary Op Implmentation(s)
+"""
+
+
+@register_op(doc_str="")
+class add(elementwise_binary):
+    """
+    Add two inputs element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    y: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor of the same type and shape as inputs.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(add, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return operator.add
+
+
+@register_op(doc_str="")
+class equal(elementwise_binary):
+    """
+    Return ``x==y`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    y: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, bool>
+        * a bool tensor with the same shape as inputs.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(equal, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return np.equal
+
+    def get_dtype(self, promoted_dtype):
+        return types.bool
+
+
+@register_op(doc_str="")
+class floor_div(elementwise_binary):
+    """
+    Return the floor_div values of two inputs element-wise.
+    That is the largest integer ``t``, and ``t <= x/y``.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    y: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor of the same type and shape as inputs.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(floor_div, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return operator.floordiv
+
+
+@register_op(doc_str="")
+class greater(elementwise_binary):
+    """
+    Return ``x > y`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    y: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, bool>
+        * a bool tensor with the same shape as inputs.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(greater, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return operator.gt
+
+    def get_dtype(self, promoted_dtype):
+        return types.bool
+
+
+@register_op(doc_str="")
+class greater_equal(elementwise_binary):
+    """
+    Return ``x >= y`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    y: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, bool>
+        * a bool tensor with the same shape as inputs.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(greater_equal, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return operator.ge
+
+    def get_dtype(self, promoted_dtype):
+        return types.bool
+
+
+@register_op(doc_str="")
+class less(elementwise_binary):
+    """
+    Return ``x < y`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    y: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, bool>
+        * a bool tensor with the same shape as inputs.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(less, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return operator.lt
+
+    def get_dtype(self, promoted_dtype):
+        return types.bool
+
+
+@register_op(doc_str="")
+class less_equal(elementwise_binary):
+    """
+    Return ``x <= y`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    y: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, bool>
+        * a bool tensor with the same shape as inputs.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(less_equal, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return operator.le
+
+    def get_dtype(self, promoted_dtype):
+        return types.bool
+
+
+@register_op(doc_str="")
+class logical_and(elementwise_binary):
+    """
+    Return ``x & y`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, bool> (Required)
+    y: tensor<*?, bool> (Required)
+
+    Returns
+    -------
+    tensor<*?, bool>
+        * a bool tensor with the same shape as inputs.
+    """
+
+    def __init__(self, **kwargs):
+        super(logical_and, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return np.logical_and
+
+    def get_dtype(self, promoted_dtype):
+        return types.bool
+
+
+@register_op(doc_str="")
+class logical_or(elementwise_binary):
+    """
+    Return ``x || y`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, bool> (Required)
+    y: tensor<*?, bool> (Required)
+
+    Returns
+    -------
+    tensor<*?, bool>
+        * a bool tensor with the same shape as inputs.
+    """
+
+    def __init__(self, **kwargs):
+        super(logical_or, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return np.logical_or
+
+    def get_dtype(self, promoted_dtype):
+        return types.bool
+
+
+@register_op(doc_str="")
+class logical_xor(elementwise_binary):
+    """
+    Return ``x ^ y`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, bool> (Required)
+    y: tensor<*?, bool> (Required)
+
+    Returns
+    -------
+    tensor<*?, bool>
+        * a bool tensor with the same shape as inputs.
+    """
+
+    def __init__(self, **kwargs):
+        super(logical_xor, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return np.logical_xor
+
+    def get_dtype(self, promoted_dtype):
+        return types.bool
+
+
+@register_op(doc_str="")
+class maximum(elementwise_binary):
+    """
+    Return ``max(x,y)`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    y: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor with the same shape and type as inputs.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(maximum, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return np.maximum
+
+
+@register_op(doc_str="")
+class minimum(elementwise_binary):
+    """
+    Return ``min(x,y)`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    y: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor with the same shape and type as inputs.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(minimum, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return np.minimum
+
+
+@register_op(doc_str="")
+class mod(elementwise_binary):
+    """
+    Return ``x % y`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    y: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor with the same shape and type as inputs.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(mod, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return operator.mod
+
+
+@register_op(doc_str="")
+class mul(elementwise_binary):
+    """
+    Return ``x * y`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    y: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor with the same shape and type as inputs.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(mul, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return operator.mul
+
+
+@register_op(doc_str="")
+class not_equal(elementwise_binary):
+    """
+    Return ``x != y`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, bool> (Required)
+    y: tensor<*?, bool> (Required)
+
+    Returns
+    -------
+    tensor<*?, bool>
+        * a bool tensor with the same shape as inputs.
+    """
+
+    def __init__(self, **kwargs):
+        super(not_equal, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return operator.ne
+
+    def get_dtype(self, promoted_dtype):
+        return types.bool
+
+
+@register_op(doc_str="")
+class real_div(elementwise_binary):
+    """
+    Return the true division ``x / y`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    y: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor with the same shape and type as inputs.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(real_div, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return operator.truediv
+
+    def get_dtype(self, promoted_dtype):
+        return types.float
+
+
+@register_op(doc_str="")
+class pow(elementwise_binary):
+    """
+    Return ``pow(x,y)`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    y: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor with the same shape and type as inputs.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(pow, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return operator.pow
+
+
+@register_op(doc_str="")
+class sub(elementwise_binary):
+    """
+    Return ``x - y`` element-wise.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+    y: tensor<*?, T> (Required)
+
+    Returns
+    -------
+    tensor<*?, T>
+        * a tensor with the same shape and type as inputs.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(sub, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return operator.sub
diff --git a/coremltools/converters/mil/mil/ops/defs/elementwise_unary.py b/coremltools/converters/mil/mil/ops/defs/elementwise_unary.py
new file mode 100644
index 000000000..fed2df668
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/elementwise_unary.py
@@ -0,0 +1,820 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import scipy
+from ._op_reqs import *
+
+"""
+Elementwise Unary Op Superclass
+"""
+
+
+class elementwise_unary(Operation):
+    input_spec = InputSpec(x=ScalarOrTensorInputType(),)
+
+    def __init__(self, **kwargs):
+        super(elementwise_unary, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return self.x.sym_type
+
+
+"""
+Elementwise unary op implmentation(s)
+"""
+
+
+@register_op(doc_str="")
+class abs(elementwise_unary):
+    """
+    Returns the absolute values of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(abs, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.abs(self.x.val)
+
+
+@register_op(doc_str="")
+class acos(elementwise_unary):
+    """
+    Returns the inverse cosine values of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(acos, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.arccos(self.x.val)
+
+
+@register_op(doc_str="")
+class asin(elementwise_unary):
+    """
+    Returns the inverse sine of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(asin, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.arcsin(self.x.val)
+
+
+@register_op(doc_str="")
+class atan(elementwise_unary):
+    """
+    Returns the inverse tangent of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(atan, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.arctan(self.x.val)
+
+
+@register_op(doc_str="")
+class atanh(elementwise_unary):
+    """
+    Returns the inverse hyperbolic tangent values of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(atanh, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.arctanh(self.x.val)
+
+
+@register_op(doc_str="")
+class ceil(elementwise_unary):
+    """
+    Returns the ceil values of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(ceil, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.ceil(self.x.val)
+
+
+@register_op(doc_str="")
+class clip(Operation):
+    """
+    Clip the values in the input ``x`` to ``[alpha, beta]``, element-wise.
+    Any values less than ``alpha`` are set to ``alpha``, and any values greater
+    than ``beta`` are set to ``beta``.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+    alpha: const f32 (Required)
+    beta: const f32 (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=ScalarOrTensorInputType(),
+        alpha=FloatInputType(const=True),
+        beta=FloatInputType(const=True),
+    )
+
+    def __init__(self, **kwargs):
+        super(clip, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return self.x.sym_type
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.minimum(np.maximum(self.x.val, self.alpha.val), self.beta.val)
+
+
+@register_op(doc_str="")
+class cos(elementwise_unary):
+    """
+    Returns cosine of ``x`` element-wise. Input domain is ``(-inf, inf)`` and output
+    range is ``[-1,1]``.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], T>
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(cos, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.cos(self.x.val)
+
+
+@register_op(doc_str="")
+class cosh(elementwise_unary):
+    """
+    Returns hyperbolic cosine of the input ``x``, element-wise.
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], T>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(cosh, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.cosh(self.x.val)
+
+
+@register_op(doc_str="")
+class erf(elementwise_unary):
+    """
+    Returns the gauss error function of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(erf, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return scipy.special.erf(self.x.val)
+
+
+@register_op(doc_str="")
+class exp(elementwise_unary):
+    """
+    Returns the exponential values of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(exp, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.exp(self.x.val)
+
+
+@register_op(doc_str="")
+class exp2(elementwise_unary):
+    """
+    Returns the exponential values of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(exp2, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.exp2(self.x.val)
+
+
+@register_op(doc_str="")
+class floor(elementwise_unary):
+    """
+    Returns the floor of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(floor, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.floor(self.x.val)
+
+
+@register_op(doc_str="")
+class inverse(elementwise_unary):
+    """
+    Returns the reciprocal value of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(inverse, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.reciprocal(self.x.val)
+
+
+@register_op(doc_str="")
+class log(elementwise_unary):
+    """
+    Returns the natural logarithm value of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(log, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.log(self.x.val)
+
+
+@register_op(doc_str="")
+class logical_not(elementwise_unary):
+    """
+    Returns the value of NOT the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(logical_not, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return np.logical_not
+
+    def get_dtype(self, promoted_dtype):
+        return types.bool
+
+
+@register_op(doc_str="")
+class round(elementwise_unary):
+    """
+    Returns the round value of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(round, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.round(self.x.val)
+
+
+@register_op(doc_str="")
+class rsqrt(elementwise_unary):
+    """
+    Returns the reciprocal value of the square root of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(rsqrt, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return 1.0 / np.sqrt(self.x.val)
+
+
+@register_op(doc_str="")
+class sign(elementwise_unary):
+    """
+    Returns the sign value of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(sign, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.sign(self.x.val)
+
+
+@register_op(doc_str="")
+class sin(elementwise_unary):
+    """
+    Returns the sine value of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(sin, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.sin(self.x.val)
+
+
+@register_op(doc_str="")
+class sinh(elementwise_unary):
+    """
+    Returns the hyperbolic sine value of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(sinh, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.sinh(self.x.val)
+
+
+@register_op(doc_str="")
+class sqrt(elementwise_unary):
+    """
+    Returns the square root value of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(sqrt, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.sqrt(self.x.val)
+
+
+@register_op(doc_str="")
+class square(elementwise_unary):
+    """
+    Returns the square value of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(square, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.square(self.x.val)
+
+
+@register_op(doc_str="")
+class tan(elementwise_unary):
+    """
+    Returns the tangent value of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(tan, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.tan(self.x.val)
+
+
+@register_op(doc_str="")
+class tanh(elementwise_unary):
+    """
+    Returns the hyperbolic tangent value of the input ``x``, element-wise.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    def __init__(self, **kwargs):
+        super(tanh, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.tanh(self.x.val)
+
+
+@register_op(doc_str="")
+class threshold(Operation):
+    """
+    Set a lower bound ``alpha`` to the values in the input ``x``, element-wise.
+    Any values less than ``alpha`` are set to ``alpha``.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+    alpha: const f32 (Required)
+
+    Returns
+    -------
+    tensor<[*d], f32>
+        * a tensor of the same shape as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=ScalarOrTensorInputType(), alpha=FloatInputType(const=True),
+    )
+
+    def __init__(self, **kwargs):
+        super(threshold, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return self.x.sym_type
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.maximum(self.x.val, self.alpha.val)
+
+
+@register_op(doc_str="")
+class cast(Operation):
+    """
+    Cast the input ``x`` to the new type ``dtype``.
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+    dtype: const str (Required)
+        * Can be one of the following types: int32, int64, fp32, fp64.
+
+    Returns
+    -------
+    tensor<[*d], dtype>
+        * a tensor of the same shape as ``x``, with type ``dtype``.
+
+    Attributes
+    ----------
+    T: i32, i64, fp32, fp64, bool.
+    """
+
+    input_spec = InputSpec(
+        x=ScalarOrTensorInputType(), dtype=StringInputType(const=True)
+    )
+
+    def __init__(self, **kwargs):
+        super(cast, self).__init__(**kwargs)
+
+    def type_inference(self):
+        type_map = {
+            "int32": types.int32,
+            "int64": types.int64,
+            "fp32": types.fp32,
+            "fp64": types.fp64,
+        }
+
+        if self.dtype.val not in type_map.keys():
+            raise NotImplementedError(
+                "Parameter dtype of the cast operation can be one of the {}. "
+                "Provided {}".format(type_map.keys(), self.dtype.val)
+            )
+
+        if not types.is_tensor(self.x.sym_type):
+            return type_map[self.dtype.val]
+
+        ret_shape = self.x.shape
+        return types.tensor(type_map[self.dtype.val], ret_shape)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        type_map = {
+            "int32": np.int32,
+            "int64": np.int64,
+            "fp32": np.float32,
+            "fp64": np.float64,
+        }
+
+        if self.dtype.val not in type_map.keys():
+            raise NotImplementedError(
+                "Parameter dtype of the cast operation can be one of the {}. "
+                "Provided {}".format(type_map.keys(), self.dtype.val)
+            )
+
+        if not types.is_tensor(self.x.sym_type):
+            return self.x.val.astype(dtype=type_map[self.dtype.val])
+        else:
+            return np.array(self.x.val).astype(dtype=type_map[self.dtype.val])
diff --git a/coremltools/converters/mil/mil/ops/defs/image_resizing.py b/coremltools/converters/mil/mil/ops/defs/image_resizing.py
new file mode 100644
index 000000000..baee1bda0
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/image_resizing.py
@@ -0,0 +1,191 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from ._op_reqs import *
+
+
+@register_op(
+    doc_str="TODO (rdar://58622145), https://quip-apple.com/1mNfAW4JhWR9#PDe9CARK6vT"
+)
+class upsample_nearest_neighbor(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        upscale_factor_height=IntInputType(const=True, default=1),
+        upscale_factor_width=IntInputType(const=True, default=1),
+    )
+
+    def __init__(self, **kwargs):
+        super(upsample_nearest_neighbor, self).__init__(**kwargs)
+
+    def type_inference(self):
+        if self.x.rank < 3:
+            raise ValueError(
+                'input to the "upsample_nearest_neighbor" op must have rank at least 3'
+            )
+
+        ret_shape = list(self.x.shape)
+        ret_shape[-1] *= self.upscale_factor_width.val
+        ret_shape[-2] *= self.upscale_factor_height.val
+        return types.tensor(self.x.dtype, ret_shape)
+
+
+@register_op(
+    doc_str="TODO (rdar://58622145), https://quip-apple.com/1mNfAW4JhWR9#PDe9CA9aGcP"
+)
+class upsample_bilinear(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        scale_factor_height=IntOrFloatInputType(const=True, default=1),
+        scale_factor_width=IntOrFloatInputType(const=True, default=1),
+        align_corners=BoolInputType(const=True, default=True),
+    )
+
+    def __init__(self, **kwargs):
+        super(upsample_bilinear, self).__init__(**kwargs)
+
+    def type_inference(self):
+        if self.x.rank < 3:
+            raise ValueError(
+                'input to the "upsample_bilinear" op must have rank at least 3'
+            )
+
+        ret_shape = list(self.x.shape)
+        ret_shape[-1] = np.floor(self.scale_factor_width.val * ret_shape[-1])
+        ret_shape[-2] = np.floor(self.scale_factor_height.val * ret_shape[-2])
+        return types.tensor(self.x.dtype, ret_shape)
+
+
+@register_op(
+    doc_str="TODO (rdar://58622145), https://quip-apple.com/1mNfAW4JhWR9#PDe9CAaiOQP"
+)
+class resize_bilinear(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        target_size_height=IntInputType(const=True, default=1),
+        target_size_width=IntInputType(const=True, default=1),
+        sampling_mode=StringInputType(const=True, default="DEFAULT"),
+    )
+
+    def __init__(self, **kwargs):
+        super(resize_bilinear, self).__init__(**kwargs)
+
+    def type_inference(self):
+        if self.x.rank < 3:
+            raise ValueError(
+                'input to the "resize_bilinear" op must have rank at least 3'
+            )
+
+        if self.sampling_mode.val not in {
+            "STRICT_ALIGN_CORNERS",
+            "ALIGN_CORNERS",
+            "DEFAULT",
+            "OFFSET_CORNERS",
+        }:
+            raise ValueError(
+                '"resize_bilinear" op: unrecognized sampling mode "{}"'.format(
+                    self.sampling_mode.val
+                )
+            )
+
+        ret_shape = list(self.x.shape)
+        ret_shape[-1] = self.target_size_width.val
+        ret_shape[-2] = self.target_size_height.val
+        return types.tensor(self.x.dtype, ret_shape)
+
+
+@register_op(doc_str="https://quip-apple.com/1mNfAW4JhWR9#PDe9CAHTGW7")
+class crop_resize(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        roi=TensorInputType(),
+        target_height=IntInputType(const=True, default=1),
+        target_width=IntInputType(const=True, default=1),
+        normalized_coordinates=BoolInputType(const=True, default=False),
+        spatial_scale=FloatInputType(const=True, default=1.0),
+        box_coordinate_mode=StringInputType(
+            const=True, default="CONRNERS_HEIGHT_FIRST"
+        ),
+        sampling_mode=StringInputType(const=True, default="STRICT_ALIGN_CORNERS"),
+    )
+
+    def __init__(self, **kwargs):
+        super(crop_resize, self).__init__(**kwargs)
+
+    def type_inference(self):
+        if self.x.rank != 4:
+            raise ValueError(
+                'input to the "crop_resize" op must be of rank 4. Provided {}'.format(
+                    self.x.rank
+                )
+            )
+
+        if self.roi.rank != 5:
+            raise ValueError(
+                'ROI input to the "crop_resize" op must be of rank 5, provided {}'.format(
+                    self.roi.rank
+                )
+            )
+
+        if self.sampling_mode.val not in {
+            "STRICT_ALIGN_CORNERS",
+            "ALIGN_CORNERS",
+            "DEFAULT",
+            "OFFSET_CORNERS",
+        }:
+            raise ValueError(
+                '"crop_resize" op: unrecognized sampling mode "{}"'.format(
+                    self.sampling_mode
+                )
+            )
+
+        # ret_shape: [N] + [B, C, h_out, w_out]
+        N, B, C = self.roi.shape[0], self.x.shape[0], self.x.shape[1]
+        ret_shape = [N, B, C, self.target_height.val, self.target_width.val]
+        return types.tensor(self.x.dtype, ret_shape)
+
+
+@register_op(doc_str="TODO")
+class crop(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        crop_height=IntTensorInputType(const=True),
+        crop_width=IntTensorInputType(const=True),
+    )
+
+    def __init__(self, **kwargs):
+        super(crop, self).__init__(**kwargs)
+
+    def type_inference(self):
+        if self.x.rank < 2:
+            raise ValueError(
+                'input to the "crop" op must at least be of rank 2. Provided {}'.format(
+                    self.x.rank
+                )
+            )
+
+        crop_height = self.crop_height.val
+        crop_width = self.crop_width.val
+
+        if len(crop_height.flatten()) != 2:
+            raise ValueError(
+                "crop_height must have 2 elements. Provided {}".format(
+                    len(crop_height.flatten())
+                )
+            )
+
+        if len(crop_width.flatten()) != 2:
+            raise ValueError(
+                "crop_width must have 2 elements. Provided {}".format(
+                    len(crop_width.flatten())
+                )
+            )
+
+        input_shape = list(self.x.shape)
+        ret_shape = (
+            input_shape[:-2]
+            + [input_shape[-2] - crop_height[0] - crop_height[1]]
+            + [input_shape[-1] - crop_width[0] - crop_width[1]]
+        )
+        return types.tensor(self.x.dtype, ret_shape)
diff --git a/coremltools/converters/mil/mil/ops/defs/linear.py b/coremltools/converters/mil/mil/ops/defs/linear.py
new file mode 100644
index 000000000..af845ea61
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/linear.py
@@ -0,0 +1,108 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil.types.symbolic import is_symbolic
+from ._op_reqs import *
+from ._utils import broadcast_shapes
+
+
+@register_op(
+    doc_str="""
+Performs  x*weight.T + bias where weight and bias are const at compile time.
+
+Inputs
+
+* x: <*D,D_in,T> (Required)
+    * 1 <= rank <= 3
+    * 0 <= rank(*D) <= 2
+* weight: const<D_out,D_in,T> (Required)
+* bias: const<D_out,T> (Optional. Default to 0)
+
+Outputs
+
+* <*D,D_out,T>
+    * same rank as the input
+
+Type Domains
+
+* T: f32
+"""
+)
+class linear(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        weight=TensorInputType(const=True),
+        bias=TensorInputType(const=True, optional=True),
+    )
+
+    def __init__(self, **kwargs):
+        super(linear, self).__init__(**kwargs)
+
+    def type_inference(self):
+        x_type = self.x.dtype
+        x_shape = self.x.shape
+        weight_shape = self.weight.shape
+        assert len(weight_shape) == 2
+
+        shape = list(x_shape)
+        shape[-1] = weight_shape[0]
+        return types.tensor(x_type, tuple(shape))
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        res = np.matmul(self.x.val, np.transpose(self.weight.val))
+        if self.bias is not None:
+            res += self.bias.val
+        return res
+
+
+# rdar://58622145
+@register_op(doc_str="TODO")
+class matmul(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        y=TensorInputType(),
+        transpose_x=BoolInputType(const=True, default=False),
+        transpose_y=BoolInputType(const=True, default=False),
+    )
+
+    def __init__(self, **kwargs):
+        super(matmul, self).__init__(**kwargs)
+
+    def type_inference(self):
+        # rdar://58621799 TODO: handle 1D x, y
+        x_type = self.x.dtype
+        x_shape = list(self.x.shape)
+        y_shape = list(self.y.shape)
+
+        if self.transpose_x.val:
+            x_shape = list(x_shape)
+            x_shape[-1], x_shape[-2] = x_shape[-2], x_shape[-1]
+            x_shape = tuple(x_shape)
+        if self.transpose_y.val:
+            y_shape = list(y_shape)
+            y_shape[-1], y_shape[-2] = y_shape[-2], y_shape[-1]
+            y_shape = tuple(y_shape)
+        if not (
+            x_shape[-1] == y_shape[-2]
+            or is_symbolic(x_shape[-1])
+            or is_symbolic(y_shape[-2])
+        ):
+            msg = "Op {} (matmul): x {}, y {} are not broadcastable"
+            raise ValueError(msg.format(self.name, self.x.shape, self.y.shape))
+
+        ret_shape = list(broadcast_shapes(x_shape[:-2], y_shape[:-2]))
+        ret_shape += [x_shape[-2], y_shape[-1]]
+        return types.tensor(x_type, tuple(ret_shape))
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        x = self.x.val
+        if self.transpose_x.val:
+            x = np.transpose(x)
+        y = self.y.val
+        if self.transpose_y.val:
+            y = np.transpose(y)
+        return np.matmul(x, y)
diff --git a/coremltools/converters/mil/mil/ops/defs/normalization.py b/coremltools/converters/mil/mil/ops/defs/normalization.py
new file mode 100644
index 000000000..3c16329c9
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/normalization.py
@@ -0,0 +1,108 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from ._op_reqs import *
+
+
+@register_op(doc_str="TODO")
+class batch_norm(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        mean=TensorInputType(const=True),
+        variance=TensorInputType(const=True),
+        gamma=TensorInputType(const=True, optional=True),
+        beta=TensorInputType(const=True, optional=True),
+        epsilon=FloatInputType(const=True, default=1e-5),
+    )
+
+    def __init__(self, **kwargs):
+        super(batch_norm, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return self.x.sym_type
+
+
+@register_op(doc_str="TODO")
+class instance_norm(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        gamma=TensorInputType(const=True, optional=True),
+        beta=TensorInputType(const=True, optional=True),
+        epsilon=FloatInputType(const=True, default=1e-5),
+    )
+
+    def __init__(self, **kwargs):
+        super(instance_norm, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return self.x.sym_type
+
+
+@register_op(doc_str="TODO")
+class l2_norm(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        axes=IntTensorInputType(),
+        epsilon=FloatInputType(const=True, default=1e-12),
+    )
+
+    def __init__(self, **kwargs):
+        super(l2_norm, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return self.x.sym_type
+
+
+@register_op(doc_str="TODO")
+class layer_norm(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        axes=IntTensorInputType(const=True, optional=True),
+        gamma=TensorInputType(const=True, optional=True),
+        beta=TensorInputType(const=True, optional=True),
+        epsilon=FloatInputType(const=True, default=1e-5),
+    )
+
+    def __init__(self, **kwargs):
+        super(layer_norm, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return self.x.sym_type
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        def np_layer_norm(x, axes, gamma, beta, epsilon=1e-5):
+            normalized_shape = x.shape[-len(axes) :]
+            gamma = np.ones(shape=normalized_shape) if gamma is None else gamma
+            beta = np.zeros(shape=normalized_shape) if beta is None else beta
+            num = x - np.mean(x, axis=tuple(axes), keepdims=True)
+            dem = np.sqrt(
+                np.sum(np.square(num), axis=tuple(axes), keepdims=True)
+                / np.prod(normalized_shape)
+                + epsilon
+            )
+            return num / dem * gamma + beta
+
+        _axes = self.x.shape if self.axes is None else self.axes.val
+        _gamma = None if self.gamma is None else self.gamma.val
+        _beta = None if self.beta is None else self.beta.val
+        return np_layer_norm(self.x.val, _axes, _gamma, _beta, self.epsilon.val)
+
+
+@register_op(doc_str="TODO")
+class local_response_norm(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        size=IntInputType(const=True),
+        alpha=FloatInputType(const=True, default=1e-4),
+        beta=FloatInputType(const=True, default=0.75),
+        k=FloatInputType(const=True, default=1.0),
+    )
+
+    def __init__(self, **kwargs):
+        super(local_response_norm, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return self.x.sym_type
diff --git a/coremltools/converters/mil/mil/ops/defs/pool.py b/coremltools/converters/mil/mil/ops/defs/pool.py
new file mode 100644
index 000000000..2a0e57e80
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/pool.py
@@ -0,0 +1,180 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil.ops.defs._utils import spatial_dimensions_out_shape
+
+from ._op_reqs import *
+
+"""
+Pooling Op Superclass
+"""
+
+
+class Pooling(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        kernel_sizes=IntTensorInputType(const=True),
+        strides=IntTensorInputType(const=True, optional=True),
+        pad_type=StringInputType(const=True),
+        pad=IntTensorInputType(const=True, optional=True),
+    )
+
+    def __init__(self, **kwargs):
+        super(Pooling, self).__init__(**kwargs)
+
+    def type_inference(self):
+        ksize = self.kernel_sizes.val
+        x_shape = self.x.shape
+        D_in_rank = len(x_shape) - 2
+
+        strides = [1] * D_in_rank if self.strides is None else self.strides.val
+        pad_type = "valid" if self.pad_type is None else self.pad_type.val.lower()
+        pad = None if self.pad is None else self.pad.val
+        D_in = x_shape[2:]  # spatial dimensions
+        D_out_shape = spatial_dimensions_out_shape(
+            pad_type=pad_type,
+            input_shape=D_in,
+            kernel_shape=ksize,
+            strides=strides,
+            custom_pad=pad,
+        )
+        ret_shape = list(x_shape[:2]) + D_out_shape
+        return types.tensor(self.x.dtype, tuple(ret_shape))
+
+
+@register_op(doc_str="")
+class avg_pool(Pooling):
+    """
+    Perform average pooling. Currently only support 1D and 2D.
+
+    Parameters
+    ----------
+    x: tensor<[n,C_in,*D_in],T>  (Required)
+        *  ``D_in`` are spatial dimensions,  ``1 <= len(D_in) <= 2``
+        *  ``C_in`` is the number of input channels or depth dimensions
+        *  ``n``  is the batch dimension
+
+    kernel_sizes: const tensor<[K],T> (Required)
+        * The size of the window for each spatial dimension ``D_in`` of the input tensor.
+        * ``K == len(D_in)``
+
+    strides: const tensor<[S],i32> (Optional. Default to all 1s)
+        * Stride along each of the spatial dimensions. ``S == len(D_in)``
+
+    pad_type: const str (Required)
+        * Must be one of the followings
+            * ``valid``: No padding. This is equivalent to custom pad with ``pad[i] = 0, for all i``
+            * ``custom``: Specify custom padding in the parameter pad. note that "same" padding is equivalent to custom padding with ``pad[2*i] + pad[2*i+1] = kernel_size[i]``
+
+    pad: const<[P],i32> (Optional. Default to all 0s)
+        *  pad represents the number of elements to pad before and after each dimension: `pad[2*i], pad[2*i+1]` are the pad size before and after spatial dimension i
+        * ``P = 2 * len(D_in)``
+        * ``pad`` should be specified if and only if ``pad_type == custom``
+
+    exclude_padding_from_average: const tensor<[], bool> (Optional, Default to False)
+        * If true, padded values (0s) are excluded from the denominator count when computing the average over the kernel window
+
+    Returns
+    -------
+    tensor<[n, C_out,*D_out],T>
+        * Same rank as ``x``
+        * ``D_out[i] = floor[(D_in[i] + pad[2*i] + pad[2*i+1] - kernel_sizes[i]) / strides[i]] +1, for i = 0, .., len(D_in) - 1`` this is mathematically the same as (when all parameters involved are integers): ``D_out[i] = ceil [(D_in[i] + pad[2*i] + pad[2*i+1] - kernel_size[i] - 1) / stride[i]], for i = 0, .., len(D_in) - 1``
+
+    Attributes
+    ----------
+    T: fp32
+
+    See Also
+    --------
+    l2_pool, max_pool
+    """
+
+    input_spec = (
+        InputSpec(exclude_padding_from_average=BoolInputType(const=True, default=False))
+        + Pooling.input_spec
+    )
+
+    def __init__(self, **kwargs):
+        super(avg_pool, self).__init__(**kwargs)
+
+
+@register_op(doc_str="")
+class l2_pool(Pooling):
+    """
+    Perform L2 pooling. Currently only support 1D and 2D.
+
+    Parameters
+    ----------
+    x: tensor<[n,C_in,*D_in],T>  (Required)
+        * See ``avg_pool``
+
+    kernel_sizes: const tensor<[K],T> (Required)
+        * See ``avg_pool``
+
+    strides: const tensor<[S],i32> (Optional. Default to all 1s)
+        * See ``avg_pool``
+
+    pad_type: const str (Required)
+        * See ``avg_pool``
+
+    pad: const<[P],i32> (Optional. Default to all 0s)
+        * See ``avg_pool``
+
+    Returns
+    -------
+    tensor<[n, C_out,*D_out],T>
+        * See ``avg_pool``
+
+    Attributes
+    ----------
+    T: fp32
+
+    See Also
+    --------
+    avg_pool, max_pool
+    """
+
+    def __init__(self, **kwargs):
+        super(l2_pool, self).__init__(**kwargs)
+
+
+@register_op(doc_str="")
+class max_pool(Pooling):
+    """
+    Perform max pooling. Currently only support 1D and 2D.
+
+    Parameters
+    ----------
+    x: tensor<[n,C_in,*D_in],T>  (Required)
+        * See ``avg_pool``
+
+    kernel_sizes: const tensor<[K],T> (Required)
+        * See ``avg_pool``
+
+    strides: const tensor<[S],i32> (Optional. Default to all 1s)
+        * See ``avg_pool``
+
+    pad_type: const str (Required)
+        * See ``avg_pool``
+
+    pad: const<[P],i32> (Optional. Default to all 0s)
+        * See ``avg_pool``
+
+    Returns
+    -------
+    tensor<[n, C_out,*D_out],T>
+        * See ``avg_pool``
+
+    Attributes
+    ----------
+    T: fp32
+
+    See Also
+    --------
+    avg_pool, l2_pool
+    """
+
+    def __init__(self, **kwargs):
+        super(max_pool, self).__init__(**kwargs)
diff --git a/coremltools/converters/mil/mil/ops/defs/random.py b/coremltools/converters/mil/mil/ops/defs/random.py
new file mode 100644
index 000000000..05f74350a
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/random.py
@@ -0,0 +1,214 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil.types.symbolic import any_symbolic
+from coremltools.converters.mil.mil import get_new_symbol, get_new_variadic_symbol
+from ._op_reqs import *
+
+"""
+Random Op Superclass
+"""
+
+
+class RandomDistribution(Operation):
+    input_spec = InputSpec(shape=IntTensorInputType(),)
+
+    def __init__(self, **kwargs):
+        super(RandomDistribution, self).__init__(**kwargs)
+
+    def type_inference(self):
+        if any_symbolic(self.shape.shape):
+            # We can't infer any shape if shape has variable length.
+            return types.tensor(types.fp32, (get_new_variadic_symbol(),))
+
+        # shape has fixed length here.
+        if self.shape.sym_val is None:
+            shape = tuple([get_new_symbol() for _ in range(self.shape.shape[0])])
+            return types.tensor(types.fp32, shape)
+
+        return types.tensor(types.fp32, tuple(self.shape.sym_val.tolist()))
+
+
+"""
+Random Op Implementation(s)
+"""
+
+
+@register_op(
+    doc_str=r"""
+Returns a tensor with specified shape with random values from a Bernoulli distribution.
+
+.. math::
+
+    f(k) = \begin{cases}1-p  &\text{if } k = 0\\
+                        p    &\text{if } k = 1\end{cases}
+
+for :math:`k` in :math:`\{0, 1\}`.
+
+Parameters
+----------
+shape: <K, i32>, required
+    Target output tensor shape.
+    K is the rank of the output tensor. shape[k] > 0 for k = 0,..., K-1.
+prob: const<f32>, optional
+    The probability of sampling 1. Defaults to 0.5.
+seed: const<i32>, optional
+    Seed to create a reproducible sequence of values across multiple invokes.
+
+Returns
+-------
+<*, T>, a tensor of given target output shape filled with random values.
+
+See Also
+--------
+random_categorical, random_normal, random_uniform
+"""
+)
+class random_bernoulli(RandomDistribution):
+    input_spec = (
+        InputSpec(
+            shape=IntTensorInputType(),
+            prob=FloatInputType(const=True, default=0.5),
+            seed=IntInputType(const=True, default=-1),
+        )
+        + RandomDistribution.input_spec
+    )
+
+    def __init__(self, **kwargs):
+        super(random_bernoulli, self).__init__(**kwargs)
+
+
+@register_op(
+    doc_str=r"""
+Returns random values from a categorical distribution.
+
+Parameters
+----------
+shape: <*D_in, T>
+    N-dimensional tensor, one of logits (event log-probabilities) or probs
+    (event probabilities). The first N - 1 dimensions specifies distributions,
+    the last dimension represents a vector of probabilities.
+mode: const<str>, optional
+    One of ['logits', 'probs']. Defaults to 'logits'.
+size: const<i32>, optional
+    Number of samples to draw. Defaults to 1.
+seed: const<i32>, optional
+    Seed to create a reproducible sequence of values across multiple invokes.
+
+Returns
+-------
+<*D_in[:-1] + [size], T>, a tensor of given target output shape filled with random values.
+
+See Also
+--------
+random_bernoulli, random_normal, random_uniform
+"""
+)
+class random_categorical(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        mode=StringInputType(const=True, default="logits"),
+        size=IntInputType(const=True, default=1),
+        seed=IntInputType(const=True, default=-1),
+    )
+
+    def __init__(self, **kwargs):
+        super(random_categorical, self).__init__(**kwargs)
+
+    def type_inference(self):
+        output_shape = self.x.shape[:-1] + (self.size.val,)
+        return types.tensor(types.fp32, output_shape)
+
+
+@register_op(
+    doc_str=r"""
+Returns a tensor with specified shape with random values from a normal distribution.
+
+.. math::
+
+    f(x) = \frac{\exp(-x^2/2)}{\sqrt{2\pi}}
+
+for a real number :math:`x`.
+
+Parameters
+----------
+shape: <K, i32>, required
+    Target output tensor shape.
+    K is the rank of the output tensor. shape[k] > 0 for k = 0,..., K-1.
+mean: const<f32>, optional
+    The mean (center) of the normal distribution. Defaults to 0.0.
+stddev: const<f32>, optional
+    The standard deviation (width) of the normal distribution. Defaults to 1.0.
+seed: const<i32>, optional
+    Seed to create a reproducible sequence of values across multiple invokes.
+
+Returns
+-------
+<*, T>, a tensor of given target output shape filled with random values.
+
+See Also
+--------
+random_categorical, random_bernoulli, random_uniform
+"""
+)
+class random_normal(RandomDistribution):
+    input_spec = (
+        InputSpec(
+            shape=IntTensorInputType(),
+            mean=FloatInputType(const=True, default=0.0),
+            stddev=FloatInputType(const=True, default=1.0),
+            seed=IntInputType(const=True, default=-1),
+        )
+        + RandomDistribution.input_spec
+    )
+
+    def __init__(self, **kwargs):
+        super(random_normal, self).__init__(**kwargs)
+
+
+@register_op(
+    doc_str=r"""
+Returns a tensor with specified shape with random values from a normal distribution.
+
+.. math::
+
+    p(x) = \frac{1}{high - low}
+
+for a real number :math:`x`.
+
+Parameters
+----------
+shape: <K, i32>, required
+    Target output tensor shape.
+    K is the rank of the output tensor. shape[k] > 0 for k = 0,..., K-1.
+low: const<f32>, optional
+    Lower boundary of the output interval (inclusive). Defaults to 0.0.
+high: const<f32>, optional
+    Upper boundary of the output interval (exclusive). Defaults to 1.0.
+seed: const<i32>, optional
+    Seed to create a reproducible sequence of values across multiple invokes.
+
+Returns
+-------
+<*, T>, a tensor of given target output shape filled with random values.
+
+See Also
+--------
+random_categorical, random_bernoulli, random_normal
+"""
+)
+class random_uniform(RandomDistribution):
+    input_spec = (
+        InputSpec(
+            shape=IntTensorInputType(),
+            low=FloatInputType(const=True, default=0.0),
+            high=FloatInputType(const=True, default=1.0),
+            seed=IntInputType(const=True, default=-1),
+        )
+        + RandomDistribution.input_spec
+    )
+
+    def __init__(self, **kwargs):
+        super(random_uniform, self).__init__(**kwargs)
diff --git a/coremltools/converters/mil/mil/ops/defs/recurrent.py b/coremltools/converters/mil/mil/ops/defs/recurrent.py
new file mode 100644
index 000000000..3f492fa10
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/recurrent.py
@@ -0,0 +1,189 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil import get_new_symbol
+from ._op_reqs import *
+
+
+@register_op(doc_str="TODO")
+class gru(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        initial_h=TensorInputType(),
+        weight=TensorInputType(const=True),
+        bias=TensorInputType(const=True, optional=True, default=None),
+        direction=StringInputType(const=True, default="forward"),
+        output_sequence=BoolInputType(const=True, default=False),
+        activations=TupleInputType(const=True, default=("sigmoid", "tanh")),
+    )
+
+    def __init__(self, **kwargs):
+        super(gru, self).__init__(**kwargs)
+
+    def type_inference(self):
+        if self.x.rank != 3:
+            raise ValueError(
+                "Invalid input shape. Expecting Rank 3 input, got {}".format(
+                    len(self.x.shape)
+                )
+            )
+
+        sequence_length, batch_size, input_size = self.x.shape
+
+        if self.weight.rank != 2:
+            raise ValueError(
+                "Invalid weight shape. Expecting Rank 2 input, got {}".format(
+                    len(self.weight.shape)
+                )
+            )
+
+        input_hidden_size, hidden_dim = self.weight.shape
+        hidden_size = input_hidden_size - input_size
+
+        direction = self.direction.val
+        valid_directions = {"forward", "reverse"}
+        if direction not in valid_directions:
+            raise ValueError(
+                "Direction {} not supported. Supported directions: {}".format(
+                    direction, valid_directions
+                )
+            )
+
+        dim_factor = 3
+        if hidden_size != (hidden_dim // dim_factor):
+            raise ValueError(
+                "Incorrect weight matrix: hidden dim size mismatch. \
+                              Provided  {}. Expecting <b, 3*H>".format(
+                    self.weight.shape
+                )
+            )
+
+        out_seq_len = sequence_length if self.output_sequence.val else 1
+        output_shape = [out_seq_len, batch_size, hidden_size]
+        output_h_shape = [batch_size, hidden_size]
+        return (
+            types.tensor(self.x.dtype, tuple(output_shape)),
+            types.tensor(self.x.dtype, tuple(output_h_shape)),
+        )
+
+
+@register_op(doc_str="TODO")
+class lstm(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        initial_h=TensorInputType(),
+        initial_c=TensorInputType(),
+        weight=TensorInputType(const=True),  # ifoz layout
+        bias=TensorInputType(const=True, optional=True, default=None),  # ifoz layout
+        direction=StringInputType(const=True, default="forward"),
+        output_sequence=BoolInputType(const=True, default=False),
+        activations=TupleInputType(const=True, default=("sigmoid", "tanh", "tanh")),
+        peephole=TensorInputType(const=True, optional=True, default=None),  # ifo layout
+        clip=FloatInputType(const=True, optional=True, default=None),
+    )
+
+    def __init__(self, **kwargs):
+        super(lstm, self).__init__(**kwargs)
+
+    def type_inference(self):
+        if self.x.rank != 3:
+            raise ValueError(
+                "Invalid input shape. Expecting Rank 3 input, got {}".format(
+                    len(self.x.shape)
+                )
+            )
+
+        sequence_length, batch_size, input_size = self.x.shape
+
+        if self.weight.rank != 2:
+            raise ValueError(
+                "Invalid weight shape. Expecting Rank 2 input, got {}".format(
+                    len(self.weight.shape)
+                )
+            )
+
+        input_hidden_size, hidden_dim = self.weight.shape
+        hidden_size = input_hidden_size - input_size
+
+        direction = self.direction.val
+        valid_directions = {"forward", "reverse", "bidirectional"}
+        if direction not in valid_directions:
+            raise ValueError(
+                "Direction {} not supported. Supported directions: {}".format(
+                    direction, valid_directions
+                )
+            )
+
+        dim_factor = 8 if direction == "bidirectional" else 4
+        if hidden_size != (hidden_dim // dim_factor):
+            raise ValueError(
+                "Incorrect weight matrix: hidden dim size mismatch. \
+                              Provided  {}. Expecting <b, 4*DIRECTION*H>".format(
+                    self.weight.shape
+                )
+            )
+
+        out_seq_len = sequence_length if self.output_sequence.val else 1
+        num_directions = dim_factor // 4
+        output_shape = [out_seq_len, batch_size, num_directions * hidden_size]
+        output_h_shape = [batch_size, num_directions * hidden_size]
+        output_c_shape = [batch_size, num_directions * hidden_size]
+        return (
+            types.tensor(self.x.dtype, tuple(output_shape)),
+            types.tensor(self.x.dtype, tuple(output_h_shape)),
+            types.tensor(self.x.dtype, tuple(output_c_shape)),
+        )
+
+
+@register_op(doc_str="TODO")
+class rnn(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        initial_h=TensorInputType(),
+        weight=TensorInputType(const=True),
+        bias=TensorInputType(const=True, optional=True, default=None),
+        direction=StringInputType(const=True, default="forward"),
+        output_sequence=BoolInputType(const=True, default=False),
+        activation=StringInputType(const=True, default="tanh"),
+    )
+
+    def __init__(self, **kwargs):
+        super(rnn, self).__init__(**kwargs)
+
+    def type_inference(self):
+        if self.x.rank != 3:
+            raise ValueError(
+                "Invalid input shape. Expecting Rank 3 input, got {}".format(
+                    len(self.x.shape)
+                )
+            )
+
+        sequence_length, batch_size, input_size = self.x.shape
+
+        if self.weight.rank != 2:
+            raise ValueError(
+                "Invalid weight shape. Expecting Rank 2 input, got {}".format(
+                    len(self.weight.shape)
+                )
+            )
+
+        _, hidden_size = self.weight.shape
+
+        direction = self.direction.val
+        valid_directions = {"forward", "reverse"}
+        if direction not in valid_directions:
+            raise ValueError(
+                "Direction {} not supported. Supported directions: {}".format(
+                    direction, valid_directions
+                )
+            )
+
+        out_seq_len = sequence_length if self.output_sequence.val else 1
+        output_shape = [out_seq_len, batch_size, hidden_size]
+        output_h_shape = [batch_size, hidden_size]
+        return (
+            types.tensor(self.x.dtype, tuple(output_shape)),
+            types.tensor(self.x.dtype, tuple(output_h_shape)),
+        )
diff --git a/coremltools/converters/mil/mil/ops/defs/reduction.py b/coremltools/converters/mil/mil/ops/defs/reduction.py
new file mode 100644
index 000000000..8bbbdbcef
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/reduction.py
@@ -0,0 +1,228 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import scipy
+from ._op_reqs import *
+
+"""
+Reduction Op Superclasses
+"""
+
+
+class ReductionAxes(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        axes=IntTensorInputType(const=True, optional=True, default=None),
+        keep_dims=BoolInputType(const=True, optional=True, default=False),
+    )
+
+    def __init__(self, **kwargs):
+        super(ReductionAxes, self).__init__(**kwargs)
+
+    def type_inference(self):
+        x_type = self.x.dtype
+        x_shape = self.x.shape
+        axes = self.axes.val if self.axes is not None else None
+        if axes is None:
+            axes = range(self.x.rank)
+        keep_dims = self.keep_dims.val
+
+        reduced_shape = list(x_shape)
+        if keep_dims:
+            for i in axes:
+                reduced_shape[i] = 1
+        else:
+            # sort reverse so we can delete shape elements back to front
+            axes = [axis if axis >= 0 else axis + len(reduced_shape) for axis in axes]
+            for i in sorted(axes)[::-1]:
+                reduced_shape.pop(i)
+        if len(reduced_shape) == 0:
+            return x_type  # scalar
+
+        return types.tensor(x_type, tuple(reduced_shape))
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        axes = tuple(self.axes.val) if self.axes is not None else None
+        return self.get_operator()(self.x.val, axis=axes, keepdims=self.keep_dims.val)
+
+    def get_operator(self):
+        raise NotImplementedError()
+
+
+class ReductionAxis(Operation):
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        axis=IntInputType(const=True, optional=True, default=-1),
+        keep_dims=BoolInputType(const=True, optional=True, default=False),
+    )
+
+    def __init__(self, **kwargs):
+        super(ReductionAxis, self).__init__(**kwargs)
+
+    def type_inference(self):
+        x_type = self.x.dtype
+        x_shape = self.x.shape
+        axis = self.axis.val
+
+        reduced_shape = list(x_shape)
+        axis = axis if axis >= 0 else axis + len(reduced_shape)
+        if self.keep_dims.val:
+            reduced_shape[axis] = 1
+        else:
+            reduced_shape.pop(axis)
+
+        return types.tensor(x_type, tuple(reduced_shape))
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return self.get_operator()(self.x.val, axis=self.axis.val)
+
+    def get_operator(self):
+        raise NotImplementedError()
+
+
+"""
+Reduction op implementations
+"""
+
+
+@register_op(doc_str="TODO")
+class reduce_arg(ReductionAxis):
+    def __init__(self, **kwargs):
+        super(reduce_arg, self).__init__(**kwargs)
+
+    def type_inference(self):
+        x_shape = self.x.shape
+        axis = self.axis.val
+
+        reduced_shape = list(x_shape)
+        axis = axis if axis >= 0 else axis + len(reduced_shape)
+        if self.keep_dims.val:
+            reduced_shape[axis] = 1
+        else:
+            reduced_shape.pop(axis)
+
+        return types.tensor(types.int32, tuple(reduced_shape))
+
+
+@register_op(doc_str="TODO")
+class reduce_argmax(reduce_arg):
+    def __init__(self, **kwargs):
+        super(reduce_argmax, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return np.argmax
+
+
+@register_op(doc_str="TODO")
+class reduce_argmin(reduce_arg):
+    def __init__(self, **kwargs):
+        super(reduce_argmin, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return np.argmin
+
+
+@register_op(doc_str="TODO")
+class reduce_l1_norm(ReductionAxes):
+    def __init__(self, **kwargs):
+        super(reduce_l1_norm, self).__init__(**kwargs)
+
+    def get_operator(self):
+        def l1_norm(x, axis=None, keepdims=False):
+            return np.sum(np.abs(x), axis=axis, keepdims=keepdims)
+
+        return l1_norm
+
+
+@register_op(doc_str="TODO")
+class reduce_l2_norm(ReductionAxes):
+    def __init__(self, **kwargs):
+        super(reduce_l2_norm, self).__init__(**kwargs)
+
+    def get_operator(self):
+        def l2_norm(x, axis=None, keepdims=False):
+            return np.sqrt(np.sum(np.square(x), axis=axis, keepdims=keepdims))
+
+        return l2_norm
+
+
+@register_op(doc_str="TODO")
+class reduce_log_sum(ReductionAxes):
+    def __init__(self, **kwargs):
+        super(reduce_log_sum, self).__init__(**kwargs)
+
+    def get_operator(self):
+        def log_sum(x, axis=None, keepdims=False):
+            return np.log(np.sum(x, axis=axis, keepdims=keepdims))
+
+        return log_sum
+
+
+@register_op(doc_str="TODO")
+class reduce_log_sum_exp(ReductionAxes):
+    def __init__(self, **kwargs):
+        super(reduce_log_sum_exp, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return scipy.special.logsumexp
+
+
+@register_op(doc_str="TODO")
+class reduce_max(ReductionAxes):
+    def __init__(self, **kwargs):
+        super(reduce_max, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return np.max
+
+
+@register_op(doc_str="TODO")
+class reduce_mean(ReductionAxes):
+    def __init__(self, **kwargs):
+        super(reduce_mean, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return np.mean
+
+
+@register_op(doc_str="TODO")
+class reduce_min(ReductionAxes):
+    def __init__(self, **kwargs):
+        super(reduce_min, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return np.min
+
+
+@register_op(doc_str="TODO")
+class reduce_prod(ReductionAxes):
+    def __init__(self, **kwargs):
+        super(reduce_prod, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return np.prod
+
+
+@register_op(doc_str="TODO")
+class reduce_sum(ReductionAxes):
+    def __init__(self, **kwargs):
+        super(reduce_sum, self).__init__(**kwargs)
+
+    def get_operator(self):
+        return np.sum
+
+
+@register_op(doc_str="TODO")
+class reduce_sum_square(ReductionAxes):
+    def __init__(self, **kwargs):
+        super(reduce_sum_square, self).__init__(**kwargs)
+
+    def get_operator(self):
+        def sum_squre(x, axis=None, keepdims=False):
+            return np.sum(np.square(x), axis=axis, keepdims=keepdims)
+
+        return sum_squre
diff --git a/coremltools/converters/mil/mil/ops/defs/scatter_gather.py b/coremltools/converters/mil/mil/ops/defs/scatter_gather.py
new file mode 100644
index 000000000..d63c09ebd
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/scatter_gather.py
@@ -0,0 +1,453 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil import SYMBOL, VALUE
+from coremltools.converters.mil.mil.types.symbolic import is_compatible_symbolic_vector
+from ._op_reqs import *
+import numbers
+
+
+@register_op(doc_str="")
+class gather(Operation):
+    """
+    Gather slices from input ``x`` along dimension ``axis`` according to ``indices``, similar to ``tf.gather``.
+
+    If ``indices`` is scalar (0-D):
+    ``output[p_0, ..., p_{axis-1},            ,p_{axis+1}, ..., p_{rank(x)-1}]
+    = x[p_0, ..., p_{axis-1}, indices, p_{axis+1}, ..., p_{rank(x)-1}]``
+    where ``rank(x)`` is the rank of ``x``.\n
+    The output has rank ``rank(x) - 1``.
+
+    If indices is 1-D tensor:
+    ``output[p_0, ..., p_{axis-1},            i, p_{axis+1}, ..., p_{rank(*D)-1}]
+    = x[p_0, ..., p_{axis-1}, indices[i], p_{axis+1}, ..., p_{rank(*D)-1}]``.
+    The output has rank ``rank(x)``.
+
+    In general,
+    ``output[p_0, ..., p_{axis-1},           i_0, ..., i_{M-1} , p_{axis+1}, ..., p_{rank(x)-1}]
+    = x[p_0, ..., p_{axis-1}, indices[i_0, ..., i_{M-1}], p_{axis+1}, ..., p_{rank(x)-1}]``.
+
+    where ``M = rank(x)``.
+
+    Parameters
+    ----------
+    * x: tensor<*D,T> (Required)
+    * indices: tensor<*N,i32> (Required)
+        * Indices values may be negative. More precisely, ``-D[axis]<= v < D[axis]`` for ``v`` in ``indices``.
+    * axis: const i32 (Optional)
+        * Default to ``0``.
+
+    Returns
+    -------
+    tensor<*K,T>
+        * where ``K = D[:axis] + N + D[axis+1:]``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        indices=IntOrIntTensorInputType(),
+        axis=IntInputType(const=True, default=0),
+    )
+
+    def __init__(self, **kwargs):
+        super(gather, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE | SYMBOL)
+    def value_inference(self):
+        x = self.x.sym_val
+        indices = self.indices.val
+        if indices is None:
+            # only allow x to be symbolic. indices cannot.
+            return None
+        scalar_indices = isinstance(indices, numbers.Integral)
+        axis = self.axis.val
+        if scalar_indices:
+            res = np.take(x, [indices], axis)
+            res2 = np.squeeze(res, axis=axis)
+            if isinstance(res2, np.ndarray) and len(res2.shape) == 0:
+                # res2 is a scalar, but represented as np.array(symbol,
+                # dtype=np.object) which np.squeeze can't remove.
+                return res2.item()
+            return res2
+        return np.take(x, indices, axis)
+
+    def type_inference(self):
+        out_type = self.x.dtype
+
+        if self.axis.val < -self.x.rank or self.axis.val >= self.x.rank:
+            raise IndexError(
+                "Axis value {} is out of bounds for {} node {}".format(
+                    self.axis.val, self.op_type, self.name
+                )
+            )
+
+        output_rank = self.x.rank - 1 + self.indices.rank
+        if output_rank == 0:
+            # output scalar
+            return out_type
+
+        axis = self.axis.val
+        axis = axis if axis >= 0 else axis + self.x.rank
+        out_shape = self.x.shape[:axis] + self.indices.shape + self.x.shape[axis + 1 :]
+        return types.tensor(out_type, out_shape)
+
+
+@register_op(doc_str="")
+class scatter(Operation):
+    """
+    Scatter ``updates`` to ``data`` at locations ``indices`` at dimension ``axis`` by operation ``mode``.
+    ``mode == update`` for instance.
+    ``
+    output[p_0, ..., p_{axis-1}, indice[i], p_{axis+1}, ..., p_D] =
+    updates[p_0, ..., p_{axis-1}, i, p_{axis+1}, ..., p_D]
+    `` for ``i`` in ``[0, len(indices)]``.
+    Otherwise
+    ``
+    output[p_0, ..., p_{axis-1}, j, p_{axis+1}, ..., p_D] =
+    data[p_0, ..., p_{axis-1}, j, p_{axis+1}, ..., p_D]
+    ``
+
+    ``mode == add`` for instance.
+    ``
+    output[p_0, ..., p_{axis-1}, indice[i], p_{axis+1}, ..., p_D] =
+    updates[p_0, ..., p_{axis-1}, i, p_{axis+1}, ..., p_D] +
+    x[p_0, ..., p_{axis-1}, indice[i], p_{axis+1}, ..., p_D]
+    `` for ``i`` in ``[0, len(indices)]
+    Otherwise
+    ``
+    output[p_0, ..., p_{axis-1}, j, p_{axis+1}, ..., p_D] =
+    data[p_0, ..., p_{axis-1}, j, p_{axis+1}, ..., p_D]
+    ``
+
+    Parameters
+    ----------
+    data: tensor<*D, T> (Required)
+    indices: tensor<[C],T> (Required)
+        * 1D tensor.
+    updates: tensor<*K, T> (Required)
+        * ``K = data.shape[:axis] + [len(indices)] + data.shape[axis+1:]``.
+    axis: const i32 (Optional)
+        * Default to ``0``.
+    mode: const string (Optional)
+        * Can be the following modes:
+            * update
+            * add
+            * sub
+            * mul
+            * div
+            * max
+            * min
+
+    Returns
+    -------
+    tensor<*D, T>
+        * with the same type and shape as input ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        data=TensorInputType(),
+        indices=IntTensorInputType(),
+        updates=TensorInputType(),
+        axis=IntInputType(const=True, default=0),
+        mode=StringInputType(const=True, default="add"),
+    )
+
+    def __init__(self, **kwargs):
+        super(scatter, self).__init__(**kwargs)
+
+    def type_inference(self):
+        if self.axis.val < -self.data.rank or self.axis.val >= self.data.rank:
+            raise IndexError(
+                "Axis value {} is out of bounds for {} node {}".format(
+                    self.axis.val, self.op_type, self.name
+                )
+            )
+
+        axis = self.axis.val
+        axis = axis if axis >= 0 else axis + self.data.rank
+        expected_updates_shape = (
+            self.data.shape[:axis] + self.indices.shape + self.data.shape[axis + 1 :]
+        )
+        np.testing.assert_equal(self.updates.shape, np.array(expected_updates_shape))
+
+        return self.data.sym_type
+
+
+@register_op(doc_str="")
+class gather_along_axis(Operation):
+    """
+    Take the values along ``axis`` at locations ``indices``.
+    ``
+    idx = indices[p_0, ..., p_{axis-1}, i, p_{axis+1}, ..., p_D],
+    output[p_0, ..., p_{axis-1}, i, p_{axis+1}, ..., p_D] =
+    = x[p_0, ..., p_{axis-1}, idx, p_{axis+1}, ..., p_D]
+    ``
+
+    Parameters
+    ----------
+    x: tensor<*D, T> (Required)
+    indices: tensor<*K, T> (Required)
+        * ``rank(indices) == rank(x)``.
+    axis: const i32 (Optional):
+        * Default to ``0``.
+
+    Returns
+    -------
+    tensor<*D, T>:
+        * output tensor has the same shape as ``indices``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        indices=IntTensorInputType(),
+        axis=IntInputType(const=True, default=0),
+    )
+
+    def __init__(self, **kwargs):
+        super(gather_along_axis, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        x = self.x.val
+        indices = self.indices.val
+        axis = self.axis.val
+        return np.take_along_axis(x, indices, axis)
+
+    def type_inference(self):
+
+        if self.x.rank != self.indices.rank:
+            raise ValueError(
+                "Rank mismatch between input and indices. \
+                              Input rank: {}, indices rank: {}".format(
+                    self.x.rank, self.indices.rank
+                )
+            )
+
+        if self.axis.val < -self.x.rank or self.axis.val >= self.x.rank:
+            raise IndexError(
+                "Axis value {} is out of bounds for {} node {}".format(
+                    self.axis.val, self.op_type, self.name
+                )
+            )
+
+        axis = self.axis.val
+        axis = axis if axis >= 0 else axis + self.x.rank
+
+        for i in range(self.x.rank):
+            if i != axis:
+                assert self.x.shape[i] == self.indices.shape[i]
+
+        return types.tensor(self.x.dtype, self.indices.shape)
+
+
+@register_op(doc_str="")
+class scatter_along_axis(Operation):
+    """
+    Scatter ``updates`` to ``data`` at locations ``indices`` at dimension ``axis`` by operation ``mode``.
+    ``mode == update`` for instance.
+    ``
+    idx = indices[p_0, ..., p_{axis-1}, i, p_{axis+1}, ..., p_D]
+    output[p_0, ..., p_{axis-1}, idx, p_{axis+1}, ..., p_D] =
+    updates[p_0, ..., p_{axis-1}, i, p_{axis+1}, ..., p_D]
+    `` for ``i`` in ``[0, len(indices)]``.
+    Otherwise
+    ``
+    output[p_0, ..., p_{axis-1}, j, p_{axis+1}, ..., p_D] =
+    data[p_0, ..., p_{axis-1}, j, p_{axis+1}, ..., p_D]
+    ``
+
+    ``mode == add`` for instance.
+    ``
+    idx = indices[p_0, ..., p_{axis-1}, i, p_{axis+1}, ..., p_D]
+    output[p_0, ..., p_{axis-1}, idx, p_{axis+1}, ..., p_D] =
+    updates[p_0, ..., p_{axis-1}, i, p_{axis+1}, ..., p_D] +
+    x[p_0, ..., p_{axis-1}, indice[i], p_{axis+1}, ..., p_D]
+    `` for ``i`` in ``[0, len(indices)]
+    Otherwise
+    ``
+    output[p_0, ..., p_{axis-1}, j, p_{axis+1}, ..., p_D] =
+    data[p_0, ..., p_{axis-1}, j, p_{axis+1}, ..., p_D]
+    ``
+
+    Parameters
+    ----------
+    data: tensor<*D, T> (Required)
+    indices: tensor<*K,T> (Required)
+        * ``rank(indices) == rank(data)``.
+    updates: tensor<*K, T> (Required)
+        * must be the same shape as ``indices``.
+    axis: const i32 (Optional)
+        * Default to ``0``.
+    mode: const string (Optional)
+        * Default to ``add``.
+        * Can be the following modes:
+            * ``update``
+            * ``add``
+            * ``sub``
+            * ``mul``
+            * ``div``
+            * ``max``
+            * ``min``
+
+    Returns
+    -------
+    tensor<*D, T>
+        * with the same type and shape as input ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        data=TensorInputType(),
+        indices=IntTensorInputType(),
+        updates=TensorInputType(),
+        axis=IntInputType(const=True, default=0),
+        mode=StringInputType(const=True, default="add"),
+    )
+
+    def __init__(self, **kwargs):
+        super(scatter_along_axis, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        data = np.copy(self.data.val)
+        indices = self.indices.val
+        updates = self.updates.val
+        axis = self.axis.val
+        np_output = data
+        np.put_along_axis(np_output, indices, updates, axis=axis)
+        return np_output
+
+    def type_inference(self):
+        if self.axis.val < -self.data.rank or self.axis.val >= self.data.rank:
+            raise IndexError(
+                "Axis value {} is out of bounds for {} node {}".format(
+                    self.axis.val, self.op_type, self.name
+                )
+            )
+
+        axis = self.axis.val
+        axis = axis if axis >= 0 else axis + self.data.rank
+
+        assert self.indices.shape == self.updates.shape
+        assert self.data.rank == self.indices.rank
+        for i in range(self.data.rank):
+            if i != axis:
+                assert self.data.shape[i] == self.indices.shape[i]
+
+        return self.data.sym_type
+
+
+@register_op(doc_str="")
+class gather_nd(Operation):
+    """
+    Gather slices from ``x`` according to ``indices``, similar to ``tf.gather_nd``.
+
+    ``indices`` is an K-dim tensor, where ``indices[i_0,...,i_{K-2}]`` defines a slice of ``x``:
+
+    ``output[i_0, ..., i_{K-2}]= x[indices[i_0, ..., i_{K-2}]]``
+
+    where ``K = rank(indices)`` , and ``x[indices[i_0, ..., i_{K-2}]]`` has rank ``rank(x) - indices.shape[-1]``.
+
+    Parameters
+    ----------
+    * x: tensor<*D,T>  (Required)
+    * indices: tensor<*K,i32> (Required)
+
+    Returns
+    -------
+    * tensor<*V,T>
+        * V = K[:-1] + D[K[-1]:], where ``D = x.shape`` and ``K = indices.shape``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(x=TensorInputType(), indices=IntTensorInputType(),)
+
+    def __init__(self, **kwargs):
+        super(gather_nd, self).__init__(**kwargs)
+
+    def type_inference(self):
+        assert self.indices.shape[-1] <= self.x.rank
+        out_type = self.x.dtype
+        out_shape = self.indices.shape[:-1] + self.x.shape[self.indices.shape[-1] :]
+        return types.tensor(out_type, out_shape)
+
+
+@register_op(doc_str="")
+class scatter_nd(Operation):
+    """
+    Scatter ``updates`` to ``data`` at locations ``indices``.\n
+
+    ``indices`` is an K-dim tensor, where ``indices[i_0,...,i_{K-2}]`` defines a slice of ``data``,
+    where ``K = rank(indices)`` , and ``data[indices[i_0, ..., i_{K-2}]]`` has rank ``rank(data) - indices.shape[-1]``.\n
+
+    Take ``mode == update`` for instance:
+    ``output`` is set to ``data`` initially, and the op updates ``output`` as following:
+    ``output[indices[i_0, ..., i_{K-2}]]= updates[indices[i_0, ..., i_{K-2}]]``,
+    One more example for ``mode == add``, the update rule with be:
+    ``output[indices[i_0, ..., i_{K-2}]] += updates[indices[i_0, ..., i_{K-2}]]``
+
+    Parameters
+    ----------
+    data: tensor<*D,T>  (Required)
+    indices: tensor<*K,i32> (Required)
+    updates: tensor<*K, T> (Required)
+        * must be the shape as ``K[:-1]+data.shape[K[-1]:]``.
+    mode: const string (Optional)
+        * Default to ``add``.
+        * Can be the following modes:
+            * ``update``
+            * ``add``
+            * ``sub``
+            * ``mul``
+            * ``div``
+            * ``max``
+            * ``min``
+    Returns
+    -------
+    tensor<*D,T>
+        * a tensor with the same shape and type as ``data``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        data=TensorInputType(),
+        indices=IntTensorInputType(),
+        updates=TensorInputType(),
+        mode=StringInputType(const=True, default="add"),
+    )
+
+    def __init__(self, **kwargs):
+        super(scatter_nd, self).__init__(**kwargs)
+
+    def type_inference(self):
+        assert self.indices.shape[-1] <= self.data.rank
+        expected_updates_shape = (
+            self.indices.shape[:-1] + self.data.shape[self.indices.shape[-1] :]
+        )
+        assert is_compatible_symbolic_vector(
+            self.updates.shape, tuple(expected_updates_shape)
+        )
+        return self.data.sym_type
diff --git a/coremltools/converters/mil/mil/ops/defs/slicend.py b/coremltools/converters/mil/mil/ops/defs/slicend.py
new file mode 100644
index 000000000..2a25b1275
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/slicend.py
@@ -0,0 +1,211 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import division
+
+from coremltools.converters.mil.mil.types.symbolic import is_symbolic
+from coremltools.converters.mil.mil import get_new_symbol
+from ._op_reqs import *
+
+
+@register_op(doc_str="")
+class slice_by_index(Operation):
+    """
+    Method for numpy style indexing and slicing.
+    Suppose we have a tensor ``x``, this method achieves:
+    ``result = x[begin[0]: end[0]: stride[0], begin[1]: end[1]: stride[1], ...]``
+    Note this method does not support pure indexing. You would need to do squeeze if indexing is intended.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+        * Input tensor
+    begin: tensor<[rank<x>], i32> (Required)
+        * Starting index for the dimension of slicing.
+    end: tensor<[rank(x)], i32> (Required)
+        * Ending index for the dimension of slicing.
+    stride: tensor<[rank(x)], i32> (Optional)
+        * Default as all ``1``s.
+        * Stride for the dimension of slicing.
+    begin_mask: tensor<[rank(x)], bool> (Optional)
+        * Default to all ``False``.
+        * If ``begin_mask[i]==True``, neglect ``begin[i]``, and set ``begin[i]`` to ``0``.
+    end_mask: tensor<[rank(x)], bool> (Optional)
+        * Default to all ``False``.
+        * If ``end_mask[i]==True``, neglect ``end[i]``, and set ``end[i]`` to ``x.shape[i]``.
+    squeeze_mask: tensor<[rank(x)], bool> (Optional)
+        * Default to all ``False``.
+        * If ``squeeze_mask[i]==true``, neglect ``end[i]``, and do the pure index at ``begin[i]``.
+
+    Returns
+    -------
+    tensor<*?, T>
+        - Scalar or tensor.
+
+    Attributes
+    ----------
+    T: fp32
+
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        begin=IntTensorInputType(),
+        end=IntTensorInputType(),
+        stride=IntTensorInputType(const=True, optional=True),
+        begin_mask=BoolTensorInputType(const=True, optional=True),
+        end_mask=BoolTensorInputType(const=True, optional=True),
+        squeeze_mask=BoolTensorInputType(const=True, optional=True),
+    )
+
+    def __init__(self, **kwargs):
+        super(slice_by_index, self).__init__(**kwargs)
+
+    def type_inference(self):
+
+        # get tensor and set default value
+        begin = self.begin.val
+        end = self.end.val
+        x_rank = self.x.rank
+        stride = self.stride.val if self.stride is not None else [1] * x_rank
+        begin_mask = (
+            self.begin_mask.val if self.begin_mask is not None else [False] * x_rank
+        )
+        end_mask = self.end_mask.val if self.end_mask is not None else [False] * x_rank
+        squeeze_mask = (
+            self.squeeze_mask.val if self.squeeze_mask is not None else [False] * x_rank
+        )
+
+        # solve shape
+        x_shape = self.x.shape
+        ret_shape = []
+
+        if begin is None or len(begin) == 0:
+            begin = [None] * len(x_shape)
+        if end is None or len(end) == 0:
+            end = [None] * len(x_shape)
+
+        # solve for shape inference
+        for idx in range(len(x_shape)):
+            # skip if we want to squeeze the dimension
+            if squeeze_mask[idx]:
+                continue
+
+            # for those a[:] cases
+            if begin_mask[idx] and end_mask[idx]:
+                if is_symbolic(x_shape[idx]):
+                    if stride[idx] == -1 or stride[idx] == 1:
+                        ret_shape.append(x_shape[idx])
+                    else:
+                        ret_shape.append(get_new_symbol())
+                    continue
+                else:
+                    num = np.ceil(float(x_shape[idx]) / abs(stride[idx])).astype(
+                        np.int32
+                    )
+                    ret_shape.append(num)
+                    continue
+
+            # for symbolic case
+            if is_symbolic(x_shape[idx]):
+                ret_shape.append(get_new_symbol())
+                continue
+
+            # when begin and end are not determined
+            if begin[idx] is None and not begin_mask[idx]:
+                ret_shape.append(get_new_symbol())
+                continue
+            if end[idx] is None and not end_mask[idx]:
+                ret_shape.append(get_new_symbol())
+                continue
+
+            # parse negative dimention
+            if begin[idx] is not None and begin[idx] < 0:
+                begin[idx] = max(0, begin[idx] + x_shape[idx])
+            if end[idx] is not None and end[idx] < 0:
+                end[idx] = max(0, end[idx] + x_shape[idx])
+
+            # compute shape
+            low, high = [0, x_shape[idx]] if stride[idx] > 0 else [-1, x_shape[idx] - 1]
+            begin_idx, end_idx = (
+                [begin[idx], end[idx]] if stride[idx] > 0 else [end[idx], begin[idx]]
+            )
+            is_begin_mask, is_end_mask = (
+                [begin_mask[idx], end_mask[idx]]
+                if stride[idx] > 0
+                else [end_mask[idx], begin_mask[idx]]
+            )
+            if is_begin_mask:
+                begin_idx = low
+            end_idx = high if is_end_mask else min(end_idx, high)
+            num = np.ceil(float(end_idx - begin_idx) / abs(stride[idx])).astype(
+                np.int32
+            )
+            ret_shape.append(max(0.0, num))
+
+        if len(ret_shape) == 0:
+            # Scalar case.
+            return self.x.dtype
+        else:
+            return types.tensor(self.x.dtype, tuple(ret_shape))
+
+    def value_inference(self):
+        if self.x.sym_val is None or self.begin.val is None or self.end.val is None:
+            return None
+        x_shape = self.x.shape
+        begin = [int(i) for i in list(self.begin.val[:])]
+        end = [int(i) for i in list(self.end.val[:])]
+        stride = [1] * self.x.rank if self.stride is None else self.stride.val
+        begin_mask = (
+            [False] * self.x.rank if self.begin_mask is None else self.begin_mask.val
+        )
+        end_mask = [False] * self.x.rank if self.end_mask is None else self.end_mask.val
+        squeeze_mask = (
+            [False] * self.x.rank
+            if self.squeeze_mask is None
+            else self.squeeze_mask.val
+        )
+
+        slices = []
+        for idx, mask in enumerate(begin_mask):
+            if mask:
+                begin[idx] = None
+        for idx, mask in enumerate(end_mask):
+            if mask:
+                end[idx] = None
+        squeeze_axes = []
+        for idx, mask in enumerate(squeeze_mask):
+            if mask:
+                end[idx] = None
+                stride[
+                    idx
+                ] = 2147483647  # We slice out only 1 element by setting stride to INF
+                squeeze_axes.append(idx)
+        for idx in range(self.x.rank):
+            slices.append(slice(begin[idx], end[idx], stride[idx]))
+
+        slices = tuple(slices)
+        res = self.x.sym_val[slices]
+
+        # remove squeezed axes
+        if len(squeeze_axes) > 0:
+            if len(squeeze_axes) == len(res.shape):
+                if len(res) == 0:
+                    logging.warning("%s seems to be a 0 sized tensor", self.name)
+                    return np.array([])
+                res = res.tolist()[0]
+                if is_symbolic(res):
+                    return res
+                elif self.x.dtype == types.int32 or self.x.dtype == types.int64:
+                    res = np.int32(res)
+                elif self.x.dtype == types.float or self.x.dtype == types.double:
+                    res = np.float32(res)
+                else:
+                    raise ValueError(
+                        "Unable to convert type {}".format(self.x.sym_val.dtype)
+                    )
+            else:
+                res = np.squeeze(res, axis=tuple(squeeze_axes))
+        return res
diff --git a/coremltools/converters/mil/mil/ops/defs/tensor_operation.py b/coremltools/converters/mil/mil/ops/defs/tensor_operation.py
new file mode 100644
index 000000000..be8483937
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/tensor_operation.py
@@ -0,0 +1,1107 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import math
+from coremltools.converters.mil.mil.types.symbolic import (
+    is_symbolic,
+    any_symbolic,
+    is_compatible_symbolic_vector,
+)
+from coremltools.converters.mil.mil import (
+    get_new_symbol,
+    get_new_variadic_symbol,
+    SYMBOL,
+    VALUE,
+    NONE,
+)
+from ._op_reqs import *
+from ._utils import promoted_primitive_type
+
+
+@register_op(doc_str="")
+class band_part(Operation):
+    """
+    Returns a tensor setting everything outside a center band to zeros for the innermost matrix. Special cases:
+
+    - ``band_part(x, 0, -1)`` returns upper triangular part.
+    - ``band_part(x, -1, 0)`` returns lower triangular part.
+    - ``band_part(x, 0, 0)`` returns diagonal.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+        * Input tensor.
+    lower: const<i32> (Optional)
+        * Number of lower / below sub-diagonals to keep. If negative, keep entire lower triangle.
+        * Defaults to ``-1`` (keep the entire lower triangle)
+    upper: const<i32> (Optional)
+        * Number of upper / above sub-diagonals to keep. If negative, keep entire lower triangle.
+        * Defaults to ``-1`` (keep the entire upper triangle)
+
+    Returns
+    -------
+    tensor<*?, T>
+        * Same type and shape as the input tensor.
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        lower=IntInputType(const=True, default=-1),
+        upper=IntInputType(const=True, default=-1),
+    )
+
+    def __init__(self, **kwargs):
+        super(band_part, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return self.x.sym_type
+
+
+@register_op(doc_str="")
+class cumsum(Operation):
+    """
+    Returns the cumulative sum the input along the given axis.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+        * Input tensor.
+    axis: const<i32> (Optional)
+        * default to ``0``.
+        * Axis for which the cumulative sum is computed.
+    exclusive: const<bool> (Optional)
+        * default to ``False``.
+        * When set to ``False``, inclusive cumsum is computed, that is the first element of
+          the output is identical to the first element in the input.
+        * When set to ``True``, exclusive cumsum is computed, which makes the first element
+          of output to ``0``.
+    reverse: const<bool> (Optional)
+        * default to ``False``.
+        * When set to ``True``, perform cumsum in the reverse order.
+
+    Returns
+    -------
+    tensor<*?, T>
+        * Same type and shape as the input tensor.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        axis=IntInputType(const=True, default=0),
+        exclusive=BoolInputType(const=True, default=False),
+        reverse=BoolInputType(const=True, default=False),
+    )
+
+    def __init__(self, **kwargs):
+        super(cumsum, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        data = np.copy(self.x.val)
+        axis = self.axis.val
+        reverse = self.reverse.val
+        exclusive = self.exclusive.val
+        if reverse:
+            data = np.flip(data, axis=axis)
+        data = np.cumsum(data, axis=axis)
+        if exclusive:
+            zero_shape = np.copy(data.shape)
+            zero_shape[axis] = 1
+            data = np.concatenate((np.zeros(zero_shape, data)), axis=axis)
+        if reverse:
+            data = np.flip(data, axis=axis)
+        return data
+
+    def type_inference(self):
+        # Check range of axis
+        if self.axis.val < -1 or self.axis.val > self.x.rank - 1:
+            raise ValueError(
+                "axis should be in the range [-1, {}]".format(self.x.rank - 1)
+            )
+
+        return self.x.sym_type
+
+
+@register_op(doc_str="")
+class fill(Operation):
+    """
+    Returns a tensor with given shape filled with a constant value.
+
+    Parameters
+    ----------
+    shape: tensor<[K], i32> (Required)
+        * Target output tensor shape.
+        * ``K`` is the rank of the output tensor. ``shape[k] > 0`` for ``k = 0,..., K-1``.
+    value: const<f32> (Optional)
+        * default to ``0``.
+        * Constant value to fill in.
+
+    Returns
+    -------
+    tensor<*?, T>
+        * Tensor with shape determined by the input shape.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        shape=IntTensorInputType(), value=IntOrFloatInputType(const=True, default=0.0),
+    )
+
+    def __init__(self, **kwargs):
+        super(fill, self).__init__(**kwargs)
+
+    def type_inference(self):
+        if any_symbolic(self.shape.shape):
+            # We can't infer any shape if shape has variable length.
+            return types.tensor(types.fp32, (get_new_variadic_symbol(),))
+
+        # shape has fixed length here.
+        if self.shape.sym_val is None:
+            ret_shape = tuple([get_new_symbol() for _ in range(self.shape.shape[0])])
+            return types.tensor(types.fp32, ret_shape)
+
+        return types.tensor(self.value.dtype, tuple(self.shape.sym_val.tolist()))
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.full(shape=self.shape.val, fill_value=self.value.val)
+
+
+@register_op(doc_str="")
+class non_maximum_suppression(Operation):
+    """
+    Applies non-maximum suppression (NMS) on the input box coordinates according
+    to their intersection-over-union (IoU). NMS selects as subset of bounding
+    boxes with the descending scores. Removes boxes that have high
+    intersection-over-union (IOU) overlap with previously selected boxes.
+
+    Parameters
+    ---------
+    boxes: tensor<[n, B, 4], T> (Required)
+        * Box coordinates to perform NMS on.
+    scores: tensor<[n, B, K], T> (Required)
+        * Scores for each one of the boxes
+    iou_threshold: const<T> (Required)
+        * The intersection over union (``IoU``) threshold over which boxes are suppressed. NMS remove all overlapping boxes with ``IoU > iou_threshold``.
+    score_threshold: const<T> (Required)
+        * Before IoU suppression is performed, boxes with class scores below this threshold are rejected.
+    max_boxes: const<i32> (Required)
+        * Maximum number of boxes to select. If the number of surviving boxes are less, output is padded up to this number.
+    per_class_suppression: const<bool> (Optional)
+        * Default to ``False``.
+        * If ``True``, suppression is performed independently within boxes of each class.
+
+    Returns
+    -------
+    tensor<[n, max_boxes, 4], T>
+        * Coordinates of selected boxes.
+    tensor<[n, max_boxes, K], T>
+        * Scores of selected boxes.
+    tensor<[n, max_boxes], i32>
+        * Indices of selected boxes.
+    tensor<[n], i32>
+        * Number of boxes selected for each batch.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        boxes=TensorInputType(),
+        scores=TensorInputType(),
+        iou_threshold=FloatInputType(const=True),
+        score_threshold=FloatInputType(const=True),
+        max_boxes=IntInputType(const=True),
+        per_class_suppression=BoolInputType(const=True, default=False),
+    )
+
+    def __init__(self, **kwargs):
+        super(non_maximum_suppression, self).__init__(**kwargs)
+
+    def type_inference(self):
+        boxes_dtype = self.boxes.dtype
+        scores_dtype = self.scores.dtype
+        n_batch, _, n_score = self.scores.shape
+        max_boxes = self.max_boxes.val
+
+        return (
+            types.tensor(boxes_dtype, (n_batch, max_boxes, 4)),
+            types.tensor(scores_dtype, (n_batch, max_boxes, n_score)),
+            types.tensor(types.int32, (n_batch, max_boxes)),
+            types.tensor(types.int32, (n_batch,)),
+        )
+
+
+@register_op(doc_str="")
+class non_zero(Operation):
+    """
+    Returns the indices of the elements in the given tensor that are non-zero.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+        * Tensor, values selected at indices where its values is not equal to ``0``.
+
+    Returns
+    -------
+    tensor<[N, R], T>
+        * 2-dimensional tensor contains indices of elements that are non-zero. Each
+          row is the index for a non-zero value.
+        * ``N`` is the number of non-zero elements, ``R`` is the rank of the input.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(x=TensorInputType())
+
+    def __init__(self, **kwargs):
+        super(non_zero, self).__init__(**kwargs)
+
+    def type_inference(self):
+        shape = tuple([get_new_symbol(), self.x.rank])
+        return types.tensor(self.x.dtype, shape)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.transpose(np.nonzero(self.x.val))
+
+
+@register_op(doc_str="")
+class one_hot(Operation):
+    """
+    Returns one hot vectors whose locations represented in ``indices`` take the ``on_value``,
+    while other locations take the ``off_value``.
+
+    Parameters
+    ----------
+    indices: tensor<[D],T> (Required)
+        * Tensor, values indicated the locations for each one hot vector to take the ``on_value``.
+    one_got_vector_size: i32 (Required)
+        * Indicates the number of returning vectors.
+    axis: const i32 (Optional)
+        * Indicates which dimension to append the new axis.
+        * If the input indices is rank ``D``, the output tensor will have rank ``D+1``.
+        * Default to ``-1`` (the last dimension).
+    on_value: const i32 (Optional)
+        * Values for locations where defined in ``indices``.
+        * Default to ``1``.
+    off_value: const i32 (Optional)
+        * Default to ``0``.
+
+    Returns
+    -------
+    tensor<*?,T>
+        * A tensor contains one hot vectors.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        indices=IntTensorInputType(),
+        one_hot_vector_size=IntInputType(),
+        axis=IntInputType(const=True, default=-1),
+        on_value=IntOrFloatInputType(const=True, default=1),
+        off_value=IntOrFloatInputType(const=True, default=0),
+    )
+
+    def __init__(self, **kwargs):
+        super(one_hot, self).__init__(**kwargs)
+
+    def type_inference(self):
+        on_type = self.on_value.dtype
+        off_type = self.off_value.dtype
+
+        if on_type != off_type:
+            raise TypeError(
+                "Parameters on_value and off_value must have same input types."
+            )
+
+        if self.axis.val < -self.indices.rank - 1 or self.axis.val > self.indices.rank:
+            raise IndexError(
+                "Axis value {} is out of bounds for {} node {}".format(
+                    self.axis.val, self.op_type, self.name
+                )
+            )
+
+        indices_shape = list(self.indices.shape)
+
+        depth_value = self.one_hot_vector_size.sym_val
+        if depth_value is None:
+            depth_value = get_new_symbol()
+        elif depth_value < 0:
+            raise ValueError("Parameter one_hot_vector_size must be non-negative")
+
+        retshape = indices_shape
+
+        if self.axis.val < 0:
+            cut = len(retshape) + self.axis.val + 1
+        else:
+            cut = self.axis.val
+        retshape = retshape[0:cut] + [depth_value] + retshape[cut:]
+
+        return types.tensor(on_type, retshape)
+
+
+@register_op(doc_str="")
+class pad(Operation):
+    """
+    Pad a tensor.
+
+    Parameters
+    ----------
+    x: tensor<[*D_in],T>  (Required)
+    * pad: const tensor<[2*N],i32> (Required)
+        * ``N <= D_in``: last ``N`` dimensions of ``x`` are padded as follows:
+        * For each dimension ``i`` of ``x`` if ``i >= D_in - N``
+            * pad ``pad[2*i]`` elements before ``x[..,i,..]``
+            * pad ``pad[2*i+1]`` elements after ``x[..,i,..]``
+        * If mode is "reflect" then ``pad[2*i]`` and ``pad[2*i+1]`` can be at most ``D[i]-1``.
+        * If mode is "replicate" then ``pad[2*i]`` and ``pad[2*i+1]`` can be at most ``D[i]``.
+    * mode: const<str> (Optional)
+        * Default to 'constant'.
+        * Must be one of the following values:
+            * constant
+            * reflect
+            * replicate
+    * constant_val: const<T> (Optional)
+        * Default to ``0``.
+        * Constant value to pad. Ignored if ``mode != constant``.
+
+    Returns
+    -------
+    tensor<[*D_out],T>
+        % Tensor with same type as the input.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        pad=IntTensorInputType(const=True),
+        mode=StringInputType(const=True, default="constant"),
+        constant_val=FloatInputType(const=True, default=0.0),
+    )
+
+    def __init__(self, **kwargs):
+        super(pad, self).__init__(**kwargs)
+
+    def type_inference(self):
+        in_shape = self.x.shape
+        pad = self.pad.val
+        ret_shape = list(in_shape)
+        if len(pad.shape) != 1:
+            raise ValueError("Pad should be a 1D tensor!")
+        pad = pad.copy()
+        pad = pad.reshape(-1, 2)
+
+        for i in range(len(pad)):
+            ret_shape[-len(pad) + i] = ret_shape[-len(pad) + i] + pad[i][0] + pad[i][1]
+
+        return types.tensor(self.x.dtype, tuple(ret_shape))
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        # NumPy `edge` mode is equivalent to `replicate` mode of PyTorch and CoreML
+        mode = "edge" if self.mode.val == "replicate" else self.mode.val
+        pad_val = self.pad.val
+        if len(self.x.val.shape) > (pad_val.shape[0] // 2):
+            updated_pad = np.zeros(len(self.x.val.shape) * 2)
+            updated_pad[-pad_val.shape[0] :] = pad_val
+            pad_val = updated_pad
+        pad_val = pad_val.reshape(-1, 2).astype(np.int32)
+        if mode == "constant":
+            return np.pad(
+                self.x.val, pad_val, mode, constant_values=self.constant_val.val
+            )
+        # NumPy does not support non-constant mode and constant_values argument
+        return np.pad(self.x.val, pad_val, mode)
+
+
+@register_op(doc_str="")
+class range_1d(Operation):
+    """
+    Returns a numpy-like 1d range sequence.
+
+    Parameters
+    ----------
+    end: <T> (Required)
+        * The upper limit of the sequence, exclusive.
+    start: <T> (Required)
+        * The start point of the sequence.
+    step: <T> (Required)
+        * Number that increments ``start``.
+
+    Returns
+    -------
+    tensor<M, T>
+        * An 1D tensor. where ``M`` is the length of the sequence.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        end=IntOrFloatInputType(),
+        start=IntOrFloatInputType(),
+        step=IntOrFloatInputType(),
+    )
+
+    def __init__(self, **kwargs):
+        super(range_1d, self).__init__(**kwargs)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        start = self.start.val
+        end = self.end.val
+        step = self.step.val
+        return np.arange(start, end, step).astype(np.int32)
+
+    def type_inference(self):
+        start = self.start.sym_val
+        end = self.end.sym_val
+        step = self.step.sym_val
+
+        if (
+            (self.start.dtype != self.end.dtype)
+            or (self.start.dtype != self.step.dtype)
+            or (self.end.dtype != self.step.dtype)
+        ):
+            raise TypeError(
+                "All inputs to the range operation must have same input types."
+            )
+
+        if all(sym_val is not None for sym_val in (start, end, step)):
+            shape = (end - start) / step
+            shape = shape if is_symbolic(shape) else int(math.ceil(shape))
+            shape = tuple([shape])
+        else:
+            shape = tuple([get_new_symbol(),])
+
+        return types.tensor(self.start.dtype, shape)
+
+
+@register_op(doc_str="")
+class tile(Operation):
+    """
+    Returns a new tensor by replicating input ``x`` multiples times.
+    The ``i``th dimention of ``x`` will be replicated ``reps[i]`` times.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+        * Input tensor.
+    reps: tensor<[rank(x)], T> (Required)
+        * A 1D tensor with length ``rank(x)`` which indicates number to replicate the input along each dimension.
+
+    Returns
+    -------
+    tensor<*?, T>:
+        * An Nd tensor with same type as the input.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(x=TensorInputType(), reps=TensorInputType(),)
+
+    def __init__(self, **kwargs):
+        super(tile, self).__init__(**kwargs)
+
+    def type_inference(self):
+        x_type = self.x.dtype
+        x_shape = np.array(self.x.shape)
+        reps = self.reps.val
+        if reps is None:
+            out_shape = tuple([get_new_symbol() for _ in range(self.x.rank)])
+            return types.tensor(x_type, out_shape)
+
+        if len(reps) == 0 or len(reps) > self.x.rank:
+            msg = (
+                "Length of the reps ({}) must be at least 1, and "
+                "not greater than the rank of the input x ({})"
+            )
+            raise ValueError(msg.format(len(reps), self.x.rank))
+
+        if any(i <= 0 for i in reps):
+            raise ValueError("All entries of reps parameter must be greater than 0")
+
+        if len(reps) < self.x.rank:
+            reps = [1] * (self.x.rank - len(reps)) + list(reps)
+
+        out_shape = tuple([reps[i] * x_shape[i] for i in range(len(reps))])
+
+        return types.tensor(x_type, out_shape)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        # Infer only if don't have symbolic values.
+        if self.reps.val is None:
+            return None
+        return np.tile(self.x.val, reps=self.reps.val)
+
+
+@register_op(doc_str="")
+class argsort(Operation):
+    """
+    Returns a tensor containing the indices of the sorted values along given axis
+    of the input tensor.
+
+    Paramters
+    ---------
+    x: <*?, T> (Required)
+        * Input tensor.
+    * axis: const<i32> (Optional)
+        * Default to ``-1`` (the last dimension).
+        * Axis to perform the operation.
+    * ascending: const<bool> (Optional)
+        * True to sort in ascending order. Default to ``Flattensalse``, sort in descending order.
+
+    Returns
+    -------
+    tensor<*?, T>
+        * Tensor containing the indices of the sorted values
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        axis=IntInputType(const=True, default=-1),
+        ascending=BoolInputType(const=True, default=False),
+    )
+
+    def __init__(self, **kwargs):
+        super(argsort, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return types.tensor(types.int32, self.x.shape)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        if self.ascending.val:
+            return np.argsort(-self.x.val, axis=self.axis.val)
+        return np.argsort(self.x.val, axis=self.axis.val)
+
+
+@register_op(doc_str="")
+class topk(Operation):
+    """
+    Returns a tensor containing top or bottom k values and the corresponding
+    indices of the input tensor along a given axis.
+
+    Parameters
+    ----------
+    x: <*?, T> (Required)
+        * Input tensor.
+    k: const<i32> (Optional)
+        * Default to ``1``.
+        * Number of values/indices to be computed along each axis.
+    * axis: const<i32> (Optional)
+        * Defaults to ``1`` (channel dimension).
+        * Axis to perform the operation.
+    * ascending: const<bool> (Optional)
+        * Default to ``False``.
+        * Whether or not to sort in ascending order, sort in descending order.
+
+    Returns
+    -------
+    tensor<*?, T>
+        * Values of top/bottom ``k`` elements
+    tensor<*?, T>
+        * Indices of the top/bottom ``k`` elements along axis
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        k=IntInputType(const=True, default=1),
+        axis=IntInputType(const=True, default=-1),
+        ascending=BoolInputType(const=True, default=False),
+    )
+
+    def __init__(self, **kwargs):
+        super(topk, self).__init__(**kwargs)
+
+    def type_inference(self):
+        x_type = self.x.dtype
+        x_shape = self.x.shape
+        k = self.k.val
+        axis = self.axis.val
+
+        if not is_symbolic(x_shape[axis]) and k > x_shape[axis]:
+            msg = "K={} is greater than size of the given axis={}"
+            raise ValueError(msg.format(k, axis))
+
+        ret_shape = list(x_shape)
+        ret_shape[axis] = k
+        return types.tensor(x_type, ret_shape), types.tensor(types.int32, ret_shape)
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        indices = np.argsort(self.x.val, axis=self.axis.val)
+        if not self.ascending.val:
+            indices = np.argsort(-self.x.val, axis=self.axis.val)
+        slc = [slice(None)] * self.x.rank
+        slc[self.axis.val] = slice(0, self.k.val)
+        indices = indices[tuple(slc)]
+        values = np.take_along_axis(self.x.val, indices, axis=self.axis.val)
+        return values, indices
+
+
+@register_op(doc_str="")
+class flatten(Operation):
+    """
+    Flattens input tensor into 2d tensor by flattening dimensions before and after the provided axis
+
+    Parameters
+    ----------
+    x: tensor<[*d], T> (Required)
+        * Input tensor.
+    * axis: const<f32>  (Optional)
+        * Defaults to ``1``.
+        * negative axis is supported.
+
+    Returns
+    -------
+    tensor<d_prior, d_post, T>
+        * ``d_prior`` is product of dimensions ``x[:axis]``
+        * ``d_post`` is product of dimensions ``x[axis:]``
+
+    Examples
+    --------
+        1. ``input_shape = (3, ), axis = -1, output_shape = (1, 3)``
+        2. ``input_shape = (3, ), axis = 1, output_shape = (3, 1)``
+        3. ``input_shape = (4, 3), axis = -1, output_shape = (4, 3)``
+        4. ``input_shape = (2, 3, 2), axis = -1, output_shape = (6, 2)``
+        5. ``input_shape = (5, 5, 2), axis = 1, output_shape = (2, 10)``
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(), axis=IntInputType(const=True, default=1)
+    )
+
+    def __init__(self, **kwargs):
+        super(flatten, self).__init__(**kwargs)
+
+    def type_inference(self):
+        shape = list(self.x.shape)
+        axis = self.axis.val
+        dim_pre_axis = np.prod(shape[:axis])
+        dim_post_axis = np.prod(shape[axis:])
+        new_shape = [dim_pre_axis, dim_post_axis]
+        return types.tensor(self.x.dtype, tuple(new_shape))
+
+    @precondition(allow=VALUE | SYMBOL)
+    def value_inference(self):
+        shape = self.x.shape
+        axis = self.axis.val
+
+        dim_pre_axis = np.prod(shape[:axis])
+        dim_post_axis = np.prod(shape[axis:])
+        return self.x.val.reshape(dim_pre_axis, dim_post_axis)
+
+
+@register_op(doc_str="")
+class shape(Operation):
+    """
+    Returns 1-dimensional tensor with shape of input tensor
+
+    Parameters
+    ----------
+    x: tensor<[*?], T> (Required)
+        * Input tensor.
+
+    Returns
+    -------
+    tensor<K, i32>
+        * Shape of input tensor.
+        * ``K = x.rank``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(x=TensorInputType())
+
+    def __init__(self, **kwargs):
+        super(shape, self).__init__(**kwargs)
+
+    def type_inference(self):
+        input_rank = self.x.rank
+        return types.tensor(types.int32, tuple([input_rank]))
+
+    def value_inference(self):
+        if any_symbolic(self.x.shape):
+            # convert elements in shape to int32
+            res = [x if is_symbolic(x) else np.int32(x) for x in self.x.shape]
+            return np.array(res)
+        else:
+            return np.array(self.x.shape).astype(np.int32)
+
+
+@register_op(doc_str="")
+class concat(Operation):
+    """
+    Concatenates tensors along a dimension.
+
+    Parameters
+    ----------
+    values: Tuple[tensor<[d0, d1, ..., d_axis_i, ..., d_n],T>]  (Required)
+        * The number of dimensions of the input tensors must match, and all dimensions except ``axis`` must be equal.
+        * The tensors may be variadic, but the number of tensors must be determined at compile time (i.e. a tuple).
+    axis: const<int32> (Required)
+        * The dimension along which to concatenate. Must be in the range ``[-rank(values[i]), rank(values[i]))`` for all ``i``.
+
+    Returns
+    -------
+    tensor<[d0, d1,...d_axis_out, ..., d_n],T>
+        * where ``d_axis_out = sum(d_axis_i)``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(values=TupleInputType(), axis=IntInputType(const=True),)
+
+    def __init__(self, **kwargs):
+        super(concat, self).__init__(**kwargs)
+
+    def type_inference(self):
+        concat_dim_len = 0
+        if len(self.values) == 0:
+            raise ValueError("Concat {} got 0 values".format(self.name))
+
+        # Validate values have the same rank
+        rank = self.values[0].rank
+        for v in self.values:
+            if v.rank != rank:
+                msg = "Input {} has rank {} != other inputs rank {}"
+                raise ValueError(msg.format(v.name, v.rank, rank))
+
+        # Check concat axis is within (-rank, rank)
+        concat_axis = self.axis.val
+        if concat_axis < 0:
+            concat_axis += rank
+        if rank > 0 and (concat_axis < 0 or concat_axis >= rank):
+            msg = "In {} of op_type {}: axis out of bound for input " + "(rank {})"
+            raise ValueError(msg.format(self.name, self.op_type, rank))
+
+        # Validate primitive types are compatible
+        dtype = self.values[0].dtype
+        for v in self.values[1:]:
+            new_dtype = promoted_primitive_type(v.dtype, dtype)
+            if new_dtype is None:
+                msg = "Incompatible primitive types concat: {} vs {}"
+                raise ValueError(msg.format(v.dtype, dtype))
+            dtype = new_dtype
+
+        # validate that non-axis dimensions match
+        retshape = list(self.values[0].shape)
+        for v in self.values[1:]:
+            for i in range(rank):
+                if is_symbolic(retshape[i]) or is_symbolic(v.shape[i]):
+                    continue
+                if i != concat_axis and retshape[i] != v.shape[i]:
+                    msg = 'Dimension mismatch in {} ("{}"): shapes {} vs. {}'
+                    raise ValueError(
+                        msg.format(self.op_type, self.name, retshape, v.shape)
+                    )
+
+        # Get length of concat dim
+        concat_dim_len = 0
+        for v in self.values:
+            if len(v.shape) == 0:
+                taxis = 1
+            else:
+                taxis = v.shape[concat_axis]
+            if is_symbolic(taxis):
+                concat_dim_len = get_new_symbol()
+                break
+            concat_dim_len += taxis
+
+        if len(retshape) == 0:
+            retshape = [concat_dim_len]
+        else:
+            retshape[concat_axis] = concat_dim_len
+
+        return types.tensor(dtype, retshape)
+
+    @precondition(allow=VALUE | SYMBOL | NONE)
+    def value_inference(self):
+
+        is_all_rank_zero = all([v.rank == 0 for v in self.values])
+        values = [
+            v.sym_val if v.sym_val is not None else get_new_symbol()
+            for v in self.values
+        ]
+
+        # we only infer values for values whose ranks are all zero,
+        # or don't have symbolic values.
+        # Note that cases like values = [[1, is0], [2]] aren't in such case.
+        if any([is_symbolic(v) for v in values]) and not is_all_rank_zero:
+            return None
+
+        if not isinstance(values[0], np.ndarray) or values[0].shape == ():
+            return np.stack(values, axis=self.axis.val)
+
+        return np.concatenate(values, axis=self.axis.val)
+
+
+@register_op(doc_str="")
+class split(Operation):
+    """
+    Split tensors into a tuple
+
+    Parameters
+    ----------
+    x: <*?,T>  (Required)
+        * The tensor to split.
+        * The tensors may be variadic, but the number of tensors must be determined at compile time (i.e. a tuple).
+    num_splits: <i32> (Optional)
+        * If specified, divide ``x`` into ``num_splits`` tensors along ``axis``. Its behavior depends on ``split_sizes``:
+            * If ``split_sizes`` is defined, ``num_splits == S``, and the output sizes may be uneven
+            * If ``split_sizes`` is not defined, ``value.shape[axis]`` must be divisible by ``num_splits``, and the output sizes must be even
+        * At least one of ``num_splits`` or ``split_sizes`` must be provided. If ``split_sizes`` length ``S`` cannot be determined at compile time, ``num_splits`` must be supplied to determine the number of outputs.
+    * split_sizes: const<S,i32> (Optional)
+        * Sizes to split to. The sum of ``split_sizes`` must equal to ``value.shape[axis]``.
+    * axis: const<i32> (Required)
+        * The dimension along which to concatenate. Must be in the range ``[-rank(x), rank(x))``.
+
+    Returns
+    -------
+    Tuple[tensor<*?,T>]
+        * where the length of the tuple is the number of splits (determined from ``num_splits`` or ``split_sizes``).
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        num_splits=IntInputType(const=True, optional=True),
+        split_sizes=IntTensorInputType(const=True, optional=True),
+        axis=IntInputType(const=True),
+    )
+
+    def __init__(self, **kwargs):
+        super(split, self).__init__(**kwargs)
+
+    def type_inference(self):
+        num_splits, sizes = self._get_num_splits_and_sizes()
+        x_shape = list(self.x.shape)
+        ret_shapes = [x_shape[:] for _ in range(num_splits)]
+        axis = self.axis.val
+        for i, d in enumerate(sizes):
+            ret_shapes[i][axis] = d
+        return tuple([types.tensor(self.x.dtype, s) for s in ret_shapes])
+
+    def _get_num_splits_and_sizes(self):
+        """
+        Return:
+        - num_splits: int
+        - sizes: list of int/symbols. Of length num_splits
+
+        Raise ValueError if num_splits cannot be determined.
+        """
+        if self.num_splits is None and self.split_sizes is None:
+            msg = (
+                "At least one of num_splits and split_sizes "
+                + "must be specified in split op {}"
+            )
+            raise ValueError(msg.format(self.name))
+
+        axis = self.axis.val
+
+        if self.num_splits is not None:
+            num_splits = self.num_splits.val
+            if self.split_sizes is None:
+                # Even split
+                if (
+                    not is_symbolic(self.x.shape[axis])
+                    and self.x.shape[axis] % num_splits != 0
+                ):
+                    msg = "num_split {} does not divide split " + "dim (length = {})"
+                    raise ValueError(msg.format(num_splits, self.x.shape[axis]))
+                size = self.x.shape[axis] / num_splits
+                return num_splits, [size] * num_splits
+
+            # self.split_sizes is not None
+            if self.split_sizes.sym_val is not None:
+                return num_splits, self.split_sizes.sym_val
+
+            # self.split_size.sym_val is None.
+            sizes = [get_new_symbol() for _ in range(num_splits)]
+            return num_splits, sizes
+
+        # self.num_splits is None, self.split_sizes is not None
+        if self.split_sizes.sym_val is not None:
+            return len(self.split_sizes.sym_val), self.split_sizes.sym_val
+
+        # self.num_splits is None, self.split_sizes is not None
+        # self.split_sizes.sym_val is None
+        if any_symbolic(self.split_sizes.shape):
+            raise ValueError("Unable to determine number of splits")
+
+        num_splits = len(self.split_sizes.shape)
+        sizes = [get_new_symbol() for _ in range(num_splits)]
+        return num_splits, sizes
+
+    @precondition(allow=VALUE | SYMBOL | NONE)
+    def value_inference(self):
+        num_splits, sizes = self._get_num_splits_and_sizes()
+        if self.x.sym_val is None or any_symbolic(sizes):
+            raise NotImplementedError()
+
+        if num_splits == 1:
+            # No split_indices possible.
+            return self.x.sym_val
+
+        split_indices = np.cumsum(sizes).astype(np.int)
+        return tuple(np.split(self.x.sym_val, split_indices[:-1], axis=self.axis.val))
+
+
+@register_op(doc_str="")
+class stack(Operation):
+    """
+    Concatenates tensors along a dimension.
+
+    Parameters
+    ----------
+    values: Tuple[tensor<[d0, d1,...d_axis_i, ..., d_n],T>]  (Required)
+        * All tensors must have identical shape.
+    axis: const<i32> (Required)
+        * The dimension along which to concatenate. Must be in the range ``[-rank(values[i]), rank(values[i]))`` for all ``i``.
+
+    Returns
+    -------
+    tenor<[d0, d1,...d_axis_out, ..., d_n],T>
+        * where ``d_axis_out = sum(d_axis_i)``
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(values=TupleInputType(), axis=IntInputType(const=True),)
+
+    def __init__(self, **kwargs):
+        super(stack, self).__init__(**kwargs)
+
+    def type_inference(self):
+
+        num_tensors = len(self.values)
+        if num_tensors == 0:
+            raise ValueError("Cannot stack 0 tensor")
+
+        # get the first value without symbolic shape
+        t_shape = None
+        for value in self.values:
+            if not any_symbolic(value.shape):
+                t_shape = value.shape
+                break
+        t_shape = self.values[0].shape if t_shape is None else t_shape
+
+        # compare all shape
+        for t in self.values:
+            if not is_compatible_symbolic_vector(t.shape, t_shape):
+                msg = "Component tensor {} has shape {}, others have {}"
+                raise ValueError(msg.format(t.name, t.shape, t_shape))
+        ret_shape = list(t_shape)
+        ret_shape.insert(self.axis.val, num_tensors)
+        return types.tensor(self.values[0].dtype, ret_shape)
+
+    @precondition(allow=VALUE | SYMBOL | NONE)
+    def value_inference(self):
+
+        is_all_rank_zero = all([v.rank == 0 for v in self.values])
+        values = [
+            v.sym_val if v.sym_val is not None else get_new_symbol()
+            for v in self.values
+        ]
+
+        if any([is_symbolic(v) for v in values]) and not is_all_rank_zero:
+            return None
+
+        return np.stack(values, self.axis.val)
+
+
+@register_op(doc_str="")
+class addn(Operation):
+    input_spec = InputSpec(values=TupleInputType(),)
+    """
+    Should deprecate this op.
+    """
+
+    def __init__(self, **kwargs):
+        super(addn, self).__init__(**kwargs)
+
+    def type_inference(self):
+        num_tensors = len(self.values)
+        if num_tensors == 0:
+            raise ValueError("Cannot addn 0 tensors.")
+
+        t_shape = self.values[0].shape
+        t_type = self.values[0].dtype
+
+        for t in self.values[1:]:
+            if t.shape != t_shape:
+                msg = "Component tensor {} has shape {}, others have {}"
+                raise ValueError(msg.format(t.name, t.shape, t_shape))
+            if t.dtype != t_type:
+                msg = "Component tensor {} has dtype {}, others have {}"
+                raise ValueError(msg.format(t.name, t.dtype, t_type))
+
+        return types.tensor(t_type, list(t_shape))
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        inputs = np.array([v.val for v in self.values])
+        return np.sum(inputs, axis=0)
+
+
+@register_op(doc_str="")
+class isfinite(Operation):
+    input_spec = InputSpec(x=ScalarOrTensorInputType(),)
+    """
+    Should deprecate this op.
+    """
+
+    def __init__(self, **kwargs):
+        super(isfinite, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return types.tensor(types.bool, list(self.x.shape))
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        return np.isfinite(self.x.val)
diff --git a/coremltools/converters/mil/mil/ops/defs/tensor_transformation.py b/coremltools/converters/mil/mil/ops/defs/tensor_transformation.py
new file mode 100644
index 000000000..80a80f347
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/defs/tensor_transformation.py
@@ -0,0 +1,679 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import functools
+import sympy as sm
+from coremltools.converters.mil.mil.types.symbolic import (
+    is_symbolic,
+    isscalar,
+    any_symbolic,
+    any_variadic,
+)
+from coremltools.converters.mil.mil import (
+    get_new_symbol,
+    get_new_variadic_symbol,
+    VALUE,
+    SYMBOL,
+    types,
+)
+from ._op_reqs import *
+
+
+@register_op(doc_str="")
+class depth_to_space(Operation):
+    """
+    Rearranges elements in a tensor from depth (channel) into spatial dimensions.
+
+    Parameters
+    ----------
+    x: tensor<[n, C, H, W], T> (Required)
+        * Input tensor of rank ``4``.
+    block_size: const i32 (Required)
+        * The size of the spatial block. Must be greater than ``1`` and divisible by channel dimension ``C``.
+
+    Returns
+    -------
+    tensor<[n, C / block_size^2, H x block_size, W x block_size], T>
+        * where ``b`` is the block size.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(x=TensorInputType(), block_size=IntInputType(const=True),)
+
+    def __init__(self, **kwargs):
+        super(depth_to_space, self).__init__(**kwargs)
+
+    def type_inference(self):
+        x_type = self.x.dtype
+        n, c, h, w = self.x.shape
+        bs = self.block_size.val
+        ret_shape = (n, c // (bs * bs), h * bs, w * bs)
+        return types.tensor(x_type, ret_shape)
+
+
+@register_op(doc_str="")
+class expand_dims(Operation):
+    """
+    Insert a single-dimension in a 1D or higher tensor at each axis in axes.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+        * Scalar or tensor.
+    axes: const tensor<[K], i32> Required
+        * ``K`` is the number of dimensions expanded.
+        * Insert single dimension at dimension index at each axes.
+        * Negative value to index from the end. ``-d-1 <= axis <= d``
+          where ``d`` is the rank of ``x``.
+
+    Returns
+    -------
+    tensor<*(rank(x)+K), T>
+        * Same type as the input ``x`` with rank ``rank(x)+K``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=ScalarOrTensorInputType(), axes=IntTensorInputType(const=True),
+    )
+
+    def __init__(self, **kwargs):
+        super(expand_dims, self).__init__(**kwargs)
+
+    def type_inference(self):
+        x_rank = self.x.rank
+        x_type = self.x.dtype
+        x_shape = list(self.x.shape)
+        axes = self.axes.val
+        out_rank = x_rank + len(axes)
+
+        for axis in axes:
+            if axis <= -out_rank - 1 or axis >= out_rank:
+                msg = 'Axis value {} is out of bounds for {} node "{}" of shape {}'
+                raise IndexError(
+                    msg.format(axis, self.op_type, self.name, self.x.shape)
+                )
+
+        ret_shape = x_shape
+        axes = sorted([out_rank + axis if axis < 0 else axis for axis in axes])
+        for axis in axes:
+            ret_shape.insert(axis, 1)
+
+        return types.tensor(x_type, tuple(ret_shape))
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        axes = self.axes.val
+        out_rank = self.x.rank + len(axes)
+
+        for axis in axes:
+            if axis <= -out_rank - 1 or axis >= out_rank:
+                msg = 'Axis value {} is out of bounds for {} node "{}" of shape {}'
+                raise IndexError(
+                    msg.format(axis, self.op_type, self.name, self.x.shape)
+                )
+
+        axes = sorted([out_rank + axis if axis < 0 else axis for axis in axes])
+        ret_shape = list(self.x.shape)
+        for axis in axes:
+            ret_shape.insert(axis, 1)
+        return np.reshape(self.x.val, ret_shape)
+
+
+def reshape_with_symbol(v, shape):
+    """
+    Perform basic reshape if v is symbolic (not array of symbols).
+    """
+    if is_symbolic(v):
+        return np.array(v).reshape(shape)
+    shape = [int(s) for s in shape]
+    return v.reshape(shape)
+
+
+@register_op(doc_str="")
+class reshape(Operation):
+    """
+    Returns a tensor that has the same values as ``x`` with shape ``shape``.
+    ``shape`` must have the same volume (number of elements) as ``x``.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+        * A nd tensor or a scalar.
+        * If ``x`` is fixed rank (and possibly contains symbolic dimension),
+          shape may contain elements that are not positive integers (see below).
+        * If ``x`` is variadic rank, shape can only contain positive integers.
+    shape: tensor<[K], i32> (Required)
+        * A 1D tensor, with elements from the followings:
+          * Positive integers.
+          * Symbols: All but one symbol in shape must be present in ``x.shape``.
+            The new symbol that is not present in ``x.shape`` represent dimension
+            such that the total size remains constant. Symbol is illegal
+            if ``x`` is variadic rank.
+          * -1: ``-1`` introduces a new symbol (see Symbols). Therefore, ``-1`` is
+            allowed if all symbols in shape appear in ``x.shape``. ``-1`` is illegal
+            if ``x`` is variadic rank.
+          * 0: If ``K == rank(x)`` then ``0`` means inheriting from the corresponding
+            dimension in ``x.shape``. ``0`` is illegal if ``x`` is variadic rank.
+
+    Returns
+    -------
+    tensor<*?, T>
+        * Tensor with shape determined by the input shape.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(x=ScalarOrTensorInputType(), shape=IntTensorInputType(),)
+
+    def __init__(self, **kwargs):
+        super(reshape, self).__init__(**kwargs)
+
+    def type_inference(self):
+        if any_symbolic(self.shape.shape):
+            # We can't infer any shape if shape has variable length.
+            return types.tensor(self.x.dtype, (get_new_variadic_symbol(),))
+
+        # shape has fixed length here.
+        if self.shape.sym_val is None:
+            shape = tuple([get_new_symbol() for _ in range(self.shape.shape[0])])
+            return types.tensor(self.x.dtype, shape)
+        t, _ = self._get_type_val()
+        return t
+
+    @precondition(allow=VALUE | SYMBOL)
+    def value_inference(self):
+        _, val = self._get_type_val()
+        return val
+
+    def _get_type_val(self):
+        x_type = self.x.dtype
+        x_shape = self.x.shape
+        x_vol = np.prod(x_shape)
+        # shape is const, and thus sym_val is not None
+        sym_shape = self.shape.sym_val
+        sym_shape = [get_new_symbol() if d == -1 else d for d in sym_shape]
+        ret_shape = reshape.enforce_volumetric_constraint(x_vol, sym_shape)
+        ret_val = None
+        if self.x.val is not None and all(isscalar(a) for a in ret_shape):
+            ret_val = reshape_with_symbol(self.x.val, ret_shape)
+        return types.tensor(x_type, tuple(ret_shape)), ret_val
+
+    @staticmethod
+    def enforce_volumetric_constraint(left_volume, inshape):
+        left_symbols = set()
+        if is_symbolic(left_volume):
+            left_symbols = left_volume.free_symbols
+        # Generally, we want to solve for right in terms of left. But this
+        # is kinda annoying actually.
+        shape = list(inshape)
+
+        # Handling when reshape is given 0 instead of actual input
+        # input tensor shape: [4, 3, 2], reshape:[0, -1], output tensor shape: [4, 6]
+        if shape.count(-1) > 1:
+            raise ValueError(
+                "Reshape op supports only one dimension to be -1. Given {}".format(
+                    shape.count(-1)
+                )
+            )
+
+        infer_dim_index = shape.index(-1) if -1 in shape else None
+        right_volume = 1
+        for i in shape:
+            if i != -1:
+                right_volume = right_volume * i
+
+        if infer_dim_index:
+            shape[infer_dim_index] = left_volume // right_volume
+
+        if not is_symbolic(right_volume):
+            return shape
+
+        constraints = [left_volume - right_volume]
+        solve_for = [s for s in shape if is_symbolic(s)]
+
+        for rightsym in solve_for:
+            sol = sm.solve(constraints, [rightsym], dict=True)
+            if not isinstance(sol, list):
+                sol = [sol]
+            # look for an acceptable solution
+            for s in sol:
+                if 0 in s.values():
+                    continue
+                for i in range(len(shape)):
+                    if shape[i] in s:
+                        v = s[shape[i]]
+                        if len(v.free_symbols - left_symbols) > 0:
+                            continue
+                        try:
+                            shape[i] = int(v)
+                        except:
+                            shape[i] = v
+        return shape
+
+
+@register_op(doc_str="")
+class reverse(Operation):
+    """
+    Reverses the order of the input tensor ``x`` along specified ``axes``(dimensions).
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+        * Input tensor.
+    axes: const<D, i32> Optional
+        * Dimension(s) to reverse. Each axis must be in the range ``[-rank(x), rank(x)]``.
+        * Defaults to None (reverse on all dimensions).
+
+    Returns
+    ------
+    tensor<*?, T>
+        - same type and shape as the input tensor.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(), axes=IntTensorInputType(const=True, optional=True),
+    )
+
+    def __init__(self, **kwargs):
+        super(reverse, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return self.x.sym_type
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        res = self.x.val
+        axes = self.axes.val if self.axes is not None else range(self.x.rank)
+        for axis in axes:
+            res = np.flip(res, axis=axis)
+        return res
+
+
+@register_op(doc_str="")
+class reverse_sequence(Operation):
+    """
+    Reverses variable length slices for specified axes / dimensions of the input
+    tensor. This op first slice input tensor along the ``batch_axis`` dimension, then
+    partially reverse the elements along the ``seq_axis`` for the first ``lengths[i]``
+    elements.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+        * Input tensor.
+    lengths: const<L, i32> (Required)
+        * 1-dimensional tensor of length ``x.shape[batch_axis]`` specifying the length
+          of the sequence to reverse.
+        * Values must be in range ``[0, x.shape[seq_axis]]``.
+    seq_axis: const<i32> (Optional)
+        * The dimension to reverse.
+        * Defaults to ``0``.
+    batch_axis: const<i32> (Optional)
+        * Dimension for slicing.
+        * Defaults to ``0``.
+
+    Returns
+    -------
+    tensor<*?, T>
+        * same type and shape as the input tensor.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        lengths=IntTensorInputType(),
+        seq_axis=IntInputType(const=True, default=0),
+        batch_axis=IntInputType(const=True, default=0),
+    )
+
+    def __init__(self, **kwargs):
+        super(reverse_sequence, self).__init__(**kwargs)
+
+    def type_inference(self):
+        return self.x.sym_type
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        raise NotImplementedError("TODO")
+
+
+@register_op(doc_str="")
+class slice_by_size(Operation):
+    """
+    Slicing input tensor with given ``size`` for each rank.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+        * Input tensor.
+    begin: tensor<[rank(x)], i32> Required
+        * The begin index for slice.
+    size: tensor<[rank(x)], i32> Required
+        * The size that is to be sliced. If ``size`` is ``-1``, we slice all the remaining elements.
+
+    Returns
+    -------
+    tensor<*?, T>
+        * Scalar or tensor. Same type as the input tensor.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(), begin=IntTensorInputType(), size=IntTensorInputType(),
+    )
+
+    def __init__(self, **kwargs):
+        super(slice_by_size, self).__init__(**kwargs)
+
+    def type_inference(self):
+        if self.begin.rank != 1:
+            raise ValueError(
+                "begin should be 1-D tensor, got {}-D tensor instead".format(
+                    self.begin.rank
+                )
+            )
+        if self.size.rank != 1:
+            raise ValueError(
+                "size should be 1-D tensor, got {}-D tensor instead".format(
+                    self.size.rank
+                )
+            )
+        if self.x.rank != self.begin.shape[0]:
+            raise ValueError(
+                "Length of begin {} doesn't equal to input rank {}.".format(
+                    len(self.begin.shape[0]), len(self.x.rank)
+                )
+            )
+        if self.x.rank != self.size.shape[0]:
+            raise ValueError(
+                "Length of size {} doesn't equal to input rank {}.".format(
+                    len(self.size.shape[0]), len(self.x.rank)
+                )
+            )
+
+        x_shape = self.x.shape
+        ret_shape = []
+        if self.size.sym_val is None:
+            ret_shape = [get_new_symbol() for _ in range(self.x.rank)]
+            return types.tensor(self.x.dtype, tuple(ret_shape))
+
+        for idx, s in enumerate(self.size.sym_val):
+            if is_symbolic(s):
+                ret_shape.append(s)
+            elif s != -1:
+                ret_shape.append(s)
+            elif self.begin.sym_val is not None:
+                ret_shape.append(x_shape[idx] - self.begin.sym_val[idx])
+            else:
+                ret_shape.append(get_new_symbol())
+
+        return types.tensor(self.x.dtype, tuple(ret_shape))
+
+    @precondition(allow=VALUE | SYMBOL)
+    def value_inference(self):
+        if any_symbolic(self.begin.sym_val):
+            return None
+        if any_symbolic(self.size.sym_val):
+            return None
+        if self.x.val is None:
+            return None
+        slices = []
+        for i in range(self.x.rank):
+            begin_val = self.begin.val[i]
+            if begin_val < 0:
+                if is_symbolic(self.x.shape[i]):
+                    return None
+                begin_val += self.x.shape[i]
+            if self.size.val[i] > 0:
+                slices.append(slice(begin_val, begin_val + self.size.val[i]))
+            else:
+                slices.append(slice(begin_val, None, None))
+        return self.x.val[tuple(slices)]
+
+
+@register_op(doc_str="")
+class space_to_depth(Operation):
+    """
+    Rearranges elements in a tensor from spatial into depth (channel) dimension.
+
+    Parameters
+    ----------
+    x: tensor<[n, C, H, W], T> (Required)
+        * Input tensor of rank ``4``.
+    block_size: const<i32> (Required)
+        * The size of the spatial block. Must be greater than ``1`` and divisible by spatial dimensions ``H, W``.
+
+    Returns
+    -------
+    tensor<[n, C x block_size^2, H / block_size, W / block_size], T>
+        * where ``b`` is the block size.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(x=TensorInputType(), block_size=IntInputType(const=True),)
+
+    def __init__(self, **kwargs):
+        super(space_to_depth, self).__init__(**kwargs)
+
+    def type_inference(self):
+        x_type = self.x.dtype
+        n, c, h, w = self.x.shape
+        bs = self.block_size.val
+        ret_shape = (n, c * (bs * bs), h // bs, w // bs)
+        return types.tensor(x_type, ret_shape)
+
+
+@register_op(doc_str="")
+class squeeze(Operation):
+    """
+    Remove single-dimension dimensions in a 1D or higher tensor.
+
+    Parameters
+    ----------
+    x: tensor<*?,T> (Required)
+        * Must be at least 1D.
+    axes: const<K,i32> (Optional)
+        * Axes to squeeze out.
+        * default to remove all single-dimensions.
+
+    Returns
+    -------
+    tensor<*(rank(x)-K),T>
+        * Tensor with same type as input ``x`` and rank ``rank(x)-K``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(), axes=IntTensorInputType(const=True, optional=True),
+    )
+
+    def __init__(self, **kwargs):
+        super(squeeze, self).__init__(**kwargs)
+
+    def type_inference(self):
+        x_type = self.x.dtype
+        x_shape = self.x.shape
+        squeezed_shape = list(x_shape)
+        if self.axes is None:
+            # Squeeze all single-dim, assuming symbolic dims != 1
+            squeezed_shape = [s for s in squeezed_shape if s != 1]
+        else:
+            axes = self.axes.val
+            axes = [axis if axis >= 0 else axis + self.x.rank for axis in axes]
+            for i in sorted(axes)[::-1]:  # descending order
+                if len(squeezed_shape) <= i:
+                    raise ValueError(
+                        "Cannot squeeze dim {} for shape"
+                        + " {}".format(i, squeezed_shape)
+                    )
+                squeezed_shape.pop(i)
+
+        return types.tensor(x_type, tuple(squeezed_shape))
+
+    @precondition(allow=VALUE)
+    def value_inference(self):
+        if self.axes is None:
+            return np.squeeze(self.x.val)
+        else:
+            return np.squeeze(self.x.val, axis=tuple(self.axes.val))
+
+
+@register_op(doc_str="")
+class transpose(Operation):
+    """
+    Permutes tensor ``x`` dimensions according to ``perm``.
+
+    Parameters
+    ----------
+    x: tensor<*?, T> (Required)
+        * Must be at least 1D. ``x`` may have symbolic shape.
+    perm: const<[rank(x)], i32> (Required)
+        * Permutation order. Must be non-negative integers.
+
+    Returns
+    -------
+    tensor<*?,T>
+        * Tensor with same rank and type as ``x``.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(x=TensorInputType(), perm=IntTensorInputType(const=True),)
+
+    def __init__(self, **kwargs):
+        super(transpose, self).__init__(**kwargs)
+
+    def type_inference(self):
+        x_type = self.x.dtype
+        perm = self.perm.val
+        x_shape = np.array(self.x.shape)
+        if len(perm) != self.x.rank:
+            msg = "perm should have the same length as rank(x): {} != {}"
+            raise ValueError(msg.format(len(perm), self.x.rank))
+        if self.x.rank == 0:
+            return self.x.sym_type  # scalar cannot be transposed
+        if any_variadic(self.x.shape):
+            ret_shape = get_new_variadic_symbol()
+        else:
+            ret_shape = x_shape[perm]
+        return types.tensor(x_type, tuple(ret_shape))
+
+    @precondition(allow=VALUE | SYMBOL)
+    def value_inference(self):
+        return np.transpose(self.x.val, axes=self.perm.val)
+
+
+@register_op(doc_str="")
+class pixel_shuffle(Operation):
+    """
+    Rearranges elements in a tensor from depth (channel) into spatial dimensions.
+    Equivalent to PyTorch's pixel_shuffle.
+
+    Parameters
+    ----------
+    x: tensor<[n, C x f^2, H, W], T> (Required)
+        * Input tensor of rank ``4``.
+    upscale_factor: const<i32>
+        * Factor to increase spatial resolution by.
+
+    Returns
+    -------
+    tensor<[n, C, H x f, W x f], T>
+        * where ``f`` is the upscale factor.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(), upscale_factor=IntInputType(const=True),
+    )
+
+    def __init__(self, **kwargs):
+        super(pixel_shuffle, self).__init__(**kwargs)
+
+    def type_inference(self):
+        x_type = self.x.dtype
+        n, c, h, w = self.x.shape
+        f = self.upscale_factor.val
+        ret_shape = (n, c // (f * f), h * f, w * f)
+        return types.tensor(x_type, ret_shape)
+
+
+@register_op(doc_str="")
+class sliding_windows(Operation):
+    """
+    Returns a tensor containing all windows of ``size``, separated by stride along the given ``axis``.
+
+    Parameters
+    ----------
+    x: tensor<[*d0, d_axis, *dn], T>
+        * Input tensor
+    axis: const<i32>
+        * Axis to perform the operation.
+    size: const<i32>
+        * Number of elements in the sliding window.
+    stride: const<i32> Optional
+        * Default to ``1``.
+        * The stride of the input elements in the sliding window.
+
+    Returns
+    -------
+    tensor<[*d0, d_axis - size // stride + 1, size, *dn], T>
+        * The output will be a tensor of rank ``N+1`` where ``N`` is the input tensor rank.
+
+    Attributes
+    ----------
+    T: fp32
+    """
+
+    input_spec = InputSpec(
+        x=TensorInputType(),
+        axis=IntInputType(const=True),
+        size=IntInputType(const=True),
+        stride=IntInputType(const=True, default=1),
+    )
+
+    def __init__(self, **kwargs):
+        super(sliding_windows, self).__init__(**kwargs)
+
+    def type_inference(self):
+        x_shape = self.x.shape
+        axis = self.axis.val
+        size = self.size.val
+        stride = self.stride.val
+        ret_shape = list(x_shape)
+        ret_shape[axis] = (x_shape[axis] - size) // stride + 1
+        pos_axis = axis if axis >= 0 else axis + self.x.rank
+        ret_shape.insert(pos_axis + 1, size)
+        return types.tensor(self.x.dtype, tuple(ret_shape))
diff --git a/coremltools/converters/mil/mil/ops/registry.py b/coremltools/converters/mil/mil/ops/registry.py
new file mode 100644
index 000000000..e260f6a04
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/registry.py
@@ -0,0 +1,67 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import logging
+from ..builder import Builder
+from collections import defaultdict
+
+
+class SSAOpRegistry:
+    # ops is 3 nested dicts:
+    # namespace (str) -> {op_type (str) -> {op_class, doc_str}}
+    ops = defaultdict(dict)
+    custom_ops = {}
+
+    @staticmethod
+    def register_op(doc_str="", is_custom_op=False, namespace="core"):
+        """
+        Registration routine for MIL Program operators
+        is_custom_op: (Boolean) [Default=False]
+            If True, maps current operator to `custom_op`
+            `custom_op` requires additional `bindings` which should be
+            specified in operator.
+            Current operator is registered as `SSARegistry.custom_ops`
+            Otherwise, current operator is registered as usual operator,
+            i.e. registered in `SSARegistry.ops'.
+        """
+
+        def class_wrapper(op_cls):
+            op_type = op_cls.__name__
+            # op_cls.__doc__ = doc_str  # TODO: rdar://58622145
+
+            # Operation specific to custom op
+            op_msg = "Custom op" if is_custom_op else "op"
+            op_reg = (
+                SSAOpRegistry.custom_ops
+                if is_custom_op
+                else SSAOpRegistry.ops[namespace]
+            )
+
+            logging.debug("Registering {} {}".format(op_msg, op_type))
+
+            if op_type in op_reg:
+                raise ValueError(
+                    "SSA {} {} already registered.".format(op_msg, op_type)
+                )
+
+            if namespace != "core":
+                # Check that op_type is prefixed with namespace
+                if op_type[: len(namespace)] != namespace:
+                    msg = (
+                        "Op type {} registered under {} namespace must "
+                        + "prefix with {}"
+                    )
+                    raise ValueError(msg.format(op_type, namespace, namespace))
+
+            op_reg[op_type] = {"class": op_cls, "doc_str": doc_str}
+
+            @classmethod
+            def add_op(cls, **kwargs):
+                return cls._add_op(op_cls, **kwargs)
+
+            setattr(Builder, op_type, add_op)
+            return op_cls
+
+        return class_wrapper
diff --git a/coremltools/converters/mil/mil/ops/tests/__init__.py b/coremltools/converters/mil/mil/ops/tests/__init__.py
new file mode 100644
index 000000000..61aafff42
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/__init__.py
@@ -0,0 +1,4 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
diff --git a/coremltools/converters/mil/mil/ops/tests/test_activation.py b/coremltools/converters/mil/mil/ops/tests/test_activation.py
new file mode 100644
index 000000000..e820bc755
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_activation.py
@@ -0,0 +1,960 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import scipy
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.testing_reqs import *
+
+from .testing_utils import run_compare_builder
+
+backends = testing_reqs.backends
+
+
+class TestClampedReLU:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=t.shape),
+        }
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.clamped_relu(x=x, alpha=2.0, beta=1.0)
+
+        expected_output_types = (2, 3, types.fp32)
+        expected_outputs = np.array([[-2, 1, -6], [1, -10, 1]], dtype=np.float32)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.clamped_relu(x=x_val, alpha=2.0, beta=1.0)
+
+        x = np.minimum(np.maximum(x_val, 0), 1.0)
+        y = np.minimum(np.minimum(x_val, 0) * 2.0, 1.0)
+        assert is_close(x + y, v.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, dim, alpha, beta",
+        itertools.product([True], backends, [2, 4, 8], [2.0, 3.0], [4.0, 5.0]),
+    )
+    def test_builder_to_backend_stress(self, use_cpu_only, backend, dim, alpha, beta):
+        shape_x = np.array([dim, dim])
+        x_val = np.random.rand(*shape_x)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [mb.clamped_relu(x=x, alpha=alpha, beta=beta)]
+
+        x = np.minimum(np.maximum(x_val, 0), 1.0)
+        y = np.minimum(np.minimum(x_val, 0) * 2.0, 1.0)
+
+        expected_outputs = [x + y]
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestELU:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=t.shape),
+        }
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.elu(x=x, alpha=2.0)
+
+        expected_output_types = (2, 3, types.fp32)
+        expected_outputs = np.array(
+            [[-1.2642411, 2.0, -1.9004259], [4.0, -1.9865241, 6.0]], dtype=np.float32
+        )
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.elu(x=x_val, alpha=2.0)
+
+        b = np.copy(x_val)
+        b[b < 0] = 2.0 * (np.exp(b[b < 0]) - 1)
+
+        assert is_close(b, v.val)
+
+
+class TestGeLU:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=t.shape),
+        }
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.gelu(x=x)
+
+        expected_output_types = (2, 3, types.fp32)
+        expected_outputs = np.array(
+            [
+                [-1.58691406e-01, 1.95410156e00, -4.04968858e-03],
+                [3.99987316e00, -1.49011612e-06, 6.00000000e00],
+            ],
+            dtype=np.float32,
+        )
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+            atol=1e-3,
+            rtol=1e-3,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+
+        mode = "TANH_APPROXIMATION"
+        v = mb.gelu(x=x_val, mode=mode)
+        a = np.sqrt(2 / np.pi) * (x_val + 0.044715 * np.power(x_val, 3))
+        out = 0.5 * x_val * (1 + np.tanh(a))
+        assert is_close(out, v.val)
+
+        mode = "SIGMOID_APPROXIMATION"
+        v = mb.gelu(x=x_val, mode=mode)
+        out = x_val * (1 / (1 + np.exp(-(1.702 * x_val))))
+        assert is_close(out, v.val)
+
+        v = mb.gelu(x=x_val)
+        out = 0.5 * x_val * (1 + scipy.special.erf(x_val / np.sqrt(2)))
+        assert is_close(out, v.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, dim, mode",
+        itertools.product(
+            [True, False],
+            backends,
+            [2, 6],
+            ["EXACT", "TANH_APPROXIMATION", "SIGMOID_APPROXIMATION"],
+        ),
+    )
+    def test_builder_to_backend_stress(self, use_cpu_only, backend, dim, mode):
+        shape = np.array([dim, dim])
+        x_val = np.random.rand(*shape)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [mb.gelu(x=x, mode=mode)]
+
+        if mode == "TANH_APPROXIMATION":
+            a = np.sqrt(2 / np.pi) * (x_val + 0.044715 * np.power(x_val, 3))
+            out = 0.5 * x_val * (1 + np.tanh(a))
+        elif mode == "SIGMOID_APPROXIMATION":
+            out = x_val * (1 / (1 + np.exp(-(1.702 * x_val))))
+        else:
+            out = 0.5 * x_val * (1 + scipy.special.erf(x_val / np.sqrt(2)))
+
+        expected_outputs = [out]
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+            atol=1e-3,
+            rtol=1e-3,
+        )
+
+
+class TestLeakyReLU:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=t.shape),
+        }
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.leaky_relu(x=x, alpha=2.0)
+
+        expected_output_types = (2, 3, types.fp32)
+        expected_outputs = np.array([[-2, 2, -6], [4, -10, 6]], dtype=np.float32)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.leaky_relu(x=x_val, alpha=2.0)
+
+        b = np.copy(x_val)
+        b[b < 0] *= 2.0
+        assert is_close(b, v.val)
+
+
+class TestLinearActivation:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.linear_activation(x=x, alpha=2.0, beta=3.0)
+
+        expected_output_types = (2, 3, types.fp32)
+        expected_outputs = np.array([[1, 7, -3], [11, -7, 15]], dtype=np.float32)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.linear_activation(x=x_val, alpha=2.0, beta=3.0)
+        assert is_close(x_val * 2.0 + 3.0, v.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, dim",
+        itertools.product([True, False], backends, [2, 4, 8]),
+    )
+    def test_builder_to_backend_stress(self, use_cpu_only, backend, dim):
+        shape = np.array([dim, dim])
+        x_val = np.random.rand(*shape)
+        alpha = np.random.uniform()
+        beta = np.random.uniform()
+        input_placeholders = {
+            "x": mb.placeholder(shape=x_val.shape),
+        }
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [mb.linear_activation(x=x, alpha=alpha, beta=beta)]
+
+        expected_outputs = [x_val * alpha + beta]
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+# TODO (rdar://59954690): Broken when there is 1 channel
+class TestPReLU:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[[-1, 3, 6]], [[-1, 2, -3]], [[4, -5, 6]]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.prelu(x=x, alpha=np.array([1, 2, 3], dtype=np.float32))
+
+        expected_output_types = (3, 1, 3, types.fp32)
+        expected_outputs = np.array(
+            [[[-1, 3, 6]], [[-2, 2, -6]], [[4, -15, 6]]], dtype=np.float32
+        )
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[[-1, 3, 6]], [[-1, 2, -3]], [[4, -5, 6]]], dtype=np.float32)
+        alpha = np.array([1, 2, 3], dtype=np.float32)
+        v = mb.prelu(x=x_val, alpha=alpha)
+
+        alpha_br = alpha
+
+        for i in range(1, len(x_val.shape)):
+            alpha_br = np.expand_dims(alpha_br, i)
+
+        x_pos = np.maximum(x_val, 0)
+        b = np.minimum(x_val, 0)
+
+        assert is_close(x_pos + b * alpha_br, v.val)
+
+    @ssa_fn
+    def test_builder_eval1(self):
+        x_val = np.array([[[-1, 3, 6]], [[-1, 2, -3]], [[4, -5, 6]]], dtype=np.float32)
+        with pytest.raises(ValueError, match=r".* dimension -3 .*"):
+            v = mb.prelu(x=x_val, alpha=np.array([1, 2], dtype=np.float32))
+
+    @ssa_fn
+    def test_builder_eval2(self):
+        x_val = np.array([[[-1, 3, 6]], [[-1, 2, -3]], [[4, -5, 6]]], dtype=np.float32)
+        with pytest.raises(ValueError, match=r"alpha .* rank 1"):
+            v = mb.prelu(x=x_val, alpha=np.array([[1, 2, 3]], dtype=np.float32))
+
+    @ssa_fn
+    def test_builder_eval3(self):
+        x_val = np.array([[[-1, 3, 6]], [[-1, 2, -3]], [[4, -5, 6]]], dtype=np.float32)
+        with pytest.raises(ValueError, match=r"x .* rank 3"):
+            v = mb.prelu(x=[1], alpha=np.array([[1, 2, 3]], dtype=np.float32))
+
+    # TODO (rdar://59672999): NN does not support PReLU with 1 input channel
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, dim, chan",
+        itertools.product([True, False], backends, [1, 2, 4, 8], [2, 3, 4]),
+    )
+    def test_builder_to_backend_stress(self, use_cpu_only, backend, dim, chan):
+        shape = np.array([chan, dim, dim])
+        x_val = np.random.rand(*shape)
+        alpha_val = np.random.rand(chan).astype(np.float32)
+
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [mb.prelu(x=x, alpha=alpha_val)]
+
+        alpha_br = np.copy(alpha_val)
+        for i in range(1, len(x_val.shape)):
+            alpha_br = np.expand_dims(alpha_br, i)
+        x_pos = np.maximum(x_val, 0)
+        b = np.minimum(x_val, 0)
+
+        expected_outputs = [x_pos + b * alpha_br]
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestReLU:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.relu(x=x)
+
+        expected_output_types = (2, 3, types.fp32)
+        expected_outputs = np.array([[0, 2, 0], [4, 0, 6]], dtype=np.float32)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.relu(x=x_val)
+        assert is_close(np.maximum(x_val, 0), v.val)
+
+
+class TestReLU6:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[-1, 7, -3], [4, -5, 8]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.relu6(x=x)
+
+        expected_output_types = (2, 3, types.fp32)
+        expected_outputs = np.array([[0, 6, 0], [4, 0, 6]], dtype=np.float32)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[-1, 7, -3], [4, -5, 8]], dtype=np.float32)
+        v = mb.relu6(x=x_val)
+        assert is_close(np.minimum(np.maximum(x_val, 0), 6), v.val)
+
+
+class TestScaledTanh:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.scaled_tanh(x=x, alpha=2.0, beta=1.0)
+
+        expected_output_types = (2, 3, types.fp32)
+        expected_outputs = np.array(
+            [[-1.5231884, 1.9280552, -1.9901096], [1.9986587, -1.9998184, 1.9999754]],
+            dtype=np.float32,
+        )
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.scaled_tanh(x=x_val, alpha=2.0, beta=1.0)
+        assert is_close(2.0 * np.tanh(x_val * 1.0), v.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, dim, alpha, beta",
+        itertools.product([True], backends, [2, 4, 8], [2.0, 3.0], [4.0, 5.0]),
+    )
+    def test_builder_to_backend_stress(self, use_cpu_only, backend, dim, alpha, beta):
+        shape_x = np.array([dim, dim])
+        x_val = np.random.rand(*shape_x)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [mb.scaled_tanh(x=x, alpha=alpha, beta=beta)]
+
+        expected_outputs = [alpha * np.tanh(x_val * beta)]
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestSigmoid:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.sigmoid(x=x)
+
+        expected_output_types = (2, 3, types.fp32)
+        expected_outputs = np.array(
+            [
+                [0.2689414213699951, 0.8807970779778823, 0.04742587],
+                [0.98201376, 0.00669285, 0.9975274],
+            ],
+            dtype=np.float32,
+        )
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.sigmoid(x=x_val)
+        assert is_close(1 / (1 + np.exp(-x_val)), v.val)
+
+
+class TestSigmoidHard:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.sigmoid_hard(x=x, alpha=1.0, beta=2.0)
+
+        expected_output_types = (2, 3, types.fp32)
+        expected_outputs = np.array(
+            [[1.0, 1.0, 0.0], [1.0, 0.0, 1.0]], dtype=np.float32
+        )
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        alpha = 1.0
+        beta = 2.0
+        v = mb.sigmoid_hard(x=x_val, alpha=alpha, beta=beta)
+        assert is_close(np.minimum(np.maximum((alpha * x_val) + beta, 0), 1), v.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, dim, alpha, beta",
+        itertools.product([True], backends, [2, 4, 8], [2.0, 3.0], [4.0, 5.0]),
+    )
+    def test_builder_to_backend_stress(self, use_cpu_only, backend, dim, alpha, beta):
+        shape_x = np.array([dim, dim])
+        x_val = np.random.rand(*shape_x)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [mb.sigmoid_hard(x=x, alpha=alpha, beta=beta)]
+
+        expected_outputs = [np.minimum(np.maximum((alpha * x_val) + beta, 0), 1)]
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestSoftplus:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.softplus(x=x)
+
+        expected_output_types = (2, 3, types.fp32)
+        expected_outputs = np.array(
+            [[0.31326166, 2.126928, 0.04858733], [4.01815, 0.00671535, 6.0024757]],
+            dtype=np.float32,
+        )
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.softplus(x=x_val)
+        assert is_close(
+            np.log(1 + np.exp(-np.abs(x_val))) + np.maximum(x_val, 0), v.val
+        )
+
+
+# TODO (rdar://59954690): NN Segfaults when converting from MIL ParametricSoftplus layer
+# No torch test because there is no direct torch translation to this layer
+class TestSoftplusParametric:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[[-1, 3, 6]], [[-1, 2, -3]], [[4, -5, 6]]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.softplus_parametric(
+                x=x,
+                alpha=np.array([1, 2, 3], dtype=np.float32),
+                beta=np.array([4, 5, 6], dtype=np.float32),
+            )
+
+        expected_output_types = (3, 1, 3, types.fp32)
+        expected_outputs = np.array(
+            [
+                [[1.8142700e-02, 1.2000000e01, 2.4000000e01]],
+                [[1.3427734e-02, 2.0000000e01, 7.1525574e-07]],
+                [[7.2000000e01, 0.0000000e00, 1.0800000e02]],
+            ],
+            dtype=np.float32,
+        )
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[[-1, 3, 6]], [[-1, 2, -3]], [[4, -5, 6]]], dtype=np.float32)
+        v = mb.softplus_parametric(
+            x=x_val,
+            alpha=np.array([1, 2, 3], dtype=np.float32),
+            beta=np.array([4, 5, 6], dtype=np.float32),
+        )
+
+        alpha_br = np.array([1, 2, 3], dtype=np.float32)
+        beta_br = np.array([4, 5, 6], dtype=np.float32)
+        for i in range(1, len(x_val.shape)):
+            alpha_br = np.expand_dims(alpha_br, i)
+            beta_br = np.expand_dims(beta_br, i)
+        out = alpha_br * np.log(np.exp(x_val * beta_br) + 1)
+
+        assert is_close(out, v.val)
+
+    @ssa_fn
+    def test_builder_eval2(self):
+        x_val = np.array([[[-1, 3, 6]], [[-1, 2, -3]], [[4, -5, 6]]], dtype=np.float32)
+        with pytest.raises(ValueError, match=r".* dimension -3 .*"):
+            v = mb.softplus_parametric(
+                x=x_val,
+                alpha=np.array([1, 2], dtype=np.float32),
+                beta=np.array([4, 5, 6], dtype=np.float32),
+            )
+
+    @ssa_fn
+    def test_builder_eval3(self):
+        x_val = np.array([[[-1, 3, 6]], [[-1, 2, -3]], [[4, -5, 6]]], dtype=np.float32)
+        with pytest.raises(ValueError, match=r"alpha .* rank 1"):
+            v = mb.softplus_parametric(
+                x=x_val,
+                alpha=np.array([[1, 2, 3]], dtype=np.float32),
+                beta=np.array([4, 5, 6], dtype=np.float32),
+            )
+
+    @ssa_fn
+    def test_builder_eval4(self):
+        x_val = np.array([[[-1, 3, 6]], [[-1, 2, -3]], [[4, -5, 6]]], dtype=np.float32)
+        with pytest.raises(ValueError, match=r"x .* rank 3"):
+            v = mb.softplus_parametric(
+                x=[1],
+                alpha=np.array([[1, 2, 3]], dtype=np.float32),
+                beta=np.array([4, 5, 6], dtype=np.float32),
+            )
+
+    @ssa_fn
+    def test_builder_eval5(self):
+        x_val = np.array([[[-1, 3, 6]], [[-1, 2, -3]], [[4, -5, 6]]], dtype=np.float32)
+        with pytest.raises(ValueError, match=r".* dimension -3 .*"):
+            v = mb.softplus_parametric(
+                x=x_val,
+                alpha=np.array([1, 2, 3], dtype=np.float32),
+                beta=np.array([5, 6], dtype=np.float32),
+            )
+
+    @ssa_fn
+    def test_builder_eval6(self):
+        x_val = np.array([[[-1, 3, 6]], [[-1, 2, -3]], [[4, -5, 6]]], dtype=np.float32)
+        with pytest.raises(ValueError, match=r"beta .* rank 1"):
+            v = mb.softplus_parametric(
+                x=x_val,
+                alpha=np.array([1, 2, 3], dtype=np.float32),
+                beta=np.array([[4, 5, 6]], dtype=np.float32),
+            )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, dim, chan",
+        itertools.product([True, False], backends, [1, 2, 4, 8], [1, 2, 3]),
+    )
+    def test_builder_to_backend_stress(self, use_cpu_only, backend, dim, chan):
+        shape = np.array([chan, dim, dim])
+        x_val = np.random.rand(*shape)
+        alpha_val = np.random.rand(chan).astype(np.float32)
+        beta_val = np.random.rand(chan).astype(np.float32)
+
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [mb.softplus_parametric(x=x, alpha=alpha_val, beta=beta_val)]
+
+        alpha_br = np.copy(alpha_val)
+        beta_br = np.copy(beta_val)
+        for i in range(1, len(x_val.shape)):
+            alpha_br = np.expand_dims(alpha_br, i)
+            beta_br = np.expand_dims(beta_br, i)
+        expected_outputs = [alpha_br * np.log(np.exp(x_val * beta_br) + 1)]
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestSoftmax:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_buidler_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.softmax(logit=x, axis=0)
+
+        expected_output_types = (2, 3, types.fp32)
+        expected_outputs = np.array(
+            [
+                [6.69285092e-03, 9.99088949e-01, 1.23394576e-04],
+                [9.93307149e-01, 9.11051194e-04, 9.99876605e-01],
+            ],
+            dtype=np.float32,
+        )
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.softmax(logit=x_val, axis=0)
+        assert is_close(scipy.special.softmax(x_val, axis=0), v.val)
+
+
+class TestSoftsign:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.softsign(x=x)
+
+        expected_output_types = (2, 3, types.fp32)
+        expected_outputs = np.array(
+            [[-0.5, 0.66666667, -0.75], [0.8, -0.83333333, 0.85714286]],
+            dtype=np.float32,
+        )
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.softsign(x=x_val)
+        assert is_close(x_val / (1 + np.abs(x_val)), v.val)
+
+
+class TestThresholdedReLU:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.thresholded_relu(x=x, alpha=2.0)
+
+        expected_output_types = (2, 3, types.fp32)
+        expected_outputs = np.array([[0, 2, 0], [4, 0, 6]], dtype=np.float32)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[0, 2, 0], [4, 0, 6]], dtype=np.float32)
+        v = mb.thresholded_relu(x=x_val, alpha=2.0)
+        assert is_close(np.maximum(x_val - 2.0, 0), v.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, dim, alpha",
+        itertools.product([True], backends, [2, 4, 8], [2.0, 3.0]),
+    )
+    def test_builder_to_backend_stress(self, use_cpu_only, backend, dim, alpha):
+        shape_x = np.array([dim, dim])
+        x_val = np.random.rand(*shape_x)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [mb.thresholded_relu(x=x, alpha=alpha)]
+
+        expected_outputs = [np.maximum(x_val - alpha, 0)]
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
diff --git a/coremltools/converters/mil/mil/ops/tests/test_control_flow.py b/coremltools/converters/mil/mil/ops/tests/test_control_flow.py
new file mode 100644
index 000000000..603e64957
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_control_flow.py
@@ -0,0 +1,302 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.testing_reqs import *
+from .testing_utils import run_compare_builder, UNK_SYM
+
+backends = testing_reqs.backends
+
+
+class TestSelect:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        cond_val = np.array([[3, 0, 0], [0, 4, 0], [5, 6, 0]], dtype=np.float32)
+        a_val = np.array([[3, 1, 1], [1, 4, 1], [5, 6, 1]], dtype=np.float32)
+        b_val = np.array([[3, 2, 2], [2, 4, 2], [5, 6, 2]], dtype=np.float32)
+        input_placeholders = {
+            "cond": mb.placeholder(shape=cond_val.shape),
+            "a": mb.placeholder(shape=a_val.shape),
+            "b": mb.placeholder(shape=b_val.shape),
+        }
+        input_values = {"cond": cond_val, "a": a_val, "b": b_val}
+
+        def build(cond, a, b):
+            return [mb.select(cond=cond, a=a, b=b)]
+
+        expected_output_types = [(3, 3, types.fp32)]
+        expected_outputs = [
+            np.array(
+                [[3.0, 2.0, 2.0], [2.0, 4.0, 2.0], [5.0, 6.0, 2.0]], dtype=np.float32
+            )
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke_broadcast(self, use_cpu_only, backend):
+        cond_val = np.array([[1], [0], [2]], dtype=np.float32)
+        a_val = np.array([[3, 1, 1], [1, 4, 1], [5, 6, 1]], dtype=np.float32)
+        b_val = np.array([[3, 2, 2], [2, 4, 2], [5, 6, 2]], dtype=np.float32)
+        input_placeholders = {
+            "cond": mb.placeholder(shape=cond_val.shape),
+            "a": mb.placeholder(shape=a_val.shape),
+            "b": mb.placeholder(shape=b_val.shape),
+        }
+        input_values = {"cond": cond_val, "a": a_val, "b": b_val}
+
+        def build(cond, a, b):
+            return [mb.select(cond=cond, a=a, b=b)]
+
+        expected_output_types = [(3, 3, types.fp32)]
+        expected_outputs = [
+            np.array(
+                [[3.0, 1.0, 1.0], [2.0, 4.0, 2.0], [5.0, 6.0, 1.0]], dtype=np.float32
+            )
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        cond = np.random.randint(low=0, high=2, size=(6, 1, 7))
+        a = random_gen(shape=(6, 1, 7), rand_min=-1962.0, rand_max=0.0)
+        b = random_gen(shape=(6, 1, 7), rand_min=0.0, rand_max=1964.0)
+        res = mb.select(cond=cond, a=a, b=b)
+        assert is_close(np.where(cond, a, b), res.val)
+
+    @ssa_fn
+    def test_builder_eval_broadcast(self):
+        cond = np.array([[1], [0], [1]])
+        a = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32)
+        b = np.array([[7, 8], [9, 10], [11, 12]], dtype=np.float32)
+        res = mb.select(cond=cond, a=a, b=b)
+        assert is_close(np.array([[1, 2], [9, 10], [5, 6]], dtype=np.float32), res.val)
+
+
+class TestCond:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        input_placeholders = {
+            "a": mb.placeholder(shape=(1,), dtype=types.bool),
+            "b": mb.placeholder(shape=(1,)),
+        }
+
+        def build(a, b):
+            def true_fn():
+                return mb.add(x=b, y=1), mb.mul(x=b, y=2)
+
+            def false_fn():
+                return mb.add(x=b, y=-1), mb.mul(x=b, y=-2)
+
+            pred = mb.squeeze(x=a)
+            return mb.cond(pred=pred, _true_fn=true_fn, _false_fn=false_fn)
+
+        input_values = {
+            "a": np.array([0], dtype=np.float32),
+            "b": np.array([2], dtype=np.float32),
+        }
+
+        expected_output_types = [
+            (1, types.fp32),
+            (1, types.fp32),
+        ]
+
+        expected_outputs = [
+            np.array([1], dtype=np.float32),
+            np.array([-4], dtype=np.float32),
+        ]
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestWhileLoop:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        def body(a, b):
+            return mb.add(x=a, y=np.float32(1)), b
+
+        def cond(a, b):
+            return mb.less(x=a, y=b)
+
+        input_placeholders = {
+            "a": mb.placeholder(shape=(1,)),
+            "b": mb.placeholder(shape=(1,)),
+        }
+
+        def build(a, b):
+            return mb.while_loop(_cond=cond, _body=body, loop_vars=(a, b))
+
+        input_values = {
+            "a": np.array([1], dtype=np.float32),
+            "b": np.array([2], dtype=np.float32),
+        }
+
+        expected_output_types = [
+            (1, types.fp32),
+            (1, types.fp32),
+        ]
+
+        expected_outputs = [
+            np.array([2], dtype=np.float32),
+            np.array([2], dtype=np.float32),
+        ]
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestList:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        elem_shape = (2,)
+        input_placeholders = {
+            "a": mb.placeholder(shape=elem_shape),
+            "b": mb.placeholder(shape=elem_shape),
+        }
+
+        def build(a, b):
+            ls = mb.make_list(init_length=2, elem_shape=elem_shape)
+            # list is initially all 0
+            init_t = mb.list_read(ls=ls, index=0)
+            ls = mb.list_write(ls=ls, index=0, value=a)
+            # this write is out of bound
+            ls = mb.list_write(ls=ls, index=4, value=b)
+            ls = mb.list_scatter(
+                ls=ls,
+                indices=[2, 1],
+                value=np.array([[-1, -2], [-4, -5]], dtype=np.float32),
+            )
+            return (
+                init_t,
+                mb.list_read(ls=ls, index=0),
+                mb.list_gather(ls=ls, indices=[4, 2, 3]),
+            )
+
+        input_values = {
+            "a": np.array([1, 3], dtype=np.float32),
+            "b": np.array([2, 4], dtype=np.float32),
+        }
+
+        expected_output_types = [
+            (2, types.fp32),
+            (2, types.fp32),
+            (3, 2, types.fp32),
+        ]
+
+        expected_outputs = [
+            np.array([0, 0], dtype=np.float32),
+            np.array([1, 3], dtype=np.float32),
+            np.array([[2, 4], [-1, -2], [0, 0]], dtype=np.float32),
+        ]
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_while(self, use_cpu_only, backend):
+        # The while_loop appends [1, 2]*i to `ls` for each iteration
+        # i = 0, ... num_iters-1.
+        def body(i, num_iters, ls, update):
+            new_elem = mb.mul(x=update, y=i)
+            return (
+                mb.add(x=i, y=1),
+                num_iters,
+                mb.list_write(ls=ls, index=i, value=new_elem),
+                update,
+            )
+
+        def cond(i, num_iters, ls, update):
+            return mb.less(x=i, y=num_iters)
+
+        elem_shape = (2,)
+        input_placeholders = {
+            "num_iters": mb.placeholder(shape=(1,)),
+            "update": mb.placeholder(shape=elem_shape),
+        }
+
+        def build(num_iters, update):
+            i = 0
+            ls = mb.make_list(init_length=1, elem_shape=elem_shape)
+            _, _, final_tensor_list, _ = mb.while_loop(
+                _cond=cond, _body=body, loop_vars=(i, num_iters, ls, update)
+            )
+            list_len = mb.list_length(ls=final_tensor_list)
+            indices = mb.range_1d(start=0, end=list_len, step=1)
+            return mb.list_gather(ls=final_tensor_list, indices=indices)
+
+        input_values = {
+            "num_iters": np.array([3], dtype=np.float32),
+            "update": np.array([1, 2], dtype=np.float32),
+        }
+
+        expected_output_types = [
+            # Type inference does not unroll loop
+            (UNK_SYM, 2, types.fp32),
+        ]
+
+        expected_outputs = [
+            np.array([[0, 0], [1, 2], [2, 4]], dtype=np.float32),
+        ]
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
diff --git a/coremltools/converters/mil/mil/ops/tests/test_conv.py b/coremltools/converters/mil/mil/ops/tests/test_conv.py
new file mode 100644
index 000000000..80c5cf956
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_conv.py
@@ -0,0 +1,348 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.mil import get_new_symbol
+from coremltools.converters.mil.testing_reqs import *
+
+from .testing_utils import run_compare_builder
+
+backends = testing_reqs.backends
+
+
+class TestConvTranspose:
+    @pytest.mark.skipif(not testing_reqs._HAS_TORCH, reason="PyTorch not installed.")
+    @pytest.mark.parametrize(
+        ",".join(
+            [
+                "use_cpu_only",
+                "backend",
+                "conv_dim",
+                "padding",
+                "DHWKdKhKw",
+                "stride",
+                "dilation",
+                "has_bias",
+                "groups",
+                "test_symbolic",
+                "test_output_shape",
+            ]
+        ),
+        itertools.product(
+            [True, False],
+            backends,
+            ["conv1d", "conv2d", "conv3d"],
+            [(1, 2, 3), (2, 2, 2)],
+            [(7, 7, 7, 2, 2, 2), (10, 12, 14, 3, 2, 4)],
+            [(1, 1, 1), (2, 1, 2)],
+            [(1, 1, 1), (1, 2, 1)],
+            [True, False],
+            # TODO: rdar://64018048 (Deconv3d: Incorrect output for group > 1)
+            [1],
+            [True, False],
+            # TODO: rdar://63968613 ([deconv3d] Deconv_3d top_shapes_for_bottom_shapes does not sets output channel if output shape is provided)
+            [False],
+        ),
+    )
+    def test_builder_to_backend_stress(
+        self,
+        use_cpu_only,
+        backend,
+        conv_dim,
+        padding,
+        DHWKdKhKw,
+        stride,
+        dilation,
+        has_bias,
+        groups,
+        test_symbolic,
+        test_output_shape,
+    ):
+        D, H, W, Kd, Kh, Kw = DHWKdKhKw
+        N, C_in, C_out = 1, 1 * groups, 2 * groups
+
+        import torch
+        import torch.nn as nn
+
+        isDeconv1d = conv_dim == "conv1d"
+        isDeconv2d = conv_dim == "conv2d"
+
+        if isDeconv1d:
+            strides = [stride[0]]
+            dilations = [dilation[0]]
+            kernels = [Kh]
+            m = nn.ConvTranspose1d(
+                C_in,
+                C_out,
+                kernels,
+                stride=strides,
+                dilation=dilations,
+                bias=has_bias,
+                groups=groups,
+                padding=padding[0],
+            )
+            input_shape = [N, C_in, H]
+            paddings = [padding[0], padding[0]]
+
+        elif isDeconv2d:
+            strides = [stride[0], stride[1]]
+            dilations = [dilation[0], dilation[1]]
+            kernels = [Kh, Kw]
+            m = nn.ConvTranspose2d(
+                C_in,
+                C_out,
+                kernels,
+                stride=strides,
+                dilation=dilations,
+                bias=has_bias,
+                groups=groups,
+                padding=(padding[0], padding[1]),
+            )
+            input_shape = [N, C_in, H, W]
+            paddings = [padding[0], padding[0], padding[1], padding[1]]
+        else:
+            strides = [stride[0], stride[1], stride[2]]
+            dilations = [dilation[0], dilation[1], dilation[2]]
+            kernels = [Kd, Kh, Kw]
+            m = nn.ConvTranspose3d(
+                C_in,
+                C_out,
+                kernels,
+                stride=strides,
+                dilation=dilations,
+                bias=has_bias,
+                groups=groups,
+                padding=padding,
+            )
+            input_shape = [N, C_in, D, H, W]
+            paddings = [
+                padding[0],
+                padding[0],
+                padding[1],
+                padding[1],
+                padding[2],
+                padding[2],
+            ]
+
+        wts = m.state_dict()
+        weight = wts["weight"].detach().numpy()
+        bias = wts["bias"].detach().numpy() if has_bias else None
+
+        # Reshape to CoreML format
+        # PyTorch weight format: C_in, C_out, H, W
+        # MIL weight format: C_out, C_in, H, W
+        if isDeconv1d:
+            weight = np.transpose(weight, [1, 0, 2])
+        elif isDeconv2d:
+            weight = np.transpose(weight, [1, 0, 2, 3])
+        else:
+            weight = np.transpose(weight, [1, 0, 2, 3, 4])
+
+        input = torch.randn(*input_shape)
+        output = m(input)
+        output = output.detach().numpy()
+        input = input.detach().numpy()
+
+        output_shape = list(output.shape)
+        if test_symbolic:
+            # For symbolic input test
+            # Make Batch Size and input channel as symbolic
+            symbolic_batch_size = get_new_symbol()
+            input_shape[0] = symbolic_batch_size
+            output_shape[0] = symbolic_batch_size
+
+        expected_output_types = tuple(output_shape[:]) + (types.fp32,)
+        expected_outputs = [output]
+
+        input_placeholders = {"x": mb.placeholder(shape=input_shape)}
+        input_values = {"x": input}
+
+        def build(x):
+            arguments = {
+                "x": x,
+                "weight": weight,
+                "pad": paddings,
+                "pad_type": "custom",
+                "strides": strides,
+                "dilations": dilations,
+                "groups": groups,
+            }
+            if has_bias:
+                arguments["bias"] = bias
+            if test_output_shape:
+                arguments["output_shape"] = output.shape[2:]
+            return mb.conv_transpose(**arguments)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestConv:
+    @pytest.mark.skipif(not testing_reqs._HAS_TORCH, reason="PyTorch not installed.")
+    @pytest.mark.parametrize(
+        ",".join(
+            [
+                "use_cpu_only",
+                "backend",
+                "conv_dim",
+                "padding",
+                "DHWKdKhKw",
+                "stride",
+                "dilation",
+                "has_bias",
+                "groups",
+                "symbolic",
+            ]
+        ),
+        itertools.product(
+            [True, False],
+            backends,
+            ["conv1d", "conv2d", "conv3d"],
+            [(1, 1, 1), (2, 2, 2)],
+            [(5, 5, 5, 4, 4, 4), (10, 12, 14, 3, 2, 4)],
+            [(1, 1, 1), (1, 2, 1)],
+            [(1, 1, 1), (1, 2, 1)],
+            [True, False],
+            [1, 2],
+            [True, False],
+        ),
+    )
+    def test_builder_to_backend_stress(
+        self,
+        use_cpu_only,
+        backend,
+        conv_dim,
+        padding,
+        DHWKdKhKw,
+        stride,
+        dilation,
+        has_bias,
+        groups,
+        symbolic,
+    ):
+        D, H, W, Kd, Kh, Kw = DHWKdKhKw
+        N, C_in, C_out = 1, 1 * groups, 2 * groups
+
+        import torch
+        import torch.nn as nn
+
+        isConv1d = conv_dim == "conv1d"
+        isConv2d = conv_dim == "conv2d"
+
+        if isConv1d:
+            strides = [stride[0]]
+            dilations = [dilation[0]]
+            kernels = [Kh]
+            m = nn.Conv1d(
+                C_in,
+                C_out,
+                kernels,
+                stride=strides,
+                dilation=dilations,
+                bias=has_bias,
+                groups=groups,
+                padding=padding[0],
+            )
+            input_shape = [N, C_in, H]
+            paddings = [padding[0], padding[0]]
+        elif isConv2d:
+            strides = [stride[0], stride[1]]
+            dilations = [dilation[0], dilation[1]]
+            kernels = [Kh, Kw]
+            m = nn.Conv2d(
+                C_in,
+                C_out,
+                kernels,
+                stride=strides,
+                dilation=dilations,
+                bias=has_bias,
+                groups=groups,
+                padding=(padding[0], padding[1]),
+            )
+            input_shape = [N, C_in, H, W]
+            paddings = [padding[0], padding[0], padding[1], padding[1]]
+        else:
+            strides = [stride[0], stride[1], stride[2]]
+            dilations = [dilation[0], dilation[1], dilation[2]]
+            kernels = [Kd, Kh, Kw]
+            m = nn.Conv3d(
+                C_in,
+                C_out,
+                kernels,
+                stride=strides,
+                dilation=dilations,
+                bias=has_bias,
+                groups=groups,
+                padding=padding,
+            )
+            input_shape = [N, C_in, D, H, W]
+            paddings = [
+                padding[0],
+                padding[0],
+                padding[1],
+                padding[1],
+                padding[2],
+                padding[2],
+            ]
+
+        wts = m.state_dict()
+        weight = wts["weight"].detach().numpy()
+        bias = wts["bias"].detach().numpy() if has_bias else None
+
+        # PyTorch and CoreML weight format is same
+        # PyTorch weight format: C_out, C_in, H, W
+        # MIL weight format: C_out, C_in, H, W
+
+        input = torch.randn(*input_shape)
+        output = m(input)
+        output = output.detach().numpy()
+        input = input.detach().numpy()
+
+        output_shape = list(output.shape)
+        if symbolic:
+            # For symbolic input test
+            # Make Batch Size and input channel as symbolic
+            symbolic_batch_size = get_new_symbol()
+            input_shape[0] = symbolic_batch_size
+            output_shape[0] = symbolic_batch_size
+
+        expected_output_types = tuple(output_shape[:]) + (types.fp32,)
+        expected_outputs = [output]
+
+        input_placeholders = {"x": mb.placeholder(shape=input_shape)}
+        input_values = {"x": input}
+
+        def build(x):
+            arguments = {
+                "x": x,
+                "weight": weight,
+                "pad": paddings,
+                "pad_type": "custom",
+                "strides": strides,
+                "dilations": dilations,
+                "groups": groups,
+            }
+            if has_bias:
+                arguments["bias"] = bias
+            return mb.conv(**arguments)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
diff --git a/coremltools/converters/mil/mil/ops/tests/test_elementwise_binary.py b/coremltools/converters/mil/mil/ops/tests/test_elementwise_binary.py
new file mode 100644
index 000000000..9d9823a7d
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_elementwise_binary.py
@@ -0,0 +1,506 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.testing_reqs import *
+
+from .testing_utils import run_compare_builder
+
+backends = testing_reqs.backends
+
+
+class TestElementwiseBinary:
+    # All in this test share the same backends
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, mode",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                "add",
+                "floor_div",
+                "maximum",
+                "minimum",
+                "mod",
+                "mul",
+                "pow",
+                "real_div",
+                "sub",
+            ],
+        ),
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend, mode):
+        if mode == "add":
+            x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array([[0, 4, 0], [8, 0, 12]], dtype=np.float32)
+
+            build = lambda x, y: mb.add(x=x, y=y)
+        elif mode == "floor_div":
+            x = np.array([[10, 20, 30], [40, 50, 60]], dtype=np.float32)
+            y = np.array([[11, 12, 13], [14, 15, 16]], dtype=np.float32)
+            expected_outputs = np.array([[0, 1, 2], [2, 3, 3]], dtype=np.float32)
+
+            build = lambda x, y: mb.floor_div(x=x, y=y)
+        elif mode == "maximum":
+            x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+
+            build = lambda x, y: mb.maximum(x=x, y=y)
+        elif mode == "minimum":
+            x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+
+            build = lambda x, y: mb.minimum(x=x, y=y)
+        elif mode == "mod":
+            x = np.array([[10, 20, 30], [40, 50, 60]], dtype=np.float32)
+            y = np.array([[11, 12, 13], [14, 15, 16]], dtype=np.float32)
+            expected_outputs = np.array([[10, 8, 4], [12, 5, 12]], dtype=np.float32)
+
+            build = lambda x, y: mb.mod(x=x, y=y)
+        elif mode == "mul":
+            x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array([[-1, 4, -9], [16, -25, 36]], dtype=np.float32)
+
+            build = lambda x, y: mb.mul(x=x, y=y)
+        elif mode == "pow":
+            x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array(
+                [[1, 4, 0.037], [256, 0.00032, 46656]], dtype=np.float32
+            )
+
+            build = lambda x, y: mb.pow(x=x, y=y)
+        elif mode == "real_div":
+            x = np.array([[10, 20, 30], [40, 50, 60]], dtype=np.float32)
+            y = np.array([[11, 12, 13], [14, 15, 16]], dtype=np.float32)
+            expected_outputs = np.array(
+                [[0.90909091, 1.66666667, 2.30769231], [2.85714286, 3.33333333, 3.75]],
+                dtype=np.float32,
+            )
+
+            build = lambda x, y: mb.real_div(x=x, y=y)
+        elif mode == "sub":
+            x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array([[2, 0, 6], [0, 10, 0]], dtype=np.float32)
+
+            build = lambda x, y: mb.sub(x=x, y=y)
+
+        expected_output_types = (2, 3, types.fp32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x.shape),
+            "y": mb.placeholder(shape=y.shape),
+        }
+        input_values = {"x": x, "y": y}
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_add(self):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        expected_outputs = np.array([[0, 4, 0], [8, 0, 12]], dtype=np.float32)
+        v = mb.add(x=x, y=y)
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_floor_div(self):
+        x = np.array([[10, 20, 30], [40, 50, 60]], dtype=np.float32)
+        y = np.array([[11, 12, 13], [14, 15, 16]], dtype=np.float32)
+        expected_outputs = np.array([[0, 1, 2], [2, 3, 3]], dtype=np.float32)
+        v = mb.floor_div(x=x, y=y)
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_maximum(self):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        expected_outputs = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        v = mb.maximum(x=x, y=y)
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_minimum(self):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        expected_outputs = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.minimum(x=x, y=y)
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_mod(self):
+        x = np.array([[10, 20, 30], [40, 50, 60]], dtype=np.float32)
+        y = np.array([[11, 12, 13], [14, 15, 16]], dtype=np.float32)
+        expected_outputs = np.array([[10, 8, 4], [12, 5, 12]], dtype=np.float32)
+        v = mb.mod(x=x, y=y)
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_mul(self):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        expected_outputs = np.array([[-1, 4, -9], [16, -25, 36]], dtype=np.float32)
+        v = mb.mul(x=x, y=y)
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_pow(self):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        expected_outputs = np.array(
+            [[1, 4, 0.037], [256, 0.00032, 46656]], dtype=np.float32
+        )
+        v = mb.pow(x=x, y=y)
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_real_div(self):
+        x = np.array([[10, 20, 30], [40, 50, 60]], dtype=np.float32)
+        y = np.array([[11, 12, 13], [14, 15, 16]], dtype=np.float32)
+        expected_outputs = np.array(
+            [[0.90909091, 1.66666667, 2.30769231], [2.85714286, 3.33333333, 3.75]],
+            dtype=np.float32,
+        )
+        v = mb.real_div(x=x, y=y)
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_real_div_both_ints(self):
+        x = np.array([5], dtype=np.int32)
+        y = np.array([2], dtype=np.int32)
+        expected_outputs = np.array([2.5], dtype=np.float32)
+        v = mb.real_div(x=x, y=y)
+        assert is_close(expected_outputs, v.val)
+        # real_div should produce float values regardless of input type
+        assert isinstance(v.val[0], (float, np.float32))
+        # make sure the dtype is float
+        assert types.is_float(v.dtype)
+        # make sure the symbolic type matches the value type
+        assert v._sym_type.get_primitive() == v._sym_val.get_primitive()
+
+    @ssa_fn
+    def test_builder_sub(self):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        expected_outputs = np.array([[2, 0, 6], [0, 10, 0]], dtype=np.float32)
+        v = mb.sub(x=x, y=y)
+        assert is_close(expected_outputs, v.val)
+
+
+class TestEqual:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x.shape),
+            "y": mb.placeholder(shape=y.shape),
+        }
+        input_values = {"x": x, "y": y}
+
+        def build(x, y):
+            return mb.equal(x=x, y=y), mb.equal(x=-3, y=y)
+
+        expected_output_types = [
+            (2, 3, types.bool),
+            (2, 3, types.bool),
+        ]
+        expected_outputs = [
+            np.array([[0, 1, 0], [1, 0, 1]], dtype=np.bool),
+            np.array([[0, 0, 1], [0, 0, 0]], dtype=np.bool),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        expected_outputs = np.array([[0, 1, 0], [1, 0, 1]], dtype=np.bool)
+        v = mb.equal(x=x_val, y=y_val)
+        assert is_close(expected_outputs, v.val)
+
+
+class TestGreater:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x.shape),
+            "y": mb.placeholder(shape=y.shape),
+        }
+        input_values = {"x": x, "y": y}
+
+        def build(x, y):
+            return mb.greater(x=x, y=y), mb.greater(x=x, y=3.5)
+
+        expected_output_types = [
+            (2, 3, types.bool),
+            (2, 3, types.bool),
+        ]
+        expected_outputs = [
+            np.array([[1, 0, 1], [0, 1, 0]], dtype=np.bool),
+            np.array([[0, 0, 0], [1, 1, 1]], dtype=np.bool),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        expected_outputs = np.array([[1, 0, 1], [0, 1, 0]], dtype=np.bool)
+        v = mb.greater(x=x_val, y=y_val)
+        assert is_close(expected_outputs, v.val)
+
+
+class TestGreaterEqual:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x.shape),
+            "y": mb.placeholder(shape=y.shape),
+        }
+        input_values = {"x": x, "y": y}
+
+        def build(x, y):
+            return mb.greater_equal(x=x, y=y), mb.greater_equal(x=x, y=3.5)
+
+        expected_output_types = [
+            (2, 3, types.bool),
+            (2, 3, types.bool),
+        ]
+        expected_outputs = [
+            np.array([[1, 1, 1], [1, 1, 1]], dtype=np.bool),
+            np.array([[0, 0, 0], [1, 1, 1]], dtype=np.bool),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        expected_outputs = np.array([[1, 1, 1], [1, 1, 1]], dtype=np.bool)
+        v = mb.greater_equal(x=x_val, y=y_val)
+        assert is_close(expected_outputs, v.val)
+
+
+class TestLess:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x.shape),
+            "y": mb.placeholder(shape=y.shape),
+        }
+        input_values = {"x": x, "y": y}
+
+        def build(x, y):
+            return mb.less(x=x, y=y)
+
+        expected_output_types = (2, 3, types.bool)
+        expected_outputs = np.array([[0, 0, 0], [0, 0, 0]], dtype=np.bool)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke2(self, use_cpu_only, backend):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=x.shape)}
+        input_values = {"x": x}
+
+        def build(x):
+            # y is const
+            y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            return mb.less(x=x, y=y)
+
+        expected_output_types = (2, 3, types.bool)
+        expected_outputs = np.array([[0, 0, 0], [0, 0, 0]], dtype=np.bool)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_broadcast(self, use_cpu_only, backend):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=x.shape)}
+        input_values = {"x": x}
+
+        def build(x):
+            # y is const
+            return mb.less(x=x, y=3.5)
+
+        expected_output_types = (2, 3, types.bool)
+        expected_outputs = np.array([[1, 1, 1], [0, 0, 0]], dtype=np.bool)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        expected_outputs = np.array([[0, 0, 0], [0, 0, 0]], dtype=np.bool)
+        v = mb.less(x=x_val, y=y_val)
+        assert is_close(expected_outputs, v.val)
+
+
+class TestLessEqual:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x.shape),
+            "y": mb.placeholder(shape=y.shape),
+        }
+        input_values = {"x": x, "y": y}
+
+        def build(x, y):
+            return mb.less_equal(x=x, y=y)
+
+        expected_output_types = (2, 3, types.bool)
+        expected_outputs = np.array([[0, 1, 0], [1, 0, 1]], dtype=np.bool)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        expected_outputs = np.array([[0, 1, 0], [1, 0, 1]], dtype=np.bool)
+        v = mb.less_equal(x=x_val, y=y_val)
+        assert is_close(expected_outputs, v.val)
+
+
+class TestNotEqual:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x.shape),
+            "y": mb.placeholder(shape=y.shape),
+        }
+        input_values = {"x": x, "y": y}
+
+        def build(x, y):
+            return mb.not_equal(x=x, y=y)
+
+        expected_output_types = (2, 3, types.bool)
+        expected_outputs = np.array([[1, 0, 1], [0, 1, 0]], dtype=np.bool)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        y_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        expected_outputs = np.array([[1, 0, 1], [0, 1, 0]], dtype=np.bool)
+        v = mb.not_equal(x=x_val, y=y_val)
+        assert is_close(expected_outputs, v.val)
diff --git a/coremltools/converters/mil/mil/ops/tests/test_elementwise_unary.py b/coremltools/converters/mil/mil/ops/tests/test_elementwise_unary.py
new file mode 100644
index 000000000..be714d0be
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_elementwise_unary.py
@@ -0,0 +1,519 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import scipy
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.testing_reqs import *
+
+from .testing_utils import run_compare_builder
+
+backends = testing_reqs.backends
+
+
+class TestElementwiseUnary:
+    # All ops in this test share the same backends
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, mode",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                "abs",
+                "acos",
+                "asin",
+                "atan",
+                "atanh",
+                "exp2",
+                "clip",
+                "cos",
+                "cosh",
+                "erf",
+                "exp",
+                "erf",
+                "floor",
+                "inverse",
+                "log",
+                "round",
+                "rsqrt",
+                "sign",
+                "sin",
+                "sinh",
+                "sqrt",
+                "tan",
+                "tanh",
+                "threshold",
+                "cast",
+            ],
+        ),
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend, mode):
+        if mode == "abs":
+            val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+
+            build = lambda x: mb.abs(x=x)
+        elif mode == "acos":
+            val = np.array([[-1, -0.5, 0], [0.4, 0.5, 0.8]], dtype=np.float32)
+            expected_outputs = np.array(
+                [
+                    [3.14159265, 2.0943951, 1.57079633],
+                    [1.15927948, 1.04719755, 0.64350111],
+                ],
+                dtype=np.float32,
+            )
+
+            build = lambda x: mb.acos(x=x)
+        elif mode == "asin":
+            val = np.array([[-1, -0.5, 0], [0.4, 0.5, 0.8]], dtype=np.float32)
+            expected_outputs = np.array(
+                [[-1.57079633, -0.52359878, 0.0], [0.41151685, 0.52359878, 0.92729522]],
+                dtype=np.float32,
+            )
+
+            build = lambda x: mb.asin(x=x)
+        elif mode == "atan":
+            val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array(
+                [
+                    [-0.78539816, 1.10714872, -1.24904577],
+                    [1.32581766, -1.37340077, 1.40564765],
+                ],
+                dtype=np.float32,
+            )
+            build = lambda x: mb.atan(x=x)
+        elif mode == "atanh":
+            if backend == "mil_proto":
+                # TODO
+                return
+            val = np.array([[-0.8, -0.5, 0], [0.4, 0.5, 0.8]], dtype=np.float32)
+            expected_outputs = np.array(
+                [[-1.09861229, -0.54930614, 0.0], [0.42364893, 0.54930614, 1.09861229]],
+                dtype=np.float32,
+            )
+
+            build = lambda x: mb.atanh(x=x)
+        elif mode == "cast":
+            if backend == "mil_proto":
+                # TODO <rdar://problem/61400566> [MIL] Add cast operation in MIL backend and enable tests
+                return
+            val = np.array([[-1.2, 2, -3.6], [4.5, -5, 6.7]], dtype=np.float32)
+            expected_outputs = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.int32)
+
+            build = lambda x: mb.cast(x=x, dtype="int32")
+        elif mode == "ceil":
+            val = np.array([[-1.2, 2, -3.4], [4.5, -5, 6.7]], dtype=np.float32)
+            expected_outputs = np.array([[-1, 2, -3], [5, -5, 7]], dtype=np.float32)
+
+            build = lambda x: mb.ceil(x=x)
+        elif mode == "clip":
+            val = np.array([[-1.2, 2, -3.4], [4.5, -5, 6.7]], dtype=np.float32)
+            expected_outputs = np.array([[0, 2, 0], [4.5, 0, 5]], dtype=np.float32)
+
+            build = lambda x: mb.clip(x=x, alpha=0.0, beta=5.0)
+        elif mode == "cos":
+            val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array(
+                [
+                    [0.54030231, -0.41614684, -0.9899925],
+                    [-0.65364362, 0.28366219, 0.96017029],
+                ],
+                dtype=np.float32,
+            )
+
+            build = lambda x: mb.cos(x=x)
+        elif mode == "cosh":
+            val = np.array([[-1, -2, -3], [1, 2, 3]], dtype=np.float32)
+            expected_outputs = np.array(
+                [
+                    [1.54308063, 3.76219569, 10.067662],
+                    [1.54308063, 3.76219569, 10.067662],
+                ],
+                dtype=np.float32,
+            )
+
+            build = lambda x: mb.cosh(x=x)
+        elif mode == "erf":
+            val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array(
+                [
+                    [-0.8427007929497148, 0.9953222650189527, -0.9999779095030014],
+                    [0.9999999845827421, -0.9999999999984626, 1.0],
+                ],
+                dtype=np.float32,
+            )
+
+            build = lambda x: mb.erf(x=x)
+        elif mode == "exp":
+            val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array(
+                [
+                    [0.36787944, 7.3890561, 0.04978707],
+                    [54.5981500, 0.0067379, 403.428793],
+                ],
+                dtype=np.float32,
+            )
+
+            build = lambda x: mb.exp(x=x)
+        elif mode == "exp2":
+            val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array(
+                [[0.5, 4.0, 0.125], [16, 0.03125, 64]], dtype=np.float32
+            )
+
+            build = lambda x: mb.exp2(x=x)
+        elif mode == "floor":
+            val = np.array([[-1.2, 2, -3.4], [4.5, -5, 6.7]], dtype=np.float32)
+            expected_outputs = np.array([[-2, 2, -4], [4, -5, 6]], dtype=np.float32)
+
+            build = lambda x: mb.floor(x=x)
+        elif mode == "inverse":
+            if backend == "mil_proto":  # TODO
+                return
+            val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array(
+                [[-1.0, 0.5, -0.33333334], [0.25, -0.2, 0.16666667]], dtype=np.float32
+            )
+            build = lambda x: mb.inverse(x=x)
+        elif mode == "log":
+            val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            expected_outputs = np.array(
+                [[0.0, 0.69314718, 1.09861229], [1.38629436, 1.60943791, 1.79175947]],
+                dtype=np.float32,
+            )
+
+            build = lambda x: mb.log(x=x)
+        elif mode == "round":
+            val = np.array([[-1.2, 2, -3.4], [4.6, -5, 6.7]], dtype=np.float32)
+            expected_outputs = np.array([[-1, 2, -3], [5, -5, 7]], dtype=np.float32)
+
+            build = lambda x: mb.round(x=x)
+        elif mode == "rsqrt":
+            val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            expected_outputs = np.array(
+                [[1.0, 0.70710678, 0.57735027], [0.5, 0.4472136, 0.40824829]],
+                dtype=np.float32,
+            )
+
+            build = lambda x: mb.rsqrt(x=x)
+        elif mode == "sign":
+            val = np.array([[-1, 2, 0], [0, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array([[-1, 1, 0], [0, -1, 1]], dtype=np.float32)
+
+            build = lambda x: mb.sign(x=x)
+        elif mode == "sin":
+            val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array(
+                [
+                    [-0.84147098, 0.90929743, -0.14112001],
+                    [-0.7568025, 0.95892427, -0.2794155],
+                ],
+                dtype=np.float32,
+            )
+
+            build = lambda x: mb.sin(x=x)
+        elif mode == "sinh":
+            val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array(
+                [[-1.1752, 3.62686, -10.017874], [27.289917, -74.20321, 201.71315]],
+                dtype=np.float32,
+            )
+
+            build = lambda x: mb.sinh(x=x)
+        elif mode == "sqrt":
+            val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            expected_outputs = np.array(
+                [[1.0, 1.41421356, 1.73205081], [2.0, 2.23606798, 2.44948974]],
+                dtype=np.float32,
+            )
+
+            build = lambda x: mb.sqrt(x=x)
+        elif mode == "tan":
+            val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array(
+                [[-1.5574, -2.185, 0.1425], [1.15782, 3.3805, -0.291]], dtype=np.float32
+            )
+
+            build = lambda x: mb.tan(x=x)
+        elif mode == "tanh":
+            val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+            expected_outputs = np.array(
+                [
+                    [-0.7615942, 0.9640276, -0.9950548],
+                    [0.9993293, -0.9999092, 0.9999877],
+                ],
+                dtype=np.float32,
+            )
+
+            build = lambda x: mb.tanh(x=x)
+        elif mode == "threshold":
+            val = np.array([[-1.2, 2, -3.4], [4.5, -5, 6.7]], dtype=np.float32)
+            expected_outputs = np.array(
+                [[1.0, 2, 1.0], [4.5, 1.0, 6.7]], dtype=np.float32
+            )
+
+            build = lambda x: mb.threshold(x=x, alpha=1.0)
+
+        input_placeholders = {"x": mb.placeholder(shape=val.shape)}
+        input_values = {"x": val}
+        expected_output_types = (
+            (2, 3, types.int32) if mode == "cast" else (2, 3, types.fp32)
+        )
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_abs_eval(self):
+        val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.abs(x=val)
+        expected_outputs = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_acos_eval(self):
+        val = np.array([[-1, -0.5, 0], [0.4, 0.5, 0.8]], dtype=np.float32)
+        v = mb.acos(x=val)
+        expected_outputs = np.array(
+            [[3.14159265, 2.0943951, 1.57079633], [1.15927948, 1.04719755, 0.64350111]],
+            dtype=np.float32,
+        )
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_asin_eval(self):
+        val = np.array([[-1, -0.5, 0], [0.4, 0.5, 0.8]], dtype=np.float32)
+        v = mb.asin(x=val)
+        expected_outputs = np.array(
+            [[-1.57079633, -0.52359878, 0.0], [0.41151685, 0.52359878, 0.92729522]],
+            dtype=np.float32,
+        )
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_atan_eval(self):
+        val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.atan(x=val)
+        expected_outputs = np.array(
+            [
+                [-0.78539816, 1.10714872, -1.24904577],
+                [1.32581766, -1.37340077, 1.40564765],
+            ],
+            dtype=np.float32,
+        )
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_atanh_eval(self):
+        val = np.array([[-0.8, -0.5, 0], [0.4, 0.5, 0.8]], dtype=np.float32)
+        v = mb.atanh(x=val)
+        expected_outputs = np.array(
+            [[-1.09861229, -0.54930614, 0.0], [0.42364893, 0.54930614, 1.09861229]],
+            dtype=np.float32,
+        )
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_cast_eval(self):
+        val = np.array([[-1.2, 2, -3.4], [4.5, -5, 6.7]], dtype=np.float32)
+        expected_outputs = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.int32)
+
+        v = mb.cast(x=val, dtype="int32")
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_ceil_eval(self):
+        val = np.array([[-1.2, 2, -3.4], [4.5, -5, 6.7]], dtype=np.float32)
+        v = mb.ceil(x=val)
+        expected_outputs = np.array([[-1, 2, -3], [5, -5, 7]], dtype=np.float32)
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_clip_eval(self):
+        val = np.array([[-1.2, 2, -3.4], [4.5, -5, 6.7]], dtype=np.float32)
+        v = mb.clip(x=val, alpha=0.0, beta=5.0)
+        expected_outputs = np.array([[0, 2, 0], [4.5, 0, 5]], dtype=np.float32)
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_cos_eval(self):
+        val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.cos(x=val)
+        expected_outputs = np.array(
+            [
+                [0.54030231, -0.41614684, -0.9899925],
+                [-0.65364362, 0.28366219, 0.96017029],
+            ],
+            dtype=np.float32,
+        )
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_cosh_eval(self):
+        val = np.array([[-1, -2, -3], [1, 2, 3]], dtype=np.float32)
+        v = mb.cosh(x=val)
+        expected_outputs = np.array(
+            [[1.54308063, 3.76219569, 10.067662], [1.54308063, 3.76219569, 10.067662]],
+            dtype=np.float32,
+        )
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_erf_eval(self):
+        x_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.erf(x=x_val)
+        assert is_close(scipy.special.erf(x_val), v.val)
+
+    @ssa_fn
+    def test_builder_exp_eval(self):
+        val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.exp(x=val)
+        expected_outputs = np.array(
+            [[0.36787944, 7.3890561, 0.04978707], [54.5981500, 0.0067379, 403.428793]],
+            dtype=np.float32,
+        )
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_exp2_eval(self):
+        val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.exp2(x=val)
+        expected_outputs = np.array(
+            [[0.5, 4.0, 0.125], [16, 0.03125, 64]], dtype=np.float32
+        )
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_floor_eval(self):
+        val = np.array([[-1.2, 2, -3.4], [4.5, -5, 6.7]], dtype=np.float32)
+        v = mb.floor(x=val)
+        expected_outputs = np.array([[-2, 2, -4], [4, -5, 6]], dtype=np.float32)
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_inverse_eval(self):
+        val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.inverse(x=val)
+        expected_outputs = np.array(
+            [[-1.0, 0.5, -0.33333334], [0.25, -0.2, 0.16666667]], dtype=np.float32
+        )
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_log_eval(self):
+        val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        v = mb.log(x=val)
+        expected_outputs = np.array(
+            [[0.0, 0.69314718, 1.09861229], [1.38629436, 1.60943791, 1.79175947]],
+            dtype=np.float32,
+        )
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_round_eval(self):
+        val = np.array([[-1.2, 2, -3.4], [4.6, -5, 6.7]], dtype=np.float32)
+        v = mb.round(x=val)
+        expected_outputs = np.array([[-1, 2, -3], [5, -5, 7]], dtype=np.float32)
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_rsqrt_eval(self):
+        val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        v = mb.rsqrt(x=val)
+        expected_outputs = np.array(
+            [[1.0, 0.70710678, 0.57735027], [0.5, 0.4472136, 0.40824829]],
+            dtype=np.float32,
+        )
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_sign_eval(self):
+        val = np.array([[-1, 2, 0], [0, -5, 6]], dtype=np.float32)
+        v = mb.sign(x=val)
+        expected_outputs = np.array([[-1, 1, 0], [0, -1, 1]], dtype=np.float32)
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_sin_eval(self):
+        val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.sin(x=val)
+        expected_outputs = np.array(
+            [
+                [-0.84147098, 0.90929743, -0.14112001],
+                [-0.7568025, 0.95892427, -0.2794155],
+            ],
+            dtype=np.float32,
+        )
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_sinh_eval(self):
+        val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.sinh(x=val)
+        expected_outputs = np.array(
+            [[-1.1752, 3.62686, -10.017874], [27.289917, -74.20321, 201.71315]],
+            dtype=np.float32,
+        )
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_sqrt_eval(self):
+        val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        v = mb.sqrt(x=val)
+        expected_outputs = np.array(
+            [[1.0, 1.41421356, 1.73205081], [2.0, 2.23606798, 2.44948974]],
+            dtype=np.float32,
+        )
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_tan_eval(self):
+        val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.tan(x=val)
+        expected_outputs = np.array(
+            [[-1.5574, -2.185, 0.1425], [1.15782, 3.3805, -0.291]], dtype=np.float32
+        )
+
+        assert is_close(expected_outputs, v.val)
+
+    @ssa_fn
+    def test_builder_tanh_eval(self):
+        x_val = np.array([[-1, 2, -3], [4, -5, 6]], dtype=np.float32)
+        v = mb.tanh(x=x_val)
+        assert is_close(np.tanh(x_val), v.val)
+
+    @ssa_fn
+    def test_builder_threshold_eval(self):
+        val = np.array([[-1.2, 2, -3.4], [4.5, -5, 6.7]], dtype=np.float32)
+        v = mb.threshold(x=val, alpha=1.0)
+        expected_outputs = np.array([[1.0, 2, 1.0], [4.5, 1.0, 6.7]], dtype=np.float32)
+
+        assert is_close(expected_outputs, v.val)
diff --git a/coremltools/converters/mil/mil/ops/tests/test_image_resizing.py b/coremltools/converters/mil/mil/ops/tests/test_image_resizing.py
new file mode 100644
index 000000000..c8f66c0f4
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_image_resizing.py
@@ -0,0 +1,454 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.mil import get_new_symbol
+from coremltools.converters.mil.testing_reqs import *
+
+from .testing_utils import run_compare_builder
+
+backends = testing_reqs.backends
+
+
+@pytest.mark.skip("Broken for mil backend")
+class TestResizeBilinear:
+    def test_builder_to_backend_smoke(self, use_cpu_only=True, backend="nn_proto"):
+        x = np.array([0, 1], dtype=np.float32).reshape(1, 1, 2)
+        input_placeholder_dict = {"x": mb.placeholder(shape=x.shape)}
+        input_value_dict = {"x": x}
+
+        def build_mode_0(x):
+            return mb.resize_bilinear(
+                x=x,
+                target_size_height=1,
+                target_size_width=5,
+                sampling_mode="STRICT_ALIGN_CORNERS",
+            )
+
+        expected_output_type = (1, 1, 5, types.fp32)
+        expected_output = np.array([0, 0.25, 0.5, 0.75, 1], dtype=np.float32).reshape(
+            1, 1, 5
+        )
+
+        run_compare_builder(
+            build_mode_0,
+            input_placeholder_dict,
+            input_value_dict,
+            expected_output_type,
+            expected_output,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+        def build_mode_2(x):
+            return mb.resize_bilinear(
+                x=x, target_size_height=1, target_size_width=5, sampling_mode="DEFAULT"
+            )
+
+        expected_output = np.array([0, 0.4, 0.8, 1, 1], dtype=np.float32).reshape(
+            1, 1, 5
+        )
+
+        run_compare_builder(
+            build_mode_2,
+            input_placeholder_dict,
+            input_value_dict,
+            expected_output_type,
+            expected_output,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+        def build_mode_3(x):
+            return mb.resize_bilinear(
+                x=x,
+                target_size_height=1,
+                target_size_width=5,
+                sampling_mode="OFFSET_CORNERS",
+            )
+
+        expected_output = np.array([0.1, 0.3, 0.5, 0.7, 0.9], dtype=np.float32).reshape(
+            1, 1, 5
+        )
+
+        run_compare_builder(
+            build_mode_3,
+            input_placeholder_dict,
+            input_value_dict,
+            expected_output_type,
+            expected_output,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+@pytest.mark.skip("Broken for nn backend")
+class TestUpsampleBilinear:
+    def test_builder_to_backend_smoke(self, use_cpu_only=True, backend="nn_proto"):
+        x = np.array([0, 1], dtype=np.float32).reshape(1, 1, 2)
+        input_placeholder_dict = {"x": mb.placeholder(shape=x.shape)}
+        input_value_dict = {"x": x}
+
+        def build_upsample_integer(x):
+            return mb.upsample_bilinear(
+                x=x, scale_factor_height=1, scale_factor_width=3
+            )
+
+        expected_output_type = (1, 1, 6, types.fp32)
+        expected_output = np.array(
+            [0, 0.2, 0.4, 0.6, 0.8, 1], dtype=np.float32
+        ).reshape(1, 1, 6)
+
+        run_compare_builder(
+            build_upsample_integer,
+            input_placeholder_dict,
+            input_value_dict,
+            expected_output_type,
+            expected_output,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+        def build_upsample_fractional(x):
+            return mb.upsample_bilinear(
+                x=x, scale_factor_height=1, scale_factor_width=2.6, align_corners=False
+            )
+
+        expected_output_type = (1, 1, 5, types.fp32)
+        expected_output = np.array([0, 0.1, 0.5, 0.9, 1], dtype=np.float32).reshape(
+            1, 1, 5
+        )
+
+        run_compare_builder(
+            build_upsample_fractional,
+            input_placeholder_dict,
+            input_value_dict,
+            expected_output_type,
+            expected_output,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    # TODO: enable GPU test: rdar://problem/60309338
+    @pytest.mark.skipif(not testing_reqs._HAS_TORCH, reason="PyTorch not installed.")
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, input_shape, scale_factor, align_corners",
+        itertools.product(
+            [True],
+            backends,
+            [(2, 5, 10, 22)],
+            [(3, 4), (2.5, 2), (0.5, 0.75)],
+            [True, False],
+        ),
+    )
+    def test_builder_to_backend_stress(
+        self, use_cpu_only, backend, input_shape, scale_factor, align_corners
+    ):
+        def _get_torch_upsample_prediction(x, scale_factor=(2, 2), align_corners=False):
+            x = torch.from_numpy(x)
+            m = torch.nn.Upsample(
+                scale_factor=scale_factor, mode="bilinear", align_corners=align_corners
+            )
+            out = m(x)
+            return out.numpy()
+
+        x = random_gen(input_shape, rand_min=-100, rand_max=100)
+        torch_pred = _get_torch_upsample_prediction(
+            x, scale_factor=scale_factor, align_corners=align_corners
+        )
+
+        input_placeholder_dict = {"x": mb.placeholder(shape=x.shape)}
+        input_value_dict = {"x": x}
+
+        def build_upsample(x):
+            return mb.upsample_bilinear(
+                x=x,
+                scale_factor_height=scale_factor[0],
+                scale_factor_width=scale_factor[1],
+                align_corners=align_corners,
+            )
+
+        expected_output_type = torch_pred.shape + (types.fp32,)
+        run_compare_builder(
+            build_upsample,
+            input_placeholder_dict,
+            input_value_dict,
+            expected_output_type,
+            torch_pred,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestUpsampleNearestNeighbor:
+    def test_builder_to_backend_smoke(self, use_cpu_only=True, backend="nn_proto"):
+        x = np.array([1.5, 2.5, 3.5], dtype=np.float32).reshape(1, 1, 1, 3)
+        input_placeholder_dict = {"x": mb.placeholder(shape=x.shape)}
+        input_value_dict = {"x": x}
+
+        def build(x):
+            return mb.upsample_nearest_neighbor(
+                x=x, upscale_factor_height=1, upscale_factor_width=2
+            )
+
+        expected_output_type = (1, 1, 1, 6, types.fp32)
+        expected_output = np.array(
+            [1.5, 1.5, 2.5, 2.5, 3.5, 3.5], dtype=np.float32
+        ).reshape(1, 1, 1, 6)
+
+        run_compare_builder(
+            build,
+            input_placeholder_dict,
+            input_value_dict,
+            expected_output_type,
+            expected_output,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestCrop:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, is_symbolic",
+        itertools.product([True, False], backends, [True, False]),
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend, is_symbolic):
+        x = np.array(
+            [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]],
+            dtype=np.float32,
+        ).reshape(1, 1, 4, 4)
+
+        input_shape = list(x.shape)
+        placeholder_input_shape = input_shape
+        if is_symbolic:
+            # set batch and channel dimension symbolic
+            placeholder_input_shape[0] = get_new_symbol()
+            placeholder_input_shape[1] = get_new_symbol()
+
+        input_placeholder_dict = {"x": mb.placeholder(shape=placeholder_input_shape)}
+        input_value_dict = {"x": x}
+
+        def build(x):
+            return mb.crop(x=x, crop_height=[0, 1], crop_width=[1, 1])
+
+        expected_output_type = (
+            placeholder_input_shape[0],
+            placeholder_input_shape[1],
+            3,
+            2,
+            types.fp32,
+        )
+        expected_output = (
+            np.array([2, 3, 6, 7, 10, 11], dtype=np.float32).reshape(1, 1, 3, 2),
+        )
+
+        run_compare_builder(
+            build,
+            input_placeholder_dict,
+            input_value_dict,
+            expected_output_type,
+            expected_output,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, C, H, W",
+        itertools.product(
+            [True, False],
+            backends,
+            [x for x in range(1, 4)],
+            [x for x in range(5, 10)],
+            [x for x in range(5, 10)],
+        ),
+    )
+    def test_builder_to_backend_stress(self, use_cpu_only, backend, C, H, W):
+        input_shape = (1, C, H, W)
+        x = np.random.random(input_shape)
+
+        crop_h = [np.random.randint(H)]
+        crop_h.append(np.random.randint(H - crop_h[0]))
+        crop_w = [np.random.randint(W)]
+        crop_w.append(np.random.randint(W - crop_w[0]))
+
+        input_placeholder_dict = {"x": mb.placeholder(shape=input_shape)}
+        input_value_dict = {"x": x}
+
+        def build(x):
+            return mb.crop(x=x, crop_height=crop_h, crop_width=crop_w)
+
+        expected_output_type = (
+            1,
+            C,
+            H - crop_h[0] - crop_h[1],
+            W - crop_w[0] - crop_w[1],
+            types.fp32,
+        )
+        expected_output = x[:, :, crop_h[0] : H - crop_h[1], crop_w[0] : W - crop_w[1]]
+
+        run_compare_builder(
+            build,
+            input_placeholder_dict,
+            input_value_dict,
+            expected_output_type,
+            expected_output,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestCropResize:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, is_symbolic",
+        itertools.product([True, False], backends, [True, False]),
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend, is_symbolic):
+        x = np.array(
+            [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]],
+            dtype=np.float32,
+        ).reshape(1, 1, 4, 4)
+
+        input_shape = list(x.shape)
+        placeholder_input_shape = input_shape
+        if is_symbolic:
+            # set batch and channel dimension symbolic
+            placeholder_input_shape[0] = get_new_symbol()
+            placeholder_input_shape[1] = get_new_symbol()
+
+        input_placeholder_dict = {"x": mb.placeholder(shape=placeholder_input_shape)}
+        input_value_dict = {"x": x}
+        N = 1
+        roi = np.array([[1, 1, 2, 2]], dtype=np.float32).reshape(1, 1, 4, 1, 1)
+        roi_normalized = np.array(
+            [[0, 0.0, 0.0, 1.0 / 3, 1.0 / 3]], dtype=np.float32
+        ).reshape(1, 1, 5, 1, 1)
+        roi_invert = np.array([[2, 2, 1, 1]], dtype=np.float32).reshape(1, 1, 4, 1, 1)
+
+        def build(x):
+            return [
+                mb.crop_resize(
+                    x=x,
+                    roi=roi,
+                    target_width=2,
+                    target_height=2,
+                    normalized_coordinates=False,
+                    box_coordinate_mode="CORNERS_HEIGHT_FIRST",
+                    sampling_mode="ALIGN_CORNERS",
+                ),
+                mb.crop_resize(
+                    x=x,
+                    roi=roi,
+                    target_width=4,
+                    target_height=4,
+                    normalized_coordinates=False,
+                    box_coordinate_mode="CORNERS_HEIGHT_FIRST",
+                    sampling_mode="ALIGN_CORNERS",
+                ),
+                mb.crop_resize(
+                    x=x,
+                    roi=roi,
+                    target_width=1,
+                    target_height=1,
+                    normalized_coordinates=False,
+                    box_coordinate_mode="CORNERS_HEIGHT_FIRST",
+                    sampling_mode="ALIGN_CORNERS",
+                ),
+                mb.crop_resize(
+                    x=x,
+                    roi=roi_normalized,
+                    target_width=2,
+                    target_height=2,
+                    normalized_coordinates=True,
+                    box_coordinate_mode="CORNERS_HEIGHT_FIRST",
+                    sampling_mode="ALIGN_CORNERS",
+                ),
+                mb.crop_resize(
+                    x=x,
+                    roi=roi_invert,
+                    target_width=2,
+                    target_height=2,
+                    normalized_coordinates=False,
+                    box_coordinate_mode="CORNERS_HEIGHT_FIRST",
+                    sampling_mode="ALIGN_CORNERS",
+                ),
+            ]
+
+        expected_output_type = [
+            (
+                N,
+                placeholder_input_shape[0],
+                placeholder_input_shape[1],
+                2,
+                2,
+                types.fp32,
+            ),
+            (
+                N,
+                placeholder_input_shape[0],
+                placeholder_input_shape[1],
+                4,
+                4,
+                types.fp32,
+            ),
+            (
+                N,
+                placeholder_input_shape[0],
+                placeholder_input_shape[1],
+                1,
+                1,
+                types.fp32,
+            ),
+            (
+                N,
+                placeholder_input_shape[0],
+                placeholder_input_shape[1],
+                2,
+                2,
+                types.fp32,
+            ),
+            (
+                N,
+                placeholder_input_shape[0],
+                placeholder_input_shape[1],
+                2,
+                2,
+                types.fp32,
+            ),
+        ]
+        expected_output = [
+            np.array([6, 7, 10, 11], dtype=np.float32).reshape(1, 1, 1, 2, 2),
+            np.array(
+                [
+                    [6, 6.333333, 6.66666, 7],
+                    [7.333333, 7.666666, 8, 8.333333],
+                    [8.666666, 9, 9.3333333, 9.666666],
+                    [10, 10.333333, 10.666666, 11],
+                ],
+                dtype=np.float32,
+            ).reshape(1, 1, 1, 4, 4),
+            np.array([8.5], dtype=np.float32).reshape(1, 1, 1, 1, 1),
+            np.array([1, 2, 5, 6], dtype=np.float32).reshape(1, 1, 1, 2, 2),
+            np.array([11, 10, 7, 6], dtype=np.float32).reshape(1, 1, 1, 2, 2),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholder_dict,
+            input_value_dict,
+            expected_output_type,
+            expected_output,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
diff --git a/coremltools/converters/mil/mil/ops/tests/test_linear.py b/coremltools/converters/mil/mil/ops/tests/test_linear.py
new file mode 100644
index 000000000..762edbb1f
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_linear.py
@@ -0,0 +1,186 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.testing_reqs import *
+
+from .testing_utils import run_compare_builder
+
+backends = testing_reqs.backends
+
+
+class TestLinear:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array([[-4.7182, 11.94], [-3.3939, 9.2166]], dtype=np.float32)
+        weight_val = np.array([[1.2313, -0.095], [-1.4075, -0.8816]], dtype=np.float32)
+        bias_val = np.array([1.0, 2.0], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [mb.linear(x=x, weight=weight_val, bias=bias_val)]
+
+        expected_output_types = [(2, 2, types.fp32)]
+        expected_outputs = [
+            np.array(
+                [[-5.9438195, -1.8854373], [-4.054486, -1.3484411]], dtype=np.float32
+            )
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = random_gen(shape=(2, 2), rand_min=-37, rand_max=64)
+        weight_val = random_gen(shape=(2, 2), rand_min=-91, rand_max=84)
+        bias_val = random_gen(shape=(2,), rand_min=0.0, rand_max=9.0)
+        v = mb.linear(x=x_val, weight=weight_val, bias=bias_val)
+        assert is_close(np.matmul(x_val, weight_val.T) + bias_val, v.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, dim",
+        itertools.product([True, False], backends, [2, 4, 8]),
+    )
+    def test_builder_to_backend_stress(self, use_cpu_only, backend, dim):
+        shape = np.array([dim, dim])
+        x_val = np.random.rand(*shape)
+        weight_val = np.random.rand(*shape).astype(np.float32)
+        bias_val = np.random.rand(dim).astype(np.float32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x_val.shape),
+        }
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [mb.linear(x=x, weight=weight_val, bias=bias_val)]
+
+        expected_outputs = [np.matmul(x_val, np.transpose(weight_val)) + bias_val]
+
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestMatMul:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array([[-4.0, 13.0], [-3.0, 9.0]], dtype=np.float32)
+        y_val = np.array([[1.0, -7.0], [-1.0, -8.0]], dtype=np.float32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x_val.shape),
+            "y": mb.placeholder(shape=y_val.shape),
+        }
+        input_values = {"x": x_val, "y": y_val}
+
+        def build(x, y):
+            return [
+                mb.matmul(x=x_val, y=y),
+                mb.matmul(x=x, y=y_val),
+                mb.matmul(x=x, y=y),
+                mb.matmul(x=x, y=y, transpose_x=True, transpose_y=True),
+                mb.matmul(x=x_val, y=y, transpose_x=True, transpose_y=True),
+                mb.matmul(x=x, y=y_val, transpose_x=True, transpose_y=True),
+                mb.matmul(x=x, y=y_val, transpose_x=True, transpose_y=False),
+                mb.matmul(x=x, y=y_val, transpose_x=False, transpose_y=True),
+            ]
+
+        expected_output_types = [
+            (2, 2, types.fp32),
+            (2, 2, types.fp32),
+            (2, 2, types.fp32),
+            (2, 2, types.fp32),
+            (2, 2, types.fp32),
+            (2, 2, types.fp32),
+            (2, 2, types.fp32),
+            (2, 2, types.fp32),
+        ]
+        expected_outputs = [
+            np.array([[-17.0, -76.0], [-12.0, -51.0]], dtype=np.float32),
+            np.array([[-17.0, -76.0], [-12.0, -51.0]], dtype=np.float32),
+            np.array([[-17.0, -76.0], [-12.0, -51.0]], dtype=np.float32),
+            np.array([[17.0, 28.0], [-50.0, -85.0]], dtype=np.float32),
+            np.array([[17.0, 28.0], [-50.0, -85.0]], dtype=np.float32),
+            np.array([[17.0, 28.0], [-50.0, -85.0]], dtype=np.float32),
+            np.array([[-1.0, 52.0], [4.0, -163.0]], dtype=np.float32),
+            np.array([[-95.0, -100.0], [-66.0, -69.0]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = random_gen(shape=(2, 2, 4), rand_min=-37, rand_max=64)
+        y_val = random_gen(shape=(2, 4, 2), rand_min=-91, rand_max=84)
+        v = mb.matmul(x=x_val, y=y_val)
+        assert is_close(np.matmul(x_val, y_val), v.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, shapes",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                ((3, 2, 3, 4), (3, 2, 4, 5)),
+                ((1, 1, 1, 3, 4), (1, 3, 2, 4, 5)),
+                ((1, 3, 1, 2, 3), (1, 4, 3, 2)),
+                ((1, 3, 4), (3, 2, 4, 6)),
+                ((7, 4), (3, 9, 5, 4, 3)),
+            ],
+        ),
+    )
+    def test_builder_to_backend_stress(self, use_cpu_only, backend, shapes):
+        shape_x, shape_y = shapes
+        x_val = np.random.rand(*shape_x)
+        y_val = np.random.rand(*shape_y)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x_val.shape),
+            "y": mb.placeholder(shape=y_val.shape),
+        }
+        input_values = {"x": x_val, "y": y_val}
+
+        def build(x, y):
+            return [mb.matmul(x=x, y=y, transpose_x=False, transpose_y=False)]
+
+        expected_outputs = [np.matmul(x_val, y_val)]
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
diff --git a/coremltools/converters/mil/mil/ops/tests/test_normalization.py b/coremltools/converters/mil/mil/ops/tests/test_normalization.py
new file mode 100644
index 000000000..d3f0d1af9
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_normalization.py
@@ -0,0 +1,360 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.testing_reqs import *
+
+from .testing_utils import run_compare_builder
+
+backends = testing_reqs.backends
+
+
+class TestNormalizationBatchNorm:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array(
+            [
+                [
+                    [[-16.0, 13.0], [11.0, -16.0]],
+                    [[13.0, -15.0], [13.0, 9.0]],
+                    [[-9.0, -4.0], [-6.0, 3.0]],
+                ]
+            ],
+            dtype=np.float32,
+        )
+        mean_val = np.array([9.0, 6.0, 3.0], dtype=np.float32)
+        variance_val = np.array([6.0, 1.0, 7.0], dtype=np.float32)
+        gamma_val = np.array([1.0, 1.0, 1.0], dtype=np.float32)
+        beta_val = np.array([1.0, 3.0, 0.0], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [
+                mb.batch_norm(x=x, mean=mean_val, variance=variance_val),
+                mb.batch_norm(
+                    x=x,
+                    mean=mean_val,
+                    variance=variance_val,
+                    gamma=gamma_val,
+                    beta=beta_val,
+                    epsilon=1e-4,
+                ),
+            ]
+
+        expected_output_types = [
+            (1, 3, 2, 2, types.fp32),
+            (1, 3, 2, 2, types.fp32),
+        ]
+        expected_outputs = [
+            np.array(
+                [
+                    [
+                        [[-10.206199, 1.6329918], [0.8164959, -10.206199]],
+                        [[6.999965, -20.999895], [6.999965, 2.9999852]],
+                        [[-4.53557, -2.6457493], [-3.4016776, 0.0]],
+                    ]
+                ],
+                dtype=np.float32,
+            ),
+            np.array(
+                [
+                    [
+                        [[-9.206122, 2.6329796], [1.8164899, -9.206122]],
+                        [[9.99965, -17.998951], [9.99965, 5.9998503]],
+                        [[-4.535541, -2.6457324], [-3.4016557, 0.0]],
+                    ]
+                ],
+                dtype=np.float32,
+            ),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestNormalizationInstanceNorm:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array(
+            [
+                [
+                    [[-16.0, 13.0], [11.0, 16.0]],
+                    [[13.0, 15.0], [13.0, 9.0]],
+                    [[-9.0, 4.0], [-6.0, 3.0]],
+                ]
+            ],
+            dtype=np.float32,
+        )
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return mb.instance_norm(x=x, epsilon=1e-2)
+
+        expected_output_types = [(1, 3, 2, 2, types.fp32)]
+        expected_outputs = [
+            np.array(
+                [
+                    [
+                        [[-1.71524656, 0.54576027], [0.38982874, 0.77965748]],
+                        [[0.22917463, 1.14587319], [0.22917463, -1.60422242]],
+                        [[-1.2470212, 1.06887531], [-0.71258354, 0.89072943]],
+                    ]
+                ],
+                dtype=np.float32,
+            )
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.skipif(not testing_reqs._HAS_TORCH, reason="PyTorch not found.")
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, epsilon",
+        itertools.product([True, False], backends, [1e-5, 1e-10],),
+    )
+    def test_builder_to_backend_stress(self, use_cpu_only, backend, epsilon):
+        shape = np.random.randint(low=1, high=6, size=4)
+        x_val = random_gen(shape=shape, rand_min=-10.0, rand_max=10.0)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return mb.instance_norm(x=x, epsilon=epsilon)
+
+        torch_op = torch.nn.InstanceNorm2d(num_features=shape[1])
+        expected_outputs = [torch_op(torch.as_tensor(x_val)).numpy()]
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+            atol=1e-2,
+            rtol=1e-3,
+        )
+
+
+class TestNormalizationL2Norm:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array([[[1.0, -7.0], [5.0, -6.0], [-3.0, -5.0]]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [mb.l2_norm(x=x, axes=[-1], epsilon=1e-10)]
+
+        expected_output_types = [(1, 3, 2, types.fp32)]
+        expected_outputs = [
+            np.array(
+                [
+                    [
+                        [0.08304548, -0.58131838],
+                        [0.41522741, -0.4982729],
+                        [-0.24913645, -0.41522741],
+                    ]
+                ],
+                dtype=np.float32,
+            )
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestNormalizationLayerNorm:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array([[[1.0, -7.0], [5.0, -6.0], [-3.0, -5.0]]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [
+                # V2->V1 lowering (op_mappings.py): if branch
+                mb.layer_norm(x=x, axes=[2], epsilon=1e-4),
+                # V2->V1 lowering (op_mappings.py): else branch
+                mb.layer_norm(x=x, axes=[-2, -1], epsilon=1e-4),
+            ]
+
+        expected_output_types = [(1, 3, 2, types.fp32), (1, 3, 2, types.fp32)]
+        expected_outputs = [
+            np.array(
+                [
+                    [
+                        [0.9999969, -0.9999969],
+                        [0.99999839, -0.99999839],
+                        [0.99995005, -0.99995005],
+                    ]
+                ],
+                dtype=np.float32,
+            ),
+            np.array(
+                [
+                    [
+                        [0.8268512, -1.0630943],
+                        [1.771824, -0.8268511],
+                        [-0.11812156, -0.590608],
+                    ]
+                ],
+                dtype=np.float32,
+            ),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        def np_layer_norm(x, axes, gamma, beta, epsilon=1e-5):
+            normalized_shape = x.shape[-len(axes) :]
+            gamma = np.ones(shape=normalized_shape) if gamma is None else gamma
+            beta = np.zeros(shape=normalized_shape) if beta is None else beta
+            num = x - np.mean(x, axis=tuple(axes), keepdims=True)
+            dem = np.sqrt(
+                np.sum(np.square(num), axis=tuple(axes), keepdims=True)
+                / np.prod(normalized_shape)
+                + epsilon
+            )
+            return num / dem * gamma + beta
+
+        x_val = random_gen(shape=(1, 3, 4, 4), rand_min=-100.0, rand_max=100.0)
+        g = random_gen(shape=(4, 4), rand_min=1.0, rand_max=2.0)
+        b = random_gen(shape=(4, 4), rand_min=0.0, rand_max=1.0)
+        res = mb.layer_norm(x=x_val, axes=[-2, -1], gamma=g, beta=b)
+        ref = np_layer_norm(x=x_val, axes=[-2, -1], gamma=g, beta=b)
+        assert is_close(ref, res.val)
+
+
+class TestNormalizationLocalResponseNorm:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array([[[1.0, -7.0], [5.0, -6.0], [-3.0, -5.0]]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [
+                mb.local_response_norm(x=x, size=2),
+                mb.local_response_norm(x=x, size=3, alpha=0.0001, beta=0.75, k=1.0),
+            ]
+
+        expected_output_types = [(1, 3, 2, types.fp32), (1, 3, 2, types.fp32)]
+        expected_outputs = [
+            np.array(
+                [
+                    [
+                        [0.99996257, -6.98716545],
+                        [4.99531746, -5.99191284],
+                        [-2.99898791, -4.99531746],
+                    ]
+                ],
+                dtype=np.float32,
+            ),
+            np.array(
+                [
+                    [
+                        [0.99997497, -6.99143696],
+                        [4.99687672, -5.99460602],
+                        [-2.99932504, -4.99687672],
+                    ]
+                ],
+                dtype=np.float32,
+            ),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.skipif(not testing_reqs._HAS_TORCH, reason="PyTorch not found.")
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, size, alpha, beta, k",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(3, 6)],
+            [2, 3, 5],
+            [0.0001, 0.01],
+            [0.75, 1.0],
+            [1.0, 2.0],
+        ),
+    )
+    def test_builder_to_backend_stress(
+        self, use_cpu_only, backend, rank, size, alpha, beta, k
+    ):
+        shape = np.random.randint(low=2, high=5, size=rank)
+        x_val = random_gen(shape=shape)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return mb.local_response_norm(x=x, size=size, alpha=alpha, beta=beta, k=k)
+
+        torch_lrn = torch.nn.LocalResponseNorm(size=size, alpha=alpha, beta=beta, k=k)
+        expected_outputs = [torch_lrn(torch.as_tensor(x_val)).numpy()]
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+            atol=1e-2,
+            rtol=1e-3,
+        )
diff --git a/coremltools/converters/mil/mil/ops/tests/test_pool.py b/coremltools/converters/mil/mil/ops/tests/test_pool.py
new file mode 100644
index 000000000..72ec303e3
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_pool.py
@@ -0,0 +1,132 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.testing_reqs import *
+
+from .testing_utils import run_compare_builder
+
+backends = testing_reqs.backends
+
+
+class TestAvgPool:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array(
+            [
+                [
+                    [[-10.80291205, -6.42076184], [-7.07910997, 9.1913279]],
+                    [[-3.18181497, 0.9132147], [11.9785544, 7.92449539]],
+                ]
+            ],
+            dtype=np.float32,
+        )
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [
+                mb.avg_pool(x=x, kernel_sizes=[1, 2], strides=[2, 1], pad_type="valid"),
+                mb.avg_pool(
+                    x=x,
+                    kernel_sizes=[2, 1],
+                    strides=[1, 2],
+                    pad_type="same",
+                    exclude_padding_from_average=True,
+                ),
+            ]
+
+        expected_output_types = [(1, 2, 1, 1, types.fp32), (1, 2, 2, 1, types.fp32)]
+        expected_outputs = [
+            np.array([[[[-8.611837]], [[-1.1343001]]]], dtype=np.float32),
+            np.array(
+                [[[[-8.94101143], [-7.07911015]], [[4.39836979], [11.97855473]]]],
+                dtype=np.float32,
+            ),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestL2Pool:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array(
+            [[[[-10.0, -6.0], [-7.0, 9.0]], [[-3.0, 0.0], [11.0, 7.0]]]],
+            dtype=np.float32,
+        )
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [
+                mb.l2_pool(x=x, kernel_sizes=[1, 2], strides=[2, 1], pad_type="valid"),
+            ]
+
+        expected_output_types = [(1, 2, 1, 1, types.fp32)]
+        expected_outputs = [np.array([[[[11.66190338]], [[3.0]]]], dtype=np.float32)]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestMaxPool:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array(
+            [
+                [
+                    [[-10.80291205, -6.42076184], [-7.07910997, 9.1913279]],
+                    [[-3.18181497, 0.9132147], [11.9785544, 7.92449539]],
+                ]
+            ],
+            dtype=np.float32,
+        )
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [
+                mb.max_pool(x=x, kernel_sizes=[1, 2], strides=[2, 1], pad_type="valid")
+            ]
+
+        expected_output_types = [(1, 2, 1, 1, types.fp32)]
+        expected_outputs = [
+            np.array([[[[-6.42076184]], [[0.9132147]]]], dtype=np.float32)
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
diff --git a/coremltools/converters/mil/mil/ops/tests/test_random.py b/coremltools/converters/mil/mil/ops/tests/test_random.py
new file mode 100644
index 000000000..d48fea10c
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_random.py
@@ -0,0 +1,434 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.testing_reqs import *
+from coremltools.converters.mil.testing_utils import get_core_ml_prediction
+from coremltools._deps import _IS_MACOS
+from .testing_utils import UNK_SYM, run_compare_builder
+
+backends = testing_reqs.backends
+
+
+class TestRandomBernoulli:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+
+        x_val = np.array([0.0], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [
+                mb.add(x=x, y=x),
+                mb.random_bernoulli(shape=np.array([2, 1, 3], np.int32), prob=1.0),
+                mb.random_bernoulli(shape=np.array([3, 1, 2], np.int32), prob=0.0),
+            ]
+
+        expected_outputs = [
+            np.array(np.zeros(shape=(1,)), np.float32),
+            np.array(np.ones(shape=(2, 1, 3)), np.float32),
+            np.array(np.zeros(shape=(3, 1, 2)), np.float32),
+        ]
+
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, prob, dynamic",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(1, 6)],
+            [1.0, 0.0],
+            [True, False],
+        ),
+    )
+    def test_builder_to_backend_stress(
+        self, use_cpu_only, backend, rank, prob, dynamic
+    ):
+        shape = np.random.randint(low=1, high=4, size=rank).astype(np.int32)
+        x_val = np.array([0.0], dtype=np.float32)
+        if dynamic:
+            input_placeholders = {
+                "x": mb.placeholder(shape=x_val.shape),
+                "dyn_shape": mb.placeholder(shape=shape.shape, dtype=types.int32),
+            }
+            input_values = {"x": x_val, "dyn_shape": shape}
+        else:
+            input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+            input_values = {"x": x_val}
+
+        def build(x):
+            return [mb.add(x=x, y=x), mb.random_bernoulli(shape=shape, prob=prob)]
+
+        def build_dyn(x, dyn_shape):
+            return [mb.add(x=x, y=x), mb.random_bernoulli(shape=dyn_shape, prob=prob)]
+
+        expected_outputs = [
+            np.array(np.zeros(shape=(1,)), np.float32),
+            np.random.binomial(1, prob, shape),
+        ]
+
+        if dynamic:
+            expected_output_types = [
+                tuple([UNK_SYM for _ in o.shape]) + (types.fp32,)
+                for o in expected_outputs
+            ]
+        else:
+            expected_output_types = [
+                o.shape[:] + (types.fp32,) for o in expected_outputs
+            ]
+
+        builder = build_dyn if dynamic else build
+
+        run_compare_builder(
+            builder,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestRandomCategorical:
+    def softmax(self, data):
+        e_data = np.exp(data - np.max(data))
+        return e_data / e_data.sum()
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array([1], dtype=np.int32)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [
+                mb.random_categorical(x=x, seed=1),
+                mb.random_categorical(x=x, seed=1, size=4),
+            ]
+
+        expected_outputs = [
+            np.array(np.zeros(shape=(1,)), dtype=np.float32),
+            np.array(np.zeros(shape=(4,)), dtype=np.float32),
+        ]
+
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, n_sample, n_class",
+        itertools.product([True, False], backends, [50000], [2, 10, 20]),
+    )
+    def test_builder_to_backend_stress(self, use_cpu_only, backend, n_sample, n_class):
+        output_name = "random_categorical"
+        logits = np.random.rand(2, n_class)
+        probs = [self.softmax(logits[0]), self.softmax(logits[1])]
+
+        # Test logits input
+        input_placeholders = {"x": mb.placeholder(shape=(2, n_class))}
+        input_values = {"x": logits}
+
+        def build(x):
+            return [
+                mb.random_categorical(
+                    x=x, size=n_sample, mode="logits", name=output_name
+                )
+            ]
+
+        if _IS_MACOS:
+            prediction = get_core_ml_prediction(
+                build, input_placeholders, input_values, backend=backend
+            )
+
+            ref0 = np.random.multinomial(n_sample, probs[0])
+            ref1 = np.random.multinomial(n_sample, probs[1])
+
+            pred0 = prediction[output_name].reshape(2, n_sample)[0]
+            pred1 = prediction[output_name].reshape(2, n_sample)[1]
+
+            # convert to bincount and validate probabilities
+            pred0 = np.bincount(np.array(pred0).astype(np.int), minlength=n_class)
+            pred1 = np.bincount(np.array(pred1).astype(np.int), minlength=n_class)
+
+            assert np.allclose(np.true_divide(pred0, n_sample), probs[0], atol=1e-2)
+            assert np.allclose(
+                np.true_divide(pred0, n_sample),
+                np.true_divide(ref0, n_sample),
+                atol=1e-2,
+            )
+
+            assert np.allclose(np.true_divide(pred1, n_sample), probs[1], atol=1e-2)
+            assert np.allclose(
+                np.true_divide(pred1, n_sample),
+                np.true_divide(ref1, n_sample),
+                atol=1e-2,
+            )
+
+        # Test probs input
+        input_placeholders = {"x": mb.placeholder(shape=(2, n_class))}
+        input_values = {"x": np.array(probs)}
+
+        def build(x):
+            return [
+                mb.random_categorical(
+                    x=x, size=n_sample, mode="probs", name=output_name
+                )
+            ]
+
+        if _IS_MACOS:
+            prediction = get_core_ml_prediction(
+                build, input_placeholders, input_values, backend=backend
+            )
+
+            pred0 = prediction[output_name].reshape(2, n_sample)[0]
+            pred1 = prediction[output_name].reshape(2, n_sample)[1]
+
+            # convert to bincount and validate probabilities
+            pred0 = np.bincount(np.array(pred0).astype(np.int), minlength=n_class)
+            pred1 = np.bincount(np.array(pred1).astype(np.int), minlength=n_class)
+
+            assert np.allclose(np.true_divide(pred0, n_sample), probs[0], atol=1e-2)
+            assert np.allclose(
+                np.true_divide(pred0, n_sample),
+                np.true_divide(ref0, n_sample),
+                atol=1e-2,
+            )
+
+            assert np.allclose(np.true_divide(pred1, n_sample), probs[1], atol=1e-2)
+            assert np.allclose(
+                np.true_divide(pred1, n_sample),
+                np.true_divide(ref1, n_sample),
+                atol=1e-2,
+            )
+
+
+class TestRandomNormal:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array([0.0], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [
+                mb.add(x=x, y=x),
+                mb.random_normal(
+                    shape=np.array([2, 1, 3], np.int32), mean=1.0, stddev=0.0
+                ),
+                mb.random_normal(
+                    shape=np.array([3, 1, 2], np.int32), mean=0.0, stddev=0.0
+                ),
+            ]
+
+        expected_outputs = [
+            np.array(np.zeros(shape=(1,)), np.float32),
+            np.array(np.ones(shape=(2, 1, 3)), np.float32),
+            np.array(np.zeros(shape=(3, 1, 2)), np.float32),
+        ]
+
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, mean, dynamic",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(1, 6)],
+            [1.0, 0.0],
+            [True, False],
+        ),
+    )
+    def test_builder_to_backend_stress(
+        self, use_cpu_only, backend, rank, mean, dynamic
+    ):
+        shape = np.random.randint(low=1, high=4, size=rank).astype(np.int32)
+        x_val = np.array([0.0], dtype=np.float32)
+        if dynamic:
+            input_placeholders = {
+                "x": mb.placeholder(shape=x_val.shape),
+                "dyn_shape": mb.placeholder(shape=shape.shape, dtype=types.int32),
+            }
+            input_values = {"x": x_val, "dyn_shape": shape}
+        else:
+            input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+            input_values = {"x": x_val}
+
+        def build(x):
+            return [
+                mb.add(x=x, y=x),
+                mb.random_normal(shape=shape, mean=mean, stddev=0.0),
+            ]
+
+        def build_dyn(x, dyn_shape):
+            return [
+                mb.add(x=x, y=x),
+                mb.random_normal(shape=dyn_shape, mean=mean, stddev=0.0),
+            ]
+
+        expected_outputs = [
+            np.array(np.zeros(shape=(1,)), np.float32),
+            np.random.normal(loc=mean, scale=0.0, size=shape),
+        ]
+
+        if dynamic:
+            expected_output_types = [
+                tuple([UNK_SYM for _ in o.shape]) + (types.fp32,)
+                for o in expected_outputs
+            ]
+        else:
+            expected_output_types = [
+                o.shape[:] + (types.fp32,) for o in expected_outputs
+            ]
+
+        builder = build_dyn if dynamic else build
+        run_compare_builder(
+            builder,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestRandomUniform:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array([0.0], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [
+                mb.add(x=x, y=x),
+                mb.random_uniform(
+                    shape=np.array([2, 1, 3], np.int32), low=0.0, high=0.0
+                ),
+                mb.random_uniform(
+                    shape=np.array([3, 1, 2], np.int32), low=1.0, high=1.0
+                ),
+            ]
+
+        expected_outputs = [
+            np.array(np.zeros(shape=(1,)), np.float32),
+            np.array(np.zeros(shape=(2, 1, 3)), np.float32),
+            np.array(np.ones(shape=(3, 1, 2)), np.float32),
+        ]
+
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, low, high, dynamic",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(1, 6)],
+            [0.0],
+            [0.0],
+            [True, False],
+        ),
+    )
+    def test_builder_to_backend_stress(
+        self, use_cpu_only, backend, rank, low, high, dynamic
+    ):
+        shape = np.random.randint(low=1, high=4, size=rank).astype(np.int32)
+        x_val = np.array([0.0], dtype=np.float32)
+        if dynamic:
+            input_placeholders = {
+                "x": mb.placeholder(shape=x_val.shape),
+                "dyn_shape": mb.placeholder(shape=shape.shape, dtype=types.int32),
+            }
+            input_values = {"x": x_val, "dyn_shape": shape}
+        else:
+            input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+            input_values = {"x": x_val}
+
+        def build(x):
+            return [
+                mb.add(x=x, y=x),
+                mb.random_uniform(shape=shape, low=low, high=high),
+            ]
+
+        def build_dyn(x, dyn_shape):
+            return [
+                mb.add(x=x, y=x),
+                mb.random_uniform(shape=dyn_shape, low=low, high=high),
+            ]
+
+        expected_outputs = [
+            np.array(np.zeros(shape=(1,)), np.float32),
+            np.random.uniform(low=low, high=high, size=shape),
+        ]
+
+        if dynamic:
+            expected_output_types = [
+                tuple([UNK_SYM for _ in o.shape]) + (types.fp32,)
+                for o in expected_outputs
+            ]
+        else:
+            expected_output_types = [
+                o.shape[:] + (types.fp32,) for o in expected_outputs
+            ]
+
+        builder = build_dyn if dynamic else build
+        run_compare_builder(
+            builder,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs=expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
diff --git a/coremltools/converters/mil/mil/ops/tests/test_recurrent.py b/coremltools/converters/mil/mil/ops/tests/test_recurrent.py
new file mode 100644
index 000000000..b6c6ac0fe
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_recurrent.py
@@ -0,0 +1,745 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.mil import get_new_symbol
+from coremltools.converters.mil.testing_reqs import *
+
+from .testing_utils import run_compare_builder
+
+backends = testing_reqs.backends
+
+
+class TestGRU:
+    @pytest.mark.skipif(not testing_reqs._HAS_TORCH, reason="PyTorch not installed.")
+    @pytest.mark.parametrize(
+        argnames=[
+            "use_cpu_only",
+            "backend",
+            "seq_len",
+            "batch_size",
+            "input_size",
+            "hidden_size",
+            "has_bias",
+            "output_sequence",
+            "direction",
+            "symbolic",
+        ],
+        argvalues=itertools.product(
+            [True, False],
+            backends,
+            [1, 8],
+            [
+                1
+            ],  # <rdar://problem/59644603> [MIL] GRU with batch size 1 produces incorrect
+            # output(always 0) for second batch onwards
+            [2, 32],
+            [1, 16],
+            [False, False],
+            [True, False],
+            ["forward", "reverse"],
+            [False, False],
+        ),
+    )
+    def test_builder_to_backend_smoke(
+        self,
+        use_cpu_only,
+        backend,
+        seq_len,
+        batch_size,
+        input_size,
+        hidden_size,
+        has_bias,
+        output_sequence,
+        direction,
+        symbolic,
+    ):
+        torch.manual_seed(5)
+        rnn = torch.nn.GRU(input_size, hidden_size, 1, bias=has_bias)
+        state_dict = rnn.state_dict()
+
+        state_dict["weight_ih_l0"] /= state_dict["weight_ih_l0"]
+        state_dict["weight_hh_l0"] /= state_dict["weight_hh_l0"]
+
+        ih_wt = state_dict["weight_ih_l0"].detach().numpy()
+        hh_wt = state_dict["weight_hh_l0"].detach().numpy()
+
+        # Make weight compatible to CoreML format
+        def rzo_to_zro(x):
+            r, z, o = np.split(x, 3)
+            return np.concatenate([z, r, o], axis=0)
+
+        w = np.concatenate([ih_wt, hh_wt], axis=1)
+        w = rzo_to_zro(w).transpose()
+
+        b = None
+        if has_bias:
+            ih_b = state_dict["bias_ih_l0"].detach().numpy()
+            hh_b = state_dict["bias_hh_l0"].detach().numpy()
+            ih_b = rzo_to_zro(ih_b)
+            hh_b = rzo_to_zro(hh_b)
+            b = np.stack([ih_b, hh_b], axis=0)
+
+        t = torch.randn(seq_len, batch_size, input_size)
+        h0 = torch.randn(1, batch_size, hidden_size)
+
+        n_t = t
+        if direction == "reverse":
+            n_t = torch.flip(n_t, [0])
+
+        output, hn = rnn(n_t, h0)
+        if output_sequence == False:
+            output = output[-1].unsqueeze(0)
+
+        output = output.detach().numpy()
+        hn = hn.detach().numpy().squeeze(0)
+
+        t = np.reshape(t.detach().numpy(), [seq_len, batch_size, input_size])
+        h = np.reshape(h0.detach().numpy().squeeze(0), [batch_size, hidden_size])
+
+        if symbolic:
+            batch_size = get_new_symbol()
+            seq_len = get_new_symbol()
+
+        input_shape = [seq_len, batch_size, input_size]
+        h_shape = [batch_size, hidden_size]
+
+        expected_output_types = [
+            (seq_len if output_sequence else 1, batch_size, hidden_size, types.fp32),
+            (batch_size, hidden_size, types.fp32),
+        ]
+        expected_outputs = [output, hn]
+
+        input_placeholders = {
+            "x": mb.placeholder(shape=input_shape),
+            "initial_h": mb.placeholder(shape=h_shape),
+        }
+        input_values = {"x": t, "initial_h": h}
+
+        def build(x, initial_h):
+            arguments = {
+                "x": x,
+                "initial_h": initial_h,
+                "weight": w,
+                "direction": direction,
+                "output_sequence": output_sequence,
+            }
+            # If bias is provided, add in arguments
+            if b is not None:
+                arguments["bias"] = b
+            return mb.gru(**arguments)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestLSTM:
+    @pytest.mark.parametrize(
+        ",".join(
+            [
+                "use_cpu_only",
+                "backend",
+                "input_dims",
+                "output_dim",
+                "activation",
+                "inner_activation",
+                "outer_activation",
+                "return_seq",
+                "has_bias",
+                "forget_bias",
+                "has_peephole",
+                "coupled_input_forget",
+                "clip",
+            ]
+        ),
+        itertools.product(
+            [True, False],
+            backends,
+            [[8, 32, 32]],
+            [1, 4],
+            ["SIGMOID"],
+            ["TANH"],
+            ["TANH", "SIGMOID"],
+            [False, True],
+            [False, True],
+            [False, True],
+            [True, False],
+            [False],  # We have not exposed this option yet!
+            [50.0, 0.2, 0.01],
+        ),
+    )
+    def test_numpy_numerical(
+        self,
+        use_cpu_only,
+        backend,
+        input_dims,
+        output_dim,
+        activation,
+        inner_activation,
+        outer_activation,
+        return_seq,
+        has_bias,
+        forget_bias,
+        has_peephole,
+        coupled_input_forget,
+        clip,
+    ):
+        def _apply_act(x, option):
+            if option == "TANH":
+                return np.tanh(x)
+            elif option == "RELU":
+                return np.maximum(0, x)
+            elif option == "SIGMOID":
+                return 1.0 / (1 + np.exp(-x))
+            elif option == "SIGMOID_HARD":
+                return np.minimum(np.maximum(0.2 * x + 0.5, 0), 1)
+            elif option == "LINEAR":
+                return x
+            else:
+                raise ValueError("activation invalid")
+
+        def _clip(x, threshold=500.0):
+            return np.maximum(np.minimum(x, threshold), -threshold)
+
+        def _get_numpy_prediction_lstm(Weights, X):
+            # X : (batch, seq_len, channel)
+            batch, _, _ = X.shape
+            out = []
+            for i in range(batch):
+                out.append(
+                    _get_numpy_prediction_lstm_single_batch(
+                        Weights, np.expand_dims(X[i, :, :], axis=0)
+                    )
+                )
+            return np.stack(out, axis=0)
+
+        def _get_numpy_prediction_lstm_single_batch(Weights, X):
+
+            batch_size, seq_len, input_size = X.shape
+            X = X[0, :, :]
+            hidden_size = output_dim
+
+            b = Weights["b"]
+            b = b[0] + b[1]
+            Wx_i, Wx_f, Wx_o, Wx_g = np.split(Weights["W_x"], 4)
+            Wh_i, Wh_f, Wh_o, Wh_g = np.split(Weights["W_h"], 4)
+            b_i, b_f, b_o, b_g = np.split(b, 4)
+            p_i, p_f, p_o = np.split(Weights["p"], 3)
+
+            act1 = activation
+            act2 = inner_activation
+            act3 = outer_activation
+
+            h = np.zeros((hidden_size))
+            c = np.zeros((hidden_size))
+            np_out = np.zeros((seq_len, hidden_size))
+            for k in range(seq_len):
+                x = X[k, :]
+                i = _apply_act(np.dot(Wx_i, x) + np.dot(Wh_i, h) + b_i + c * p_i, act1)
+                f = _apply_act(np.dot(Wx_f, x) + np.dot(Wh_f, h) + b_f + c * p_f, act1)
+                g = _apply_act(np.dot(Wx_g, x) + np.dot(Wh_g, h) + b_g, act2)
+                if coupled_input_forget:
+                    c = c * (1 - i) + i * g
+                else:
+                    c = c * f + i * g
+                c = _clip(c, clip)
+                o = _apply_act(np.dot(Wx_o, x) + np.dot(Wh_o, h) + b_o + c * p_o, act1)
+                h = o * _apply_act(c, act3)
+                np_out[k, :] = h
+
+            if return_seq:
+                np_out_final = np_out
+            else:
+                np_out_final = np_out[-1, :]
+            return np_out_final
+
+        batch = input_dims[0]
+        seq_len = input_dims[1]
+        input_size = input_dims[2]
+        hidden_size = output_dim
+
+        # define random weights
+        W_x = np.random.rand(4 * hidden_size, input_size)
+        W_h = np.random.rand(4 * hidden_size, hidden_size)
+
+        if has_bias:
+            b = np.random.rand(2, 4 * hidden_size) - 0.5
+            if forget_bias:
+                b = b + 1
+        else:
+            b = np.zeros((2, 4 * hidden_size))
+
+        if has_peephole:
+            p = np.random.rand(3 * hidden_size) - 0.5
+        else:
+            p = np.zeros((3 * hidden_size))
+
+        Weights = {}
+        Weights["W_x"] = W_x
+        Weights["W_h"] = W_h
+        Weights["b"] = b
+        Weights["p"] = p
+
+        input_data = np.random.rand(batch, seq_len, input_size)
+        numpy_preds = _get_numpy_prediction_lstm(Weights, input_data)
+        if return_seq:
+            numpy_preds = np.transpose(numpy_preds, [1, 0, 2])
+
+        coreml_input_data = np.transpose(input_data, [1, 0, 2])
+        input_placeholders = {"x": mb.placeholder(shape=coreml_input_data.shape)}
+        input_values = {"x": coreml_input_data}
+
+        weights = np.concatenate([W_x, W_h], axis=1).transpose()
+
+        def build(x):
+            h_all, ht, ct = mb.lstm(
+                x=x,
+                initial_h=np.zeros((batch, hidden_size)).astype(np.float32),
+                initial_c=np.zeros((batch, hidden_size)).astype(np.float32),
+                weight=weights,
+                peephole=p,
+                direction="forward",
+                bias=b,
+                output_sequence=return_seq,
+                activations=(activation, inner_activation, outer_activation),
+                clip=clip,
+            )
+            return h_all
+
+        expected_output_types = (
+            seq_len if return_seq else 1,
+            batch,
+            hidden_size,
+            types.fp32,
+        )
+        expected_outputs = numpy_preds
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+            # rdar://63839623 ([GITLAB-CI] precision issue on various tests on gitlab ci)
+            atol=1e-3,
+            rtol=1e-3,
+        )
+
+    @pytest.mark.skipif(not testing_reqs._HAS_TORCH, reason="PyTorch not installed.")
+    @pytest.mark.parametrize(
+        argnames=[
+            "use_cpu_only",
+            "backend",
+            "seq_len",
+            "batch_size",
+            "input_size",
+            "hidden_size",
+            "has_bias",
+            "output_sequence",
+            "direction",
+            "symbolic",
+        ],
+        argvalues=itertools.product(
+            [True, False],
+            backends,
+            [1, 8],
+            [1, 32],
+            [1, 64],
+            [1, 16],
+            [True, False],
+            [True, False],
+            ["forward", "reverse"],
+            [True, False],
+        ),
+    )
+    def test_builder_to_backend_smoke_unilstm(
+        self,
+        use_cpu_only,
+        backend,
+        seq_len,
+        batch_size,
+        input_size,
+        hidden_size,
+        has_bias,
+        output_sequence,
+        direction,
+        symbolic,
+    ):
+        # TODO: <rdar://problem/59540160> [MIL] LSTM layer- Implement eval and tf register routine
+        # Testing 1. peephole values
+        #         2. clip values
+
+        torch.manual_seed(50)
+        rnn = torch.nn.LSTM(input_size, hidden_size, 1, bias=has_bias)
+        state_dict = rnn.state_dict()
+
+        ih_wt = state_dict["weight_ih_l0"].detach().numpy()
+        hh_wt = state_dict["weight_hh_l0"].detach().numpy()
+
+        # Make weight compatible to CoreML format
+        def ifzo_to_ifoz(x):
+            i, f, z, o = np.split(x, 4)
+            return np.concatenate([i, f, o, z], axis=0)
+
+        w = np.concatenate([ih_wt, hh_wt], axis=1)
+        w = ifzo_to_ifoz(w).transpose()
+
+        b = None
+        if has_bias:
+            ih_b = state_dict["bias_ih_l0"].detach().numpy()
+            hh_b = state_dict["bias_hh_l0"].detach().numpy()
+            ih_b = ifzo_to_ifoz(ih_b).transpose()
+            hh_b = ifzo_to_ifoz(hh_b).transpose()
+            b = np.stack([ih_b, hh_b], axis=0)
+
+        t = torch.randn(seq_len, batch_size, input_size)
+        h0 = torch.randn(1, batch_size, hidden_size)
+        c0 = torch.randn(1, batch_size, hidden_size)
+
+        n_t = t
+        if direction == "reverse":
+            n_t = torch.flip(n_t, [0])
+
+        output, (hn, cn) = rnn(n_t, (h0, c0))
+        if output_sequence == False:
+            output = output[-1].unsqueeze(0)
+
+        output = output.detach().numpy()
+        hn = hn.detach().numpy().squeeze(0)
+        cn = cn.detach().numpy().squeeze(0)
+
+        t = np.reshape(t.detach().numpy(), [seq_len, batch_size, input_size])
+        h = np.reshape(h0.detach().numpy().squeeze(0), [batch_size, hidden_size])
+        c = np.reshape(c0.detach().numpy().squeeze(0), [batch_size, hidden_size])
+
+        if symbolic:
+            batch_size = get_new_symbol()
+            seq_len = get_new_symbol()
+
+        input_shape = [seq_len, batch_size, input_size]
+        h_shape = [batch_size, hidden_size]
+        c_shape = [batch_size, hidden_size]
+
+        expected_output_types = [
+            (seq_len if output_sequence else 1, batch_size, hidden_size, types.fp32),
+            (batch_size, hidden_size, types.fp32),
+            (batch_size, hidden_size, types.fp32),
+        ]
+        expected_outputs = [output, hn, cn]
+
+        input_placeholders = {
+            "x": mb.placeholder(shape=input_shape),
+            "initial_h": mb.placeholder(shape=h_shape),
+            "initial_c": mb.placeholder(shape=c_shape),
+        }
+        input_values = {"x": t, "initial_h": h, "initial_c": c}
+
+        def build(x, initial_h, initial_c):
+            arguments = {
+                "x": x,
+                "initial_h": initial_h,
+                "initial_c": initial_c,
+                "weight": w,
+                "direction": direction,
+                "output_sequence": output_sequence,
+            }
+            # If bias is provided, add in arguments
+            if b is not None:
+                arguments["bias"] = b
+            return mb.lstm(**arguments)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.skipif(not testing_reqs._HAS_TORCH, reason="PyTorch not installed.")
+    @pytest.mark.parametrize(
+        argnames=[
+            "use_cpu_only",
+            "backend",
+            "seq_len",
+            "batch_size",
+            "input_size",
+            "hidden_size",
+            "has_bias",
+            "output_sequence",
+            "symbolic",
+        ],
+        argvalues=itertools.product(
+            [True],
+            backends,
+            [1, 8],
+            [1, 32],
+            [1, 64],
+            [2, 16],
+            [True, False],
+            [True, False],
+            [True, False],
+        ),
+    )
+    def test_builder_to_backend_smoke_bidirlstm(
+        self,
+        use_cpu_only,
+        backend,
+        seq_len,
+        batch_size,
+        input_size,
+        hidden_size,
+        has_bias,
+        output_sequence,
+        symbolic,
+    ):
+        def _pytorch_hidden_to_coreml(x):
+            x = x.detach().numpy()
+            # Split of Direction axis
+            f, b = np.split(x, 2, axis=0)
+            # Concat on Hidden Size axis
+            x = np.concatenate([f, b], axis=2)
+            x = np.squeeze(x, axis=0)
+            return x
+
+        direction = "bidirectional"
+        torch.manual_seed(20)
+        rnn = torch.nn.LSTM(
+            input_size, hidden_size, 1, bidirectional=True, bias=has_bias
+        )
+        state_dict = rnn.state_dict()
+
+        ih_wt = state_dict["weight_ih_l0"].detach().numpy()
+        hh_wt = state_dict["weight_hh_l0"].detach().numpy()
+        ih_wt_r = state_dict["weight_ih_l0_reverse"].detach().numpy()
+        hh_wt_r = state_dict["weight_hh_l0_reverse"].detach().numpy()
+
+        f_wt = np.concatenate([ih_wt, hh_wt], axis=1)
+        r_wt = np.concatenate([ih_wt_r, hh_wt_r], axis=1)
+
+        def ifzo_to_ifoz(x):
+            i, f, z, o = np.split(x, 4)
+            return np.concatenate([i, f, o, z], axis=0)
+
+        f_wt = ifzo_to_ifoz(f_wt).transpose()
+        r_wt = ifzo_to_ifoz(r_wt).transpose()
+        w = np.concatenate([f_wt, r_wt], axis=1)
+
+        b = None
+        if has_bias:
+            ih_b = state_dict["bias_ih_l0"].detach().numpy()
+            hh_b = state_dict["bias_hh_l0"].detach().numpy()
+            ih_b_r = state_dict["bias_ih_l0_reverse"].detach().numpy()
+            hh_b_r = state_dict["bias_hh_l0_reverse"].detach().numpy()
+            # Convert forward bias into [2, 4*H]
+            ih_b = ifzo_to_ifoz(ih_b)
+            hh_b = ifzo_to_ifoz(hh_b)
+            f_b = np.stack([ih_b, hh_b], axis=0)
+            # Convert reverse bias into [2, 4*H]
+            ih_b_r = ifzo_to_ifoz(ih_b_r)
+            hh_b_r = ifzo_to_ifoz(hh_b_r)
+            r_b = np.stack([ih_b_r, hh_b_r], axis=0)
+            # Final bias of [2, 2*4*H]
+            b = np.concatenate([f_b, r_b], axis=1)
+
+        t = torch.randn(seq_len, batch_size, input_size)
+        h0 = torch.randn(2, batch_size, hidden_size)
+        c0 = torch.randn(2, batch_size, hidden_size)
+
+        output, (hn, cn) = rnn(t, (h0, c0))
+        if output_sequence == False:
+            output_f = output[-1].unsqueeze(0)[:, :, :hidden_size]
+            output_r = output[0].unsqueeze(0)[:, :, hidden_size:]
+            output = torch.cat([output_f, output_r], dim=2)
+
+        output = output.detach().numpy()
+        hn = _pytorch_hidden_to_coreml(hn)
+        cn = _pytorch_hidden_to_coreml(cn)
+
+        if symbolic:
+            batch_size = get_new_symbol()
+            seq_len = get_new_symbol()
+
+        input_shape = [seq_len, batch_size, input_size]
+        h_shape = [batch_size, 2 * hidden_size]
+        c_shape = [batch_size, 2 * hidden_size]
+
+        expected_output_types = [
+            (
+                seq_len if output_sequence else 1,
+                batch_size,
+                2 * hidden_size,
+                types.fp32,
+            ),
+            (batch_size, 2 * hidden_size, types.fp32),
+            (batch_size, 2 * hidden_size, types.fp32),
+        ]
+        expected_outputs = [output, hn, cn]
+
+        t = t.detach().numpy()
+        h = _pytorch_hidden_to_coreml(h0)
+        c = _pytorch_hidden_to_coreml(c0)
+
+        input_placeholders = {
+            "x": mb.placeholder(shape=input_shape),
+            "initial_h": mb.placeholder(shape=h_shape),
+            "initial_c": mb.placeholder(shape=c_shape),
+        }
+        input_values = {"x": t, "initial_h": h, "initial_c": c}
+
+        def build(x, initial_h, initial_c):
+            arguments = {
+                "x": x,
+                "initial_h": initial_h,
+                "initial_c": initial_c,
+                "weight": w,
+                "direction": direction,
+                "output_sequence": output_sequence,
+            }
+            # If bias is provided, add in arguments
+            if b is not None:
+                arguments["bias"] = b
+            return mb.lstm(**arguments)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestRNN:
+    @pytest.mark.skipif(not testing_reqs._HAS_TORCH, reason="PyTorch not installed.")
+    @pytest.mark.parametrize(
+        argnames=[
+            "use_cpu_only",
+            "backend",
+            "seq_len",
+            "batch_size",
+            "input_size",
+            "hidden_size",
+            "has_bias",
+            "output_sequence",
+            "direction",
+            "symbolic",
+        ],
+        argvalues=itertools.product(
+            [True, False],
+            backends,
+            [2, 8],
+            [1, 32],
+            [1, 64],
+            [1, 16],
+            [True, False],
+            [True, False],
+            ["forward", "reverse"],
+            [True, False],
+        ),
+    )
+    def test_builder_to_backend_smoke(
+        self,
+        use_cpu_only,
+        backend,
+        seq_len,
+        batch_size,
+        input_size,
+        hidden_size,
+        has_bias,
+        output_sequence,
+        direction,
+        symbolic,
+    ):
+        torch.manual_seed(50)
+        rnn = torch.nn.RNN(input_size, hidden_size, 1, bias=has_bias)
+        state_dict = rnn.state_dict()
+
+        ih_wt = state_dict["weight_ih_l0"].detach().numpy()
+        hh_wt = state_dict["weight_hh_l0"].detach().numpy()
+        w = np.concatenate([ih_wt, hh_wt], axis=1).transpose()
+
+        b = None
+        if has_bias:
+            ih_b = state_dict["bias_ih_l0"].detach().numpy()
+            hh_b = state_dict["bias_hh_l0"].detach().numpy()
+            b = np.stack([ih_b, hh_b], axis=0)
+
+        t = torch.randn(seq_len, batch_size, input_size)
+        h0 = torch.randn(1, batch_size, hidden_size)
+
+        n_t = t
+        if direction == "reverse":
+            n_t = torch.flip(n_t, [0])
+
+        output, hn = rnn(n_t, h0)
+        if output_sequence == False:
+            output = output[-1].unsqueeze(0)
+
+        output = output.detach().numpy()
+        hn = hn.detach().numpy().squeeze(0)
+
+        t = np.reshape(t.detach().numpy(), [seq_len, batch_size, input_size])
+        h = np.reshape(h0.detach().numpy().squeeze(0), [batch_size, hidden_size])
+
+        if symbolic:
+            batch_size = get_new_symbol()
+            seq_len = get_new_symbol()
+
+        input_shape = [seq_len, batch_size, input_size]
+        h_shape = [batch_size, hidden_size]
+
+        expected_output_types = [
+            (seq_len if output_sequence else 1, batch_size, hidden_size, types.fp32),
+            (batch_size, hidden_size, types.fp32),
+        ]
+        expected_outputs = [output, hn]
+
+        input_placeholders = {
+            "x": mb.placeholder(shape=input_shape),
+            "initial_h": mb.placeholder(shape=h_shape),
+        }
+        input_values = {"x": t, "initial_h": h}
+
+        def build(x, initial_h):
+            arguments = {
+                "x": x,
+                "initial_h": initial_h,
+                "weight": w,
+                "direction": direction,
+                "output_sequence": output_sequence,
+            }
+            # If bias is provided, add in arguments
+            if b is not None:
+                arguments["bias"] = b
+            return mb.rnn(**arguments)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
diff --git a/coremltools/converters/mil/mil/ops/tests/test_reduction.py b/coremltools/converters/mil/mil/ops/tests/test_reduction.py
new file mode 100644
index 000000000..01b265124
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_reduction.py
@@ -0,0 +1,282 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import scipy
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.mil import get_new_symbol
+from coremltools.converters.mil.testing_reqs import *
+
+from .testing_utils import run_compare_builder
+
+backends = testing_reqs.backends
+
+
+class TestReduction:
+    # All ops in this test share the same backends
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, mode",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                "argmax",
+                "argmin",
+                "l1_norm",
+                "l2_norm",
+                "log_sum",
+                "log_sum_exp",
+                "max",
+                "mean",
+                "min",
+                "prod",
+                "sum",
+                "sum_square",
+            ],
+        ),
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend, mode):
+        val = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=val.shape)}
+        input_values = {"x": val}
+
+        if mode in {"argmax", "argmin"}:
+            expected_output_types = (2, types.int32)
+        else:
+            expected_output_types = (2, types.fp32)
+
+        if mode == "argmax":
+            build = lambda x: mb.reduce_argmax(x=x, axis=1, keep_dims=False)
+            expected_outputs = np.array([2, 2], dtype=np.int32)
+        elif mode == "argmin":
+            build = lambda x: mb.reduce_argmin(x=x, axis=1, keep_dims=False)
+            expected_outputs = np.array([0, 0], dtype=np.int32)
+        elif mode == "l1_norm":
+            build = lambda x: mb.reduce_l1_norm(x=x, axes=[1], keep_dims=False)
+            expected_outputs = np.array([6.0, 15.0], dtype=np.float32)
+        elif mode == "l2_norm":
+            build = lambda x: mb.reduce_l2_norm(x=x, axes=[1], keep_dims=False)
+            expected_outputs = np.array([3.74165738, 8.77496438], dtype=np.float32)
+        elif mode == "log_sum":
+            build = lambda x: mb.reduce_log_sum(x=x, axes=[1], keep_dims=False)
+            expected_outputs = np.array([1.7917595, 2.70805025], dtype=np.float32)
+        elif mode == "log_sum_exp":
+            build = lambda x: mb.reduce_log_sum_exp(x=x, axes=[1], keep_dims=False)
+            expected_outputs = np.array([3.40760589, 6.40760612], dtype=np.float32)
+        elif mode == "max":
+            build = lambda x: mb.reduce_max(x=x, axes=[1], keep_dims=False)
+            expected_outputs = np.array([3.0, 6.0], dtype=np.float32)
+        elif mode == "mean":
+            build = lambda x: mb.reduce_mean(x=x, axes=[1], keep_dims=False)
+            expected_outputs = np.array([2.0, 5.0], dtype=np.float32)
+        elif mode == "min":
+            build = lambda x: mb.reduce_min(x=x, axes=[1], keep_dims=False)
+            expected_outputs = np.array([1.0, 4.0], dtype=np.float32)
+        elif mode == "prod":
+            build = lambda x: mb.reduce_prod(x=x, axes=[1], keep_dims=False)
+            expected_outputs = np.array([6.0, 120.0], dtype=np.float32)
+        elif mode == "sum":
+            build = lambda x: mb.reduce_sum(x=x, axes=[1], keep_dims=False)
+            expected_outputs = np.array([6.0, 15.0], dtype=np.float32)
+        elif mode == "sum_square":
+            build = lambda x: mb.reduce_sum_square(x=x, axes=[1], keep_dims=False)
+            expected_outputs = np.array([14.0, 77.0], dtype=np.float32)
+        else:
+            raise NotImplementedError()
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, mode",
+        itertools.product([True, False], ["nn_proto"], ["max", "mean"]),
+    )
+    def test_builder_to_backend_global_pool_2d(self, use_cpu_only, backend, mode):
+        # test lowering to spatial reduction to global_pool path
+        val = np.array([[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=val.shape)}
+        input_values = {"x": val}
+
+        expected_output_types = (1, 1, 1, 1, types.fp32)
+
+        if mode == "max":
+            build = lambda x: mb.reduce_max(x=x, axes=[2, -1], keep_dims=True)
+            expected_outputs = np.array([[[[6.0]]]], dtype=np.float32)
+        elif mode == "mean":
+            build = lambda x: mb.reduce_mean(x=x, axes=[3, -2], keep_dims=True)
+            expected_outputs = np.array([[[[3.5]]]], dtype=np.float32)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, mode",
+        itertools.product([True, False], ["nn_proto"], ["max", "mean"]),
+    )
+    def test_builder_to_backend_global_pool_3d(self, use_cpu_only, backend, mode):
+        # test lowering to spatial reduction to global_pool path
+        val = np.array([[[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]]]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=val.shape)}
+        input_values = {"x": val}
+
+        expected_output_types = (1, 1, 1, 1, 1, types.fp32)
+
+        if mode == "max":
+            build = lambda x: mb.reduce_max(x=x, axes=[2, -1, 3], keep_dims=True)
+            expected_outputs = np.array([[[[[6.0]]]]], dtype=np.float32)
+        elif mode == "mean":
+            build = lambda x: mb.reduce_mean(x=x, axes=[-3, 3, 4], keep_dims=True)
+            expected_outputs = np.array([[[[[3.5]]]]], dtype=np.float32)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        ["axis", "keep_dims"], itertools.product([1, -3], [True, False])
+    )
+    def test_builder_eval(self, axis, keep_dims):
+        x_val = random_gen(shape=(1, 3, 4, 4), rand_min=-100.0, rand_max=100.0)
+
+        @ssa_fn
+        def test_reduce_argmax():
+            res = mb.reduce_argmax(x=x_val, axis=axis, keep_dims=keep_dims).val
+            ref = np.argmax(x_val, axis=axis)
+            assert is_close(ref, res)
+
+        @ssa_fn
+        def test_reduce_argmin():
+            res = mb.reduce_argmin(x=x_val, axis=axis, keep_dims=keep_dims).val
+            ref = np.argmin(x_val, axis=axis)
+            assert is_close(ref, res)
+
+        @ssa_fn
+        def test_reduce_l1_norm():
+            res = mb.reduce_l1_norm(x=x_val, axes=[axis], keep_dims=keep_dims).val
+            ref = np.sum(np.abs(x_val), axis=axis, keepdims=keep_dims)
+            assert is_close(ref, res)
+
+        @ssa_fn
+        def test_reduce_l2_norm():
+            res = mb.reduce_l2_norm(x=x_val, axes=[axis], keep_dims=keep_dims).val
+            ref = np.sqrt(np.sum(np.square(x_val), axis=axis, keepdims=keep_dims))
+            assert is_close(ref, res)
+
+        @ssa_fn
+        def test_reduce_log_sum():
+            x_val = random_gen(shape=(1, 3, 4, 4), rand_min=0.0, rand_max=100.0)
+            res = mb.reduce_log_sum(x=x_val, axes=[axis], keep_dims=keep_dims).val
+            ref = np.log(np.sum(x_val, axis=axis, keepdims=keep_dims))
+            assert is_close(ref, res)
+
+        @ssa_fn
+        def test_reduce_log_sum_exp():
+            res = mb.reduce_log_sum_exp(x=x_val, axes=[axis], keep_dims=keep_dims).val
+            ref = scipy.special.logsumexp(x_val, axis=axis, keepdims=keep_dims)
+            assert is_close(ref, res)
+
+        @ssa_fn
+        def test_reduce_max():
+            res = mb.reduce_max(x=x_val, axes=[axis], keep_dims=keep_dims).val
+            ref = np.max(x_val, axis=axis, keepdims=keep_dims)
+            assert is_close(ref, res)
+
+        @ssa_fn
+        def test_reduce_mean():
+            res = mb.reduce_mean(x=x_val, axes=[axis], keep_dims=keep_dims).val
+            ref = np.mean(x_val, axis=axis, keepdims=keep_dims)
+            assert is_close(ref, res)
+
+        @ssa_fn
+        def test_reduce_min():
+            res = mb.reduce_min(x=x_val, axes=[axis], keep_dims=keep_dims).val
+            ref = np.min(x_val, axis=axis, keepdims=keep_dims)
+            assert is_close(ref, res)
+
+        @ssa_fn
+        def test_reduce_prod():
+            res = mb.reduce_prod(x=x_val, axes=[axis], keep_dims=keep_dims).val
+            ref = np.prod(x_val, axis=axis, keepdims=keep_dims)
+            assert is_close(ref, res)
+
+        @ssa_fn
+        def test_reduce_sum():
+            res = mb.reduce_sum(x=x_val, axes=[axis], keep_dims=keep_dims).val
+            ref = np.sum(x_val, axis=axis, keepdims=keep_dims)
+            assert is_close(ref, res)
+
+        @ssa_fn
+        def test_reduce_sum_square():
+            res = mb.reduce_sum_square(x=x_val, axes=[axis], keep_dims=keep_dims).val
+            ref = np.sum(np.square(x_val), axis=axis, keepdims=keep_dims)
+            assert is_close(ref, res)
+
+        test_reduce_argmax()
+        test_reduce_argmin()
+        test_reduce_l1_norm()
+        test_reduce_l2_norm()
+        test_reduce_log_sum()
+        test_reduce_log_sum_exp()
+        test_reduce_max()
+        test_reduce_mean()
+        test_reduce_min()
+        test_reduce_prod()
+        test_reduce_sum()
+        test_reduce_sum_square()
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_symbolic(self, use_cpu_only, backend):
+        # TODO: variadic (rdar://59559656)
+
+        s0 = get_new_symbol()
+
+        val = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=(s0, 3))}
+        input_values = {"x": val}
+
+        def build(x):
+            return [
+                mb.reduce_argmax(x=x, axis=1, keep_dims=True),
+                mb.reduce_argmin(x=x, axis=0, keep_dims=True),
+            ]
+
+        expected_output_types = [(s0, 1, types.int32), (1, 3, types.int32)]
+        expected_outputs = [
+            np.array([[2], [2]], dtype=np.int32),
+            np.array([[0], [0], [0]], dtype=np.int32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
diff --git a/coremltools/converters/mil/mil/ops/tests/test_scatter_gather.py b/coremltools/converters/mil/mil/ops/tests/test_scatter_gather.py
new file mode 100644
index 000000000..7039a47bc
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_scatter_gather.py
@@ -0,0 +1,529 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.testing_reqs import *
+
+from .testing_utils import run_compare_builder
+
+backends = testing_reqs.backends
+
+
+class TestScatter:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        data = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        indices = np.array([1, 0], dtype=np.int32)
+        updates = np.array([[5, 6, 7], [8, 9, 10]], dtype=np.float32)
+        input_placeholders = {
+            "data": mb.placeholder(shape=data.shape),
+            "indices": mb.placeholder(shape=indices.shape, dtype=types.int32),
+            "updates": mb.placeholder(shape=updates.shape),
+        }
+
+        input_values = {"data": data, "indices": indices, "updates": updates}
+
+        def build(data, indices, updates):
+            return (mb.scatter(data=data, indices=indices, updates=updates),)
+
+        expected_output_types = (2, 3, types.fp32)
+
+        expected_outputs = np.array([[9, 11, 13], [9, 11, 13]], dtype=np.float32)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.skipif(not testing_reqs._HAS_TF_1, reason=MSG_TF1_NOT_FOUND)
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rankData_rankIndices, accumulate_mode",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                (1, 2),
+                (2, 1),
+                (3, 2),
+                (2, 3),
+                (2, 2),
+                (1, 1),
+                (3, 3),
+                (3, 3),
+                (3, 3),
+                (1, 3),
+                (3, 1),
+                (3, 1),
+            ],
+            ["update", "add", "sub", "mul", "div", "max", "min"],
+        ),
+    )
+    def test_builder_to_backend_programmatic(
+        self, use_cpu_only, backend, rankData_rankIndices, accumulate_mode
+    ):
+        data_rank, indices_rank = rankData_rankIndices
+        data_shape = np.random.randint(low=2, high=5, size=data_rank)
+        indices_shape = np.random.randint(low=2, high=5, size=indices_rank)
+        updates_shape = list(indices_shape) + list(data_shape[1:])
+
+        data = np.random.rand(*data_shape).astype(np.float32)
+        updates = np.random.rand(*updates_shape).astype(np.float32)
+        indices = np.random.randint(0, data_shape[0], size=indices_shape).astype(
+            np.int32
+        )
+
+        def build(data, indices, updates):
+            return mb.scatter(
+                data=data, indices=indices, updates=updates, mode=accumulate_mode
+            )
+
+        with tf.Graph().as_default(), tf.Session() as sess:
+            tf_output = tf.Variable(data)
+            sess.run(tf.global_variables_initializer())
+            if accumulate_mode == "update":
+                sess.run(tf.scatter_update(tf_output, indices, updates))
+            if accumulate_mode == "add":
+                sess.run(tf.scatter_add(tf_output, indices, updates))
+            if accumulate_mode == "sub":
+                sess.run(tf.scatter_sub(tf_output, indices, updates))
+            if accumulate_mode == "mul":
+                sess.run(tf.scatter_mul(tf_output, indices, updates))
+            if accumulate_mode == "div":
+                sess.run(tf.scatter_div(tf_output, indices, updates))
+            if accumulate_mode == "max":
+                sess.run(tf.scatter_max(tf_output, indices, updates))
+            if accumulate_mode == "min":
+                sess.run(tf.scatter_min(tf_output, indices, updates))
+            expected_output = sess.run(tf_output)
+
+        input_placeholders = {
+            "data": mb.placeholder(shape=data.shape),
+            "indices": mb.placeholder(shape=indices.shape, dtype=types.int32),
+            "updates": mb.placeholder(shape=updates.shape),
+        }
+
+        input_values = {"data": data, "indices": indices, "updates": updates}
+
+        expected_output_types = tuple(data_shape[:]) + (types.fp32,)
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_output,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestScatterAlongAxis:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        data = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        indices = np.array([[1, 0, 1], [1, 1, 0]], dtype=np.int32)
+        updates = np.array([[5, 6, 7], [8, 9, 10]], dtype=np.float32)
+        input_placeholders = {
+            "data": mb.placeholder(shape=data.shape),
+            "indices": mb.placeholder(shape=indices.shape, dtype=types.int32),
+            "updates": mb.placeholder(shape=updates.shape),
+        }
+
+        input_values = {"data": data, "indices": indices, "updates": updates}
+
+        def build(data, indices, updates):
+            return mb.scatter_along_axis(
+                data=data, indices=indices, updates=updates, axis=0, mode="update"
+            )
+
+        expected_output_types = (2, 3, types.fp32)
+
+        expected_outputs = np.array([[1, 6, 10], [8, 9, 7]], dtype=np.float32)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        indices = np.array([[1, 0, 1], [1, 1, 0]], dtype=np.int32)
+        updates = np.array([[5, 6, 7], [8, 9, 10]], dtype=np.float32)
+        v = mb.scatter_along_axis(
+            data=x, indices=indices, updates=updates, axis=0, mode="update"
+        )
+        assert is_close(np.array([[1, 6, 10], [8, 9, 7]], dtype=np.float32), v.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank_axis",
+        itertools.product(
+            [True, False],
+            backends,
+            [(rank, axis) for rank in range(1, 5) for axis in range(-rank, rank)],
+        ),
+    )
+    def test_builder_to_backend_programmatic(self, use_cpu_only, backend, rank_axis):
+        rank, axis = rank_axis
+        data_shape = np.random.randint(low=2, high=8, size=rank)
+        indices_shape = np.copy(data_shape)
+        indices_shape[axis] = np.random.randint(low=1, high=8)
+        updates_shape = indices_shape
+
+        data = np.random.rand(*data_shape).astype(np.float32)
+        updates = np.random.rand(*updates_shape).astype(np.float32)
+        indices = np.random.randint(
+            -data_shape[axis], data_shape[axis], size=indices_shape
+        ).astype(np.int32)
+
+        def build(data, indices, updates):
+            return mb.scatter_along_axis(
+                data=data, indices=indices, updates=updates, axis=axis, mode="update"
+            )
+
+        input_placeholders = {
+            "data": mb.placeholder(shape=data.shape),
+            "indices": mb.placeholder(shape=indices.shape, dtype=types.int32),
+            "updates": mb.placeholder(shape=updates.shape),
+        }
+
+        input_values = {"data": data, "indices": indices, "updates": updates}
+
+        expected_output_types = tuple(data_shape[:]) + (types.fp32,)
+
+        np_output = np.copy(data)
+        np.put_along_axis(np_output, indices, updates, axis=axis)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            np_output,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestScatterNd:
+    # TODO: <rdar://problem/59737282> [MIL] Scatter and ScatterNd in tensoflow
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        data = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        indices = np.array([[1, 0], [0, 2]], dtype=np.int32)
+        updates = np.array([5, 10], dtype=np.float32)
+        input_placeholders = {
+            "data": mb.placeholder(shape=data.shape),
+            "indices": mb.placeholder(shape=indices.shape, dtype=types.int32),
+            "updates": mb.placeholder(shape=updates.shape),
+        }
+
+        input_values = {"data": data, "indices": indices, "updates": updates}
+
+        def build(data, indices, updates):
+            return (mb.scatter_nd(data=data, indices=indices, updates=updates),)
+
+        expected_output_types = (2, 3, types.fp32)
+
+        expected_outputs = np.array([[1, 2, 13], [9, 5, 6]], dtype=np.float32)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.skipif(not testing_reqs._HAS_TF_1, reason=MSG_TF1_NOT_FOUND)
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rankData_rankIndices, accumulate_mode",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                (1, 2),
+                (2, 2),
+                (3, 2),
+                (2, 3),
+                (1, 4),
+                (5, 2),
+                (2, 5),
+                (4, 3),
+                (3, 4),
+                (2, 4),
+                (4, 2),
+                (1, 5),
+            ],
+            ["update", "add", "sub"],
+        ),
+    )
+    def test_builder_to_backend_programmatic(
+        self, use_cpu_only, backend, rankData_rankIndices, accumulate_mode
+    ):
+        data_rank, indices_rank = rankData_rankIndices
+        data_shape = np.random.randint(low=2, high=5, size=data_rank)
+        indices_shape = np.random.randint(low=2, high=5, size=indices_rank)
+        indices_shape[-1] = np.random.randint(low=1, high=data_rank + 1)
+        updates_shape = list(indices_shape[:-1]) + list(data_shape[indices_shape[-1] :])
+
+        data = np.random.rand(*data_shape).astype(np.float32)
+        updates = np.random.rand(*updates_shape).astype(np.float32)
+        indices_list = []
+        for i in range(indices_shape[-1]):
+            indices_list.append(
+                np.random.randint(0, data_shape[i], size=indices_shape[:-1])
+            )
+
+        indices = np.stack(indices_list, axis=-1).astype(np.int32)
+
+        def build(data, indices, updates):
+            return mb.scatter_nd(
+                data=data, indices=indices, updates=updates, mode=accumulate_mode
+            )
+
+        with tf.Graph().as_default(), tf.Session() as sess:
+            tf_output = tf.Variable(data)
+            sess.run(tf.global_variables_initializer())
+            if accumulate_mode == "update":
+                sess.run(tf.scatter_nd_update(tf_output, indices, updates))
+            if accumulate_mode == "add":
+                sess.run(tf.scatter_nd_add(tf_output, indices, updates))
+            if accumulate_mode == "sub":
+                sess.run(tf.scatter_nd_sub(tf_output, indices, updates))
+            expected_output = sess.run(tf_output)
+
+        input_placeholders = {
+            "data": mb.placeholder(shape=data.shape),
+            "indices": mb.placeholder(shape=indices.shape, dtype=types.int32),
+            "updates": mb.placeholder(shape=updates.shape),
+        }
+
+        input_values = {"data": data, "indices": indices, "updates": updates}
+
+        expected_output_types = tuple(data_shape[:]) + (types.fp32,)
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_output,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestGather:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        indices = np.array([1, 0], dtype=np.int32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x.shape),
+            "indices": mb.placeholder(shape=indices.shape, dtype=types.int32),
+        }
+
+        input_values = {"x": x, "indices": indices}
+
+        def build(x, indices):
+            return [
+                mb.gather(x=x, indices=indices, axis=0),
+                mb.gather(x=x, indices=indices, axis=1),
+                mb.gather(x=x, indices=indices, axis=-2),
+                mb.gather(x=x, indices=indices, axis=-1),
+                mb.gather(x=x, indices=indices),
+            ]
+
+        expected_output_types = [
+            (2, 3, types.fp32),
+            (2, 2, types.fp32),
+            (2, 3, types.fp32),
+            (2, 2, types.fp32),
+            (2, 3, types.fp32),
+        ]
+
+        expected_outputs = [
+            np.array([[4, 5, 6], [1, 2, 3]], dtype=np.float32),
+            np.array([[2, 1], [5, 4]], dtype=np.float32),
+            np.array([[4, 5, 6], [1, 2, 3]], dtype=np.float32),
+            np.array([[2, 1], [5, 4]], dtype=np.float32),
+            np.array([[4, 5, 6], [1, 2, 3]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        indices = np.array([1, 0], dtype=np.int32)
+        v = mb.gather(x=x, indices=indices, axis=-1)
+        assert is_close(np.array([[2, 1], [5, 4]], dtype=np.float32), v.val)
+
+
+class TestGatherAlongAxis:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        indices = np.array([[1, 0, 1], [1, 1, 0]], dtype=np.int32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x.shape),
+            "indices": mb.placeholder(shape=indices.shape, dtype=types.int32),
+        }
+
+        input_values = {"x": x, "indices": indices}
+
+        def build(x, indices):
+            return [
+                mb.gather_along_axis(x=x, indices=indices, axis=0),
+                mb.gather_along_axis(x=x, indices=indices, axis=1),
+                mb.gather_along_axis(x=x, indices=indices, axis=-2),
+                mb.gather_along_axis(x=x, indices=indices, axis=-1),
+                mb.gather_along_axis(x=x, indices=indices),
+            ]
+
+        expected_output_types = [
+            (2, 3, types.fp32),
+            (2, 3, types.fp32),
+            (2, 3, types.fp32),
+            (2, 3, types.fp32),
+            (2, 3, types.fp32),
+        ]
+
+        expected_outputs = [
+            np.array([[4, 2, 6], [4, 5, 3]], dtype=np.float32),
+            np.array([[2, 1, 2], [5, 5, 4]], dtype=np.float32),
+            np.array([[4, 2, 6], [4, 5, 3]], dtype=np.float32),
+            np.array([[2, 1, 2], [5, 5, 4]], dtype=np.float32),
+            np.array([[4, 2, 6], [4, 5, 3]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        indices = np.array([[1, 0, 1], [0, 0, 1]], dtype=np.int32)
+        v = mb.gather_along_axis(x=x, indices=indices, axis=0)
+        assert is_close(np.array([[4, 2, 6], [1, 2, 6]], dtype=np.float32), v.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank_axis",
+        itertools.product(
+            [True, False],
+            backends,
+            [(rank, axis) for rank in range(1, 5) for axis in range(-rank, rank)],
+        ),
+    )
+    def test_builder_to_backend_programmatic(self, use_cpu_only, backend, rank_axis):
+        rank, axis = rank_axis
+        x_shape = np.random.randint(low=2, high=8, size=rank)
+        indices_shape = np.copy(x_shape)
+        indices_shape[axis] = np.random.randint(low=1, high=8)
+
+        x = np.random.rand(*x_shape).astype(np.float32)
+        indices = np.random.randint(
+            -x_shape[axis], x_shape[axis], size=indices_shape
+        ).astype(np.int32)
+
+        def build(x, indices):
+            return mb.gather_along_axis(x=x, indices=indices, axis=axis)
+
+        input_placeholders = {
+            "x": mb.placeholder(shape=x.shape),
+            "indices": mb.placeholder(shape=indices.shape, dtype=types.int32),
+        }
+
+        input_values = {"x": x, "indices": indices}
+
+        expected_output_types = tuple(indices_shape[:]) + (types.fp32,)
+        expected_output = np.take_along_axis(x, indices, axis=axis)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_output,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestGatherNd:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        indices = np.array([[1, 0], [0, 2]], dtype=np.int32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x.shape),
+            "indices": mb.placeholder(shape=indices.shape, dtype=types.int32),
+        }
+
+        input_values = {"x": x, "indices": indices}
+
+        def build(x, indices):
+            return (mb.gather_nd(x=x, indices=indices),)
+
+        expected_output_types = (2, types.fp32)
+        expected_outputs = np.array([4, 3], dtype=np.float32)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
diff --git a/coremltools/converters/mil/mil/ops/tests/test_slice.py b/coremltools/converters/mil/mil/ops/tests/test_slice.py
new file mode 100644
index 000000000..fb3194773
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_slice.py
@@ -0,0 +1,205 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.testing_reqs import *
+
+from .testing_utils import UNK_SYM, run_compare_builder
+
+backends = testing_reqs.backends
+
+
+class TestSliceByIndex:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array(list(range(24))).reshape((2, 3, 4)).astype(np.float32)
+        begin_val = np.array([1, 1, 1], dtype=np.int32)
+        end_val = np.array([2, 3, 3], dtype=np.int32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x_val.shape),
+            "begin": mb.placeholder(shape=begin_val.shape, dtype=types.int32),
+            "end": mb.placeholder(shape=end_val.shape, dtype=types.int32),
+        }
+        input_values = {"x": x_val, "begin": begin_val, "end": end_val}
+
+        def build(x, begin, end):
+            return [
+                mb.slice_by_index(x=x, begin=begin, end=end),
+            ]
+
+        expected_output_types = [(UNK_SYM, UNK_SYM, UNK_SYM, types.fp32)]
+        expected_outputs = [np.array([[[17, 18], [21, 22]]], dtype=np.float32)]
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.array(list(range(24))).reshape((2, 3, 4))
+        v = [
+            mb.slice_by_index(x=x_val, begin=[1, 1, 1], end=[2, 2, 2]),
+            mb.slice_by_index(
+                x=x_val, begin=[1, 1, 1], end=[2, 3, 4], stride=[1, 1, 2]
+            ),
+            mb.slice_by_index(x=x_val, begin=[-1, -3, -3], end=[-1, -1, -1]),
+            mb.slice_by_index(x=x_val, begin=[0, 0, -3], end=[-1, -2, -2]),
+            mb.slice_by_index(
+                x=x_val, begin=[-1, -1, -1], end=[0, 1, -3], stride=[-2, -1, -3]
+            ),
+            mb.slice_by_index(
+                x=x_val,
+                begin=[1, 1, 1],
+                end=[2, 3, 4],
+                stride=[1, 1, 2],
+                begin_mask=[True, False, True],
+            ),
+            mb.slice_by_index(
+                x=x_val,
+                begin=[1, 1, 1],
+                end=[2, 3, 4],
+                stride=[1, 1, 2],
+                begin_mask=[True, False, True],
+                end_mask=[True, True, False],
+            ),
+            mb.slice_by_index(
+                x=x_val,
+                begin=[1, 1, 1],
+                end=[2, 3, 4],
+                stride=[1, 1, 2],
+                begin_mask=[False, False, True],
+                end_mask=[True, False, False],
+                squeeze_mask=[False, True, False],
+            ),
+            mb.slice_by_index(
+                x=x_val,
+                begin=[0, 0, 0],
+                end=[0, 0, 0],
+                stride=[1, 1, 1],
+                begin_mask=[True, True, True],
+                end_mask=[True, True, True],
+            ),
+            mb.slice_by_index(
+                x=x_val,
+                begin=[1, 1, 1],
+                end=[2, 2, 0],
+                stride=[1, 1, 1],
+                squeeze_mask=[False, False, True],
+            ),
+            mb.slice_by_index(
+                x=x_val,
+                begin=[1, 0, 0],
+                end=[2, 0, 0],
+                stride=[1, 1, 1],
+                begin_mask=[False, True, True],
+                end_mask=[False, True, True],
+            ),
+            mb.slice_by_index(
+                x=x_val,
+                begin=[0, 0, 0],
+                end=[0, 0, 0],
+                stride=[1, 1, 1],
+                begin_mask=[True, True, True],
+                end_mask=[True, True, True],
+            ),
+            mb.slice_by_index(
+                x=x_val,
+                begin=[1, 0, 1],
+                end=[2, 0, 2],
+                stride=[1, 1, 1],
+                begin_mask=[False, True, False],
+                end_mask=[False, True, False],
+            ),
+            mb.slice_by_index(
+                x=x_val,
+                begin=[0, 0, 1],
+                end=[0, 0, 0],
+                stride=[1, 1, 1],
+                begin_mask=[True, True, False],
+                end_mask=[True, True, False],
+                squeeze_mask=[False, False, True],
+            ),
+            mb.slice_by_index(
+                x=x_val,
+                begin=[0, 0, 0],
+                end=[0, 0, 0],
+                stride=[1, 1, 1],
+                begin_mask=[False, False, True],
+                end_mask=[False, False, True],
+                squeeze_mask=[True, True, False],
+            ),
+            mb.slice_by_index(
+                x=x_val,
+                begin=[1, 0, 0],
+                end=[2, 0, 0],
+                stride=[1, 1, 1],
+                begin_mask=[False, True, True],
+                end_mask=[False, True, True],
+            ),
+            mb.slice_by_index(
+                x=x_val,
+                begin=[1, 1, 0],
+                end=[2, 2, 0],
+                stride=[1, 1, 1],
+                begin_mask=[False, False, True],
+                end_mask=[False, False, True],
+            ),
+            mb.slice_by_index(
+                x=x_val,
+                begin=[1, 0, 0],
+                end=[0, 0, 0],
+                stride=[1, 1, 1],
+                begin_mask=[False, True, True],
+                end_mask=[False, True, True],
+                squeeze_mask=[True, False, False],
+            ),
+            mb.slice_by_index(
+                x=x_val,
+                begin=[0, 0, 0],
+                end=[0, 0, 0],
+                begin_mask=[True, True, True],
+                end_mask=[True, True, True],
+            ),
+            mb.slice_by_index(
+                x=x_val,
+                begin=[0, 0, 0],
+                end=[0, 0, 0],
+                stride=[1, 1, -1],
+                begin_mask=[True, True, True],
+                end_mask=[True, True, True],
+            ),
+        ]
+        ans = [
+            x_val[1:2, 1:2, 1:2],
+            x_val[1:2, 1:3, 1:4:2],
+            x_val[-3:-1, -3:-1, -3:-1],
+            x_val[0:-1, 0:-2, -3:-2],
+            x_val[-1:0:-2, -1:1:-1, -1:-3:-3],
+            x_val[:2, 1:3, :4:2],
+            x_val[:, 1:, :4:2],
+            x_val[1::1, 1, :3:2],
+            x_val[:, :, :],
+            x_val[1:2, 1:2, 1],
+            x_val[1:2, ...],
+            x_val[...],
+            x_val[1:2, ..., 1:2],
+            x_val[..., 1],
+            x_val[0, 0, :],
+            x_val[1:2],
+            x_val[1:2, 1:2],
+            x_val[1],
+            x_val[:],
+            x_val[..., ::-1],
+        ]
+        for idx in range(len(v)):
+            assert is_close(ans[idx], v[idx].val)
diff --git a/coremltools/converters/mil/mil/ops/tests/test_tensor_operation.py b/coremltools/converters/mil/mil/ops/tests/test_tensor_operation.py
new file mode 100644
index 000000000..c8ab59654
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_tensor_operation.py
@@ -0,0 +1,1610 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.mil import get_new_symbol
+from coremltools.converters.mil.testing_reqs import *
+
+from .testing_utils import UNK_SYM, UNK_VARIADIC, run_compare_builder
+
+backends = testing_reqs.backends
+
+
+class TestBandPart:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array(
+            [
+                [3.0, 3.0, 5.0, 1.0],
+                [5.0, 6.0, 3.0, 8.0],
+                [7.0, 2.0, 7.0, 2.0],
+                [6.0, 7.0, 7.0, 1.0],
+            ],
+            dtype=np.float32,
+        )
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [
+                mb.band_part(x=x),
+                mb.band_part(x=x, lower=0, upper=-1),
+                mb.band_part(x=x, lower=-1, upper=0),
+                mb.band_part(x=x, lower=0, upper=0),
+            ]
+
+        expected_output_types = [
+            (4, 4, types.fp32),
+            (4, 4, types.fp32),
+            (4, 4, types.fp32),
+            (4, 4, types.fp32),
+        ]
+
+        expected_outputs = [
+            np.array(
+                [
+                    [3.0, 3.0, 5.0, 1.0],
+                    [5.0, 6.0, 3.0, 8.0],
+                    [7.0, 2.0, 7.0, 2.0],
+                    [6.0, 7.0, 7.0, 1.0],
+                ],
+                dtype=np.float32,
+            ),
+            np.array(
+                [
+                    [3.0, 3.0, 5.0, 1.0],
+                    [0.0, 6.0, 3.0, 8.0],
+                    [0.0, 0.0, 7.0, 2.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+                dtype=np.float32,
+            ),
+            np.array(
+                [
+                    [3.0, 0.0, 0.0, 0.0],
+                    [5.0, 6.0, 0.0, 0.0],
+                    [7.0, 2.0, 7.0, 0.0],
+                    [6.0, 7.0, 7.0, 1.0],
+                ],
+                dtype=np.float32,
+            ),
+            np.array(
+                [
+                    [3.0, 0.0, 0.0, 0.0],
+                    [0.0, 6.0, 0.0, 0.0],
+                    [0.0, 0.0, 7.0, 0.0],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+                dtype=np.float32,
+            ),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestCumSum:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.cumsum(x=x, axis=0, reverse=True, exclusive=False)
+
+        expected_output_types = (2, 3, types.fp32)
+        expected_outputs = np.array([[5, 7, 9], [4, 5, 6]], dtype=np.float32)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = random_gen(shape=(1, 2, 3, 4, 5), rand_min=-100, rand_max=100)
+        v = mb.cumsum(x=x_val)
+        assert is_close(np.cumsum(x_val, axis=0), v.val)
+
+    @ssa_fn
+    def test_invalid_axis1(self):
+        x_val = random_gen(shape=(1, 2, 3, 4, 5), rand_min=-100, rand_max=100)
+        with pytest.raises(ValueError):
+            mb.cumsum(x=x_val, axis=-2)
+
+    @ssa_fn
+    def test_invalid_axis2(self):
+        x_val = random_gen(shape=(1, 2, 3, 4, 5), rand_min=-100, rand_max=100)
+        with pytest.raises(ValueError):
+            mb.cumsum(x=x_val, axis=len(x_val.shape))
+
+    @ssa_fn
+    def test_invalid_axis3(self):
+        x_val = random_gen(shape=(1, 2, 3, 4, 5), rand_min=-100, rand_max=100)
+        with pytest.raises(TypeError):
+            mb.cumsum(x=x_val, axis="")
+
+    @ssa_fn
+    def test_invalid_reverse1(self):
+        x_val = random_gen(shape=(1, 2, 3, 4, 5), rand_min=-100, rand_max=100)
+        with pytest.raises(TypeError):
+            mb.cumsum(x=x_val, reverse="")
+
+    @ssa_fn
+    def test_invalid_reverse2(self):
+        x_val = random_gen(shape=(1, 2, 3, 4, 5), rand_min=-100, rand_max=100)
+        with pytest.raises(TypeError):
+            pred = mb.cumsum(x=x_val, reverse=0)
+
+    @ssa_fn
+    def test_invalid_reverse3(self):
+        x_val = random_gen(shape=(1, 2, 3, 4, 5), rand_min=-100, rand_max=100)
+        with pytest.raises(TypeError):
+            pred = mb.cumsum(x=x_val, reverse=1)
+
+    @ssa_fn
+    def test_invalid_exclusive1(self):
+        x_val = random_gen(shape=(1, 2, 3, 4, 5), rand_min=-100, rand_max=100)
+        with pytest.raises(TypeError):
+            pred = mb.cumsum(x=x_val, exclusive="")
+
+    @ssa_fn
+    def test_invalid_exclusive2(self):
+        x_val = random_gen(shape=(1, 2, 3, 4, 5), rand_min=-100, rand_max=100)
+        with pytest.raises(TypeError):
+            pred = mb.cumsum(x=x_val, exclusive=0)
+
+    @ssa_fn
+    def test_invalid_exclusive3(self):
+        x_val = random_gen(shape=(1, 2, 3, 4, 5), rand_min=-100, rand_max=100)
+        with pytest.raises(TypeError):
+            pred = mb.cumsum(x=x_val, exclusive=1)
+
+    @ssa_fn
+    def test_invalid_input1(self):
+        x_val = 1
+        with pytest.raises(TypeError):
+            pred = mb.cumsum(x=x_val)
+
+    @ssa_fn
+    def test_invalid_input2(self):
+        x_val = ["1"]
+        with pytest.raises(TypeError):
+            pred = mb.cumsum(x=x_val)
+
+
+class TestFill:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        shape = (2, 1, 3)
+        x_val = np.zeros(shape=shape, dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+
+        input_values = {"x": x_val}
+
+        def build(x):
+            return mb.add(x=x, y=mb.fill(shape=shape, value=1.0))
+
+        expected_output_types = [(2, 1, 3, types.fp32)]
+        expected_outputs = [np.full(shape=shape, fill_value=1.0)]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        shape = np.random.randint(low=1, high=3, size=5).astype(np.int32)
+        res = mb.fill(shape=shape, value=1991.0).val
+        assert is_close(np.full(shape, fill_value=1991.0), res)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank, value",
+        itertools.product(
+            [True, False],
+            backends,
+            [rank for rank in range(1, 6)],
+            [-1917.0, 0.0, 2048.0],
+        ),
+    )
+    def test_builder_to_backend_stress(self, use_cpu_only, backend, rank, value):
+        shape = np.random.randint(low=1, high=4, size=rank).astype(np.int32)
+        x_val = np.zeros(shape=shape, dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return mb.add(x=x, y=mb.fill(shape=shape, value=value))
+
+        expected_outputs = [np.full(shape=shape, fill_value=value)]
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_symbolic(self, use_cpu_only, backend):
+        # Test variadic (rdar://59559656)
+
+        s_len = get_new_symbol()
+        input_placeholders = {
+            "shape": mb.placeholder(shape=(s_len,), dtype=types.int32),
+        }
+
+        def build(shape):
+            return [mb.fill(shape=shape)]
+
+        expected_output_types = [(UNK_VARIADIC, types.fp32)]
+        expected_outputs = [np.zeros(shape=(2, 1, 3), dtype=np.float32)]
+        input_values = {"shape": np.array([2, 1, 3], dtype=np.float32)}
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestNonMaximumSuppression:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        boxes_val = np.array(
+            [
+                [
+                    [0.0, 0.0, 0.0, 0.0],
+                    [1.0, 1.0, 1.0, 1.0],
+                    [2.0, 2.0, 2.0, 2.0],
+                    [3.0, 3.0, 3.0, 3.0],
+                ]
+            ],
+            dtype=np.float32,
+        )
+        scores_val = np.array([[[-3.5], [9.4], [2.3], [0.7]]], dtype=np.float32)
+        input_placeholders = {
+            "boxes": mb.placeholder(shape=(1, 4, 4)),
+            "scores": mb.placeholder(shape=(1, 4, 1)),
+        }
+        input_values = {"boxes": boxes_val, "scores": scores_val}
+
+        expected_output_types = [
+            (1, 2, 4, types.fp32),
+            (1, 2, 1, types.fp32),
+            (1, 2, types.int32),
+            (1, types.int32),
+        ]
+        expected_outputs = [
+            np.array([[[1.0, 1.0, 1.0, 1.0], [2.0, 2.0, 2.0, 2.0]]], dtype=np.float32),
+            np.array([[[9.4], [2.3]]], dtype=np.float32),
+            np.array([[1, 2]], dtype=np.int32),
+            np.array([2], dtype=np.int32),
+        ]
+
+        def build(boxes, scores):
+            return mb.non_maximum_suppression(
+                boxes=boxes,
+                scores=scores,
+                iou_threshold=0.2,
+                score_threshold=0.4,
+                max_boxes=2,
+                per_class_suppression=True,
+            )
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @staticmethod
+    def _compute_iou_matrix(boxes):
+        # input is (N, 4), in order [center_w, center_h, width, height]
+        boxes = boxes.astype(np.float)
+        center_w, center_h, width, height = np.split(boxes, 4, axis=1)
+        top = center_h + 0.5 * height
+        bottom = center_h - 0.5 * height
+        left = center_w - 0.5 * width
+        right = center_w + 0.5 * width
+        area = width * height
+
+        h_b = np.minimum(top, np.transpose(top))
+        w_b = np.minimum(right, np.transpose(right))
+        h_a = np.maximum(bottom, np.transpose(bottom))
+        w_a = np.maximum(left, np.transpose(left))
+
+        intersection_area = np.maximum(0, h_b - h_a) * np.maximum(0, w_b - w_a)
+        union_area = area + np.transpose(area) - intersection_area
+        return intersection_area / union_area
+
+    @staticmethod
+    def _ref_non_maximum_suppression(
+        boxes, scores, iou_threshold, score_threshold, max_boxes, per_class_suppression
+    ):
+        """
+        Reference implementation of Core ML's NMS op using TensorFlow.
+        boxes of shape (n_batch, n_box, 4), [center_w, center_h, width, height]
+        scores of shape (n_batch, n_box, n_score)
+        output shapes [
+           (n_batch, max_boxes, 4),
+           (n_batch, max_boxes, n_score),
+           (n_batch, max_boxes),
+           (n_batch,)
+        ]
+        """
+        n_batch, n_box, n_score = scores.shape
+
+        iou_threshold = iou_threshold.astype(np.float32)
+        score_threshold = score_threshold.astype(np.float32)
+
+        # convert box ids to TF style
+        center_w, center_h, width, height = np.split(
+            boxes, 4, axis=-1
+        )  # (n_batch,n_box,1)
+        y1 = center_h - 0.5 * height
+        y2 = center_h + 0.5 * height
+        x1 = center_w - 0.5 * width
+        x2 = center_w + 0.5 * width
+        boxes_tf = np.concatenate((y1, x1, y2, x2), axis=-1)  # (n_batch,n_box,4)
+
+        out1 = np.zeros((n_batch, max_boxes, 4))
+        out2 = np.zeros((n_batch, max_boxes, n_score))
+        out3 = -1 * np.ones((n_batch, max_boxes))
+        out4 = np.zeros((n_batch,))
+
+        for b in range(n_batch):
+            box_coord_matrix = boxes_tf[b, :, :]  # (n_box,4)
+            score_vector = np.max(scores[b, :, :], axis=-1)  # (n_box,)
+            if not per_class_suppression:
+                # this is the simple case as TF directly supports it
+                with tf.Graph().as_default(), tf.Session() as sess:
+                    box_coord_matrix_pl = tf.placeholder(
+                        tf.float32, shape=box_coord_matrix.shape
+                    )
+                    score_vector_pl = tf.placeholder(
+                        tf.float32, shape=score_vector.shape
+                    )
+                    ids_g = tf.image.non_max_suppression(
+                        box_coord_matrix_pl,
+                        score_vector_pl,
+                        max_output_size=max_boxes,
+                        iou_threshold=iou_threshold,
+                        score_threshold=score_threshold,
+                    )
+                    ids = sess.run(
+                        ids_g,
+                        feed_dict={
+                            box_coord_matrix_pl: box_coord_matrix,
+                            score_vector_pl: score_vector,
+                        },
+                    )
+            else:
+                # this is slightly complicated as TF does not directly support it
+                class_ids = np.argmax(scores[b, :, :], axis=-1)  # (n_box,)
+                sorted_score_ids = np.argsort(-score_vector)
+                box_coord_matrix2 = np.take(box_coord_matrix, sorted_score_ids, axis=0)
+                score_vector2 = np.take(score_vector, sorted_score_ids)
+                class_ids = np.take(class_ids, sorted_score_ids)
+                classes_seen = dict()
+                ids_intermediate = np.array([], dtype=np.int)
+                for n in range(n_box):
+                    if class_ids[n] in classes_seen:
+                        continue
+                    c = class_ids[n]
+                    classes_seen[c] = True
+                    current_class_ids = np.where(class_ids == c)[0]
+                    if len(current_class_ids) > 0:
+                        feed_in1 = np.take(box_coord_matrix2, current_class_ids, axis=0)
+                        feed_in2 = np.take(score_vector2, current_class_ids)
+
+                        with tf.Graph().as_default(), tf.Session() as sess:
+                            box_coord_matrix_pl = tf.placeholder(
+                                tf.float32, shape=feed_in1.shape
+                            )
+                            score_vector_pl = tf.placeholder(
+                                tf.float32, shape=feed_in2.shape
+                            )
+                            cur_ids_g = tf.image.non_max_suppression(
+                                box_coord_matrix_pl,
+                                score_vector_pl,
+                                max_output_size=max_boxes,
+                                iou_threshold=iou_threshold,
+                                score_threshold=score_threshold,
+                            )
+                            cur_ids = sess.run(
+                                cur_ids_g,
+                                feed_dict={
+                                    box_coord_matrix_pl: feed_in1,
+                                    score_vector_pl: feed_in2,
+                                },
+                            )
+
+                        from_sort_ids = np.take(current_class_ids, cur_ids)
+                        ids_intermediate = np.append(ids_intermediate, from_sort_ids)
+                ids_intermediate.sort()
+                ids = np.take(sorted_score_ids, ids_intermediate)
+
+            xx = len(ids)
+            if xx == 0:
+                ids = np.array([np.argmax(score_vector)])
+                xx = 1
+            if xx > max_boxes:
+                ids = ids[:max_boxes]
+                xx = len(ids)
+            out1[b, :xx, :] = np.take(boxes[b, :, :], ids, axis=0)
+            out2[b, :xx, :] = np.take(scores[b, :, :], ids, axis=0)
+            out3[b, :xx] = ids
+            out4[b] = xx
+
+        return out1, out2, out3, out4
+
+    @pytest.mark.xfail(reason="rdar://60390856", run=False)
+    @pytest.mark.parametrize(
+        ",".join(
+            [
+                "use_cpu_only",
+                "backend",
+                "iou_threshold_percentile",
+                "score_threshold_percentile",
+                "n_boxes",
+                "n_batch",
+                "n_score",
+                "per_class_suppression",
+            ]
+        ),
+        itertools.product(
+            [True, False],
+            backends,
+            [0, 30, 80, 100],
+            [0, 40, 100],
+            [(10, 7), (30, 37), (100, 64)],
+            [1],
+            [1, 4, 7],
+            [True, False],
+        ),
+    )
+    def test_builder_to_backend_stress(
+        self,
+        use_cpu_only,
+        backend,
+        iou_threshold_percentile,
+        score_threshold_percentile,
+        n_boxes,
+        n_batch,
+        n_score,
+        per_class_suppression,
+    ):
+        n_boxes_in, n_boxes_out = n_boxes
+        boxes_val = random_gen((n_batch, n_boxes_in, 4), 0, 100)
+        scores_val = random_gen((n_batch, n_boxes_in, n_score), -100, 100)
+
+        iou_matrix = self._compute_iou_matrix(boxes_val[0, :, :])
+        iou_matrix = iou_matrix[~np.eye(iou_matrix.shape[0], dtype=bool)].reshape(
+            iou_matrix.shape[0], -1
+        )
+
+        if score_threshold_percentile == 0:
+            score_threshold = np.min(scores_val) - 1
+        elif score_threshold_percentile == 100:
+            score_threshold = np.max(scores_val) + 1
+        else:
+            score_threshold = (
+                np.percentile(scores_val, score_threshold_percentile) + 0.01
+            )
+
+        if iou_threshold_percentile == 0:
+            iou_threshold = np.maximum(np.min(iou_matrix) - 0.01, 0.0)
+        else:
+            iou_threshold = np.percentile(iou_matrix, iou_threshold_percentile) + 0.01
+
+        (
+            tf_boxes,
+            tf_scores,
+            tf_indices,
+            tf_num_boxes,
+        ) = self._ref_non_maximum_suppression(
+            boxes_val,
+            scores_val,
+            iou_threshold,
+            score_threshold,
+            n_boxes_out,
+            per_class_suppression,
+        )
+        expected_outputs = [tf_boxes, tf_scores, tf_indices, tf_num_boxes]
+        expected_output_types = [
+            tf_boxes.shape[:] + (types.fp32,),
+            tf_scores.shape[:] + (types.fp32,),
+            tf_indices.shape[:] + (types.int32,),
+            tf_num_boxes.shape[:] + (types.int32,),
+        ]
+
+        input_placeholders = {
+            "boxes": mb.placeholder(shape=(n_batch, n_boxes_in, 4)),
+            "scores": mb.placeholder(shape=(n_batch, n_boxes_in, n_score)),
+        }
+        input_values = {"boxes": boxes_val, "scores": scores_val}
+
+        def build(boxes, scores):
+            return mb.non_maximum_suppression(
+                boxes=boxes,
+                scores=scores,
+                iou_threshold=iou_threshold,
+                score_threshold=score_threshold,
+                max_boxes=n_boxes_out,
+                per_class_suppression=per_class_suppression,
+            )
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestNonZero:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], ["nn_proto"])
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array([[3, 0, 0], [0, 4, 0], [5, 6, 0]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [mb.non_zero(x=x)]
+
+        expected_output_types = [(UNK_SYM, 2, types.fp32)]
+        expected_outputs = [np.array(np.transpose(np.nonzero(x_val)))]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.random.randint(low=-1, high=2, size=(6, 1, 7))
+        res = mb.non_zero(x=x_val)
+        assert is_close(np.transpose(np.nonzero(x_val)), res.val)
+
+
+class TestOneHot:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x = np.array([1, 0], dtype=np.int32)
+        depth = 4
+
+        input_placeholders = {
+            "x": mb.placeholder(shape=x.shape, dtype=types.int32),
+            "y": mb.placeholder(shape=(), dtype=types.int32),
+        }
+
+        input_values = {"x": x, "y": depth}
+
+        def build(x, y):
+            return [
+                mb.one_hot(indices=x, one_hot_vector_size=4),
+                mb.one_hot(indices=x, one_hot_vector_size=4, axis=0),
+                mb.one_hot(
+                    indices=x, one_hot_vector_size=4, on_value=1.0, off_value=0.0
+                ),
+                mb.one_hot(
+                    indices=x, one_hot_vector_size=y, on_value=1.0, off_value=0.0
+                ),
+            ]
+
+        expected_output_types = [
+            (2, 4, types.int32),
+            (4, 2, types.int32),
+            (2, 4, types.fp32),
+            (2, UNK_SYM, types.fp32),
+        ]
+
+        expected_outputs = [
+            np.array([[0, 1, 0, 0], [1, 0, 0, 0]], dtype=np.float32),
+            np.array([[0, 1], [1, 0], [0, 0], [0, 0]], dtype=np.float32),
+            np.array([[0, 1, 0, 0], [1, 0, 0, 0]], dtype=np.float32),
+            np.array([[0, 1, 0, 0], [1, 0, 0, 0]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestPad:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        def test_constant_mode():
+            t = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            pad = np.array([1, 1, 2, 2], dtype=np.int32)
+            input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+            input_values = {"x": t}
+
+            def build(x):
+                return mb.pad(x=x, pad=pad, mode="constant", constant_val=0.0)
+
+            expected_output_types = (4, 7, types.fp32)
+            expected_outputs = np.array(
+                [
+                    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                    [0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 0.0],
+                    [0.0, 0.0, 4.0, 5.0, 6.0, 0.0, 0.0],
+                    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                ],
+                dtype=np.float32,
+            )
+
+            run_compare_builder(
+                build,
+                input_placeholders,
+                input_values,
+                expected_output_types,
+                expected_outputs,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+        def test_constant_mode_constant_val():
+            t = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            pad = np.array([1, 1, 2, 2], dtype=np.int32)
+            input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+            input_values = {"x": t}
+
+            def build(x):
+                return mb.pad(x=x, pad=pad, mode="constant", constant_val=0.5)
+
+            expected_output_types = (4, 7, types.fp32)
+            expected_outputs = np.array(
+                [
+                    [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
+                    [0.5, 0.5, 1.0, 2.0, 3.0, 0.5, 0.5],
+                    [0.5, 0.5, 4.0, 5.0, 6.0, 0.5, 0.5],
+                    [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
+                ],
+                dtype=np.float32,
+            )
+
+            run_compare_builder(
+                build,
+                input_placeholders,
+                input_values,
+                expected_output_types,
+                expected_outputs,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+        def test_reflect_mode():
+            t = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            pad = np.array([1, 1, 2, 2], dtype=np.int32)
+            input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+            input_values = {"x": t}
+
+            def build(x):
+                return mb.pad(x=x, pad=pad, mode="reflect")
+
+            expected_output_types = (4, 7, types.fp32)
+            expected_outputs = np.array(
+                [
+                    [6.0, 5.0, 4.0, 5.0, 6.0, 5.0, 4.0],
+                    [3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0],
+                    [6.0, 5.0, 4.0, 5.0, 6.0, 5.0, 4.0],
+                    [3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0],
+                ],
+                dtype=np.float32,
+            )
+
+            run_compare_builder(
+                build,
+                input_placeholders,
+                input_values,
+                expected_output_types,
+                expected_outputs,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+        def test_replicate_mode():
+            t = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            pad = np.array([1, 1, 2, 2], dtype=np.int32)
+            input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+            input_values = {"x": t}
+
+            def build(x):
+                return mb.pad(x=x, pad=pad, mode="replicate")
+
+            expected_output_types = (4, 7, types.fp32)
+            expected_outputs = np.array(
+                [
+                    [1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0],
+                    [1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0],
+                    [4.0, 4.0, 4.0, 5.0, 6.0, 6.0, 6.0],
+                    [4.0, 4.0, 4.0, 5.0, 6.0, 6.0, 6.0],
+                ],
+                dtype=np.float32,
+            )
+
+            run_compare_builder(
+                build,
+                input_placeholders,
+                input_values,
+                expected_output_types,
+                expected_outputs,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+        def test_constant_general():
+            t = np.arange(12, dtype=np.float32).reshape([2, 2, 3])
+            pad = np.array([[1, 1], [2, 2], [1, 1]], dtype=np.int32)
+            input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+            input_values = {"x": t}
+
+            def build(x):
+                return mb.pad(
+                    x=x, pad=pad.reshape(-1), mode="constant", constant_val=0.0
+                )
+
+            expected_output_types = (4, 6, 5, types.fp32)
+            expected_outputs = np.pad(t, pad, mode="constant")
+
+            run_compare_builder(
+                build,
+                input_placeholders,
+                input_values,
+                expected_output_types,
+                expected_outputs,
+                use_cpu_only=use_cpu_only,
+                frontend_only=False,
+                backend=backend,
+            )
+
+        # Test different modes
+        test_constant_mode()
+        test_constant_mode_constant_val()
+        test_reflect_mode()
+        test_replicate_mode()
+        test_constant_general()
+
+    @ssa_fn
+    def test_builder_eval(self):
+        def test_constant_mode():
+            x_val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            v = mb.pad(
+                x=x_val,
+                pad=np.array([1, 1, 2, 2], dtype=np.int32),
+                mode="constant",
+                constant_val=0.0,
+            )
+            expected_outputs = np.array(
+                [
+                    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                    [0.0, 0.0, 1.0, 2.0, 3.0, 0.0, 0.0],
+                    [0.0, 0.0, 4.0, 5.0, 6.0, 0.0, 0.0],
+                    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                ],
+                dtype=np.float32,
+            )
+            assert is_close(expected_outputs, v.val)
+
+        def test_reflect_mode():
+            x_val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            v = mb.pad(
+                x=x_val, pad=np.array([1, 1, 2, 2], dtype=np.int32), mode="reflect"
+            )
+            expected_outputs = np.array(
+                [
+                    [6.0, 5.0, 4.0, 5.0, 6.0, 5.0, 4.0],
+                    [3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0],
+                    [6.0, 5.0, 4.0, 5.0, 6.0, 5.0, 4.0],
+                    [3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0],
+                ],
+                dtype=np.float32,
+            )
+            assert is_close(expected_outputs, v.val)
+
+        def test_replicate_mode():
+            x_val = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+            v = mb.pad(
+                x=x_val, pad=np.array([1, 1, 2, 2], dtype=np.int32), mode="replicate"
+            )
+            expected_outputs = np.array(
+                [
+                    [1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0],
+                    [1.0, 1.0, 1.0, 2.0, 3.0, 3.0, 3.0],
+                    [4.0, 4.0, 4.0, 5.0, 6.0, 6.0, 6.0],
+                    [4.0, 4.0, 4.0, 5.0, 6.0, 6.0, 6.0],
+                ],
+                dtype=np.float32,
+            )
+            assert is_close(expected_outputs, v.val)
+
+        def test_constant_general():
+            x_val = np.arange(12, dtype=np.float32).reshape([2, 2, 3])
+            pad = np.array([[1, 1], [2, 2], [1, 1]], dtype=np.int32)
+            v = mb.pad(x=x_val, pad=pad.reshape(-1), mode="constant", constant_val=0.0)
+            expected_outputs = np.pad(x_val, pad, mode="constant")
+            assert is_close(expected_outputs, v.val)
+
+        # Test different modes
+        test_constant_mode()
+        test_reflect_mode()
+        test_replicate_mode()
+        test_constant_general()
+
+
+class TestRange1d:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x = np.array([15.0], dtype=np.float32)
+        y = 5.0
+        z = 2.0
+        input_placeholders = {
+            "x": mb.placeholder(shape=x.shape),
+            "y": mb.placeholder(shape=()),
+            "z": mb.placeholder(shape=()),
+        }
+        input_values = {"x": x, "y": y, "z": z}
+
+        def build(x, y, z):
+            return [
+                mb.mul(x=x, y=x),
+                mb.range_1d(start=y, end=15.0, step=2.0),
+                mb.range_1d(start=y, end=15.0, step=z),
+                mb.range_1d(start=y, end=x, step=2.0),
+                mb.range_1d(start=y, end=x, step=z),
+                mb.range_1d(start=5.0, end=15.0, step=z),
+                mb.range_1d(start=5.0, end=x, step=2.0),
+                mb.range_1d(start=5.0, end=x, step=z),
+            ]
+
+        expected_output_types = [
+            (1, types.fp32),
+            (UNK_SYM, types.fp32),
+            (UNK_SYM, types.fp32),
+            (UNK_SYM, types.fp32),
+            (UNK_SYM, types.fp32),
+            (UNK_SYM, types.fp32),
+            (UNK_SYM, types.fp32),
+            (UNK_SYM, types.fp32),
+        ]
+
+        expected_outputs = [
+            np.array([225.0], dtype=np.float32),
+            np.array([5, 7, 9, 11, 13], dtype=np.float32),
+            np.array([5, 7, 9, 11, 13], dtype=np.float32),
+            np.array([5, 7, 9, 11, 13], dtype=np.float32),
+            np.array([5, 7, 9, 11, 13], dtype=np.float32),
+            np.array([5, 7, 9, 11, 13], dtype=np.float32),
+            np.array([5, 7, 9, 11, 13], dtype=np.float32),
+            np.array([5, 7, 9, 11, 13], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        v = mb.range_1d(start=5, end=15, step=2)
+        assert is_close(np.arange(5, 15, 2), v.val)
+
+
+class TestTile:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=x.shape)}
+
+        input_values = {"x": x}
+
+        def build(x):
+            return [
+                mb.tile(x=x, reps=(1, 1)),
+                mb.tile(x=x, reps=(2,)),
+                mb.tile(x=x, reps=(2, 1)),
+            ]
+
+        expected_output_types = [
+            (2, 3, types.fp32),
+            (2, 6, types.fp32),
+            (4, 3, types.fp32),
+        ]
+
+        expected_outputs = [
+            x,
+            np.array([[1, 2, 3, 1, 2, 3], [4, 5, 6, 4, 5, 6]], dtype=np.float32),
+            np.array([[1, 2, 3], [4, 5, 6], [1, 2, 3], [4, 5, 6]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        v = mb.tile(x=x, reps=(2,))
+        assert is_close(np.tile(x, reps=(2,)), v.val)
+
+
+class TestDynamicTile:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        rep1 = np.array([1, 1]).astype(np.int32)
+        rep2 = np.array([2, 1]).astype(np.int32)
+        rep3 = np.array([2, 3]).astype(np.int32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x.shape),
+            "reps1": mb.placeholder(shape=rep1.shape),
+            "reps2": mb.placeholder(shape=rep2.shape),
+            "reps3": mb.placeholder(shape=rep3.shape),
+        }
+
+        input_values = {"x": x, "reps1": rep1, "reps2": rep2, "reps3": rep3}
+
+        def build(x, reps1, reps2, reps3):
+            return [
+                mb.tile(x=x, reps=reps1),
+                mb.tile(x=x, reps=reps2),
+                mb.tile(x=x, reps=reps3),
+            ]
+
+        expected_output_types = [
+            (UNK_SYM, UNK_SYM, types.fp32),
+            (UNK_SYM, UNK_SYM, types.fp32),
+            (UNK_SYM, UNK_SYM, types.fp32),
+        ]
+
+        expected_outputs = [
+            x,
+            np.array([[1, 2, 3], [4, 5, 6], [1, 2, 3], [4, 5, 6]], dtype=np.float32),
+            np.array(
+                [
+                    [1, 2, 3, 1, 2, 3, 1, 2, 3],
+                    [4, 5, 6, 4, 5, 6, 4, 5, 6],
+                    [1, 2, 3, 1, 2, 3, 1, 2, 3],
+                    [4, 5, 6, 4, 5, 6, 4, 5, 6],
+                ],
+                dtype=np.float32,
+            ),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestTopK:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        val = np.array([[-1.0, 2.0, -3.0], [4.0, -5.0, 6.0]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=val.shape)}
+        input_values = {"x": val}
+
+        def build(x):
+            return mb.topk(x=x, k=2, axis=1)
+
+        expected_output_types = [
+            (2, 2, types.fp32),
+            (2, 2, types.int32),
+        ]
+        expected_outputs = [
+            np.array([[2.0, -1.0], [6.0, 4.0]], dtype=np.float32),
+            np.array([[1, 0], [2, 0]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        def np_topk(x, k, axis, ascending=False):
+            indices = np.argsort(x, axis=axis)
+            if not ascending:
+                indices = np.argsort(-x, axis=axis)
+            slc = [slice(None)] * len(x.shape)
+            slc[axis] = slice(0, k)
+            indices = indices[tuple(slc)]
+            values = np.take_along_axis(x, indices, axis=axis)
+            return values, indices
+
+        val = np.array([[-1.0, 7.0, -3.0], [4.0, -5.0, 8.0]], dtype=np.float32)
+        res_values, res_indices = mb.topk(x=val, k=1, axis=0)
+        ref_values, ref_indices = np_topk(x=val, k=1, axis=0)
+        assert is_close(ref_values, res_values.val)
+        assert is_close(ref_indices, res_indices.val)
+        res_values, res_indices = mb.topk(x=val, k=2, axis=-1, ascending=True)
+        ref_values, ref_indices = np_topk(x=val, k=2, axis=-1, ascending=True)
+        assert is_close(ref_values, res_values.val)
+        assert is_close(ref_indices, res_indices.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_symbolic(self, use_cpu_only, backend):
+        s0 = get_new_symbol()
+
+        val = np.array([[1.0, 2.0, -3.0], [4.0, -5.0, 6.0]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=(s0, 3))}
+        input_values = {"x": val}
+
+        def build(x):
+            return mb.topk(x=x, k=2, axis=-1, ascending=True)
+
+        expected_output_types = [
+            (s0, 2, types.fp32),
+            (s0, 2, types.int32),
+        ]
+        expected_outputs = [
+            np.array([[-3.0, 1.0], [-5.0, 4.0]], dtype=np.float32),
+            np.array([[2, 0], [1, 0]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestFlatten:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array(
+            [[[1, 2, 3], [4, 5, 6]], [[-1, -2, -3], [-4, -5, -6]]], dtype=np.float32
+        )
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return [mb.flatten(x=x)]
+
+        expected_output_types = [
+            (2, 6, types.fp32),
+        ]
+        expected_outputs = [
+            np.array([[1, 2, 3, 4, 5, 6], [-1, -2, -3, -4, -5, -6]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        t = np.array([[[1, 2, 3], [4, 5, 6]]], dtype=np.float32)
+        f = mb.flatten(x=t)
+        expected_f = np.array([[1, 2, 3, 4, 5, 6]], dtype=np.float32)
+        assert is_close(expected_f, f.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_symbolic(self, use_cpu_only, backend):
+        s0 = get_new_symbol()
+
+        # Test variadic (rdar://59559656)
+        input_placeholders = {
+            "x": mb.placeholder(shape=(s0, 4, 5, 6)),
+        }
+
+        def build(x):
+            return [mb.flatten(x=x)]
+
+        input = np.random.rand(10, 4, 5, 6)
+        output = input.reshape(10, -1)
+
+        expected_output_types = (s0, 120, types.fp32)
+        expected_outputs = [output]
+
+        input_values = {"x": input}
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestShape:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        pad = np.array([1, 1, 2, 2], dtype=np.int32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            x = mb.pad(x=x, pad=pad, mode="constant", constant_val=0.0)
+            return mb.shape(x=x)
+
+        expected_output_types = (2, types.int32)
+        expected_outputs = [
+            np.array([4, 7], dtype=np.int32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        t = np.array([[[1, 2, 3], [4, 5, 6]]], dtype=np.float32)
+        f = mb.shape(x=t)
+        expected_f = np.array([1, 2, 3], dtype=np.float32)
+        assert is_close(expected_f, f.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_symbolic(self, use_cpu_only, backend):
+        s0 = get_new_symbol()
+
+        # Test variadic (rdar://59559656)
+        input_placeholders = {
+            "x": mb.placeholder(shape=(s0, 4, 5, 6)),
+        }
+
+        def build(x):
+            return [mb.shape(x=x)]
+
+        input = np.random.rand(10, 4, 5, 6)
+        output = np.array([10, 4, 5, 6], dtype=np.float32)
+
+        expected_output_types = (4, types.int32)
+        expected_outputs = [output]
+
+        input_values = {"x": input}
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestConcat:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t1 = np.array([[1, 2], [4, 5]], dtype=np.float32)
+        t2 = np.array([[7, 8]], dtype=np.float32)
+
+        input_placeholders = {
+            "x": mb.placeholder(shape=t1.shape),
+            "y": mb.placeholder(shape=t2.shape),
+        }
+        input_values = {"x": t1, "y": t2}
+
+        def build(x, y):
+            return (mb.concat(values=(x, y), axis=0),)
+
+        expected_output_types = [
+            (3, 2, types.fp32),
+        ]
+        expected_outputs = [
+            np.array([[1, 2], [4, 5], [7, 8]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_type_promotion(self, use_cpu_only, backend):
+        t1 = np.array([[1, 2], [4, 5]], dtype=np.float32)
+        t2 = np.array([[7, 8]], dtype=np.float32)
+
+        input_placeholders = {
+            "x": mb.placeholder(shape=t1.shape),
+        }
+        input_values = {"x": t1}
+
+        def build(x):
+            t2 = np.array([[7, 8]], dtype=np.int32)
+            return (mb.concat(values=(x, t2), axis=0),)
+
+        expected_output_types = [
+            # np.int32 should be promoted to fp32
+            (3, 2, types.fp32),
+        ]
+        expected_outputs = [
+            np.array([[1, 2], [4, 5], [7, 8]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        values = [
+            np.random.rand(1, 1, 6, 2),
+            np.random.rand(1, 1, 3, 2),
+        ]
+        v = mb.concat(values=values, axis=2)
+        assert is_close(np.concatenate(values, 2), v.val)
+
+    @ssa_fn
+    def test_builder_eval_failure(self):
+        values = [
+            np.random.rand(1, 1, 6, 2),
+            np.random.rand(1, 1, 3, 1),
+        ]
+        with pytest.raises(ValueError):
+            v = mb.concat(values=values, axis=2)
+
+
+class TestSplit:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32)
+
+        input_placeholders = {
+            "x": mb.placeholder(shape=t.shape),
+        }
+        input_values = {"x": t}
+
+        def build(x):
+            return mb.split(x=x, num_splits=2, axis=1) + mb.split(
+                x=x, split_sizes=[1, 2], axis=0
+            )
+
+        expected_output_types = [
+            (3, 1, types.fp32),
+            (3, 1, types.fp32),
+            (1, 2, types.fp32),
+            (2, 2, types.fp32),
+        ]
+        expected_outputs = [
+            np.array([[1], [3], [5]], dtype=np.float32),
+            np.array([[2], [4], [6]], dtype=np.float32),
+            np.array([[1, 2]], dtype=np.float32),
+            np.array([[3, 4], [5, 6]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        t = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32)
+        vs = mb.split(x=t, num_splits=3, axis=0)
+        es = np.split(t, [1, 2, 3], axis=0)
+        for v, e in zip(vs, es):
+            assert is_close(e, v.val)
+
+
+class TestStack:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t1 = np.array([1, 2, 3], dtype=np.float32)
+        t2 = np.array([7, 8, 9], dtype=np.float32)
+
+        input_placeholders = {
+            "x": mb.placeholder(shape=t1.shape),
+            "y": mb.placeholder(shape=t2.shape),
+        }
+        input_values = {"x": t1, "y": t2}
+
+        def build(x, y):
+            return [mb.stack(values=(x, y), axis=0), mb.stack(values=(x, y), axis=1)]
+
+        expected_output_types = [
+            (2, 3, types.fp32),
+            (3, 2, types.fp32),
+        ]
+        expected_outputs = [
+            np.array([[1, 2, 3], [7, 8, 9]], dtype=np.float32),
+            np.array([[1, 7], [2, 8], [3, 9]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        values = [
+            np.random.rand(1, 1, 3, 2).astype(np.float32),
+            np.random.rand(1, 1, 3, 2).astype(np.float32),
+        ]
+        v = mb.stack(values=values, axis=2)
+        assert is_close(np.stack(values, 2), v.val)
+
+
+class TestAddN:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t1 = np.array([1, 2, 3], dtype=np.float32)
+        t2 = np.array([7, 8, 9], dtype=np.float32)
+        t3 = np.array([1, 1, 1], dtype=np.float32)
+
+        input_placeholders = {
+            "x": mb.placeholder(shape=t1.shape),
+            "y": mb.placeholder(shape=t2.shape),
+            "z": mb.placeholder(shape=t3.shape),
+        }
+        input_values = {"x": t1, "y": t2, "z": t3}
+
+        def build(x, y, z):
+            return (mb.addn(values=(x, y, z)),)
+
+        expected_output_types = [
+            (3, types.fp32),
+        ]
+        expected_outputs = [
+            np.array([9, 11, 13], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        values = [
+            np.random.rand(1, 1, 3, 2).astype(np.float32),
+            np.random.rand(1, 1, 3, 2).astype(np.float32),
+            np.random.rand(1, 1, 3, 2).astype(np.float32),
+        ]
+        v = mb.addn(values=values)
+        assert is_close(values[0] + values[1] + values[2], v.val)
+
+
+class TestArgSort:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        val = np.array([[-1.0, 2.0, -3.0], [4.0, -5.0, 6.0]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=val.shape)}
+        input_values = {"x": val}
+
+        def build(x):
+            return [mb.argsort(x=x), mb.argsort(x=x, axis=0, ascending=True)]
+
+        expected_output_types = [
+            (2, 3, types.int32),
+            (2, 3, types.int32),
+        ]
+        expected_outputs = [
+            np.array([[1, 0, 2], [2, 0, 1]], dtype=np.int32),
+            np.array([[0, 1, 0], [1, 0, 1]], dtype=np.int32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = random_gen(shape=(1, 3, 2, 2), rand_min=-100, rand_max=100)
+        res = mb.argsort(x=x_val, axis=-3)
+        assert is_close(np.argsort(x_val, axis=-3), res.val)
+
+
+class TestIsFinite:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        val = np.array([[np.inf, -np.inf, 0], [-np.inf, 5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=val.shape)}
+        input_values = {"x": val}
+
+        def build(x):
+            return [mb.isfinite(x=x)]
+
+        expected_output_types = [(2, 3, types.bool)]
+        expected_outputs = [
+            np.array([[False, False, True], [False, True, True]], dtype=np.bool)
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        shape = (3, 3, 3, 3)
+        x_val = random_gen(shape=shape, rand_min=-1, rand_max=1)
+        random_map = np.random.choice([np.inf, -np.inf, 0], size=shape)
+        x_val[np.where(random_map == np.inf)] = np.inf
+        x_val[np.where(random_map == -np.inf)] = -np.inf
+        res = mb.isfinite(x=x_val)
+        assert is_close(np.isfinite(x_val), res.val)
diff --git a/coremltools/converters/mil/mil/ops/tests/test_tensor_transformation.py b/coremltools/converters/mil/mil/ops/tests/test_tensor_transformation.py
new file mode 100644
index 000000000..bdb14102e
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_tensor_transformation.py
@@ -0,0 +1,884 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil import testing_reqs
+from coremltools.converters.mil.mil import get_new_symbol, get_new_variadic_symbol
+from coremltools.converters.mil.testing_reqs import *
+
+from .testing_utils import UNK_SYM, UNK_VARIADIC, run_compare_builder
+
+backends = testing_reqs.backends
+
+
+class TestDepthToSpace:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        # original input type is (1, 4, 1, 1, fp32)
+        val = np.array([[[[9.0]], [[5.0]], [[1.0]], [[3.0]]]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=val.shape)}
+        input_values = {"x": val}
+
+        def build(x):
+            return [mb.depth_to_space(x=x, block_size=2)]
+
+        expected_output_types = (1, 1, 2, 2, types.fp32)
+        expected_outputs = np.array([[[[9.0, 5.0], [1.0, 3.0]]]], dtype=np.float32)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestExpandDims:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return [
+                mb.expand_dims(x=x, axes=[0]),
+                mb.expand_dims(x=x, axes=[1]),
+                mb.expand_dims(x=x, axes=[2]),
+                mb.expand_dims(x=x, axes=[-1]),
+                mb.expand_dims(x=x, axes=[0, 1]),
+                mb.expand_dims(x=x, axes=[-2, -1]),
+            ]
+
+        expected_output_types = [
+            (1, 2, 3, types.fp32),
+            (2, 1, 3, types.fp32),
+            (2, 3, 1, types.fp32),
+            (2, 3, 1, types.fp32),
+            (1, 1, 2, 3, types.fp32),
+            (2, 3, 1, 1, types.fp32),
+        ]
+        expected_outputs = [
+            np.array([[[1, 2, 3], [4, 5, 6]]], dtype=np.float32),
+            np.array([[[1, 2, 3]], [[4, 5, 6]]], dtype=np.float32),
+            np.array([[[1], [2], [3]], [[4], [5], [6]]], dtype=np.float32),
+            np.array([[[1], [2], [3]], [[4], [5], [6]]], dtype=np.float32),
+            np.array([[[[1, 2, 3], [4, 5, 6]]]], dtype=np.float32),
+            np.array([[[[1]], [[2]], [[3]]], [[[4]], [[5]], [[6]]]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_symbolic(self, use_cpu_only, backend):
+        s0 = get_new_symbol()
+
+        input_placeholders = {
+            "x": mb.placeholder(shape=(2, s0)),
+        }
+
+        def build(x):
+            return [
+                mb.expand_dims(x=x, axes=[-1]),
+                mb.expand_dims(x=x, axes=[1]),
+            ]
+
+        expected_output_types = [
+            (2, s0, 1, types.fp32),
+            (2, 1, s0, types.fp32),
+        ]
+        expected_outputs = [
+            np.array([[[1], [2], [3]], [[4], [5], [6]]], dtype=np.float32),
+            np.array([[[1, 2, 3]], [[4, 5, 6]]], dtype=np.float32),
+        ]
+
+        input_values = {
+            "x": np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32),
+        }
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x_val = np.random.rand(1, 6)
+        v1 = mb.expand_dims(x=x_val, axes=[2])
+        assert is_close(np.expand_dims(x_val, 2), v1.val)
+
+        v2 = mb.expand_dims(x=x_val, axes=[-1])
+        assert is_close(np.expand_dims(x_val, -1), v2.val)
+
+        v3 = mb.expand_dims(x=x_val, axes=[-1, -2])
+        ref = np.expand_dims(np.expand_dims(x_val, -1), -1)
+        assert is_close(ref, v3.val)
+
+        v4 = mb.expand_dims(x=x_val, axes=[0, -1, -2])
+        assert is_close(np.reshape(x_val, (1, 1, 6, 1, 1)), v4.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank_and_axis",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                (rank, axis)
+                for rank in range(1, 5)
+                for axis in range(-rank - 1, rank + 1)
+            ],
+        ),
+    )
+    def test_builder_to_backend_programmatic_one_axis(
+        self, use_cpu_only, backend, rank_and_axis
+    ):
+        rank, axis = rank_and_axis
+        x_shape = np.random.randint(low=2, high=6, size=rank)
+        input_placeholders = {"x": mb.placeholder(shape=x_shape)}
+        input_values = {"x": np.random.sample(x_shape).astype(np.float32)}
+
+        def build(x):
+            return mb.expand_dims(x=x, axes=[axis])
+
+        adjusted_axis = axis if axis >= 0 else rank + axis + 1
+        x_shape = list(x_shape)
+        out_shape = x_shape[:adjusted_axis] + [1] + x_shape[adjusted_axis:]
+        expected_output_types = tuple(out_shape[:]) + (types.fp32,)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            np.expand_dims(input_values["x"], axis),
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank_and_axes",
+        itertools.product(
+            [True, False],
+            backends,
+            [
+                (3, [0, 1]),
+                (3, [1, 0]),
+                (3, [-2, -1]),
+                (3, [-1, -2]),
+                (2, [-3, -1]),
+                (2, [-3, 1, -1]),
+                (2, [-2, 0]),
+                (1, [-1, -2, -3, -4]),
+                (1, [0, -1]),
+                (1, [0, 1, -2, -1]),
+            ],
+        ),
+    )
+    def test_builder_to_backend_programmatic_multiple_axes(
+        self, use_cpu_only, backend, rank_and_axes
+    ):
+        rank, axes = rank_and_axes
+        x_shape = np.random.randint(low=1, high=6, size=rank)
+        input_placeholders = {"x": mb.placeholder(shape=x_shape)}
+        input_values = {"x": np.random.sample(x_shape).astype(np.float32)}
+
+        def build(x):
+            return mb.expand_dims(x=x, axes=axes)
+
+        out_shape = list(x_shape)
+        out_rank = rank + len(axes)
+        pos_axes = sorted([out_rank + axis if axis < 0 else axis for axis in axes])
+        for axis in pos_axes:
+            out_shape.insert(axis, 1)
+
+        expected_outputs = np.reshape(input_values["x"], out_shape)
+        expected_output_types = tuple(out_shape) + (types.fp32,)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestReshape:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        t = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=t.shape)}
+        input_values = {"x": t}
+
+        def build(x):
+            return [
+                mb.reshape(x=x, shape=[3, 2]),
+                mb.reshape(x=x, shape=[2, -1]),
+                mb.reshape(x=x, shape=[2, 1, 1, 3]),
+            ]
+
+        expected_output_types = [
+            (3, 2, types.fp32),
+            (2, 3, types.fp32),
+            (2, 1, 1, 3, types.fp32),
+        ]
+        expected_outputs = [
+            np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32),
+            np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32),
+            np.array([[[[1.0, 2.0, 3.0]]], [[[4.0, 5.0, 6.0]]]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        t = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        r = mb.reshape(x=t, shape=[3, 2])
+        expected_r = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32)
+        assert is_close(expected_r, r.val)
+        r2 = mb.reshape(x=t, shape=[2, -1])
+        expected_r2 = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        assert is_close(expected_r2, r2.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_symbolic(self, use_cpu_only, backend):
+        s0 = get_new_symbol()
+        s_len = get_new_symbol()
+        s1 = get_new_variadic_symbol()
+
+        # Test variadic (rdar://59559656)
+        input_placeholders = {
+            "x": mb.placeholder(shape=(2, s0)),
+            # TODO: variadic (rdar://59559656)
+            # "x2": mb.placeholder(shape=(s1, 2)),
+            "shape": mb.placeholder(shape=(3,), dtype=types.int32),
+            "shape2": mb.placeholder(shape=(s_len,), dtype=types.int32),
+        }
+
+        def build(x, shape, shape2):
+            return [
+                mb.reshape(x=x, shape=[2, -1]),
+                mb.reshape(x=x, shape=[1, -1]),
+                mb.reshape(x=x, shape=[2, 1, 1, -1]),
+                # TODO: variadic (rdar://59559656)
+                # mb.reshape(x=x2, shape=[2, 1, 1]),
+                mb.reshape(x=x, shape=shape),
+                mb.reshape(x=x, shape=shape2),
+            ]
+
+        expected_output_types = [
+            (2, s0, types.fp32),
+            (1, 2 * s0, types.fp32),
+            (2, 1, 1, s0, types.fp32),
+            # TODO: variadic (rdar://59559656)
+            # (2, 1, 1, types.fp32),
+            (UNK_SYM, UNK_SYM, UNK_SYM, types.fp32),
+            (UNK_VARIADIC, types.fp32),
+        ]
+        expected_outputs = [
+            np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32),
+            np.array([[1, 2, 3, 4, 5, 6]], dtype=np.float32),
+            np.array([[[[1.0, 2.0, 3.0]]], [[[4.0, 5.0, 6.0]]]], dtype=np.float32),
+            # TODO: variadic (rdar://59559656)
+            # np.array([[1, 2, 3],
+            #          [4, 5, 6]], dtype=np.float32),
+            np.array([[[1, 2, 3]], [[4, 5, 6]]], dtype=np.float32),
+            np.array([[[1, 2, 3]], [[4, 5, 6]]], dtype=np.float32),
+        ]
+
+        input_values = {
+            "x": np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32),
+            # TODO: variadic (rdar://59559656)
+            # "x2": np.array([[[1, 2, 3],[4, 5, 6]]], dtype=np.float32),
+            "shape": np.array([2, 1, 3], dtype=np.float32),
+            "shape2": np.array([2, 1, 3], dtype=np.float32),
+        }
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestReverse:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        val = np.array([[-1.0, 2.0, -3.0], [4.0, -5.0, 6.0]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=val.shape)}
+        input_values = {"x": val}
+
+        def build(x):
+            return [mb.reverse(x=x), mb.reverse(x=x, axes=[0])]
+
+        expected_output_types = [(2, 3, types.fp32), (2, 3, types.fp32)]
+        expected_outputs = [
+            np.array([[6.0, -5.0, 4.0], [-3.0, 2.0, -1.0]], dtype=np.float32),
+            np.array([[4.0, -5.0, 6.0], [-1.0, 2.0, -3.0]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        val = np.array([[-1.0, 7.0, -3.0], [4.0, -5.0, 8.0]], dtype=np.float32)
+        res = mb.reverse(x=val, axes=[0])
+        assert is_close(np.flip(val, axis=0), res.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_symbolic(self, use_cpu_only, backend):
+        s0 = get_new_symbol()
+
+        val = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=(s0, 3))}
+        input_values = {"x": val}
+
+        def build(x):
+            return [
+                mb.reverse(x=x, axes=[1]),
+                mb.reverse(x=x, axes=[0]),
+            ]
+
+        expected_output_types = [
+            (s0, 3, types.fp32),
+            (s0, 3, types.fp32),
+        ]
+        expected_outputs = [
+            np.array([[3.0, 2.0, 1.0], [6.0, 5.0, 4.0]], dtype=np.float32),
+            np.array([[4.0, 5.0, 6.0], [1.0, 2.0, 3.0]], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestReverseSequence:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array(
+            [
+                [1, 2, 3, 4, 5, 0, 0, 0],
+                [1, 2, 0, 0, 0, 0, 0, 0],
+                [1, 2, 3, 4, 0, 0, 0, 0],
+                [1, 2, 3, 4, 5, 6, 7, 8],
+            ],
+            dtype=np.float32,
+        )
+        input_placeholders = {"x": mb.placeholder(shape=x_val.shape)}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [
+                mb.reverse_sequence(
+                    x=x, lengths=[7, 2, 3, 5], seq_axis=1, batch_axis=0
+                ),
+            ]
+
+        expected_output_types = [
+            (4, 8, types.fp32),
+        ]
+        expected_outputs = [
+            np.array(
+                [
+                    [0, 0, 5, 4, 3, 2, 1, 0],
+                    [2, 1, 0, 0, 0, 0, 0, 0],
+                    [3, 2, 1, 4, 0, 0, 0, 0],
+                    [5, 4, 3, 2, 1, 6, 7, 8],
+                ],
+                dtype=np.float32,
+            )
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_symbolic(self, use_cpu_only, backend):
+        s0 = get_new_symbol()
+
+        x_val = np.array(
+            [
+                [1, 2, 3, 4, 5, 0, 0, 0],
+                [1, 2, 0, 0, 0, 0, 0, 0],
+                [1, 2, 3, 4, 0, 0, 0, 0],
+                [1, 2, 3, 4, 5, 6, 7, 8],
+            ],
+            dtype=np.float32,
+        )
+        input_placeholders = {"x": mb.placeholder(shape=(4, s0))}
+        input_values = {"x": x_val}
+
+        def build(x):
+            return [
+                mb.reverse_sequence(
+                    x=x, lengths=[7, 2, 3, 5], seq_axis=1, batch_axis=0
+                ),
+            ]
+
+        expected_output_types = [
+            (4, s0, types.fp32),
+        ]
+        expected_outputs = [
+            np.array(
+                [
+                    [0, 0, 5, 4, 3, 2, 1, 0],
+                    [2, 1, 0, 0, 0, 0, 0, 0],
+                    [3, 2, 1, 4, 0, 0, 0, 0],
+                    [5, 4, 3, 2, 1, 6, 7, 8],
+                ],
+                dtype=np.float32,
+            )
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestSliceBySize:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x_val = np.array(list(range(24))).reshape((2, 3, 4)).astype(np.float32)
+        begin_val = np.array([1, 1, 1], dtype=np.int32)
+        input_placeholders = {
+            "x": mb.placeholder(shape=x_val.shape),
+            "begin": mb.placeholder(shape=begin_val.shape, dtype=types.int32),
+        }
+        input_values = {"x": x_val, "begin": begin_val}
+
+        def build_non_single(x, begin):
+            return [
+                mb.slice_by_size(x=x, begin=begin, size=[1, 2, 3]),
+            ]
+
+        def build_single(x, begin):
+            return [
+                mb.slice_by_size(x=x, begin=begin, size=[-1, 2, -1]),
+            ]
+
+        expected_output_types = [(1, 2, 3, types.fp32)]
+        expected_outputs = [np.array([[[17, 18, 19], [21, 22, 23]]], dtype=np.float32)]
+        run_compare_builder(
+            build_non_single,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+        expected_output_types = [(UNK_SYM, 2, UNK_SYM, types.fp32)]
+        run_compare_builder(
+            build_single,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x = np.array(list(range(24))).reshape(2, 3, 4)
+        v_1 = mb.slice_by_size(x=x, begin=(0, 1, 0), size=(-1, -1, -1))
+        v_2 = mb.slice_by_size(x=x, begin=(0, 1, 0), size=(-1, -1, 3))
+        v_3 = mb.slice_by_size(x=x, begin=(0, -2, 0), size=(-1, -1, 3))
+        assert is_close(x[:, 1:, :], v_1.val)
+        assert is_close(x[:, 1:, :3], v_2.val)
+        assert is_close(x[:, -2:, :3], v_3.val)
+
+
+class TestSpaceToDepth:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        # original input type is (1, 1, 2, 2, fp32)
+        val = np.array([[[[7.0, 9.0], [4.0, 6.0]]]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=val.shape)}
+        input_values = {"x": val}
+
+        def build(x):
+            return [mb.space_to_depth(x=x, block_size=2)]
+
+        expected_output_types = (1, 4, 1, 1, types.fp32)
+        expected_outputs = np.array(
+            [[[[7.0]], [[9.0]], [[4.0]], [[6.0]]]], dtype=np.float32
+        )
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestSqueeze:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        x = np.array([[[[1], [2], [3]]]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=x.shape)}
+
+        input_values = {"x": x}
+
+        def build(x):
+            return [
+                mb.squeeze(x=x, axes=(-1,)),
+                mb.squeeze(x=x, axes=(-3, 0)),
+                mb.squeeze(x=x, axes=(0, 1, 3)),
+                mb.squeeze(x=x),
+            ]
+
+        expected_output_types = [
+            (1, 1, 3, types.fp32),
+            (3, 1, types.fp32),
+            (3, types.fp32),
+            (3, types.fp32),
+        ]
+
+        expected_outputs = [
+            np.array([[[1, 2, 3]]], dtype=np.float32),
+            np.array([[1], [2], [3]], dtype=np.float32),
+            np.array([1, 2, 3], dtype=np.float32),
+            np.array([1, 2, 3], dtype=np.float32),
+        ]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x = np.array([[[[1], [2], [3]], [[4], [5], [6]]]], dtype=np.float32)
+        v = mb.squeeze(x=x, axes=(-4, 3))
+        assert is_close(np.squeeze(x, axis=(-4, 3)), v.val)
+
+
+class TestTranspose:
+    @pytest.mark.parametrize(
+        argnames=["use_cpu_only", "backend", "is_symbolic"],
+        argvalues=itertools.product([True, False], backends, [True, False],),
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend, is_symbolic):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+
+        input_shape = x.shape
+        # is_symbolic = False
+        if is_symbolic:
+            input_shape = [get_new_symbol(), get_new_symbol()]
+
+        input_placeholders = {"x": mb.placeholder(shape=input_shape)}
+
+        input_values = {"x": x}
+
+        def build(x):
+            return [
+                mb.transpose(x=x, perm=(0, 1)),
+                mb.transpose(x=x, perm=(1, 0)),
+                mb.transpose(x=x, perm=(-1, 0)),
+                mb.transpose(x=x, perm=(-2, -1)),
+            ]
+
+        d0 = input_shape[0]
+        d1 = input_shape[1]
+        expected_output_types = [
+            (d0, d1, types.fp32),
+            (d1, d0, types.fp32),
+            (d1, d0, types.fp32),
+            (d0, d1, types.fp32),
+        ]
+
+        expected_outputs = [x, x.T, x.T, x]
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+    @ssa_fn
+    def test_builder_eval(self):
+        x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+        v = mb.transpose(x=x, perm=(1, 0))
+        assert is_close(x.T, v.val)
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_symbolic(self, use_cpu_only, backend):
+        s0 = get_new_symbol()
+
+        # Test variadic (rdar://59559656)
+        input_placeholders = {
+            "x": mb.placeholder(shape=(2, s0)),
+        }
+
+        def build(x):
+            return [
+                mb.transpose(x=x, perm=[1, 0]),
+            ]
+
+        expected_output_types = [
+            (s0, 2, types.fp32),
+        ]
+        expected_outputs = [
+            np.array([[1, 4], [2, 5], [3, 6]], dtype=np.float32),
+        ]
+
+        input_values = {
+            "x": np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32),
+        }
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            frontend_only=False,
+            backend=backend,
+        )
+
+
+class TestPixelShuffle:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        # original input type is (1, 4, 1, 1, fp32)
+        val = np.array([[[[9.0]], [[5.0]], [[1.0]], [[3.0]]]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=val.shape)}
+        input_values = {"x": val}
+
+        def build(x):
+            return [mb.pixel_shuffle(x=x, upscale_factor=2)]
+
+        expected_output_types = (1, 1, 2, 2, types.fp32)
+        expected_outputs = np.array([[[[9.0, 5.0], [1.0, 3.0]]]], dtype=np.float32)
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.skipif(not testing_reqs._HAS_TORCH, reason="PyTorch not found.")
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, shape, upscale_factor",
+        itertools.product(
+            [True, False],
+            backends,
+            [(1, 16, 1, 1), (2, 16, 3, 3), (1, 32, 1, 1)],
+            [2, 4],
+        ),
+    )
+    def test_builder_to_backend_stress(
+        self, use_cpu_only, backend, shape, upscale_factor
+    ):
+        val = np.random.rand(*shape)
+        input_placeholders = {"x": mb.placeholder(shape=val.shape)}
+        input_values = {"x": val}
+
+        def build(x):
+            return [mb.pixel_shuffle(x=x, upscale_factor=upscale_factor)]
+
+        torch_pixel_shuffle = torch.nn.PixelShuffle(upscale_factor)
+        expected_outputs = [torch_pixel_shuffle(torch.Tensor(val)).numpy()]
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+
+class TestSlidingWindows:
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend", itertools.product([True, False], backends,)
+    )
+    def test_builder_to_backend_smoke(self, use_cpu_only, backend):
+        # original input type is (1, 4, 1, 1, fp32)
+        val = np.array([[[[9.0]], [[5.0]], [[1.0]], [[3.0]]]], dtype=np.float32)
+        input_placeholders = {"x": mb.placeholder(shape=val.shape)}
+        input_values = {"x": val}
+
+        def build(x):
+            return [mb.sliding_windows(x=x, axis=1, size=2)]
+
+        expected_output_types = (1, 3, 2, 1, 1, types.fp32)
+        expected_outputs = np.array(
+            [[[[[9.0]], [[5.0]]], [[[5.0]], [[1.0]]], [[[1.0]], [[3.0]]]]],
+            dtype=np.float32,
+        )
+
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
+
+    @pytest.mark.parametrize(
+        "use_cpu_only, backend, rank_and_axis, size, stride",
+        itertools.product(
+            [True, False],
+            backends,
+            [(rank, axis) for rank in range(1, 5) for axis in range(-rank, rank)],
+            [1, 2],
+            [1, 2],
+        ),
+    )
+    def test_builder_to_backend_stress(
+        self, use_cpu_only, backend, rank_and_axis, size, stride
+    ):
+        def np_sliding_windows(a, np_axis, np_size, np_stride):
+            n = (a.shape[np_axis] - np_size) // np_stride + 1
+            x_shape = list(a.shape)
+            x_shape[np_axis] = n
+            if np_axis < 0:
+                np_axis += len(x_shape)
+            x_shape.insert(np_axis + 1, np_size)
+            strides = list(a.strides)
+            eff_stride = strides[np_axis] * np_stride
+            strides.insert(np_axis, eff_stride)
+            return np.lib.stride_tricks.as_strided(a, x_shape, strides)
+
+        rank, axis = rank_and_axis
+        shape = np.random.randint(low=2, high=5, size=rank)
+        val = np.random.rand(*shape)
+        input_placeholders = {"x": mb.placeholder(shape=val.shape)}
+        input_values = {"x": val}
+
+        def build(x):
+            return [mb.sliding_windows(x=x, axis=axis, size=size, stride=stride)]
+
+        expected_outputs = [
+            np_sliding_windows(val, np_axis=axis, np_size=size, np_stride=stride)
+        ]
+        expected_output_types = [o.shape[:] + (types.fp32,) for o in expected_outputs]
+        run_compare_builder(
+            build,
+            input_placeholders,
+            input_values,
+            expected_output_types,
+            expected_outputs,
+            use_cpu_only=use_cpu_only,
+            backend=backend,
+        )
diff --git a/coremltools/converters/mil/mil/ops/tests/test_utils.py b/coremltools/converters/mil/mil/ops/tests/test_utils.py
new file mode 100644
index 000000000..ba9d7a28a
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/test_utils.py
@@ -0,0 +1,265 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import numpy as np
+
+from coremltools.converters.mil.mil.ops.defs._utils import (
+    aggregated_pad,
+    effective_kernel,
+    spatial_dimensions_out_shape,
+)
+
+
+class TestDilation:
+    def test_kernel_and_dilations_not_same_size(self):
+        np.testing.assert_raises_regex(
+            ValueError,
+            "kernel_shape.*dilations.*length",
+            effective_kernel,
+            kernel_shape=(1, 2, 3),
+            dilations=(1, 2),
+        )
+
+    def test_effective_kernel_dilation_1(self):
+        actual = effective_kernel(kernel_shape=(1, 2, 3), dilations=(1, 1, 1))
+
+        expected = [1, 2, 3]
+        np.testing.assert_equal(actual, expected)
+
+    def test_effective_kernel_dilation_2(self):
+        actual = effective_kernel(kernel_shape=(1, 2, 3), dilations=(2, 2, 2))
+
+        expected = [1, 3, 5]
+        np.testing.assert_equal(actual, expected)
+
+    def test_effective_kernel_dilation_3(self):
+        actual = effective_kernel(kernel_shape=(1, 2, 3), dilations=(3, 3, 3))
+
+        expected = [1, 4, 7]
+        np.testing.assert_equal(actual, expected)
+
+
+class TestAggregatePadding:
+    def test_invalid_pad_type(self):
+        np.testing.assert_raises_regex(
+            ValueError,
+            "Invalid padding pad_type",
+            aggregated_pad,
+            pad_type="bananas",
+            kernel_shape=(1, 2, 3),
+        )
+
+    def test_dilations_rank_different_from_input_rank(self):
+        np.testing.assert_raises_regex(
+            ValueError,
+            "dilations must have same length as kernel_shape",
+            aggregated_pad,
+            pad_type="valid",  # doesn't matter
+            kernel_shape=(1, 2, 3),
+            dilations=(4, 5),
+        )
+
+    def test_custom_pad(self):
+        actual = aggregated_pad(
+            pad_type="custom", kernel_shape=(1, 2, 3), custom_pad=(7, 8, 9, 10, 11, 12)
+        )
+
+        expected = [7 + 8, 9 + 10, 11 + 12]
+        np.testing.assert_equal(actual, expected)
+
+    def test_custom_pad_none(self):
+        np.testing.assert_raises_regex(
+            ValueError,
+            "Invalid custom_pad",
+            aggregated_pad,
+            pad_type="custom",
+            kernel_shape=(1, 2, 3),  # doesn't matter
+            custom_pad=None,
+        )
+
+    def test_custom_pad_invalid(self):
+        np.testing.assert_raises_regex(
+            ValueError,
+            "Invalid custom_pad",
+            aggregated_pad,
+            pad_type="custom",
+            kernel_shape=(1, 2, 3),  # doesn't matter
+            custom_pad=(7, 8, 9, 10),  # too few elements
+        )
+
+    def test_valid_pad(self):
+        actual = aggregated_pad(pad_type="valid", kernel_shape=(1, 2, 3),)
+
+        expected = [0, 0, 0]
+        np.testing.assert_equal(actual, expected)
+
+    def test_valid_pad_4d(self):
+        actual = aggregated_pad(pad_type="valid", kernel_shape=(1, 2, 3, 4),)
+
+        expected = [0, 0, 0, 0]
+        np.testing.assert_equal(actual, expected)
+
+    def test_valid_pad_2d(self):
+        actual = aggregated_pad(pad_type="valid", kernel_shape=(1, 2),)
+
+        expected = [0, 0]
+        np.testing.assert_equal(actual, expected)
+
+    def test_valid_pad_1d(self):
+        actual = aggregated_pad(pad_type="valid", kernel_shape=[4])
+
+        expected = [0]
+        np.testing.assert_equal(actual, expected)
+
+    def test_same_padding_no_dilation(self):
+        actual = aggregated_pad(
+            pad_type="same",
+            input_shape=(5, 6, 7),
+            kernel_shape=(2, 2, 2),
+            strides=(1, 2, 2),
+        )
+
+        expected = [1, 0, 1]
+        np.testing.assert_equal(actual, expected)
+
+    def test_same_padding_dilation_with_dilation(self):
+        actual = aggregated_pad(
+            pad_type="same",
+            input_shape=(19, 20, 21),
+            kernel_shape=(2, 2, 2),
+            strides=(1, 2, 2),
+            dilations=(5, 6, 7),
+        )
+
+        expected = [5, 5, 7]
+        np.testing.assert_equal(actual, expected)
+
+    def test_same_padding_stride_same_as_input(self):
+        actual = aggregated_pad(
+            pad_type="same", input_shape=(5, 5), kernel_shape=(3, 3), strides=(5, 5),
+        )
+
+        expected = [0, 0]
+        np.testing.assert_equal(actual, expected)
+
+    def test_same_padding_stride_larger_than_kernel_but_less_than_input(self):
+        actual = aggregated_pad(
+            pad_type="same", input_shape=(5, 5), kernel_shape=(3, 3), strides=(4, 4),
+        )
+
+        expected = [2, 2]
+        np.testing.assert_equal(actual, expected)
+
+    def test_same_padding_none_input_shape(self):
+        np.testing.assert_raises_regex(
+            ValueError,
+            "input_shape.*None",
+            aggregated_pad,
+            pad_type="same",
+            kernel_shape=(1, 2, 3),
+            strides=(1, 2, 3),
+        )
+
+    def test_same_padding_input_shape_wrong_size(self):
+        np.testing.assert_raises_regex(
+            ValueError,
+            "input_shape.*same length",
+            aggregated_pad,
+            pad_type="same",
+            kernel_shape=(1, 2, 3),
+            input_shape=(1, 2),
+            strides=(1, 2, 3),
+        )
+
+    def test_same_padding_none_strides(self):
+        np.testing.assert_raises_regex(
+            ValueError,
+            "strides.*None",
+            aggregated_pad,
+            pad_type="same",
+            kernel_shape=(1, 2, 3),
+            input_shape=(1, 2, 3),
+        )
+
+    def test_same_padding_strides_wrong_size(self):
+        np.testing.assert_raises_regex(
+            ValueError,
+            "strides.*same length",
+            aggregated_pad,
+            pad_type="same",
+            kernel_shape=(1, 2, 3),
+            input_shape=(1, 2, 3),
+            strides=(1, 2),
+        )
+
+
+class TestOutputShape:
+    def test_custom_padding_shape(self):
+        actual = spatial_dimensions_out_shape(
+            pad_type="custom",
+            input_shape=(3, 3, 3),
+            kernel_shape=(2, 2, 2),
+            strides=(2, 2, 2),
+            custom_pad=(2, 0, 1, 2, 2, 3),
+        )
+
+        expected = [2, 3, 4]
+        np.testing.assert_equal(actual, expected)
+
+    def test_valid_padding_shape(self):
+        actual = spatial_dimensions_out_shape(
+            pad_type="valid", input_shape=(7, 7), kernel_shape=(3, 3), strides=(1, 1)
+        )
+
+        expected = [5, 5]
+        np.testing.assert_equal(actual, expected)
+
+    def test_valid_padding_shape_dilation_2(self):
+        actual = spatial_dimensions_out_shape(
+            pad_type="valid",
+            input_shape=(7, 7),
+            kernel_shape=(3, 3),
+            strides=(1, 1),
+            dilations=(2, 2),
+        )
+
+        expected = [3, 3]
+        np.testing.assert_equal(actual, expected)
+
+    def test_valid_padding_shape_with_stride_2(self):
+        actual = spatial_dimensions_out_shape(
+            pad_type="valid", input_shape=(7, 7), kernel_shape=(3, 3), strides=(2, 2)
+        )
+
+        expected = [3, 3]
+        np.testing.assert_equal(actual, expected)
+
+    def test_same_padding_shape(self):
+        actual = spatial_dimensions_out_shape(
+            pad_type="same", input_shape=(6, 6), kernel_shape=(2, 2), strides=(2, 2)
+        )
+
+        expected = [3, 3]
+        np.testing.assert_equal(actual, expected)
+
+    def test_same_padding_shape_stride_2_input_not_multiple_of_kernel(self):
+        actual = spatial_dimensions_out_shape(
+            pad_type="same", input_shape=(5, 5), kernel_shape=(2, 2), strides=(2, 2)
+        )
+
+        expected = [3, 3]
+        np.testing.assert_equal(actual, expected)
+
+    def test_same_padding_shape_dilation_2(self):
+        actual = spatial_dimensions_out_shape(
+            pad_type="same",
+            input_shape=(5, 5),
+            kernel_shape=(2, 2),
+            strides=(1, 1),
+            dilations=(2, 2),
+        )
+
+        expected = [5, 5]
+        np.testing.assert_equal(actual, expected)
diff --git a/coremltools/converters/mil/mil/ops/tests/testing_utils.py b/coremltools/converters/mil/mil/ops/tests/testing_utils.py
new file mode 100644
index 000000000..42735f987
--- /dev/null
+++ b/coremltools/converters/mil/mil/ops/tests/testing_utils.py
@@ -0,0 +1,124 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import logging
+
+from coremltools.converters.mil.testing_reqs import _converter as converter
+from coremltools.converters.mil.mil.types.symbolic import is_symbolic
+from coremltools.converters.mil.mil import Program, Function
+from coremltools.converters.mil.testing_utils import compare_backend
+
+UNK_VARIADIC = "*s_unk"
+UNK_SYM = "s_unk"
+
+
+def run_compare_builder(
+    build,
+    input_placeholders,
+    input_values,
+    expected_output_types=None,
+    expected_outputs=None,
+    use_cpu_only=False,
+    frontend_only=False,
+    backend="nn_proto",
+    atol=1e-04,
+    rtol=1e-05,
+):
+    """
+    Inputs:
+        - build: python function taking input of Vars and returning Var or
+          list[Var]. Each input argument in build must match a key in
+          input_values / input_placeholders.
+
+        - input_placeholders: str -> placeholder. It may not be an empty
+                              dict as MLModel doesn't support function with
+                              no input.
+
+        - expected_output_types: list[(shape, builtin_type)] or (shape,
+          builtin_type).  None skips type inference validation.
+
+        - expected_outputs: list[np.array] or np.array. Required iff
+          frontend_only == False
+
+        - frontend_only: True to test up to proto generation.
+    """
+    if not isinstance(expected_output_types, list):
+        expected_output_types = [expected_output_types]
+
+    if expected_outputs is not None and not isinstance(expected_outputs, list):
+        expected_outputs = [expected_outputs]
+
+    prog = Program()
+    with Function(input_placeholders) as ssa_func:
+        output_vars = build(**ssa_func.inputs)
+        if isinstance(output_vars, tuple):
+            output_vars = list(output_vars)
+        elif not isinstance(output_vars, list):
+            output_vars = [output_vars]
+        ssa_func.set_outputs(output_vars)
+        prog.add_function("main", ssa_func)
+
+    # Validate type inference
+    msg = (
+        "Provided expected outputs types {} should match number of output"
+        + " variables {}"
+    )
+    assert_msg = msg.format(len(expected_output_types), len(output_vars))
+    assert len(output_vars) == len(expected_output_types), assert_msg
+
+    for out_var, s in zip(output_vars, expected_output_types):
+        if out_var.dtype != s[-1]:
+            raise ValueError(
+                "Output {} type: expect {}, got {}. Program:\n{}".format(
+                    out_var.name, s[-1], out_var.dtype, prog
+                )
+            )
+        if UNK_VARIADIC in s[:-1]:
+            msg = "Skip type checking for UNK_VARIADIC. Output shape: {} vs expected shape: {}"
+            logging.debug(msg.format(out_var.shape, s[:-1]))
+            continue
+        expected_shape = s[:-1]
+        msg = "Output {} shape: expect {}, got {}. Program:\n{}".format(
+            out_var.name, expected_shape, out_var.shape, prog
+        )
+        # No more variadic here.
+        if len(out_var.shape) != len(expected_shape):
+            raise ValueError(msg)
+        # replace UNK_SYM in out_var.shape.
+        output_shape = [
+            0 if es == UNK_SYM else os for os, es in zip(out_var.shape, expected_shape)
+        ]
+        expected_shape = [0 if es == UNK_SYM else es for es in expected_shape]
+        # convert float etc to int.
+        output_shape = [i if is_symbolic(i) else int(i) for i in output_shape]
+        expected_shape = [i if is_symbolic(i) else int(i) for i in expected_shape]
+        if output_shape != expected_shape:
+            raise ValueError(msg)
+
+    proto = converter._convert(prog, convert_from="mil", convert_to=backend)
+
+    if frontend_only:
+        return
+
+    if expected_outputs:
+        assert len(output_vars) == len(expected_outputs), (
+            "Provided expected_outputs {}"
+            " should match number of output"
+            " variables {}".format(len(expected_outputs), len(output_vars))
+        )
+
+        expected_outputs = {
+            o.name: val for o, val in zip(output_vars, expected_outputs)
+        }
+
+    compare_backend(
+        proto=proto,
+        input_key_values=input_values,
+        expected_outputs=expected_outputs,
+        use_cpu_only=use_cpu_only,
+        atol=atol,
+        rtol=rtol,
+        also_compare_shapes=False,
+    )
diff --git a/coremltools/converters/mil/mil/passes/__init__.py b/coremltools/converters/mil/mil/passes/__init__.py
new file mode 100644
index 000000000..2d20ab448
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/__init__.py
@@ -0,0 +1,22 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+# Import all passes in this dir
+from os.path import dirname, basename, isfile, join
+import glob
+
+excluded_files = ["pass_registry.py", "common_pass.py", "__init__.py"]
+modules = glob.glob(join(dirname(__file__), "*.py"))
+pass_modules = [
+    basename(f)[:-3]
+    for f in modules
+    if isfile(f)
+    and basename(f)[:1] != "_"  # Follow python convention to hide _* files.
+    and basename(f)[:4] != "test"
+    and basename(f) not in excluded_files
+]
+__all__ = pass_modules
+
+from . import *  # import everything in __all__
diff --git a/coremltools/converters/mil/mil/passes/common_pass.py b/coremltools/converters/mil/mil/passes/common_pass.py
new file mode 100644
index 000000000..9443b1bbc
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/common_pass.py
@@ -0,0 +1,38 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY
+import logging as _logging
+from coremltools.converters._profile_utils import _profile
+from tqdm import tqdm as _tqdm
+
+
+@_profile
+def common_pass(prog):
+    passes = [
+        "common::const_elimination",
+        "common::divide_to_multiply",
+        "common::fuse_matmul_weight_bias",
+        "common::const_elimination",
+        "common::loop_invariant_elimination",
+        "common::remove_symbolic_reshape",
+        "common::fuse_gelu_tanh_approximation",
+        "common::reduce_transposes",
+        "common::fuse_bias_conv",
+        "common::fuse_elementwise_to_batchnorm",
+        "common::fuse_onehot_matmul_to_gather",
+        "common::fuse_layernorm_or_instancenorm",  # should come after reduce_transposes, to detect instance_norm
+        "common::dead_code_elimination",  # always end with dce
+    ]
+
+    _logging.debug("Program before common passes:\n{}".format(prog))
+
+    prog.validate()
+    for p in _tqdm(passes, desc="Running MIL optimization passes", unit=" passes"):
+        _logging.info('Performing pass: "{}"'.format(p))
+        PASS_REGISTRY[p](prog)
+        prog.validate()
+
+    _logging.debug("Program after common passes:\n{}".format(prog))
diff --git a/coremltools/converters/mil/mil/passes/const_elimination.py b/coremltools/converters/mil/mil/passes/const_elimination.py
new file mode 100644
index 000000000..ff88bb6a3
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/const_elimination.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+import numpy as np
+import six
+
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+
+
+def get_const_mode(val):
+    # Heuristics to determine if a val should be file value or immediate
+    # value.
+    if isinstance(val, six.string_types):
+        return "immediate_value"
+    if isinstance(val, (np.generic, np.ndarray)):
+        if val.size > 10:
+            return "file_value"
+        return "immediate_value"
+    raise ValueError("val {} not recognized.".format(val))
+
+
+def const_elimination_block(block):
+    # shallow copy hides changes on f.operations during the loop
+    for op in list(block.operations):
+        if op.op_type == "const":
+            continue
+
+        for b in op.blocks:
+            const_elimination_block(b)
+
+        all_outputs_are_const = True
+        for i, o in enumerate(op.outputs):
+            if o.val is not None:
+                with block:
+                    res = mb.const(
+                        val=o.val,
+                        mode=get_const_mode(o.val),
+                        before_op=op,
+                        # same var name, but different python
+                        # instance does not violate SSA property.
+                        name=o.name,
+                    )
+                op.enclosing_block.replace_uses_of_var_after_op(
+                    anchor_op=op, old_var=o, new_var=res
+                )
+            else:
+                all_outputs_are_const = False
+
+        if all_outputs_are_const:
+            op.remove_from_block()
+
+
+@register_pass(namespace="common")
+def const_elimination(prog):
+    """
+    prog: Program
+
+    # Replace non-const ops that have const Var
+    # outputs replaced with const op. Example:
+    #
+    # Given:
+    #   %2, %3 = non_const_op(...)  # %2 is const, %3 isn't const
+    #   %4 = other_op(%2, %3)
+    #
+    # Result:
+    #   _, %3 = non_const_op(...)  # _ is the ignored output
+    #   %2_const = const(mode=m)  # %2_const name is for illustration only
+    #   %4 = other_op(%2_const, %3)
+    #
+    # where m is 'file_value' / 'immediate_value' depending on heuristics
+    # in get_const_mode.
+    """
+    for f_name, f in prog.functions.items():
+        const_elimination_block(f)
diff --git a/coremltools/converters/mil/mil/passes/conv_bias_fusion.py b/coremltools/converters/mil/mil/passes/conv_bias_fusion.py
new file mode 100644
index 000000000..ac10d2554
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/conv_bias_fusion.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+from coremltools.converters.mil.mil import Builder as mb
+import numpy as np
+
+child_op_types = ["add", "sub"]
+
+
+def match_pattern(op):
+    if op.op_type == "conv" or op.op_type == "conv_transpose":
+        # abort fusion if op output is also a block output
+        if op.outputs[0] in op.enclosing_block.outputs:
+            return None
+        # find add
+        child_ops = op.outputs[0].child_ops
+        if len(child_ops) == 1:
+            add_op_candidate = list(child_ops)[0]
+            if add_op_candidate.op_type in child_op_types:
+                return add_op_candidate
+    return None
+
+
+def try_to_transform(conv_op, add_op, block):
+    if add_op.op_type == "sub":
+        bias_var = add_op.y
+    else:
+        bias_var = add_op.x if add_op.x.val is not None else add_op.y
+    bias_value = bias_var.val
+
+    # check that the bias value is a constant array or a scalar constant
+    if not isinstance(bias_value, (np.ndarray, np.generic)):
+        return False
+
+    is_bias_scalar = False
+    if not isinstance(bias_value, np.ndarray):
+        is_bias_scalar = True
+
+    # find rank of the conv input
+    rank = conv_op.x.rank
+    if rank is None:
+        return False
+    if not (rank == 3 or rank == 4 or rank == 5):
+        return False
+
+    # check compatibility of bias value with the rank of the conv op
+    # either bias value should be a scalar or:
+    # rank=3 ==> (B,C,D), which means bias must be (1,C,1) or (C,1)
+    # rank=4 ==> (B,C,D1,D2), which means bias must be (1,C,1,1) or (C,1,1)
+    # rank=5 ==> (B,C,D1,D2,D3), which means bias must be (1,C,1,1,1) or (C,1,1,1)
+
+    if is_bias_scalar:
+        bias_value = np.array([bias_value])
+    else:
+        # check that there is at most one dimension in the shape that is not 1
+        if len(np.squeeze(bias_value).shape) > 1:
+            return False
+        # check that addition is not happening on the batch dimension
+        if len(bias_value) == rank:
+            if bias_value.shape[0] != 1:
+                return False
+        # check that last rank-2 entries in the shape vector are all 1s
+        if np.prod(bias_value.shape[-(rank - 2) :]) != 1:
+            return False
+        bias_value = np.squeeze(bias_value)
+
+    if add_op.op_type == "sub":
+        bias_value *= -1
+
+    # everything looks good, now find the new updated bias
+    old_bias = conv_op.inputs.get("bias", None)
+    old_bias_value = None
+    if old_bias is not None and old_bias.val is not None:
+        old_bias_value = old_bias.val
+    if old_bias is None:
+        # need to create a fresh numpy array for bias
+        if np.prod(bias_value.shape) == 1:
+            # its a scalar bias
+            # need to find the value of Cout to form a new bias
+            if conv_op.weight.val is None:
+                return False
+            Cout = conv_op.weight.val.shape[0]
+            new_bias_value = np.broadcast_to(bias_value, (Cout,))
+        else:
+            new_bias_value = bias_value
+    else:
+        # just need to update the existing bias array
+        try:
+            new_bias_value = old_bias_value + bias_value
+        except:
+            return False
+
+    # create a new conv op with the new bias value, copying rest of the attributes
+    out_name = add_op.outputs[0].name
+    new_bias_var = mb.const(val=new_bias_value, mode="file_value", before_op=conv_op)
+
+    conv_kargs = {"bias": new_bias_var, "name": out_name, "before_op": conv_op}
+
+    for k, v in conv_op.inputs.items():
+        if k == "bias":
+            continue
+        conv_kargs[k] = v
+
+    if conv_op.op_type == "conv":
+        x = mb.conv(**conv_kargs)
+    else:
+        x = mb.conv_transpose(**conv_kargs)
+
+    add_op.enclosing_block.replace_uses_of_var_after_op(
+        anchor_op=add_op, old_var=add_op.outputs[0], new_var=x
+    )
+    # Remove all the ops at once
+    block.remove_ops([conv_op, add_op])
+    return True
+
+
+def fuse_bias_conv_block(block):
+    fusion_status = False
+    for op in list(block.operations):
+        for b in op.blocks:
+            block_changed = True
+            while block_changed:
+                block_changed = fuse_bias_conv_block(b)
+        if len(op.blocks) > 0:
+            # This op can't be conv or conv_transpose
+            continue
+
+        add_op = match_pattern(op)
+        if add_op is not None:
+            with block:
+                fusion_status = try_to_transform(op, add_op, block)
+            # has to break as the downstream iterator is affected.
+            if fusion_status:
+                return fusion_status
+    return fusion_status
+
+
+@register_pass(namespace="common")
+def fuse_bias_conv(prog):
+    """
+    Fold add/sub into bias of conv and conv_transpose
+    That is, convert conv + add/sub to conv, when add/sub is adding a constant
+
+    Given:
+        %2 = conv(%1)
+        ...
+        %3 = add(%2, constant) # where constant has shape (1,C,1)/(C,1) for 1d conv, (1,C,1,1)/(C,1,1) for 2d conv etc
+        ...
+
+    Result:
+        %3 = conv(%1)
+        ...
+
+    """
+    for f_name, f in prog.functions.items():
+        block_changed = True
+        while block_changed:
+            block_changed = fuse_bias_conv_block(f)
diff --git a/coremltools/converters/mil/mil/passes/dead_code_elimination.py b/coremltools/converters/mil/mil/passes/dead_code_elimination.py
new file mode 100644
index 000000000..98bfca08f
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/dead_code_elimination.py
@@ -0,0 +1,84 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+import logging
+
+
+def dead_code_elimination_block(block):
+    used_vars = set()
+    ops_to_remove = list()
+
+    # mark block's outputs to used
+    used_vars.update(block.outputs)
+
+    for op in reversed(block.operations):
+        # if none of op's output is used, delete op
+        if not set(op.outputs).intersection(used_vars):
+            ops_to_remove.append(op)
+            continue
+
+        # mark all op's inputs to used
+        for _, input_var in op.inputs.items():
+            if isinstance(input_var, (tuple, list)):
+                used_vars.update(list(input_var))
+            else:
+                used_vars.update([input_var])
+
+        for b in op.blocks:
+            used_in_block = dead_code_elimination_block(b)
+            used_vars.update(used_in_block)
+
+    for op in ops_to_remove:
+        logging.info('Removing op "{}" (type: {})'.format(op.name, op.op_type))
+        op.remove_from_block()
+
+    return used_vars
+
+
+@register_pass(namespace="common")
+def dead_code_elimination(program):
+    """
+    Eliminate unused ops in program.
+
+    Parameters
+    ----------
+    program: Program SSA Program before graph pass
+
+    Returns
+    -------
+    program: Program SSA Program after graph pass
+
+    Example
+    -------
+
+        Given:
+        main(%x: (2, 4, fp32)) {
+          block0() {
+            %const_2: (4, 2, fp32)* = const(val=[...])
+            %const_3: (4, fp32)* = const(val=[...])
+            %tx_0: (bool)* = const(val=False)
+            %ty_0: (bool)* = const(val=False)
+            %matmul_0: (2, 2, fp32) = matmul(x=%x, y=%const_2, transpose_x=%tx_0, transpose_y=%ty_0)
+            %linear_0: (2, 4, fp32) = linear(x=%x, weight=%const_2, bias=%const_3)
+          } -> (%linear_0)
+        }
+
+        Result:
+        main(%x: (2, 4, fp32)) {
+          block0() {
+            %const_2: (4, 2, fp32)* = const(val=[...])
+            %const_3: (4, fp32)* = const(val=[...])
+            %linear_0: (2, 4, fp32) = linear(x=%x, weight=%const_2, bias=%const_3)
+          } -> (%linear_0)
+        }
+
+    Ops whose outputs are not contributed to final outputs will be deleted.
+    In this example, %matmul_0 is an op that's not used in the computation,
+    this op and its input ops (%tx_0 and %ty_0) are eliminated in this pass.
+    """
+
+    for name, f in program.functions.items():
+        dead_code_elimination_block(f)
diff --git a/coremltools/converters/mil/mil/passes/divide_to_multiply.py b/coremltools/converters/mil/mil/passes/divide_to_multiply.py
new file mode 100644
index 000000000..21a655069
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/divide_to_multiply.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+from coremltools.converters.mil.mil import Builder as mb
+
+
+def divide_to_multiply_block(block):
+    for op in list(block.operations):
+        for b in op.blocks:
+            divide_to_multiply_block(b)
+        if len(op.blocks) > 0:
+            # This op can't be divide.
+            continue
+
+        if op.op_type == "real_div" and op.y.val is not None:
+            with block:
+                x = mb.mul(
+                    x=op.x, y=1.0 / op.y.val, name="_inversed_" + op.name, before_op=op
+                )
+                op.enclosing_block.replace_uses_of_var_after_op(
+                    anchor_op=op, old_var=op.outputs[0], new_var=x
+                )
+                block.remove_ops([op])
+
+
+@register_pass(namespace="common")
+def divide_to_multiply(prog):
+    """
+    Convert divide into multiply if divisor is const.
+    """
+    for f_name, f in prog.functions.items():
+        divide_to_multiply_block(f)
diff --git a/coremltools/converters/mil/mil/passes/elementwise_batchnorm_fusion.py b/coremltools/converters/mil/mil/passes/elementwise_batchnorm_fusion.py
new file mode 100644
index 000000000..cd612f700
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/elementwise_batchnorm_fusion.py
@@ -0,0 +1,151 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+from coremltools.converters.mil.mil import Builder as mb
+import numpy as np
+
+
+def match_pattern(op):
+    if op.outputs[0] in op.enclosing_block.outputs:
+        return None
+
+    if op.op_type == "mul":
+        # find add
+        child_ops = op.outputs[0].child_ops
+        if len(child_ops) == 1:
+            add_op_candidate = list(child_ops)[0]
+            if add_op_candidate.op_type == "add":
+                return add_op_candidate
+    return None
+
+
+def _find_const_input_val(op):
+    if op.x.val is not None:
+        return op.x.val
+    if op.y.val is not None:
+        return op.y.val
+    return None
+
+
+def _check_shape(arr):
+    """
+    return True if shape is of form
+    (1,C,1,1) or (C,1,1)
+    """
+    rank = len(arr.shape)
+    if not (rank == 3 or rank == 4):
+        return False
+    C = arr.shape[-3]
+    if not (arr.shape == (1, C, 1, 1) or arr.shape == (C, 1, 1)):
+        return False
+    return True
+
+
+def try_to_transform(mul_op, add_op, block):
+    non_const_input_mul = mul_op.x if mul_op.x.val is None else mul_op.y
+    if non_const_input_mul.rank != 4:
+        return False
+
+    gamma = _find_const_input_val(mul_op)
+    beta = _find_const_input_val(add_op)
+    if gamma is None or beta is None:
+        return False
+
+    if not (isinstance(gamma, np.ndarray) and isinstance(beta, np.ndarray)):
+        return False
+
+    # check that gamma and beta have shape (1,C,1,1) or (C,1,1)
+    # that is they are doing vector addition on the axis=-3, which is what the
+    # batchnorm layer does (batchnorm layer only works on rank 4 input tensors)
+    if not (_check_shape(gamma) and _check_shape(beta)):
+        return False
+
+    C = gamma.shape[-3]
+    if C == 1:
+        return False
+
+    out_name = add_op.outputs[0].name
+    x = mb.batch_norm(
+        x=non_const_input_mul,
+        mean=np.zeros((C,), np.float32),
+        variance=np.ones((C,), np.float32),
+        gamma=np.squeeze(gamma),
+        beta=np.squeeze(beta),
+        name=out_name,
+        before_op=mul_op,
+    )
+
+    add_op.enclosing_block.replace_uses_of_var_after_op(
+        anchor_op=add_op, old_var=add_op.outputs[0], new_var=x
+    )
+    # Remove all the ops at once
+    block.remove_ops([mul_op, add_op])
+    return True
+
+
+def fuse_elementwise_to_batchnorm_block(block):
+    fusion_status = False
+    for op in list(block.operations):
+        for b in op.blocks:
+            block_changed = True
+            while block_changed:
+                block_changed = fuse_elementwise_to_batchnorm_block(b)
+        if len(op.blocks) > 0:
+            # This op can't be mul
+            continue
+
+        add_op = match_pattern(op)
+        if add_op is not None:
+            with block:
+                fusion_status = try_to_transform(op, add_op, block)
+            # has to break as the downstream iterator is affected.
+            if fusion_status:
+                return fusion_status
+    return fusion_status
+
+
+@register_pass(namespace="common")
+def fuse_elementwise_to_batchnorm(prog):
+    """
+    Fold mul + add into a batch norm,
+    if the const feeding into the mul/add is of shape (1,C,1,1) or (C,1,1)
+    and input to mul is of rank 4.
+
+    Given:
+             [Const]   [Const]
+                |         |
+                V         V
+    [...] --> [Mul] --> [Add] --> [...]
+
+    That is,
+
+        %2 = op1(%1)
+        %3 = mul(%2, constant)
+        %4 = add(%3, constant)
+        %5 = op2(%4)
+        ...
+
+    Result:
+
+    [...] --> [BatchNorm] --> [...]
+
+    That is,
+        %2 = op1(%1)
+        %4 = batchnorm(%2)
+        %5 = op2(%4)
+        ...
+
+    """
+    for f_name, f in prog.functions.items():
+        block_changed = True
+        while block_changed:
+            block_changed = fuse_elementwise_to_batchnorm_block(f)
diff --git a/coremltools/converters/mil/mil/passes/gelu_tanh_approximation_fusion.py b/coremltools/converters/mil/mil/passes/gelu_tanh_approximation_fusion.py
new file mode 100644
index 000000000..044ca381d
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/gelu_tanh_approximation_fusion.py
@@ -0,0 +1,209 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+from coremltools.converters.mil.mil import Builder as mb
+import numpy as np
+
+
+def _check_var_scalar_value(x, val, tol=1e-3):
+    """
+    :param x: var
+    :param val: a scalar value
+    :return: True if the value of var is equal to val otherwise return False
+    """
+    if x.val is None:
+        return False
+    if not isinstance(x.val, (np.ndarray, np.generic)):
+        return False
+
+    if isinstance(x.val, np.ndarray):
+        if x.val.size != 1:
+            return False
+        x_val = x.val[:][0]
+    else:
+        x_val = x.val
+
+    if abs(x_val - val) < tol:
+        return True
+    return False
+
+
+def _check_child_op_type(op, child_op_type):
+    """
+    :param op: operation
+    :param child_op_type: str
+    :return: Return True if op has 1 child and type of that child matches child_op_type
+    """
+    if len(op.outputs) != 1:
+        return False
+    child_ops = list(op.outputs[0].child_ops)
+    if len(child_ops) != 1:
+        return False
+    if child_ops[0].op_type == child_op_type:
+        return True
+    return False
+
+
+def try_to_transform(pow_op, block):
+    all_ops = [pow_op]
+    root_var = pow_op.x
+
+    # check that root_var feeds into exactly 3 ops
+    if len(list(root_var.child_ops)) != 3:
+        return False
+
+    # check for 1st mul op
+    if not _check_child_op_type(pow_op, "mul"):
+        return False
+    mul_op1 = list(pow_op.outputs[0].child_ops)[0]
+    if not (
+        (
+            mul_op1.x == pow_op.outputs[0]
+            and _check_var_scalar_value(mul_op1.y, 0.044715)
+        )
+        or (
+            mul_op1.y == pow_op.outputs[0]
+            and _check_var_scalar_value(mul_op1.x, 0.044715)
+        )
+    ):
+        return False
+    all_ops.append(mul_op1)
+
+    # check for 1st add op
+    if not _check_child_op_type(mul_op1, "add"):
+        return False
+    add_op1 = list(mul_op1.outputs[0].child_ops)[0]
+    if not (
+        (add_op1.x == mul_op1.outputs[0] and add_op1.y == root_var)
+        or (add_op1.y == mul_op1.outputs[0] and add_op1.x == root_var)
+    ):
+        return False
+    all_ops.append(add_op1)
+
+    # check for 2nd mul op
+    if not _check_child_op_type(add_op1, "mul"):
+        return False
+    mul_op2 = list(add_op1.outputs[0].child_ops)[0]
+    if not (
+        (
+            mul_op2.x == add_op1.outputs[0]
+            and _check_var_scalar_value(mul_op2.y, 0.79788)
+        )
+        or (
+            mul_op2.y == add_op1.outputs[0]
+            and _check_var_scalar_value(mul_op2.x, 0.79788)
+        )
+    ):
+        return False
+    all_ops.append(mul_op2)
+
+    # check for tanh op
+    if not _check_child_op_type(mul_op2, "tanh"):
+        return False
+    tanh_op = list(mul_op2.outputs[0].child_ops)[0]
+    all_ops.append(tanh_op)
+
+    # check for 2nd add op
+    if not _check_child_op_type(tanh_op, "add"):
+        return False
+    add_op2 = list(tanh_op.outputs[0].child_ops)[0]
+    if not (
+        (add_op2.x == tanh_op.outputs[0] and _check_var_scalar_value(add_op2.y, 1))
+        or (add_op2.y == tanh_op.outputs[0] and _check_var_scalar_value(add_op2.x, 1))
+    ):
+        return False
+    all_ops.append(add_op2)
+
+    # check for 3rd mul op
+    if not _check_child_op_type(add_op2, "mul"):
+        return False
+    mul_op3 = list(add_op2.outputs[0].child_ops)[0]
+    if not (
+        (mul_op3.x == add_op2.outputs[0] and _check_var_scalar_value(mul_op3.y, 0.5))
+        or (mul_op3.y == add_op2.outputs[0] and _check_var_scalar_value(mul_op3.x, 0.5))
+    ):
+        return False
+    all_ops.append(mul_op3)
+
+    # check for 4th mul op
+    if not _check_child_op_type(mul_op3, "mul"):
+        return False
+    mul_op4 = list(mul_op3.outputs[0].child_ops)[0]
+    if not (
+        (mul_op4.x == mul_op3.outputs[0] and mul_op4.y == root_var)
+        or (mul_op4.y == mul_op3.outputs[0] and mul_op4.x == root_var)
+    ):
+        return False
+    all_ops.append(mul_op4)
+
+    # check that none of the op in this pattern is connected to the output
+    # (except the last mul op)
+    for i, op in enumerate(all_ops):
+        if i == len(all_ops) - 1:
+            continue
+        for out in op.outputs:
+            if out in block.outputs:
+                return False
+
+    # remove all the ops, and replace with a gelu op
+    out_name = mul_op4.outputs[0].name
+    x = mb.gelu(x=root_var, mode="TANH_APPROXIMATION", name=out_name, before_op=pow_op)
+
+    mul_op4.enclosing_block.replace_uses_of_var_after_op(
+        anchor_op=mul_op4, old_var=mul_op4.outputs[0], new_var=x
+    )
+    # Remove all the ops at once
+    block.remove_ops(all_ops)
+    return True
+
+
+def fuse_gelu_tanh_block(block):
+    fusion_status = False
+    for op in list(block.operations):
+        for b in op.blocks:
+            block_changed = True
+            while block_changed:
+                block_changed = fuse_gelu_tanh_block(b)
+        if len(op.blocks) > 0:
+            # This op can't be pow
+            continue
+
+        # start pattern match if pow op with power 3 is encountered
+        if op.op_type == "pow":
+            if _check_var_scalar_value(op.y, 3):
+                with block:
+                    fusion_status = try_to_transform(op, block)
+                # has to break as the downstream iterator is affected.
+                if fusion_status:
+                    return fusion_status
+    return fusion_status
+
+
+@register_pass(namespace="common")
+def fuse_gelu_tanh_approximation(prog):
+    """
+    Identify the pattern that corresponds to the tanh approximate version of gelu, and replace it with a single
+    gelu layer with mode=TANH_APPROXIMATION
+
+    y = ( tanh((.0447)x^3 + x ) * (sqrt(2/pi)) + 1 ) * 0.5 * x
+
+    [...] -----> pow (3) ----> mul (.044715) ---> add -----> mul (sqrt(2/pi)) ---> tanh ----> add (1) ----> mul (0.5) -----> mul ---> [...]
+      |                                            ^                                                                          ^
+      |                                            |                                                                          |
+      |------------------------------------------------------------------------------------------------------------------------
+
+
+    """
+    for f_name, f in prog.functions.items():
+        block_changed = True
+        while block_changed:
+            block_changed = fuse_gelu_tanh_block(f)
diff --git a/coremltools/converters/mil/mil/passes/layernorm_instancenorm_pattern_fusion.py b/coremltools/converters/mil/mil/passes/layernorm_instancenorm_pattern_fusion.py
new file mode 100644
index 000000000..ea98146e4
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/layernorm_instancenorm_pattern_fusion.py
@@ -0,0 +1,299 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+from coremltools.converters.mil.mil import Builder as mb
+import numpy as np
+
+
+def _check_child_op_type(op, child_op_type):
+    """
+    :param op: operation
+    :param child_op_type: str
+    :return: Return True if op has 1 child and type of that child matches child_op_type
+    """
+    if len(op.outputs) != 1:
+        return False
+    child_ops = list(op.outputs[0].child_ops)
+    if len(child_ops) != 1:
+        return False
+    if child_ops[0].op_type == child_op_type:
+        return True
+    return False
+
+
+def try_to_transform(reduce_mean_op, block):
+    all_ops = [reduce_mean_op]
+    root_var = reduce_mean_op.x
+
+    input_shape = root_var.shape
+    if input_shape is None:
+        return False
+
+    rank = len(input_shape)
+
+    # check that root_var feeds into exactly 3 ops
+    if len(list(root_var.child_ops)) != 3:
+        return False
+
+    # check 1st reduce_mean op
+    if not (
+        reduce_mean_op.keep_dims.val is not None
+        and reduce_mean_op.keep_dims.val == True
+    ):
+        return False
+    axes = reduce_mean_op.axes.val
+    if axes is None:
+        return False
+
+    # check 1st sub op
+    child_ops_reduce_mean = list(reduce_mean_op.outputs[0].child_ops)
+    if len(child_ops_reduce_mean) != 2:
+        return False
+    op_a = child_ops_reduce_mean[0]
+    op_b = child_ops_reduce_mean[1]
+    if not (
+        (op_a.op_type == "sub" and op_b.op_type == "mul")
+        or (op_a.op_type == "mul" and op_b.op_type == "sub")
+    ):
+        return False
+    sub_op1 = op_a if op_a.op_type == "sub" else op_b
+    if not (sub_op1.x == root_var and sub_op1.y == reduce_mean_op.outputs[0]):
+        return False
+    all_ops.append(sub_op1)
+
+    # check square op
+    if not _check_child_op_type(sub_op1, "square"):
+        return False
+    square_op = list(sub_op1.outputs[0].child_ops)[0]
+    all_ops.append(square_op)
+
+    # check second reduce mean
+    if not _check_child_op_type(square_op, "reduce_mean"):
+        return False
+    reduce_mean_op2 = list(square_op.outputs[0].child_ops)[0]
+    if not (
+        reduce_mean_op2.keep_dims.val is not None
+        and reduce_mean_op2.keep_dims.val == True
+    ):
+        return False
+    if not (
+        (reduce_mean_op2.axes.val is not None)
+        and (axes == reduce_mean_op2.axes.val).all()
+    ):
+        return False
+    all_ops.append(reduce_mean_op2)
+
+    # check add op (with epsilon)
+    if not _check_child_op_type(reduce_mean_op2, "add"):
+        return False
+    add_op1 = list(reduce_mean_op2.outputs[0].child_ops)[0]
+    epsilon_var = add_op1.y if add_op1.x == reduce_mean_op2.outputs[0] else add_op1.x
+    if epsilon_var.val is None:
+        return False
+    if len(epsilon_var.val.shape) != 0:
+        # must be scalar
+        return False
+    all_ops.append(add_op1)
+
+    # check rsqrt op
+    if not _check_child_op_type(add_op1, "rsqrt"):
+        return False
+    rsqrt_op = list(add_op1.outputs[0].child_ops)[0]
+    all_ops.append(rsqrt_op)
+
+    # check mul (gamma)
+    if not _check_child_op_type(rsqrt_op, "mul"):
+        return False
+    mul_op1 = list(rsqrt_op.outputs[0].child_ops)[0]
+    gamma_var = mul_op1.y if mul_op1.x == rsqrt_op.outputs[0] else mul_op1.x
+    if gamma_var.val is None:
+        return False
+    all_ops.append(mul_op1)
+
+    # check 2 muls after the gamma mul
+    child_ops = list(mul_op1.outputs[0].child_ops)
+    if len(child_ops) != 2:
+        return False
+    mul_op2 = child_ops[0]
+    mul_op3 = child_ops[1]
+    if not (mul_op2.op_type == "mul" and mul_op3.op_type == "mul"):
+        return False
+    mul_op2_other_var = mul_op2.x if mul_op2.y == mul_op1.outputs[0] else mul_op2.y
+    mul_op3_other_var = mul_op3.x if mul_op3.y == mul_op1.outputs[0] else mul_op3.y
+    if not (
+        (
+            mul_op2_other_var == root_var
+            and mul_op3_other_var == reduce_mean_op.outputs[0]
+        )
+        or (
+            mul_op2_other_var == reduce_mean_op.outputs[0]
+            and mul_op3_other_var == root_var
+        )
+    ):
+        return False
+    if mul_op2_other_var == root_var:
+        mul_root_op = mul_op2
+        mul_mean_op = mul_op3
+    else:
+        mul_root_op = mul_op3
+        mul_mean_op = mul_op2
+    all_ops.append(mul_mean_op)
+    all_ops.append(mul_root_op)
+
+    # check sub with beta
+    if not _check_child_op_type(mul_mean_op, "sub"):
+        return False
+    sub_op2 = list(mul_mean_op.outputs[0].child_ops)[0]
+    if sub_op2.y != mul_mean_op.outputs[0]:
+        return False
+    beta_var = sub_op2.x
+    if beta_var.val is None:
+        return False
+    all_ops.append(sub_op2)
+
+    # check last add op
+    if not _check_child_op_type(sub_op2, "add"):
+        return False
+    add_op2 = list(sub_op2.outputs[0].child_ops)[0]
+    if not (add_op2.x == mul_root_op.outputs[0] or add_op2.y == mul_root_op.outputs[0]):
+        return False
+    all_ops.append(add_op2)
+
+    # check that none of the op in this pattern is connected to the output
+    # (except the last add op)
+    for i, op in enumerate(all_ops):
+        if i == len(all_ops) - 1:
+            continue
+        for out in op.outputs:
+            if out in block.outputs:
+                return False
+
+    # check whether the pattern is instance_norm or layer_norm
+    is_layernorm = False
+    is_instancenorm = False
+
+    negative_axes = [a - rank if a >= 0 else a for a in axes]
+    negative_axes.sort()
+
+    if len(gamma_var.val.shape) == len(axes) and len(beta_var.val.shape) == len(axes):
+        # axes for layer_norm must be [-1] or [-1,-2] or [-1,-2,-3] and so on
+        if negative_axes == list(range(-len(negative_axes), 0)):
+            is_layernorm = True
+
+    if negative_axes == [-2, -1] and rank == 4:
+        if (
+            len(np.squeeze(gamma_var.val).shape) == 1
+            and len(np.squeeze(beta_var.val).shape) == 1
+        ):
+            is_instancenorm = True
+
+    if not (is_instancenorm or is_layernorm):
+        return False
+
+    # remove all the ops, and replace with a layer_norm or instance_norm op
+    out_name = add_op2.outputs[0].name
+
+    if is_instancenorm:
+        x = mb.instance_norm(
+            x=root_var,
+            gamma=np.squeeze(gamma_var.val),
+            beta=np.squeeze(beta_var.val),
+            epsilon=epsilon_var,
+            name=out_name,
+            before_op=add_op2,
+        )
+    else:
+        x = mb.layer_norm(
+            x=root_var,
+            axes=axes,
+            gamma=gamma_var,
+            beta=beta_var,
+            epsilon=epsilon_var,
+            name=out_name,
+            before_op=add_op2,
+        )
+
+    add_op2.enclosing_block.replace_uses_of_var_after_op(
+        anchor_op=add_op2, old_var=add_op2.outputs[0], new_var=x
+    )
+    # Remove all the ops at once
+    block.remove_ops(all_ops)
+    return True
+
+
+def fuse_layernorm_or_instancenorm_block(block):
+    fusion_status = False
+    for i, op in enumerate(list(block.operations)):
+        for b in op.blocks:
+            block_changed = True
+            while block_changed:
+                block_changed = fuse_layernorm_or_instancenorm_block(b)
+        if len(op.blocks) > 0:
+            # This op can't be pow
+            continue
+
+        # start pattern match if reduce_mean op is encountered
+        if op.op_type == "reduce_mean":
+            with block:
+                fusion_status = try_to_transform(op, block)
+            # has to break as the downstream iterator is affected.
+            if fusion_status:
+                return fusion_status
+    return fusion_status
+
+
+@register_pass(namespace="common")
+def fuse_layernorm_or_instancenorm(prog):
+    """
+    Identify the pattern:
+
+    y = gamma * (x - mean) / sqrt(variance + epsilon) + beta
+
+    y = x * [gamma * rsqrt(variance + eps)] + (beta - mean * [gamma * rsqrt(variance + eps)])
+
+    [....] ---> reduce_mean --->  sub -->square -->reduce_mean --> add(epsilon)-->rsqrt
+       |             |             ^                                                |
+       |             |             |                                                |
+       |             |             |                                                V
+       |---------------------------                                               mul (gamma)
+       |             |                                                              |
+       |             |                                                      --------|---------
+       |             |                                                      |                |
+       |             |                                                      |                V
+       |             |------------------------------------------------------------------->  mul
+       |                                                                    |                |
+       |                                                                    |                |
+       |                                                                    V                |
+       |-----------------------------------------------------------------> mul               |
+                                                                            |                |
+                                                                            |                V
+                                                                            |              sub (beta) --> add --> [...]
+                                                                            |                              ^
+                                                                            |                              |
+                                                                            |-------------------------------
+
+
+    This pattern corresponds to either layer_norm or instance_norm.
+
+    It is instance_norm if all of the following are true:
+        - input is rank 4
+        - axes of reduce_mean is [-2, -1]
+        - gamma and beta are rank 1, after squeeze
+
+    It is layer_norm if all of the following are true:
+        - axes is either [-1] or [-1,-2] or [-1,-2,-3] and so on
+        - rank of gamma and beta is equal to the length of the axes
+    """
+    for f_name, f in prog.functions.items():
+        block_changed = True
+        while block_changed:
+            block_changed = fuse_layernorm_or_instancenorm_block(f)
diff --git a/coremltools/converters/mil/mil/passes/loop_invariant_elimination.py b/coremltools/converters/mil/mil/passes/loop_invariant_elimination.py
new file mode 100644
index 000000000..ed4a3120a
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/loop_invariant_elimination.py
@@ -0,0 +1,177 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+import numpy as np
+import six
+
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+
+
+def detect_loop_invariants(while_op):
+    block = while_op.blocks[0]
+    loop_invariant_ids = []  # list of index in op.loop_vars, block.inputs
+    for i, vx_in in enumerate(block.inputs):
+        vx_out = block.outputs[i + 1]  # first output is cond var.
+        return_input_as_output = vx_in == vx_out
+        # this block output is a var from outside of the block
+        output_from_outside_of_block = (
+            vx_out in block._visible_vars_from_enclosing_block()
+        )
+        if return_input_as_output or output_from_outside_of_block:
+            loop_invariant_ids.append(i)
+
+    # TODO: All outputs that depend on only invariants are invariant. We
+    # need to move computation out of while loop.
+    return loop_invariant_ids
+
+
+def loop_invariant_elimination_block(block):
+    # Phase 1: Find vars needed to be renamed.
+    #
+    # while_loop outputs need to be renamed if the output will be eliminated
+    # (due to loop invariant) and is returned as block output (which would
+    # change the return var name and the program interface).
+    #
+    # list[(v_src, v_tgt, before_op)]: will rename v_src to v_tgt before
+    # before_op (a while_loop)
+    output_rename = []
+    for op in list(block.operations):
+        for b in op.blocks:
+            loop_invariant_elimination_block(b)
+
+        if op.op_type != "while_loop":
+            continue
+
+        loop_invariant_ids = detect_loop_invariants(op)
+        for i in loop_invariant_ids:
+            output_rename.append((op.loop_vars[i], op.outputs[i], op))
+        if len(loop_invariant_ids) > 0:
+            # Avoid the following case:
+            # %a, %b = while_loop(..., name="b")
+            # becomes
+            # %b = identity(..., name="b")
+            # %a = while_loop(..., name="b")
+            # (two ops with the same name -> name collision)
+            op.name = op.name + "_renamed"
+
+    # Phase 2: insert rename ops. This changes block.operations
+    for v_src, v_tgt, op in output_rename:
+        if v_tgt in block.outputs:
+            # rename the loop output to existing block output names
+            with block:
+                res = mb.identity(x=v_src, before_op=op, name=v_tgt.name)
+                op.enclosing_block.replace_uses_of_var_after_op(
+                    anchor_op=op, old_var=v_tgt, new_var=res
+                )
+
+    # Phase 3: Perform loop invariant elimination without fear!
+    for op in list(block.operations):
+        if op.op_type != "while_loop":
+            continue
+        block = op.blocks[0]
+        loop_invariant_ids = detect_loop_invariants(op)
+
+        loop_variant_vars = []
+
+        # replace uses of loop_invariants with its source from outside of the
+        # while_loop op.
+        for i in loop_invariant_ids:
+            block.replace_uses_of_var_after_op(
+                anchor_op=None, old_var=block.inputs[i], new_var=op.loop_vars[i]
+            )
+
+        # replace block inputs
+        block.remove_inputs([block.inputs[i] for i in loop_invariant_ids])
+
+        # remove invariants from while_loop loop_vars
+        for i in loop_invariant_ids:
+            # replace usage of while_loop outputs that we'll eliminate.
+            op.enclosing_block.replace_uses_of_var_after_op(
+                anchor_op=op, old_var=op.outputs[i], new_var=op.loop_vars[i]
+            )
+
+        # Remove after replacing to ensure program is valid
+        for i in loop_invariant_ids:
+            op.loop_vars[i].remove_child_op(op)
+
+        op.loop_vars = tuple(
+            v for i, v in enumerate(op.loop_vars) if i not in loop_invariant_ids
+        )
+        op._input_vars["loop_vars"] = op.loop_vars
+
+        # remove invariants from while_loop outputs
+        # block.outputs[0] is cond var
+        block.set_outputs(
+            [block.outputs[0]]
+            + [
+                v
+                for i, v in enumerate(block.outputs[1:])
+                if i not in loop_invariant_ids
+            ]
+        )
+
+        # op._output_vars doesn't include cond var
+        op._output_vars = [
+            v for i, v in enumerate(op._output_vars) if i not in loop_invariant_ids
+        ]
+
+        # check healthy state
+        op.enclosing_block.validate()
+
+
+@register_pass(namespace="common")
+def loop_invariant_elimination(prog):
+    """
+    prog: Program
+
+    # When a block does not modify a block input var, eliminate that block
+    # input var and use the corresponding var in the outer scope. Example:
+    #
+    # Given:
+    #    main(%a: (1, 2, fp32),
+    #         %b: (1, 2, fp32)) {
+    #      block0() {
+    #        %loop:0: (1, 2, fp32), %loop:1: (1, 2, fp32) = \
+    #        while_loop(loop_vars=(%a, %b))
+    #          loop_cond(%a.x, %b.x) {
+    #            %cond_var: (bool) = some_op(x=%a.x, y=%b.x)
+    #          } -> (%cond_var)
+    #          loop_body(%a.x, %b.x) {
+    #            %add_0: (1, 2, fp32) = add(x=%a.x, y=%b.x)
+    #          } -> (%add_0, %b.x)
+    #      } -> (%loop:0, %loop:1)
+    #    }
+    #
+    # (Notice that %b.x is constant through while loop iterates)
+    #
+    # Result:
+    #    main(%a: (1, 2, fp32),
+    #         %b: (1, 2, fp32)) {
+    #      block0() {
+    #        %loop:1: (1, 2, fp32) = identity(x=%b)
+    #        %loop:0: (1, 2, fp32) = \
+    #        while_loop(loop_vars=(%a))
+    #          loop_cond(%a.x) {
+    #            %cond_var: (bool) = some_op(x=%a.x, y=%b)
+    #          } -> (%cond_var)
+    #          loop_body(%a.x) {
+    #            %add_0: (1, 2, fp32) = add(x=%a.x, y=%b)
+    #          } -> (%add_0)
+    #      } -> (%loop:0, %loop:1)
+    #    }
+    #
+    # where we eliminate loop invariant %b.x from while_loop, which returns 1
+    # instead of 2 outputs. We also preserve the return var names with
+    # identity.
+    """
+    for f_name, f in prog.functions.items():
+        loop_invariant_elimination_block(f)
diff --git a/coremltools/converters/mil/mil/passes/matmul_weight_bias_fusion.py b/coremltools/converters/mil/mil/passes/matmul_weight_bias_fusion.py
new file mode 100644
index 000000000..676680108
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/matmul_weight_bias_fusion.py
@@ -0,0 +1,159 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+import numpy as np
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+from coremltools.converters.mil.mil import Builder as mb
+
+child_op_types = ["add", "sub"]
+
+
+def match_pattern(op):
+    if op.op_type == "matmul":
+        # find add
+        child_ops = op.outputs[0].child_ops
+        if len(child_ops) == 1:
+            add_op_candidate = list(child_ops)[0]
+            if add_op_candidate.op_type in child_op_types:
+                return add_op_candidate
+    return None
+
+
+def transpose(v, before_op):
+    """
+    Transpose the last 2 dims.
+    v: Var (must be a tensor)
+    """
+    perm = list(range(v.rank))
+    perm[-2], perm[-1] = perm[-1], perm[-2]
+    return mb.transpose(x=v, perm=perm, before_op=before_op)
+
+
+def try_to_transform(matmul_op, add_op, block):
+    if matmul_op.x.val is None and matmul_op.y.val is None:
+        # This is a dynamic matmul.
+        return False
+    if add_op.x.val is None and add_op.y.val is None:
+        # This is a dynamic add.
+        return False
+
+    x_is_weight = matmul_op.x.val is not None
+    if x_is_weight:
+        weight, linear_x = matmul_op.x, matmul_op.y
+        transpose_weight = matmul_op.transpose_x.val
+        transpose_x = matmul_op.transpose_y.val
+    else:
+        weight, linear_x = matmul_op.y, matmul_op.x
+        transpose_weight = matmul_op.transpose_y.val
+        transpose_x = matmul_op.transpose_x.val
+
+    if linear_x.rank < 2 or weight.rank != 2:
+        # We don't support these cases yet.
+        return False
+
+    d_out = weight.shape[1] if not transpose_weight else weight.shape[0]
+    bias = add_op.x.val if add_op.x.val is not None else add_op.y.val
+    if len(bias.shape) > 1:
+        if any([d != 1 for d in bias.shape[:-1]]):
+            return  # cannot transform
+
+        # squeeze leading dims of size 1
+        bias = np.squeeze(bias)
+
+    if len(bias.shape) != 1 or bias.shape[0] != d_out:
+        return  # cannot transform
+
+    if add_op.op_type == "sub":
+        bias = -bias
+    out_name = add_op.outputs[0].name
+
+    if x_is_weight:
+        # If transpose_x == transpose_weight == False:
+        # w*x = (x^T w^T)^T = linear(x^T, w)^T
+        x_transposed = (
+            transpose(linear_x, before_op=matmul_op) if not transpose_x else linear_x
+        )
+        w_no_transpose = (
+            weight if not transpose_weight else transpose(weight, before_op=matmul_op)
+        )
+        x = mb.linear(
+            x=x_transposed, weight=w_no_transpose, bias=bias, before_op=matmul_op
+        )
+        x = transpose(x, before_op=matmul_op, name=out_name)
+    else:
+        # If transpose_x == transpose_weight == False
+        # x*w = x*(w^T)^T = linear(x, w^T)
+        x_no_transpose = (
+            transpose(linear_x, before_op=matmul_op) if transpose_x else linear_x
+        )
+        w_transposed = (
+            weight if transpose_weight else transpose(weight, before_op=matmul_op)
+        )
+        x = mb.linear(
+            x=x_no_transpose,
+            weight=w_transposed,
+            bias=bias,
+            before_op=matmul_op,
+            name=out_name,
+        )
+
+    add_op.enclosing_block.replace_uses_of_var_after_op(
+        anchor_op=add_op, old_var=add_op.outputs[0], new_var=x
+    )
+    # Remove all the ops at once
+    block.remove_ops([matmul_op, add_op])
+    return True
+
+
+def fuse_matmul_weight_bias_block(block):
+    fusion_status = False
+    for op in list(block.operations):
+        for b in op.blocks:
+            block_changed = True
+            while block_changed:
+                block_changed = fuse_matmul_weight_bias_block(b)
+        if len(op.blocks) > 0:
+            # This op can't be matmul
+            continue
+
+        add_op = match_pattern(op)
+        if add_op is not None:
+            with block:
+                fusion_status = try_to_transform(op, add_op, block)
+            # has to break as the downstream iterator is affected.
+            if fusion_status:
+                return fusion_status
+    return fusion_status
+
+
+@register_pass(namespace="common")
+def fuse_matmul_weight_bias(prog):
+    """
+    Convert matmul + add/sub to linear whenever possible.
+
+    Given:
+        %3 = matmul(x=%1, y=%2)  # %1 or %2 is const and rank 2 (weight)
+        ...
+        %5 = add(x=%3, y=%4) # %4 is const. add(x=%4, y=%3) is equivalent
+                             # sub is similar.
+
+    Result:
+        # assuming %2 above is const and rank 2
+        %5 = linear(x=%1, weight=%2, bias=%4)
+
+    Inputs:
+
+        prog: Program
+    """
+    for f_name, f in prog.functions.items():
+        block_changed = True
+        while block_changed:
+            block_changed = fuse_matmul_weight_bias_block(f)
diff --git a/coremltools/converters/mil/mil/passes/onehot_matmul_to_gather.py b/coremltools/converters/mil/mil/passes/onehot_matmul_to_gather.py
new file mode 100644
index 000000000..6fbf9ccbd
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/onehot_matmul_to_gather.py
@@ -0,0 +1,147 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+from coremltools.converters.mil.mil import Builder as mb
+import numpy as np
+
+
+def _check_child_op_type(op, child_op_type):
+    """
+    :param op: operation
+    :param child_op_type: str
+    :return: Return True if op has 1 child and type of that child matches child_op_type
+    """
+    if len(op.outputs) != 1:
+        return False
+    child_ops = list(op.outputs[0].child_ops)
+    if len(child_ops) != 1:
+        return False
+    if child_ops[0].op_type == child_op_type:
+        return True
+    return False
+
+
+def _check_var_scalar_value(x, val, tol=1e-3):
+    """
+    :param x: var
+    :param val: a scalar value
+    :return: True if the value of var is equal to val otherwise return False
+    """
+    if x.val is None:
+        return False
+    if not isinstance(x.val, (np.ndarray, np.generic)):
+        return False
+
+    if isinstance(x.val, np.ndarray):
+        if x.val.size != 1:
+            return False
+        x_val = x.val[:][0]
+    else:
+        x_val = x.val
+
+    if abs(x_val - val) < tol:
+        return True
+    return False
+
+
+def try_to_transform(onehot_op, block):
+    root_var = onehot_op.indices
+
+    # check that the output of the onehot op is not a block output
+    if onehot_op.outputs[0] in block.outputs:
+        return False
+
+    # check that onehot op has axis=-1, on_value=1 and off_value=0
+    # and constant one_hot_vector_size
+    axis = onehot_op.axis.val
+    if axis is None:
+        return False
+    if onehot_op.indices.shape is None:
+        return False
+    rank = len(onehot_op.indices.shape)
+    if axis >= 0:
+        axis -= rank
+    if axis != -1:
+        return False
+    if not _check_var_scalar_value(onehot_op.on_value, 1):
+        return False
+    if not _check_var_scalar_value(onehot_op.off_value, 0):
+        return False
+    if onehot_op.one_hot_vector_size.val is None:
+        return False
+
+    # checks for the following matmul op
+    if not _check_child_op_type(onehot_op, "matmul"):
+        return False
+    matmul_op = list(onehot_op.outputs[0].child_ops)[0]
+    if matmul_op.x != onehot_op.outputs[0]:
+        return False
+    if matmul_op.transpose_x.val or matmul_op.transpose_y.val:
+        return False
+    W_var = matmul_op.y
+    if W_var.val is None:
+        return False
+    if len(W_var.val.shape) != 2:
+        return False
+
+    # remove onehot and matmul and replace with gather op
+    out_name = matmul_op.outputs[0].name
+    x = mb.gather(x=W_var, indices=root_var, axis=0, name=out_name, before_op=matmul_op)
+
+    matmul_op.enclosing_block.replace_uses_of_var_after_op(
+        anchor_op=matmul_op, old_var=matmul_op.outputs[0], new_var=x
+    )
+    # Remove all the ops at once
+    block.remove_ops([onehot_op, matmul_op])
+    return True
+
+
+def fuse_onehot_matmul_to_gather_block(block):
+    fusion_status = False
+    for i, op in enumerate(list(block.operations)):
+        for b in op.blocks:
+            block_changed = True
+            while block_changed:
+                block_changed = fuse_onehot_matmul_to_gather_block(b)
+        if len(op.blocks) > 0:
+            # This op can't be pow
+            continue
+
+        # start pattern match if one_hot op is encountered
+        if op.op_type == "one_hot":
+            with block:
+                fusion_status = try_to_transform(op, block)
+            # has to break as the downstream iterator is affected.
+            if fusion_status:
+                return fusion_status
+    return fusion_status
+
+
+@register_pass(namespace="common")
+def fuse_onehot_matmul_to_gather(prog):
+    """
+    Detect if onehot (axis=-1, on_value=1, off_value=0) is followed by a matmul op (no bias),
+    then they can be replaced by a gather op.
+
+    Input:
+        %2 = one_hot(%1, on_value=1, off_value=0, axis=-1)
+        %3 = const() # rank 2
+        %4  = matmul(%2, %3)
+
+    Output:
+        %4 = gather(%3, %2, axis=0)
+
+    """
+    for f_name, f in prog.functions.items():
+        block_changed = True
+        while block_changed:
+            block_changed = fuse_onehot_matmul_to_gather_block(f)
diff --git a/coremltools/converters/mil/mil/passes/pass_registry.py b/coremltools/converters/mil/mil/passes/pass_registry.py
new file mode 100644
index 000000000..955751651
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/pass_registry.py
@@ -0,0 +1,46 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import logging
+
+
+class PassRegistry:
+    def __init__(self):
+        # str -> func (func takes Program as input and
+        # modifies in-place)
+        self.passes = {}
+
+    def __getitem__(self, pass_id):
+        """
+        pass_id (str): namespace::func_name (e.g., 'common::const_elimination')
+        """
+        if pass_id not in self.passes:
+            raise KeyError("Pass {} not found".format(pass_id))
+        return self.passes[pass_id]
+
+    def add(self, namespace, pass_func):
+        func_name = pass_func.__name__
+        pass_id = namespace + "::" + func_name
+        logging.debug("Registering pass {}".format(pass_id))
+        if pass_id in self.passes:
+            msg = "Pass {} already registered."
+            raise KeyError(msg.format(pass_id))
+        self.passes[pass_id] = pass_func
+
+
+PASS_REGISTRY = PassRegistry()
+
+
+def register_pass(namespace):
+    """
+    namespaces like {'common', 'nn_backend', <other-backends>,
+    <other-frontends>}
+    """
+
+    def func_wrapper(pass_func):
+        PASS_REGISTRY.add(namespace, pass_func)
+        return pass_func
+
+    return func_wrapper
diff --git a/coremltools/converters/mil/mil/passes/reduce_transposes.py b/coremltools/converters/mil/mil/passes/reduce_transposes.py
new file mode 100644
index 000000000..7ebfafb1b
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/reduce_transposes.py
@@ -0,0 +1,1053 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.var import Var
+import logging
+import numpy as np
+import copy
+from collections import defaultdict
+
+DEBUG = False  # set to true to plot the block before and after the transformation
+
+"""
+Description of the Pass:
+
+The pass is divided into 3 phases
+
+1st phase: information gathering
+---------------------------------
+
+- Block is traversed in the topological order, starting from the ops connected to the inputs.
+- During the traversal, a value is associated with every var in the block
+- This value can be either of type "HypotheticalValue" or "LazyTransposeHypotheticalValue"
+- Main purpose of type "HypotheticalValue" is to essentially indicate that it is NOT of type  "LazyTransposeHypotheticalValue"
+- "LazyTransposeHypotheticalValue" represents either one or multiple transpose ops with the same perm value. This information
+is stored in this class. It also wraps a "HypotheticalValue" that was the last hypothetical value which was generated
+prior to the origin of "LazyTransposeHypotheticalValue"
+
+- Each op decides which type of hypothetical value to associate with its output vars, based on its op type,
+its attributes, and the types of the hypothetical values of its input vars
+- Ops are classified into 4 categories: unary like, axis update, transpose and materialize (all the rest)
+- Transpose ops: these are the ops from which a "LazyTransposeHypotheticalValue" originate.
+    - If the input to it is a "HypotheticalValue", its output will be a "LazyTransposeHypotheticalValue",
+        indicating that this transpose op is available to get cancelled downstream
+    - If the input to it is a "LazyTransposeHypotheticalValue", then it is checked whether this op cancels it or not
+    - If it cancels it, a "HypotheticalValue" value is generated at the output and the information about this transpose cancellation
+      is recorded in the dictionary "transpose_op_to_cancel_ops"
+    - If it does not cancel, the current transpose op is categrorized as a materialize op and hence the information in
+       dictionary "transpose_op_to_materialize_ops" is updated accordingly. The output of the op is now mapped to a
+       "HypotheticalValue"
+- Unary like ops: they simply transfer their input hypothetical value type to the output.
+- Axis update ops: if a transpose can pass through them, they are treated like a unary op and the dictionary
+   "transpose_op_to_axis_update_ops" is updated. If the op cannot be updated in any manner to
+   allow a transpose to pass through, this op is then categorized as a materialize op and handled accordingly
+- Materialzie ops: All "LazyTransposeHypotheticalValue" input vars, if present, materialize here. Output of this op
+  is always of type "HypotheticalValue". If the input is a "LazyTransposeHypotheticalValue", update the dict
+  "transpose_op_to_materialize_ops"
+
+- If the "LazyTransposeHypotheticalValue" hits a block output var, it is recorded in the dictionary
+"transpose_op_to_block_output_vars". A non cancelled transpose op has to be materialized before a block output
+
+- To treat an op like a unary op, add its type to "UNARY_LIKE_OP_TYPES". In future changes we want to make this process
+automatic, by automatically detecting an op as a unary like by its "traits"
+
+- To treat an op like axis update op, add a class specific to the op implementing the class "transform_axis_update_ops"
+For examples, see classes "transform_concat", "transform_pad" etc. The dictionary AXIS_UPDATE_OPS is automatically filled
+in by the decorator "register_axis_update_op"
+
+
+2nd phase: making a determination on which transpose ops to remove from the graph
+---------------------------------------------------------------------------------
+- All Transpose ops that have a corresponding compliment op in dict "transpose_op_to_cancel_ops" is a candidate
+- However, we need to make sure of two things
+    - if a transpose op is removed then all its cancel ops in "transpose_op_to_cancel_ops" must be also removed,
+      to ensure correctness of the graph. Same is true in the reverse direction as well,
+      that is, for every cancel op that is removed all its parent transpose ops upstream, must also be removed.
+    - transpose ops should only be removed if the number of cancel ops is greater than the number of transpose ops
+      that would get freshly introduced to the block as a result of materialization ops. Right now in the algorithm
+      each materialization op/output var (dicts "transpose_op_to_materialize_ops" and "transpose_op_to_block_output_vars"),
+      results in one more transpose op, although this can be further optimized in the future
+
+- To resolve this, we recognize that nodes consisting of sets (a) and (b) form a bipartitle graph, where,
+(a) == starting transpose ops (originators of "LazyTransposeHypotheticalValue")
+and (b) == set of transpose cancel ops and materialize ops
+- in this bipartite graph, we find all the connected components
+- for each connected component, either the whole set of transpose ops in it are removed/materialized or none
+of them are touched
+- thus for each set, a determination is made based on counting the number of cancel ops and materialize ops
+- Based on this the final set of transpose ops to be removed is updated
+
+
+3rd phase: transforming the graph
+----------------------------------
+- Transpose starting ops and the cancel ops are removed
+- Axis update ops, affected by these transpose ops, are updated
+- Transposes are materialized, i.e. added just before the materialize ops, which are linked to the starting transpose ops.
+  The starting transpose op can get materialized (inserted) multiple times, before each of the "materialize ops" downstream.
+- Block outputs are handled similar to the materialize ops
+- Type inference on all ops is invoked after all the transformations
+
+
+Debugging:
+------------
+If the debug flag is set to True, the block before and after the transformation is plotted,
+with transpose nodes highlighted
+
+"""
+
+# TODO: instead of a hard-coded set, use op-traits
+# These are the ops that satisfy the following property:
+# - single non constant input
+# - single output
+# - non rank changing
+# - doesn't need to be updated of a transpose passes through it. i.e.
+#  Transpose(op(x)) == op(Transpose(x))
+UNARY_LIKE_OP_TYPES = set(
+    [
+        "relu",
+        "log",
+        "relu6",
+        "abs",
+        "acos",
+        "asin",
+        "atan",
+        "atanh",
+        "ceil",
+        "clip",
+        "cos",
+        "cosh",
+        "erf",
+        "exp",
+        "exp2",
+        "floor",
+        "logical_not",
+        "round",
+        "rsqrt",
+        "sign",
+        "sin",
+        "sinh",
+        "sqrt",
+        "square",
+        "tan",
+        "tanh",
+        "threshold",
+        "clamped_relu",
+        "elu",
+        "gelu",
+        "leaky_relu",
+        "linear_activation",
+        "scaled_tanh",
+        "sigmoid",
+        "sigmoid_hard",
+        "softplus",
+        "softplus_parametric",
+        "softsign",
+        "thresholded_relu",
+    ]
+)
+
+# Dictionary from axis update op to its class
+# This is filled in by child classes of the class "transform_axis_update_ops".
+AXIS_UPDATE_OPS = {}
+
+
+def _do_transposes_cancel(perm1, perm2):
+    if len(perm1) != len(perm2):
+        return False
+    x = list(range(len(perm1)))
+    x1 = [x[i] for i in perm1]
+    x2 = [x1[i] for i in perm2]
+    if x == x2:
+        return True
+    return False
+
+
+def _get_input_vars(op, only_nonconst_vars=False):
+    """
+    :return: List[Var]
+    """
+    input_vars = []
+    for name, val in op.inputs.items():
+        if isinstance(val, Var):
+            if only_nonconst_vars:
+                if val.op and val.op.op_type == "const":
+                    continue
+            input_vars.append(val)
+        elif isinstance(val, (list, tuple)):
+            for var in val:
+                if not isinstance(var, Var):
+                    msg = "transpose optimization pass: unrecognized input type of op='{}', input='{}'"
+                    raise ValueError(msg.format(op.name, name))
+                if only_nonconst_vars:
+                    if var.op and var.op.op_type == "const":
+                        continue
+                input_vars.append(var)
+        else:
+            msg = "transpose optimization pass: unrecognized input type of op='{}', input='{}'"
+            raise ValueError(msg.format(op.name, name))
+    return input_vars
+
+
+def register_axis_update_op(cls=None, similar_ops=[]):
+    """
+    :param similar_ops: these ops share the same "update" and
+    "can_transpose_pass" methods as the base class.
+    For example: the class "transform_reduce_mean" corresponding to
+    op "reduce_mean" can be shared with other ops such as
+    "reduce_prod", "reduce_sum" etc
+    """
+
+    def class_wrapper(op_update_cls):
+        cls_name = op_update_cls.__name__
+        # remove "transform_" to get type of op
+        op_type = cls_name[len("transform_") :]
+        if op_type in AXIS_UPDATE_OPS:
+            raise ValueError(
+                "Update class for op '{}' already defined".format(op_update_cls)
+            )
+        AXIS_UPDATE_OPS[op_type] = op_update_cls
+        for similar_op_type in similar_ops:
+            if similar_op_type in AXIS_UPDATE_OPS:
+                raise ValueError(
+                    "Update class for op of type '{}' already defined".format(op_type)
+                )
+            AXIS_UPDATE_OPS[similar_op_type] = op_update_cls
+        return op_update_cls
+
+    if cls is None:
+        return class_wrapper
+
+    return class_wrapper
+
+
+class transform_axis_update_ops(object):
+    """
+    Parent class for every axis update op's class
+
+    An axis update op is an op that can be updated, such that it can allow a transpose layer to "pass" through it.
+    That is,
+
+    op(transpose(x)) == transpose(op_updated(x))
+
+    where
+    "op" : original op,
+    "op_updated": op after being updated.
+
+    Example:
+
+    if x is a tensor of rank 2, and transpose has perm=[1,0],
+    then
+
+    reduce_mean[axis=1](transpose(x)) == transpose(reduce_mean[axis=0](x))
+
+    here reduce_mean op with axis=1 can be updated to a reduce_mean op with axis=0,
+    to allow the transpose to "pass" through it, i.e. get applied after it.
+
+    """
+
+    def __init__(self, op, transpose_axes):
+        self.op = op
+        self.transpose_axes = transpose_axes
+
+    def can_transpose_pass(self):
+        """
+        Each "axis" op must determine whether it can act like a unary op
+        and allow the transpose to pass through.
+        Return True if it can allow the transpose to pass through, otherwise return False.
+
+        :return: bool
+        """
+        raise NotImplementedError("This function must be implemented by each op")
+
+    def update(self):
+        """
+        A method that updates some attribute of the axis op,
+        based on the transpose axes value.
+        This method only gets called if "can_transpose_pass" returns True.
+
+        Update the op such that the output %i2 should be equal to %o2
+
+        Before:
+        %i_1 = transpose_op(%i_0, perm=transpose_axes)
+        %i2 = op(%i1)
+
+        After:
+        %o1 = op_updated(%i0)
+        %o2 = transpose_op(%o1, perm=transpose_axes)
+
+        :return: None
+        """
+        raise NotImplementedError("This function must be implemented by each op")
+
+
+@register_axis_update_op()
+class transform_concat(transform_axis_update_ops):
+    def __init__(self, **kwargs):
+        super(transform_concat, self).__init__(**kwargs)
+        self.axis_var = self.op.inputs["axis"]
+
+    def can_transpose_pass(self):
+        if self.axis_var.val is not None:
+            return True
+        return False
+
+    def update(self):
+        new_axis_val = self.transpose_axes[self.axis_var.val]
+        inputs = list(self.op.inputs["values"])
+
+        # to be used, if there is a consant inputs to the concat op
+        transpose_perm_for_const = [0] * len(self.transpose_axes)
+        for i, axis in enumerate(self.transpose_axes):
+            transpose_perm_for_const[axis] = i
+
+        # if there is a constant input, transpose it
+        for input_var in inputs:
+            if input_var.op.op_type == "const":
+                const_val = input_var.val
+                new_const_val = np.transpose(const_val, transpose_perm_for_const)
+                # insert a new constant JUST before the op
+                with self.op.enclosing_block:
+                    new_const_input_var = mb.const(
+                        val=new_const_val, mode="immediate_value", before_op=self.op
+                    )
+                self.op.enclosing_block.replace_uses_of_var_after_op(
+                    anchor_op=new_const_input_var.op,
+                    end_op=self.op,
+                    old_var=input_var,
+                    new_var=new_const_input_var,
+                    no_check_var_types=True,
+                )
+
+        # insert a new constant for the new axis, JUST before the op
+        with self.op.enclosing_block:
+            new_axis_var = mb.const(
+                val=new_axis_val, mode="immediate_value", before_op=self.op
+            )
+
+        self.op.enclosing_block.replace_uses_of_var_after_op(
+            anchor_op=new_axis_var.op,
+            end_op=self.op,
+            old_var=self.axis_var,
+            new_var=new_axis_var,
+            no_check_var_types=True,
+        )
+
+
+@register_axis_update_op()
+class transform_pad(transform_axis_update_ops):
+    def __init__(self, **kwargs):
+        super(transform_pad, self).__init__(**kwargs)
+        self.pad_var = self.op.inputs["pad"]
+        self.pad_op = self.pad_var.op
+        self.mode = self.op.mode.val
+        self.pad_amounts_new = None
+
+    def _compute_new_pad_values(self):
+        pad_amounts = np.reshape(self.pad_var.val, [-1, 2])
+        rank_diff = len(self.transpose_axes) - pad_amounts.shape[0]
+        self.pad_amounts_new = copy.deepcopy(pad_amounts)
+        # append "rank_diff" rows of zeros to the top
+        self.pad_amounts_new = np.concatenate(
+            (np.zeros((2 * rank_diff)).reshape(-1, 2), self.pad_amounts_new)
+        )
+        self.pad_amounts_new = self.pad_amounts_new.astype(pad_amounts.dtype)
+        pad_amounts = np.concatenate(
+            (np.zeros((2 * rank_diff)).reshape(-1, 2), pad_amounts)
+        )
+        for i, axis in enumerate(self.transpose_axes):
+            self.pad_amounts_new[axis][0] = pad_amounts[i][0]
+            self.pad_amounts_new[axis][1] = pad_amounts[i][1]
+        # get the top "rank_diff" rows
+        top_rows = self.pad_amounts_new[:rank_diff, :]
+        if not np.all(top_rows == 0):
+            return False
+        # cut "rank_diff" from the top
+        self.pad_amounts_new = self.pad_amounts_new[rank_diff:, :]
+        self.pad_amounts_new = self.pad_amounts_new.flatten()
+        return True
+
+    def can_transpose_pass(self):
+        if (
+            len(_get_input_vars(self.op, only_nonconst_vars=True)) != 1
+            or self.pad_op.op_type != "const"
+        ):
+            return False
+        if len(self.transpose_axes) < 2:
+            return False
+        if not self._compute_new_pad_values():
+            return False
+        # check that if mode is not constant, the updated padding
+        # would stay limited to last 2 axes
+        if self.mode != "constant" and not np.all(self.pad_amounts_new[:-4] == 0):
+            return False
+        return True
+
+    def update(self):
+        self._compute_new_pad_values()
+        # insert a new constant for pad val, JUST before the op
+        with self.op.enclosing_block:
+            new_pad_var = mb.const(
+                val=self.pad_amounts_new, mode="immediate_value", before_op=self.op
+            )
+        self.op.enclosing_block.replace_uses_of_var_after_op(
+            anchor_op=new_pad_var.op,
+            end_op=self.op,
+            old_var=self.pad_var,
+            new_var=new_pad_var,
+            no_check_var_types=True,
+        )
+
+
+@register_axis_update_op(
+    similar_ops=[
+        "reduce_l1_norm",
+        "reduce_l2_norm",
+        "reduce_max",
+        "reduce_log_sum",
+        "reduce_log_sum_exp",
+        "reduce_min",
+        "reduce_prod",
+        "reduce_sum",
+        "reduce_sum_square",
+    ]
+)
+class transform_reduce_mean(transform_axis_update_ops):
+    def __init__(self, **kwargs):
+        super(transform_reduce_mean, self).__init__(**kwargs)
+        self.axes_var = self.op.inputs["axes"]
+        self.axes_op = self.axes_var.op
+
+    def can_transpose_pass(self):
+        # allow transpose to push through it only if keep_dims are True since that doesn't change the rank
+        if self.op.inputs["keep_dims"].val:
+            if self.axes_op.op_type == "const":
+                return True
+        return False
+
+    def update(self):
+        # update axis of the op
+        old_axes_val = self.axes_var.val
+        new_axes_val = [0] * len(old_axes_val)
+        for i, axis in enumerate(old_axes_val):
+            new_axes_val[i] = self.transpose_axes[axis]
+
+        # insert a new constant for the axis, JUST before the op
+        with self.op.enclosing_block:
+            new_axis_var = mb.const(
+                val=new_axes_val, mode="immediate_value", before_op=self.op
+            )
+
+        self.op.enclosing_block.replace_uses_of_var_after_op(
+            anchor_op=new_axis_var.op,
+            end_op=self.op,
+            old_var=self.axes_var,
+            new_var=new_axis_var,
+            no_check_var_types=True,
+        )
+
+
+@register_axis_update_op(similar_ops=["mul", "sub", "real_div", "maximum", "minimum"])
+class transform_add(transform_axis_update_ops):
+    def __init__(self, **kwargs):
+        super(transform_add, self).__init__(**kwargs)
+
+    def can_transpose_pass(self):
+        const_input = None
+        if self.op.inputs["x"].op and self.op.inputs["x"].op.op_type == "const":
+            const_input = self.op.inputs["x"]
+            other_input = self.op.inputs["y"]
+        if self.op.inputs["y"].op and self.op.inputs["y"].op.op_type == "const":
+            if const_input is not None:
+                return False  # both inputs are constant
+            const_input = self.op.inputs["y"]
+            other_input = self.op.inputs["x"]
+        if const_input is None:
+            return True
+        if not isinstance(const_input.val, (np.ndarray, np.generic)):
+            return False
+        rank_const_input = len(const_input.val.shape)
+        rank_other_input = len(other_input.shape) if other_input.shape else 0
+        if rank_const_input <= 1 and rank_other_input > 0:
+            return True
+        return False
+
+    def update(self):
+        if len(_get_input_vars(self.op, only_nonconst_vars=True)) == 2:
+            # nothing to update
+            return
+
+        for input_var in _get_input_vars(self.op):
+            if input_var.op and input_var.op.op_type == "const":
+                const_input_var = input_var
+                break
+
+        const_value = const_input_var.val
+        if len(const_value.shape) == 0:
+            # const is a scalar, no need to modify it
+            return
+
+        rank = len(self.transpose_axes)
+        new_shape = [1] * rank
+        new_shape[self.transpose_axes[-1]] = const_value.shape[0]
+        new_const_val = np.reshape(const_value, new_shape)
+
+        # insert a new constant JUST before the op
+        with self.op.enclosing_block:
+            new_const_var = mb.const(
+                val=new_const_val, mode=const_input_var.op.mode, before_op=self.op
+            )
+
+        self.op.enclosing_block.replace_uses_of_var_after_op(
+            anchor_op=new_const_var.op,
+            end_op=self.op,
+            old_var=const_input_var,
+            new_var=new_const_var,
+            no_check_var_types=True,
+        )
+
+
+class HypotheticalValue(object):
+    # A hypothetical value.
+    # Simply wraps a Var.
+    # Actual Var it wraps doesn't really matter, its mainly for debugging.
+    # This class really exists to differentiate a "LazyTransposeHypotheticalValue" type with a
+    # non-"LazyTransposeHypotheticalValue" type
+    def __init__(self, var=None):
+        self.value = var  # type : Var
+
+
+class LazyTransposeHypotheticalValue(object):
+    # a hypothetical value that represents a transpose op on top of a hypothetical value,
+    # or a collection of transpose_ops, which have the same "perm" parameter
+
+    def __init__(self, hypothetical_value, transpose_ops, perm):
+
+        # Input hypothetical value to the transpose op.
+        # When there are multiple transpose ops, this is the incoming hypothetical value to any one of those
+        self.wrapped_hypothetical_value = hypothetical_value  # type : HypotheticalValue
+
+        if not isinstance(hypothetical_value, HypotheticalValue):
+            raise ValueError(
+                "transpose optimization pass: incorrect type passed for hypothetical_value"
+            )
+
+        for op in transpose_ops:
+            if op.op_type != "transpose":
+                raise ValueError(
+                    "transpose optimization pass: LazyTransposeHypotheticalValue can only be made with transpose ops"
+                )
+            perm_op = list(op.inputs["perm"].val)
+            if perm_op != perm:
+                raise ValueError(
+                    "transpose optimization pass: LazyTransposeHypotheticalValue can only be made with transpose ops with the same 'perm' values"
+                )
+
+        self.perm = perm  # type : list[int], perm parameter of all the transpose ops
+        self.transpose_ops = transpose_ops  # type : Set(op)
+
+
+class TransposeOptimization(object):
+    def __init__(self, block):
+        self.block = block
+
+        # for each var in the block, this dictionary stores the hypothetical value that is assigned to it during
+        # graph traversal
+        self.var_to_hypothetical_value = (
+            {}
+        )  # type : var : HypotheticalValue or LazyTransposeHypotheticalValue
+        # start out by filling this dictionary with all the inputs of the block
+        for _, input_var in block.inputs.items():
+            self.var_to_hypothetical_value[input_var] = HypotheticalValue(input_var)
+
+        # Dictionaries below are used to store transpose cancellation/fusion information.
+        # These are filled during the traversal of the graph,
+        # after which they are used by the `_apply_transform` method
+
+        # transpose op to the list of transpose ops that are its compliments and can be cancelled away with it
+        self.transpose_op_to_cancel_ops = defaultdict(
+            lambda: []
+        )  # type : op : List[op]
+
+        # transpose op to the list of ops before which it has to materialize, i.e. the root transpose op
+        #  can be moved downstream in the graph, as far as these materialize ops
+        self.transpose_op_to_materialize_ops = defaultdict(
+            lambda: []
+        )  # type : op : List[Tuple(op, Var)]
+
+        # list of the ops that need to be updated (either their axis parameter or one of their constant inputs)
+        # if the transpose op is fused away or moved downstream in the graph
+        self.transpose_op_to_axis_update_ops = defaultdict(
+            lambda: []
+        )  # type : op : List[op]
+
+        # transpose op to "materialize" block output vars
+        # if a block output var gets a Lazy transpose value, this dictionary is updated
+        self.transpose_op_to_block_output_vars = defaultdict(
+            lambda: []
+        )  # type : op : List[Var]
+
+        # for book keeping
+        self.ops_updated = set()
+        self.materialized_ops_handled = set()
+        self.transpose_ops_removed = set()
+
+    def _visit_unary_like_op(self, op, input_var=None):
+        # pass the input var's hypothetical_value to the output var's, since shape invariant ops do
+        # not modify the incoming hypothetical_value
+
+        if input_var is None:
+            input_var = op.inputs["x"]
+
+        if len(op.outputs) > 1:
+            msg = (
+                "transpose optimization pass: op '{}', of type = '{}', has multiple outputs, hence it"
+                "cannot be handled like a unary op"
+            )
+            raise ValueError(msg.format(op.name, op.op_type))
+        self.var_to_hypothetical_value[op.outputs[0]] = self.var_to_hypothetical_value[
+            input_var
+        ]
+
+    def _visit_materialize_op(self, op):
+        # this is the catch all category of ops
+        # these are the "not-lazy-transpose-pass-through" kind of ops
+        # output hypothetical_value is same as the vars
+        for out_var in op.outputs:
+            self.var_to_hypothetical_value[out_var] = HypotheticalValue(out_var)
+
+        # check for the inputs
+        # if there is a lazy transpose hypothetical value as an input,
+        # all the transpose ops it hold,
+        # need to be materialized here now, i.e., we should update "transpose_op_to_materialize_ops"
+        for input_var in _get_input_vars(op):
+            input_hypothetical_value = self.var_to_hypothetical_value[input_var]
+            if isinstance(input_hypothetical_value, LazyTransposeHypotheticalValue):
+                all_lazy_transpose_ops = input_hypothetical_value.transpose_ops
+                for transpose_op in all_lazy_transpose_ops:
+                    self.transpose_op_to_materialize_ops[transpose_op].append(
+                        (op, input_var)
+                    )
+
+    def _visit_axis_update_op(self, op):
+        """
+        Check that all non constant inputs are of type LazyTransposeHypotheticalValue with the same perm value
+        This check is common for all "axis update" ops.
+        """
+        input_vars = _get_input_vars(op, only_nonconst_vars=True)
+        perm = None
+        for i, var in enumerate(input_vars):
+            hypothetical_value = self.var_to_hypothetical_value[var]
+            if not isinstance(hypothetical_value, LazyTransposeHypotheticalValue):
+                self._visit_materialize_op(op)
+                return
+            if i == 0:
+                perm = hypothetical_value.perm
+            elif perm != hypothetical_value.perm:
+                self._visit_materialize_op(op)
+                return
+
+        # checks specific to the op type
+        op_cls = AXIS_UPDATE_OPS.get(op.op_type, None)
+        if op_cls is None:
+            raise ValueError(
+                "Transform class for op of type '{}' not found".format(op.op_type)
+            )
+
+        if not op_cls(**{"op": op, "transpose_axes": perm}).can_transpose_pass():
+            self._visit_materialize_op(op)
+            return
+
+        # add this op to the dictionary "transpose_op_to_axis_update_ops"
+        # and update self.var_to_hypothetical_value[op.outputs[0]]
+        all_lazy_transpose_ops = set()
+        for var in input_vars:
+            input_hypothetical_value = self.var_to_hypothetical_value[var]
+            all_lazy_transpose_ops.update(input_hypothetical_value.transpose_ops)
+
+        for transpose_op in all_lazy_transpose_ops:
+            self.transpose_op_to_axis_update_ops[transpose_op].append(op)
+
+        self.var_to_hypothetical_value[op.outputs[0]] = LazyTransposeHypotheticalValue(
+            input_hypothetical_value.wrapped_hypothetical_value,
+            all_lazy_transpose_ops,
+            perm,
+        )
+
+    def _visit_transpose_op(self, op):
+        input_var = op.inputs["x"]
+        if op.inputs["perm"].val is None:
+            self._visit_materialize_op(op)
+            return
+        perm = list(op.inputs["perm"].val)
+        input_hypothetical_value = self.var_to_hypothetical_value[input_var]
+
+        """
+        There are 3 cases to handle:
+
+        1. input type == HypotheticalValue
+        2. input type == LazyTransposeHypotheticalValue and this op is the transpose compliment of it
+        3. input type == LazyTransposeHypotheticalValue and this op is NOT the transpose compliment of it
+        """
+
+        if isinstance(input_hypothetical_value, HypotheticalValue):
+            # case 1
+            # the input is not a lazy transpose.
+            # Since the current node is a transpose, it might get cancelled downstream, so
+            # make the output var's hypothetical_value a lazy transpose
+            self.var_to_hypothetical_value[
+                op.outputs[0]
+            ] = LazyTransposeHypotheticalValue(
+                input_hypothetical_value, set([op]), perm
+            )
+            return
+
+        # input is a Lazy transpose hypothetical value. Lets first check whether the current
+        # transpose cancels it or not
+        do_cancel = _do_transposes_cancel(input_hypothetical_value.perm, perm)
+        if do_cancel:
+            # case 2
+            # transposes cancel, so now the hypothetical_value of the output will
+            # be same as the hypothetical value wrapped inside the upstream lazy transpose
+            self.var_to_hypothetical_value[
+                op.outputs[0]
+            ] = input_hypothetical_value.wrapped_hypothetical_value
+            # also update the dictionary "transpose_op_to_cancel_ops"
+            all_lazy_transpose_ops = input_hypothetical_value.transpose_ops
+            for transpose_op in all_lazy_transpose_ops:
+                self.transpose_op_to_cancel_ops[transpose_op].append(op)
+        else:
+            # case 3
+            # transposes don't cancel
+            # this is same as a materialize op then
+            self._visit_materialize_op(op)
+
+    def _visit_op(self, op):
+
+        input_vars = _get_input_vars(op)
+        for var in input_vars:
+            assert (
+                var in self.var_to_hypothetical_value
+            ), "transpose optimization pass: hypothetical value for var '{}', not found".format(
+                var.name
+            )
+
+        if op.op_type in UNARY_LIKE_OP_TYPES:
+            self._visit_unary_like_op(op)
+        elif op.op_type in AXIS_UPDATE_OPS:
+            self._visit_axis_update_op(op)
+        elif op.op_type == "transpose":
+            self._visit_transpose_op(op)
+        elif op.op_type == "const":
+            self.var_to_hypothetical_value[op.outputs[0]] = HypotheticalValue(
+                op.outputs[0]
+            )
+        else:
+            self._visit_materialize_op(op)
+
+    def block_traversal(self):
+
+        # Since the ops are already organized in a topological manner,
+        # simply iterate through all the ops
+
+        for op in self.block.operations:
+            self._visit_op(op)
+
+        # after all the ops have been visited, every var being produced would
+        # have a corresponding hypothetical value associated with it
+        # update "transpose_op_to_block_output_vars"
+        # that is, if an output var has a lazy transpose hypothetical value associated
+        # with it, record this in "transpose_op_to_block_output_vars"
+        block_output_vars = self.block.outputs
+        for var in block_output_vars:
+            assert var in self.var_to_hypothetical_value
+            hypothetical_value = self.var_to_hypothetical_value[var]
+            if isinstance(hypothetical_value, LazyTransposeHypotheticalValue):
+                all_lazy_transpose_ops = hypothetical_value.transpose_ops
+                for transpose_op in all_lazy_transpose_ops:
+                    self.transpose_op_to_block_output_vars[transpose_op].append(var)
+
+    def _verify_cancellable_transposes(self):
+
+        # invert "transpose_op_to_cancel_ops"
+        transpose_cancel_ops_to_starting_transpose_set = defaultdict(lambda: set())
+        for op, cancel_ops_list in self.transpose_op_to_cancel_ops.items():
+            for cancel_op in cancel_ops_list:
+                transpose_cancel_ops_to_starting_transpose_set[cancel_op].update(
+                    set([op])
+                )
+
+        for op in transpose_cancel_ops_to_starting_transpose_set:
+            assert (
+                op not in self.transpose_op_to_cancel_ops
+            ), "transpose reduction optimization: transpose op '{}' cannot be both a starting and cancel op".format(
+                op.name
+            )
+
+        # invert "transpose_op_to_materialize_ops"
+        materizalize_ops_to_starting_transpose_set = defaultdict(lambda: set())
+        for op, materialize_ops in self.transpose_op_to_materialize_ops.items():
+            for materialize_op, edge in materialize_ops:
+                materizalize_ops_to_starting_transpose_set[materialize_op].update(
+                    set([op])
+                )
+
+                # the starting transpose op may not be in "transpose_op_to_cancel_ops"
+                # but it needs to be removed if it materializes later, hence we need to add it
+                # to the "transpose_op_to_cancel_ops", with an empty value, i.e. no other ops to cancel because of it
+                if op not in self.transpose_op_to_cancel_ops:
+                    self.transpose_op_to_cancel_ops[op] = []
+
+        # (starting transpose ops) and (transpose cancel ops + materialize ops) form a bipartite graph.
+        # Find the connected components of this graph, by doing a BFS traversal
+        connected_components = []  # List[(Set(op), Set(op)), Set(op)]
+        visited = {}
+        for op in list(self.transpose_op_to_cancel_ops.keys()):
+            if op in visited:
+                continue
+            visited[op] = 1
+            set_a = set([op])  # set of starting transpose ops
+            set_b1 = set()  # set of transpose cancel ops connected to set_a
+            set_b2 = set()  # set of materialize ops connected to set_a
+            queue = []
+            queue.extend(self.transpose_op_to_cancel_ops[op])
+            if op in self.transpose_op_to_materialize_ops:
+                materialize_ops_list = list(
+                    list(zip(*self.transpose_op_to_materialize_ops[op]))[0]
+                )
+                queue.extend(materialize_ops_list)
+            while len(queue) > 0:
+                o = queue.pop(0)
+                visited[o] = 1
+                # enque nodes connected to o
+                if o in self.transpose_op_to_cancel_ops:
+                    set_a.update(set([o]))
+                    for neighbor_op in self.transpose_op_to_cancel_ops[o]:
+                        if neighbor_op not in visited:
+                            queue.append(neighbor_op)
+                elif o in transpose_cancel_ops_to_starting_transpose_set:
+                    set_b1.update(set([o]))
+                    for neighbor_op in transpose_cancel_ops_to_starting_transpose_set[
+                        o
+                    ]:
+                        if neighbor_op not in visited:
+                            queue.append(neighbor_op)
+                else:
+                    set_b2.update(set([o]))
+                    for neighbor_op in materizalize_ops_to_starting_transpose_set[o]:
+                        if neighbor_op not in visited:
+                            queue.append(neighbor_op)
+            connected_components.append((set_a, set_b1, set_b2))
+
+        starting_ops_to_remove = (
+            set()
+        )  # starting ops to remove from the optimization list
+
+        # now for each connected component, make a decision whether to cancel it or not
+        # (either all transpose ops in a set get cancelled or they don't)
+        for op_set, op_cancel_set, materialize_op_set in connected_components:
+
+            block_output = False
+            # check that output is not directly connected to a starting transpose op
+            for op in op_set:
+                if op.outputs[0] in self.block.outputs:
+                    starting_ops_to_remove.update(op_set)
+                    block_output = True
+                    break
+            if block_output:
+                continue
+
+            # check whether transposes resulting from materialization are not greater than cancel transposes
+            materizalize_set = set(list(materialize_op_set))
+            # all the output nodes are like materialize ops , so add them here
+            for op in op_set:
+                materizalize_set.update(
+                    set(self.transpose_op_to_block_output_vars.get(op, []))
+                )
+
+            if len(materizalize_set) > len(op_cancel_set):
+                starting_ops_to_remove.update(op_set)
+
+        # remove ops
+        for op in starting_ops_to_remove:
+            self.transpose_op_to_cancel_ops.pop(op, None)
+
+    def _remove_transpose_ops(self, starting_transpose_op):
+
+        perm = list(starting_transpose_op.inputs["perm"].val)
+        starting_transpose_op_out_var = starting_transpose_op.outputs[0]
+        starting_transpose_op_input_var = starting_transpose_op.inputs["x"]
+
+        # update all the "axis_update" ops
+        for op in self.transpose_op_to_axis_update_ops.get(starting_transpose_op, []):
+            if op not in self.ops_updated:
+                op_cls = AXIS_UPDATE_OPS.get(op.op_type, None)
+                op_cls(**{"op": op, "transpose_axes": perm}).update()
+                self.ops_updated.add(op)
+
+        # short circuit starting_transpose_op and its cancel ops
+
+        to_be_removed_ops = []
+
+        for op in [starting_transpose_op] + self.transpose_op_to_cancel_ops[
+            starting_transpose_op
+        ]:
+            if op in self.transpose_ops_removed:
+                continue
+
+            to_be_removed_ops.append(op)
+            self.transpose_ops_removed.add(op)
+
+            input_var = op.inputs["x"]  # input to the transpose op
+            output_var = op.outputs[0]  # output of the transpose op
+            parent_op = input_var.op  # parent op of the transpose op
+
+            if output_var in self.block.outputs:
+                # output is a block output, so this must be one of the "edge" transpose compliment ops
+                # We need to set `input_var` as the block output var
+                # Change the name of the input_var to match the block output
+                input_var.name = output_var.name
+
+            # connect all the child ops of the output_var to the parent of the transpose op.
+            self.block.replace_uses_of_var_after_op(
+                anchor_op=parent_op,
+                old_var=output_var,
+                new_var=input_var,
+                no_check_var_types=True,
+            )
+
+        """
+        Insert a transpose op JUST before each one of the materialize ops
+        i.e.
+        Given:  %i1 = op(...)
+                ...
+                ... = materialize_op(..., %i1 ,...)
+                ...
+
+        Result: %i1 = op(...)
+                ...
+                %i2 = transpose_op(%i1, %perm)
+                ... = materialize_op(..., %i2 ,...)
+                ...
+        """
+        for op, input_var in self.transpose_op_to_materialize_ops.get(
+            starting_transpose_op, []
+        ):
+            if (op, input_var) in self.materialized_ops_handled:
+                continue
+
+            self.materialized_ops_handled.add((op, input_var))
+            with self.block:
+                if input_var == starting_transpose_op_out_var:
+                    # materialize op is connected to the starting transpose op
+                    # in this case, connect to its parent
+                    i1 = starting_transpose_op_input_var
+                else:
+                    i1 = input_var
+                x = mb.transpose(x=i1, perm=perm, before_op=op)
+
+            self.block.replace_uses_of_var_after_op(
+                anchor_op=x.op,
+                end_op=op,
+                old_var=i1,
+                new_var=x,
+                no_check_var_types=True,
+            )
+
+        # materialize transpose JUST before output vars
+        for output_var in self.transpose_op_to_block_output_vars.get(
+            starting_transpose_op, []
+        ):
+            with self.block:
+                x = mb.transpose(x=output_var, perm=perm, name=output_var.name)
+
+            self.block.replace_uses_of_var_after_op(
+                anchor_op=x.op, old_var=output_var, new_var=x, no_check_var_types=True
+            )
+            output_var.name += "_before_transpose_op_" + x.op.name
+            if output_var.op.name == x.op.name:
+                output_var.op.name += "_before_transpose_op_" + x.op.name
+
+        self.block.remove_ops(to_be_removed_ops)
+
+    def apply_transform(self):
+        """
+        Take in the data collected during graph traversal
+        and transform the graph by cancelling out transpose ops that can be removed.
+        """
+
+        logging.debug(
+            "Block before optimize transpose transform:\n{}".format(self.block)
+        )
+        if DEBUG:
+            import graphviz
+
+            graphviz.Source(
+                self.block.get_dot_string(
+                    highlight_debug_op_names=[], highlight_debug_op_types=["transpose"]
+                )
+            ).view(filename="/tmp/block_before_reduce_transpose")
+
+        """
+        First check which transposes can be cancelled.
+        After this function call we get an updated dictionary "transpose_op_to_cancel_ops"
+        with only the transpose ops that can really be cancelled in the graph
+        Reasons to not cancel:
+        - materialize_ops are greater than cancel_ops, so removing transpose will instead end up increasing the count of transposes
+        - removing a transpose op can only be successful, if all of its cancel ops are removed, removing all the cancel ops
+          is only successful if all of their starting transpose ops are removed and so on. This check is also done in
+           "_verify_cancellable_transposes()"
+        """
+        self._verify_cancellable_transposes()
+
+        # apply transform
+        for transpose_op in self.transpose_op_to_cancel_ops:
+            self._remove_transpose_ops(transpose_op)
+
+        if DEBUG:
+            graphviz.Source(
+                self.block.get_dot_string(
+                    highlight_debug_op_names=[], highlight_debug_op_types=["transpose"]
+                )
+            ).view(filename="/tmp/block_after_reduce_transpose")
+
+        logging.debug(
+            "Block after optimize transpose transform:\n{}".format(self.block)
+        )
+
+        for op in self.block.operations:
+            op.type_value_inference(overwrite_output=True)
+
+
+def reduce_transposes_block(block):
+    """
+    Only apply the optimization if the block is flat,
+    i.e, it does not contain any op which contains a sub-block.
+    TODO:
+    Removing transposes and transpose compliments requires re-running
+    type inference for the set of ops in between the fused transpose ops,
+    which is simpler to do when all the ops in the block are free of sub blocks.
+    The case of transpose fusion with sub-block containing ops needs to be handled with more care and test cases.
+    """
+    for op in list(block.operations):
+        if len(op.blocks) > 0:
+            return
+
+    opt_transposes = TransposeOptimization(block)
+    opt_transposes.block_traversal()
+    opt_transposes.apply_transform()
+
+
+@register_pass(namespace="common")
+def reduce_transposes(prog):
+    for f_name, f in prog.functions.items():
+        reduce_transposes_block(f)
diff --git a/coremltools/converters/mil/mil/passes/remove_symbolic_reshape.py b/coremltools/converters/mil/mil/passes/remove_symbolic_reshape.py
new file mode 100644
index 000000000..ad7c4a424
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/remove_symbolic_reshape.py
@@ -0,0 +1,104 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+import numpy as np
+from coremltools.converters.mil.mil.types.symbolic import (
+    is_symbolic,
+    any_variadic,
+    num_symbolic,
+)
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.passes.pass_registry import register_pass
+import logging
+
+
+def remove_symbolic_reshape_block(block):
+    num_changes = 0
+    for op in list(block.operations):
+        for b in op.blocks:
+            num_changes += remove_symbolic_reshape_block(b)
+        if op.op_type != "reshape":
+            continue
+        if op.shape.val is not None:
+            # shape does not contain symbol.
+            continue
+        if op.shape.sym_val is None:
+            # shape is runtime determined.
+            continue
+        # Use output shape as `shape`
+        shape = op.outputs[0].shape
+        if any_variadic(shape):
+            msg = (
+                "Cannot reshape to variadic from a compile time "
+                + "shape argument. Variadic shape can only be achieved "
+                + "via runtime shape argument. op: {}"
+            )
+            raise ValueError(msg.format(op))
+        num_symbols = num_symbolic(shape)
+        if num_symbols > 1:
+            continue
+        # Convert the one symbol to -1
+        integer_shape = [-1 if is_symbolic(i) else i for i in shape]
+        with block:
+            shape_const = mb.const(
+                val=integer_shape,
+                mode="immediate_value",
+                name=op.shape.name + "x",
+                before_op=op,
+            )
+            reshaped = mb.reshape(x=op.x, shape=shape_const, name=op.name, before_op=op)
+            op.enclosing_block.replace_uses_of_var_after_op(
+                anchor_op=op, old_var=op.outputs[0], new_var=reshaped
+            )
+            # Remove all the ops at once
+            block.remove_ops([op, op.shape.op])
+        num_changes += 1
+    return num_changes
+
+
+@register_pass(namespace="common")
+def remove_symbolic_reshape(prog):
+    """
+    Convert symbolic shape in `reshape` to integers.
+
+    Given:
+
+        main(%x: (s0, 4, fp32)) {
+          block0() {
+            %reshape_0_shape_0: (3,i32)^ = const(val=(s0, s1, 2))
+            %reshape_0: (s0, 2, 2, fp32) = reshape(x=%x, shape=%reshape_0_shape_0)
+          } -> (%reshape_0)
+        }
+
+    Result:
+        main(%x: (s0, 4, fp32)) {
+          block0() {
+            %reshape_0_shape_0x: (3,i32)* = const(val=[-1, 2, 2])
+            %reshape_0: (-1, 2, 2, fp32) = reshape(x=%x, shape=%reshape_0_shape_0x)
+          } -> (%reshape_0)
+        }
+
+    Comment: Currently it does not perform any optimization, but simply
+    replacing symbols with positive integer if solved from volumetric
+    constraint, or -1. Therefore this pass fails if more than one symbols
+    need to be resolve to -1.
+
+    TODO (rdar://59165842): Use expand_dims, squeeze etc to use 0 instead
+    of dynamic reshape with -1.
+
+    Inputs:
+
+        prog: Program
+    """
+    for f_name, f in prog.functions.items():
+        num_changes = remove_symbolic_reshape_block(f)
+        msg = "remove_symbolic_reshape: changed {} reshapes."
+        logging.info(msg.format(num_changes))
diff --git a/coremltools/converters/mil/mil/passes/test_elementwise_fusions.py b/coremltools/converters/mil/mil/passes/test_elementwise_fusions.py
new file mode 100644
index 000000000..3ff3faa84
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/test_elementwise_fusions.py
@@ -0,0 +1,202 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.testing_utils import (
+    assert_op_count_match,
+    assert_model_is_valid,
+    get_op_types_in_program,
+    apply_pass_and_basic_check,
+)
+
+import pytest
+import numpy as np
+import itertools
+
+np.random.seed(1984)
+
+
+class TestElementwiseOptimizationPasses:
+    """
+    Input graph:
+                                    Const
+                                      |
+                                      V
+    input -----> convolution -----> add/sub  ----> relu ---> out
+
+    Output graph:
+    input -----> convolution -----> relu ----> out
+    """
+
+    @pytest.mark.parametrize(
+        "conv_dim, \
+                             flip_add_input_order, \
+                             add_batch_dim_to_const, \
+                             use_sub_instead, \
+                             prebuilt_bias, \
+                             scalar_elementwise, \
+                             use_conv_transpose",
+        itertools.product(
+            [
+                2,
+                3,
+            ],  # 1D conv conversion broken even without the pass: rdar://problem/62960720
+            [True, False],  # flip_add_input_order
+            [True, False],  # add_batch_dim_to_const
+            [True, False],  # use_sub_instead
+            [True, False],  # prebuilt_bias
+            [True, False],  # scalar_elementwise
+            [True, False],  # use_conv_transpose
+        ),
+    )
+    def test_fuse_bias_conv(
+        self,
+        conv_dim,
+        flip_add_input_order,
+        add_batch_dim_to_const,
+        use_sub_instead,
+        prebuilt_bias,
+        scalar_elementwise,
+        use_conv_transpose,
+    ):
+
+        if flip_add_input_order and use_sub_instead:
+            return
+
+        if use_conv_transpose and conv_dim != 2:
+            return
+
+        input_shape = None
+        W = None
+        Cout = 8
+        Cin = 3
+        D = 10
+        const = (
+            np.random.rand(Cout) if add_batch_dim_to_const else np.random.rand(1, Cout)
+        )
+        const = np.expand_dims(const, axis=-1)
+
+        if conv_dim == 1:
+            input_shape = (1, Cin, D)
+            W = np.random.rand(Cout, Cin, 1)
+        elif conv_dim == 2:
+            input_shape = (1, Cin, D, D)
+            W = np.random.rand(Cout, Cin, 1, 1)
+            const = np.expand_dims(const, axis=-1)
+        elif conv_dim == 3:
+            input_shape = (1, Cin, D, D, D)
+            W = np.random.rand(Cout, Cin, 1, 1, 1)
+            const = np.expand_dims(const, axis=-1)
+            const = np.expand_dims(const, axis=-1)
+
+        output_shape = list(input_shape)
+        output_shape[1] = Cout
+
+        if scalar_elementwise:
+            const = np.random.uniform(0)
+
+        @mb.program(input_specs=[mb.TensorSpec(shape=input_shape)])
+        def prog(x):
+            kwargs = {
+                "x": x,
+                "weight": W,
+                "pad_type": "valid",
+                "dilations": [1] * conv_dim,
+                "strides": [1] * conv_dim,
+            }
+            if prebuilt_bias:
+                kwargs["bias"] = np.random.rand(Cout)
+
+            x = mb.conv_transpose(**kwargs) if use_conv_transpose else mb.conv(**kwargs)
+
+            if use_sub_instead:
+                x = mb.sub(x=x, y=const)
+            else:
+                x = mb.add(
+                    x=const if flip_add_input_order else x,
+                    y=x if flip_add_input_order else const,
+                )
+            x = mb.relu(x=x)
+            return x
+
+        element_op = "sub" if use_sub_instead else "add"
+        conv_op = "conv" if not use_conv_transpose else "conv_transpose"
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::fuse_bias_conv"
+        )
+        assert get_op_types_in_program(prev_prog) == [conv_op, element_op, "relu"]
+        assert get_op_types_in_program(prog) == [conv_op, "relu"]
+
+        old_bias = prev_block.find_ops(op_type=conv_op)[0].inputs.get("bias", None)
+        old_bias_val = 0 if old_bias is None else old_bias.val
+        assert old_bias_val is not None
+        assert block.find_ops(op_type=conv_op)[0].inputs["bias"] is not None
+        new_bias_val = block.find_ops(op_type=conv_op)[0].inputs["bias"].val
+        assert new_bias_val is not None
+        if use_sub_instead:
+            np.testing.assert_almost_equal(
+                old_bias_val - np.squeeze(const), new_bias_val
+            )
+        else:
+            np.testing.assert_almost_equal(
+                old_bias_val + np.squeeze(const), new_bias_val
+            )
+
+        assert_model_is_valid(
+            prog,
+            {"x": input_shape},
+            expected_output_shapes={block.outputs[0].name: tuple(output_shape)},
+        )
+
+    """
+    Input graph:
+                                 Const     Const
+                                   |         |
+                                   V         V
+    input -----> transpose -----> mul ----> add ---> out
+
+    Output graph:
+    input -----> transpose -----> batchnorm ----> out
+    """
+
+    @pytest.mark.parametrize(
+        "flip_mul_input_order, flip_add_input_order, rank_3_const_input",
+        itertools.product([False, True], [False, True], [False, True]),
+    )
+    def test_mul_add_fusion_to_batchnorm(
+        self, flip_mul_input_order, flip_add_input_order, rank_3_const_input
+    ):
+
+        C = 3
+        gamma = np.random.rand(1, C, 1, 1)
+        beta = np.random.rand(1, C, 1, 1)
+        if rank_3_const_input:
+            gamma = np.squeeze(gamma, axis=0)
+            beta = np.squeeze(beta, axis=0)
+
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 10, 10, C))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[0, 3, 1, 2])
+            if flip_mul_input_order:
+                x = mb.mul(x=gamma, y=x)
+            else:
+                x = mb.mul(x=x, y=gamma)
+            if flip_add_input_order:
+                x = mb.add(x=beta, y=x)
+            else:
+                x = mb.add(x=x, y=beta)
+            return x
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::fuse_elementwise_to_batchnorm"
+        )
+        assert get_op_types_in_program(prev_prog) == ["transpose", "mul", "add"]
+        assert get_op_types_in_program(prog) == ["transpose", "batch_norm"]
+        assert_model_is_valid(
+            prog,
+            {"x": (1, 10, 10, C)},
+            expected_output_shapes={block.outputs[0].name: (1, C, 10, 10)},
+        )
diff --git a/coremltools/converters/mil/mil/passes/test_passes.py b/coremltools/converters/mil/mil/passes/test_passes.py
new file mode 100644
index 000000000..9d5290336
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/test_passes.py
@@ -0,0 +1,425 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.testing_utils import (
+    assert_op_count_match,
+    assert_model_is_valid,
+    assert_same_output_names,
+    get_op_types_in_program,
+    apply_pass_and_basic_check,
+)
+from coremltools.converters.mil.mil import Symbol, types
+from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY
+import copy
+import pytest
+
+import numpy as np
+
+np.random.seed(1984)
+validate_model = True
+
+
+# TODO: rdar://58993652 (Add recursive block test cases for graph pass tests)
+
+
+def test_const_elimination():
+    @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))])
+    def prog(x):
+        a = np.random.rand(2, 4).astype(np.float32)
+        double_a = mb.add(x=a, y=a)
+        return mb.add(x=x, y=double_a)
+
+    assert_op_count_match(prog, expect=2, op="const")
+    prev_prog = copy.deepcopy(prog)
+    PASS_REGISTRY["common::const_elimination"](prog)
+    assert_same_output_names(prev_prog, prog)
+    assert_op_count_match(prog, expect=3, op="const")
+
+    if validate_model:
+        assert_model_is_valid(prog, {"x": (2, 4)})
+
+
+def test_divide_to_multiply():
+    @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))])
+    def prog(x):
+        div_val = np.random.rand(2, 4).astype(np.float32)
+        div_const = mb.const(val=div_val, mode="immediate_value")
+
+        div_val_1 = np.random.rand(2, 4).astype(np.float32)
+        div_const_1 = mb.const(val=div_val_1, mode="immediate_value")
+
+        real_div = mb.real_div(x=x, y=div_const)
+
+        return mb.real_div(x=real_div, y=div_const_1)
+
+    assert_op_count_match(prog, expect=2, op="real_div")
+    assert_op_count_match(prog, expect=0, op="mul")
+    prev_prog = copy.deepcopy(prog)
+    PASS_REGISTRY["common::divide_to_multiply"](prog)
+    assert_same_output_names(prev_prog, prog)
+    assert_op_count_match(prog, expect=0, op="real_div")
+    assert_op_count_match(prog, expect=2, op="mul")
+
+    if validate_model:
+        assert_model_is_valid(prog, {"x": (2, 4)})
+
+
+def test_fuse_matmul_weight_bias():
+    @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))])
+    def prog(x):
+        weights_val = np.random.rand(2, 4).T.astype(np.float32)
+        weights = mb.const(val=weights_val, mode="immediate_value")
+        bias_val = np.random.rand(2).astype(np.float32)
+        bias = mb.const(val=bias_val, mode="immediate_value")
+
+        matmul = mb.matmul(x=x, y=weights)
+        return mb.add(x=matmul, y=bias)
+
+    assert_op_count_match(prog, expect=1, op="matmul")
+    assert_op_count_match(prog, expect=0, op="linear")
+    prev_prog = copy.deepcopy(prog)
+    PASS_REGISTRY["common::fuse_matmul_weight_bias"](prog)
+    assert_same_output_names(prev_prog, prog)
+    assert_op_count_match(prog, expect=0, op="matmul")
+    assert_op_count_match(prog, expect=1, op="linear")
+
+    if validate_model:
+        assert_model_is_valid(prog, {"x": (2, 4)})
+
+
+def test_dead_code_elimination():
+    @mb.program(
+        input_specs=[mb.TensorSpec(shape=(2, 4)), mb.TensorSpec(shape=(2, 4)),]
+    )
+    def program0(x, y):
+        # following three unused op should be eliminated
+        a = mb.const(val=np.zeros(shape=(1,)), mode="immediate_value")
+        b = mb.const(val=np.zeros(shape=(1,)), mode="immediate_value")
+        _ = mb.add(x=a, y=b)
+        return mb.add(x=x, y=y)
+
+    assert_op_count_match(program0, expect=4)
+    prev_prog = copy.deepcopy(program0)
+    PASS_REGISTRY["common::dead_code_elimination"](program0)
+    assert_same_output_names(prev_prog, program0)
+    assert_op_count_match(program0, expect=1)
+
+    if validate_model:
+        assert_model_is_valid(program0, {"x": (2, 4), "y": (2, 4)})
+
+    @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))])
+    def program1(x):
+        weights_val = np.random.rand(2, 4).T.astype(np.float32)
+        weights = mb.const(val=weights_val, mode="immediate_value")
+        bias_val = np.random.rand(4).astype(np.float32)
+        bias = mb.const(val=bias_val, mode="immediate_value")
+
+        # unused op and its inputs should be eliminated
+        mb.matmul(x=x, y=weights)
+
+        return mb.linear(x=x, weight=weights, bias=bias)
+
+    assert_op_count_match(program1, expect=6)
+    prev_prog = copy.deepcopy(program1)
+    PASS_REGISTRY["common::dead_code_elimination"](program1)
+    assert_same_output_names(prev_prog, program1)
+    assert_op_count_match(program1, expect=3)
+
+    if validate_model:
+        assert_model_is_valid(program1, {"x": (2, 4)})
+
+
+def test_remove_symbolic_reshape():
+    sym_b = Symbol("s0")
+    original_shape = (sym_b, Symbol("s1"), 2)
+    reshape_name = "reshape"
+
+    @mb.program(input_specs=[mb.TensorSpec(shape=(sym_b, 4))])
+    def prog(x):
+        # const cannot represent symbolic values. Use _const_symbolic
+        shape = mb._const_symbolic(val=original_shape)
+        return mb.reshape(x=x, shape=shape, name=reshape_name)
+
+    reshape_op = prog.find_ops(
+        prefix=reshape_name, op_type="reshape", exactly_one=True
+    )[0]
+    shape_var = reshape_op.shape
+    reshaped_var = reshape_op.outputs[0]
+    assert np.all(shape_var.sym_val == original_shape)
+    assert np.all(reshaped_var.shape == (sym_b, 2, 2))
+
+    # Note: we cannot deepcopy prog with symbol.
+    prev_outputs = [o.name for o in prog["main"].outputs]
+    PASS_REGISTRY["common::remove_symbolic_reshape"](prog)
+    curr_outputs = [o.name for o in prog["main"].outputs]
+    assert curr_outputs == prev_outputs
+
+    reshape_op = prog.find_ops(
+        prefix=reshape_name, op_type="reshape", exactly_one=True
+    )[0]
+    shape_var = reshape_op.shape
+    reshaped_var = reshape_op.outputs[0]
+    # shape param cannot be symbolic after the pass
+    assert np.all(shape_var.sym_val == (-1, 2, 2))
+    # output shape is still symbolic
+    assert np.all(reshaped_var.shape == (sym_b, 2, 2))
+
+    if validate_model:
+        assert_model_is_valid(prog, {"x": (3, 4)})
+
+
+def test_loop_invariant_elimination1():
+    """
+    Invariant pattern: Block input vars are returned as block output vars.
+    """
+
+    def body(a, b):
+        return mb.add(x=a, y=b), b
+
+    def cond(a, b):
+        a_mean = mb.reduce_mean(x=a, axes=[0, 1])
+        b_mean = mb.reduce_mean(x=b, axes=[0, 1])
+        return mb.less(x=a_mean, y=b_mean)
+
+    @mb.program(
+        input_specs=[mb.TensorSpec(shape=(1, 2)), mb.TensorSpec(shape=(1, 2)),]
+    )
+    def prog(a, b):
+        # b is loop invariant
+        return mb.while_loop(_cond=cond, _body=body, loop_vars=(a, b))
+
+    while_op = prog.find_ops(op_type="while_loop", exactly_one=True)[0]
+    assert len(while_op.blocks[0].inputs) == 2
+    assert len(while_op.outputs) == 2
+    assert len(while_op.loop_vars) == 2
+    assert while_op.blocks[0].inputs[0].name == "a.x"
+    assert while_op.blocks[0].inputs[1].name == "b.x"
+
+    prev_prog = copy.deepcopy(prog)
+    PASS_REGISTRY["common::loop_invariant_elimination"](prog)
+    assert_same_output_names(prev_prog, prog)
+
+    while_op = prog.find_ops(op_type="while_loop", exactly_one=True)[0]
+    assert len(while_op.blocks[0].inputs) == 1
+    assert len(while_op.outputs) == 1
+    assert len(while_op.loop_vars) == 1
+    assert while_op.blocks[0].inputs[0].name == "a.x"
+
+    if validate_model:
+        assert_model_is_valid(prog, {"a": (1, 2), "b": (1, 2)})
+
+
+def test_loop_invariant_elimination2():
+    """
+    Invariant pattern: Block outputs var from outside of the block
+    """
+
+    @mb.program(
+        input_specs=[mb.TensorSpec(shape=(1, 2)), mb.TensorSpec(shape=(1, 2)),]
+    )
+    def prog(a, b):
+        def body(a, bx):
+            return mb.add(x=a, y=b), b
+
+        def cond(a, bx):
+            a_mean = mb.reduce_mean(x=a, axes=[0, 1])
+            b_mean = mb.reduce_mean(x=bx, axes=[0, 1])
+            return mb.less(x=a_mean, y=b_mean)
+
+        # b is loop invariant
+        return mb.while_loop(_cond=cond, _body=body, loop_vars=(a, b))
+
+    while_op = prog.find_ops(op_type="while_loop", exactly_one=True)[0]
+    assert len(while_op.blocks[0].inputs) == 2
+    assert len(while_op.outputs) == 2
+    assert len(while_op.loop_vars) == 2
+    assert while_op.blocks[0].inputs[0].name == "a.x"
+    assert while_op.blocks[0].inputs[1].name == "b.x"
+
+    prev_prog = copy.deepcopy(prog)
+    PASS_REGISTRY["common::loop_invariant_elimination"](prog)
+    assert_same_output_names(prev_prog, prog)
+
+    while_op = prog.find_ops(op_type="while_loop", exactly_one=True)[0]
+    assert len(while_op.blocks[0].inputs) == 1
+    assert len(while_op.outputs) == 1
+    assert len(while_op.loop_vars) == 1
+    assert while_op.blocks[0].inputs[0].name == "a.x"
+
+    if validate_model:
+        assert_model_is_valid(prog, {"a": (1, 2), "b": (1, 2)})
+
+
+def test_gelu_tanh_approximation():
+    """
+    Detect gelu tanh approx pattern, found in the TF bert model.
+    y = ( tanh((.0447)x^3 + x ) * (sqrt(2/pi)) + 1 ) * 0.5 * x
+    """
+
+    @mb.program(input_specs=[mb.TensorSpec(shape=(3, 5, 6))])
+    def prog(x):
+        x1 = mb.pow(x=x, y=3)
+        x1 = mb.mul(x=0.044715, y=x1)
+        x1 = mb.add(x=x1, y=x)
+        x1 = mb.mul(x=x1, y=np.sqrt(2 / np.pi))
+        x1 = mb.tanh(x=x1)
+        x1 = mb.add(x=1, y=x1)
+        x1 = mb.mul(x=0.5, y=x1)
+        x1 = mb.mul(x=x, y=x1)
+        return x1
+
+    prev_prog, prev_block, block = apply_pass_and_basic_check(
+        prog, "common::fuse_gelu_tanh_approximation"
+    )
+    assert get_op_types_in_program(prev_prog) == [
+        "pow",
+        "mul",
+        "add",
+        "mul",
+        "tanh",
+        "add",
+        "mul",
+        "mul",
+    ]
+    assert get_op_types_in_program(prog) == ["gelu"]
+    assert_model_is_valid(
+        prog,
+        {"x": (3, 5, 6)},
+        expected_output_shapes={block.outputs[0].name: (3, 5, 6)},
+    )
+
+
+@pytest.mark.parametrize("axes_size", [1, 2, 3])
+def test_layernorm_fusion(axes_size):
+    """
+    Detect layer norm pattern, found in the TF bert model.
+    y = x * [gamma * rsqrt(variance + eps)] + (beta - mean * [gamma * rsqrt(variance + eps)])
+
+    where mean and variance are computed along axes [-1] or [-1,-2] and so on
+    and gamma and beta are constants with rank equal to the length of the axes parameter.
+    """
+    shape = (3, 5, 6)
+    rank = len(shape)
+    axes = list(range(rank - axes_size, rank))
+
+    @mb.program(input_specs=[mb.TensorSpec(shape=shape)])
+    def prog(x):
+        x1 = mb.reduce_mean(x=x, axes=axes, keep_dims=True)
+        x2 = mb.sub(x=x, y=x1)
+        x2 = mb.square(x=x2)
+        x2 = mb.reduce_mean(x=x2, axes=axes, keep_dims=True)
+        x2 = mb.add(x=x2, y=1e-5)
+        x2 = mb.rsqrt(x=x2)
+        x3 = mb.mul(x=np.random.rand(*shape[-len(axes) :]), y=x2)
+        x4 = mb.mul(x=x3, y=x1)
+        x5 = mb.mul(x=x, y=x3)
+        x4 = mb.sub(x=np.random.rand(*shape[-len(axes) :]), y=x4)
+        y = mb.add(x=x4, y=x5)
+        return y
+
+    prev_prog, prev_block, block = apply_pass_and_basic_check(
+        prog, "common::fuse_layernorm_or_instancenorm"
+    )
+    assert get_op_types_in_program(prev_prog) == [
+        "reduce_mean",
+        "sub",
+        "square",
+        "reduce_mean",
+        "add",
+        "rsqrt",
+        "mul",
+        "mul",
+        "mul",
+        "sub",
+        "add",
+    ]
+    assert get_op_types_in_program(prog) == ["layer_norm"]
+    assert_model_is_valid(
+        prog, {"x": shape}, expected_output_shapes={block.outputs[0].name: shape}
+    )
+
+
+def test_instancenorm_fusion():
+    """
+    Detect instance norm pattern
+    y = x * [gamma * rsqrt(variance + eps)] + (beta - mean * [gamma * rsqrt(variance + eps)])
+
+    where input is rank 4, (N,C,H,W), axis=[2, 3], along which reduction happens,
+    and gamma and beta are of shape (1,C,1,1)
+    """
+    shape = (3, 5, 6, 7)
+
+    @mb.program(input_specs=[mb.TensorSpec(shape=shape)])
+    def prog(x):
+        x1 = mb.reduce_mean(x=x, axes=[2, 3], keep_dims=True)
+        x2 = mb.sub(x=x, y=x1)
+        x2 = mb.square(x=x2)
+        x2 = mb.reduce_mean(x=x2, axes=[2, 3], keep_dims=True)
+        x2 = mb.add(x=x2, y=1e-5)
+        x2 = mb.rsqrt(x=x2)
+        x3 = mb.mul(x=np.random.rand(1, shape[1], 1, 1), y=x2)
+        x4 = mb.mul(x=x3, y=x1)
+        x5 = mb.mul(x=x, y=x3)
+        x4 = mb.sub(x=np.random.rand(1, shape[1], 1, 1), y=x4)
+        y = mb.add(x=x4, y=x5)
+        return y
+
+    prev_prog, prev_block, block = apply_pass_and_basic_check(
+        prog, "common::fuse_layernorm_or_instancenorm"
+    )
+    assert get_op_types_in_program(prev_prog) == [
+        "reduce_mean",
+        "sub",
+        "square",
+        "reduce_mean",
+        "add",
+        "rsqrt",
+        "mul",
+        "mul",
+        "mul",
+        "sub",
+        "add",
+    ]
+    assert get_op_types_in_program(prog) == ["instance_norm"]
+    assert_model_is_valid(
+        prog, {"x": shape}, expected_output_shapes={block.outputs[0].name: shape}
+    )
+
+
+@pytest.mark.parametrize("rank", [1, 2, 3, 4])
+def test_onehot_matmul_to_gather_fusion(rank):
+    """
+    Input:
+        %2 = one_hot(%1, on_value=1, off_value=0, axis=-1)
+        %3 = const() # rank 2
+        %4  = matmul(%2, %3)
+
+    Output:
+        %4 = gather(%3, %2, axis=0)
+    """
+    rank4_shape = (10, 3, 6, 7)
+    input_shape = rank4_shape[-rank:]
+    vocab_size = 15
+    embedding_size = 12
+
+    @mb.program(input_specs=[mb.TensorSpec(shape=input_shape, dtype=types.int32)])
+    def prog(x):
+        x = mb.one_hot(
+            indices=x, on_value=1, off_value=0, axis=-1, one_hot_vector_size=vocab_size
+        )
+        x = mb.matmul(x=x, y=np.random.rand(vocab_size, embedding_size))
+        return x
+
+    prev_prog, prev_block, block = apply_pass_and_basic_check(
+        prog, "common::fuse_onehot_matmul_to_gather"
+    )
+    assert get_op_types_in_program(prev_prog) == ["one_hot", "matmul"]
+    assert get_op_types_in_program(prog) == ["gather"]
+    assert_model_is_valid(
+        prog,
+        {"x": input_shape},
+        expected_output_shapes={block.outputs[0].name: input_shape + (embedding_size,)},
+    )
diff --git a/coremltools/converters/mil/mil/passes/test_reduce_transposes_pass.py b/coremltools/converters/mil/mil/passes/test_reduce_transposes_pass.py
new file mode 100644
index 000000000..8e689cb96
--- /dev/null
+++ b/coremltools/converters/mil/mil/passes/test_reduce_transposes_pass.py
@@ -0,0 +1,1407 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.testing_utils import (
+    assert_op_count_match,
+    assert_model_is_valid,
+    get_op_types_in_program,
+    apply_pass_and_basic_check,
+)
+import unittest
+import pytest
+
+import numpy as np
+
+np.random.seed(1984)
+
+
+class TransposeOptimizationPass(unittest.TestCase):
+    """"""
+
+    """
+    Input graph:
+    input -----> transpose(axis=[1,0]) -----> transpose(axis=[1,0]) ----> relu ---> out
+
+    Output graph:
+    input -----> relu -----> out
+    """
+
+    def test_simple_consecutive_ops_fusion(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[1, 0])
+            x = mb.transpose(x=x, perm=[1, 0])
+            x = mb.relu(x=x)
+            return x
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog), ["transpose", "transpose", "relu"]
+        )
+        self.assertEqual(get_op_types_in_program(prog), ["relu"])
+        assert_model_is_valid(
+            prog,
+            {"x": (10, 20)},
+            expected_output_shapes={block.outputs[0].name: (10, 20)},
+        )
+
+    """
+    Input graph:
+    input---->transpose(axis=[0,3,1,2])---->relu---->log--->transpose(axis=[0,2,3,1])--->relu--->out
+
+    Output graph:
+    input----->relu----->log----->relu--->out
+    """
+
+    def test_linear_graph_two_op_fusion(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 3, 4))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[0, 3, 1, 2])
+            x = mb.relu(x=x)
+            x = mb.log(x=x)
+            x = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x = mb.relu(x=x)
+            return x
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            ["transpose", "relu", "log", "transpose", "relu"],
+        )
+        self.assertEqual(get_op_types_in_program(prog), ["relu", "log", "relu"])
+        assert_model_is_valid(
+            prog,
+            {"x": (1, 2, 3, 4)},
+            expected_output_shapes={block.outputs[0].name: (1, 2, 3, 4)},
+        )
+
+    """
+    Input graph:
+    input(shape=1,2,3,4)---->transpose(axis=[0,3,1,2])---->relu---->log--->transpose(axis=[0,2,3,1])--->relu--->out1(shape=1,2,3,4)
+                                                                |
+                                                                v
+                                                        out2(shape=1,4,2,3)
+
+    Output graph:
+    input(shape=1,2,3,4)---->relu---->log--->relu--->out1(shape=1,2,3,4)
+                                  |
+                                  |----->transpose(axis=[0,3,1,2])----->out2(shape=1,4,2,3)
+    """
+
+    def test_fusion_with_output_edge_inbetween(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 3, 4))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[0, 3, 1, 2])
+            x1 = mb.relu(x=x)
+            x2 = mb.log(x=x1)
+            x3 = mb.transpose(x=x2, perm=[0, 2, 3, 1])
+            x4 = mb.relu(x=x3)
+            return x4, x1
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            ["transpose", "relu", "log", "transpose", "relu"],
+        )
+        self.assertEqual(
+            get_op_types_in_program(prog), ["relu", "log", "relu", "transpose"]
+        )
+        assert_model_is_valid(
+            prog,
+            {"x": (1, 2, 3, 4)},
+            expected_output_shapes={
+                block.outputs[0].name: (1, 2, 3, 4),
+                block.outputs[1].name: (1, 4, 2, 3),
+            },
+        )
+
+    """
+    Input graph:
+    input---->transpose(axis=[0,3,1,2])---->relu---->transpose(axis=[0,2,3,1])--->out
+
+    Output graph:
+    input----->relu----->out
+    """
+
+    def test_linear_graph_two_op_fusion_with_last_op_removal(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 3, 4))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[0, 3, 1, 2])
+            x = mb.relu(x=x)
+            x = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            return x
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog), ["transpose", "relu", "transpose"]
+        )
+        self.assertEqual(get_op_types_in_program(prog), ["relu"])
+        assert_model_is_valid(
+            prog,
+            {"x": (1, 2, 3, 4)},
+            expected_output_shapes={block.outputs[0].name: (1, 2, 3, 4)},
+        )
+
+    """
+    Input graph:
+    input(shape=10,2,3)--->transpose(axis=[0,2,1])----->relu---->transpose(axis=[0,2,1])---->out1
+                                                    |
+                                                    |
+                                                    --->relu----->log---->transpose(axis=[0,2,1])---->out2
+
+    Output graph:
+    input(shape=10,2,3)----->relu---->out1
+                        |
+                        |
+                        --->relu----->log---->out2
+    """
+
+    def test_multiple_fusions(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(10, 2, 3))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[0, 2, 1])
+            x1 = mb.relu(x=x)
+            x2 = mb.relu(x=x)
+            y1 = mb.transpose(x=x1, perm=[0, 2, 1])
+            x3 = mb.log(x=x2)
+            y2 = mb.transpose(x=x3, perm=[0, 2, 1])
+            return y1, y2
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            ["transpose", "relu", "relu", "transpose", "log", "transpose"],
+        )
+        self.assertEqual(get_op_types_in_program(prog), ["relu", "relu", "log"])
+
+        assert (
+            prev_block.inputs["x"]
+            == prev_block.find_ops(op_type="transpose")[0].inputs["x"]
+        )
+        assert block.find_ops(op_type="log")[0].outputs[0] in block.outputs
+        assert_model_is_valid(
+            prog,
+            {"x": (10, 2, 3)},
+            expected_output_shapes={
+                block.outputs[0].name: (10, 2, 3),
+                block.outputs[1].name: (10, 2, 3),
+            },
+        )
+
+    """
+    Input graph:
+    input(shape=10,2,3,5)--->transpose(axis=[0,2,3,1])----->relu---->pool----->out1
+                                                       |
+                                                       |
+                                                       --->relu----->log---->transpose(axis=[0,3,1,2])---->out2
+
+
+    Output graph:
+    input(shape=10,2,3,5)----->relu---->transpose(axis=[0,2,3,1])---->pool----->out1
+                           |
+                           |
+                           --->relu----->log---->out2
+    """
+
+    def test_partial_fusion_0(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(10, 2, 3, 5))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x1 = mb.relu(x=x)
+            x2 = mb.relu(x=x)
+            y1 = mb.avg_pool(
+                x=x1, kernel_sizes=[1, 1], strides=[1, 1], pad_type="valid"
+            )
+            x3 = mb.log(x=x2)
+            y2 = mb.transpose(x=x3, perm=[0, 3, 1, 2])
+            return y1, y2
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            ["transpose", "relu", "relu", "avg_pool", "log", "transpose"],
+        )
+        self.assertEqual(
+            get_op_types_in_program(prog),
+            ["relu", "relu", "transpose", "avg_pool", "log"],
+        )
+
+        assert (
+            prev_block.inputs["x"]
+            == prev_block.find_ops(op_type="transpose")[0].inputs["x"]
+        )
+        assert block.find_ops(op_type="log")[0].outputs[0] == block.outputs[1]
+        assert (
+            block.find_ops(op_type="transpose")[0].outputs[0]
+            == block.find_ops(op_type="avg_pool")[0].inputs["x"]
+        )
+        assert list(block.find_ops(op_type="transpose")[0].perm.val) == [0, 2, 3, 1]
+        assert_model_is_valid(
+            prog,
+            {"x": (10, 2, 3, 5)},
+            expected_output_shapes={
+                block.outputs[0].name: (10, 3, 5, 2),
+                block.outputs[1].name: (10, 2, 3, 5),
+            },
+        )
+
+    """
+    Input graph:
+    input(shape=10,2,3,5)--->transpose(axis=[0,2,1,3])----->relu---->transpose(axis=[0,2,1,3])---->out1
+                                                        |
+                                                        |
+                                                        --->pool--->log---->transpose(axis=[0,2,1,3])---->out2
+
+    Output graph:
+    input(shape=10,2,3,5)----->relu---->out1
+                           |
+                           |
+                           --->transpose(axis=[0,2,1,3])---->pool----->log---->transpose(axis=[0,2,1,3])---->out2
+    """
+
+    def test_partial_fusion_1(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(10, 2, 3, 5))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[0, 2, 1, 3])
+            x1 = mb.relu(x=x)
+            x2 = mb.avg_pool(x=x, kernel_sizes=[1, 1], strides=[1, 1], pad_type="valid")
+            y1 = mb.transpose(x=x1, perm=[0, 2, 1, 3])
+            x3 = mb.log(x=x2)
+            y2 = mb.transpose(x=x3, perm=[0, 2, 1, 3])
+            return y1, y2
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            ["transpose", "relu", "avg_pool", "transpose", "log", "transpose"],
+        )
+        self.assertEqual(
+            get_op_types_in_program(prog),
+            ["relu", "transpose", "avg_pool", "log", "transpose"],
+        )
+
+        assert block.inputs["x"] == block.find_ops(op_type="relu")[0].inputs["x"]
+        assert block.outputs[0] == block.find_ops(op_type="relu")[0].outputs[0]
+        assert_model_is_valid(
+            prog,
+            {"x": (10, 2, 3, 5)},
+            expected_output_shapes={
+                block.outputs[0].name: (10, 2, 3, 5),
+                block.outputs[1].name: (10, 2, 3, 5),
+            },
+        )
+
+    """
+    Input graph:
+
+                                                             |-------> transpose(axis=[0,2,1,3]) ---->out1(shape=10,2,3,5)
+                                                             |
+    input(shape=10,2,3,5)-->relu-->transpose(axis=[0,2,1,3])--->relu--->transpose(axis=[0,2,1,3]) ---->out2(shape=10,2,3,5)
+                                                                     |
+                                                                     |----->pool--------------->out3(shape=10,3,2,5)
+                                                                     |
+                                                                     |----->pool--------------->out4(shape=10.3.2.5)
+
+
+    Output graph:
+
+                        |---->out1(shape=10,2,3,5)
+                        |
+    input---->relu---------->relu------->out2(shape=10,2,3,5)
+                                    |
+                                    |----->transpose(axis=[0,2,1,3])--->pool---->out3(shape=10,3,2,5)
+                                    |
+                                    |----->transpose(axis=[0,2,1,3])---->pool--->out4(shape=10.3.2.5)
+    """
+
+    def test_partial_fusion_2(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(10, 2, 3, 5))])
+        def prog(x):
+            x = mb.relu(x=x)
+            x = mb.transpose(x=x, perm=[0, 2, 1, 3])
+            y1 = mb.transpose(x=x, perm=[0, 2, 1, 3])
+            x1 = mb.relu(x=x)
+            y2 = mb.transpose(x=x1, perm=[0, 2, 1, 3])
+            y3 = mb.avg_pool(
+                x=x1, kernel_sizes=[1, 1], strides=[1, 1], pad_type="valid"
+            )
+            y4 = mb.avg_pool(
+                x=x1, kernel_sizes=[1, 1], strides=[1, 1], pad_type="valid"
+            )
+            return y1, y2, y3, y4
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            [
+                "relu",
+                "transpose",
+                "transpose",
+                "relu",
+                "transpose",
+                "avg_pool",
+                "avg_pool",
+            ],
+        )
+        self.assertEqual(
+            get_op_types_in_program(prog),
+            ["relu", "relu", "transpose", "avg_pool", "transpose", "avg_pool"],
+        )
+
+        assert block.outputs[0] == block.find_ops(op_type="relu")[0].outputs[0]
+        assert block.outputs[1] == block.find_ops(op_type="relu")[1].outputs[0]
+        assert_model_is_valid(
+            prog,
+            {"x": (10, 2, 3, 5)},
+            expected_output_shapes={
+                block.outputs[0].name: (10, 2, 3, 5),
+                block.outputs[1].name: (10, 2, 3, 5),
+                block.outputs[2].name: (10, 3, 2, 5),
+                block.outputs[3].name: (10, 3, 2, 5),
+            },
+        )
+
+    """
+    Input graph:
+
+    input(shape=10,2,3,5)-->relu--->transpose(axis=[0,2,1,3])----->transpose(axis=[0,2,1,3])---->out1(shape=10,2,3,5)
+                                                               |
+                                                               ---->relu------>out2(shape=10,3,2,5)
+
+    Output graph:
+
+    input(shape=10,2,3,5)-->relu---->out1(shape=10,2,3,5)
+                                  |
+                                   ---->relu--->transpose(axis=[0,2,1,3])------>out2(shape=10,3,2,5)
+    """
+
+    def test_partial_fusion_3(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(10, 2, 3, 5))])
+        def prog(x):
+            x = mb.relu(x=x)
+            x = mb.transpose(x=x, perm=[0, 2, 1, 3])
+            x1 = mb.transpose(x=x, perm=[0, 2, 1, 3])
+            x2 = mb.relu(x=x)
+            return x1, x2
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            ["relu", "transpose", "transpose", "relu"],
+        )
+        self.assertEqual(get_op_types_in_program(prog), ["relu", "relu", "transpose"])
+
+        assert block.outputs[0] == block.find_ops(op_type="relu")[0].outputs[0]
+        assert_model_is_valid(
+            prog,
+            {"x": (10, 2, 3, 5)},
+            expected_output_shapes={
+                block.outputs[0].name: (10, 2, 3, 5),
+                block.outputs[1].name: (10, 3, 2, 5),
+            },
+        )
+
+    """
+    Input graph:
+
+    input(shape=10,2,3,5)-->relu--->transpose(axis=[0,2,1,3])----->transpose(axis=[0,2,1,3])---->out1(shape=10,2,3,5)
+                                                               |
+                                                               ------>out2(shape=10,3,2,5)
+
+    Output graph:
+    same as input graph as one of the optimizing transpose is connected to model output
+    """
+
+    def test_partial_fusion_4(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(10, 2, 3, 5))])
+        def prog(x):
+            x = mb.relu(x=x)
+            out2 = mb.transpose(x=x, perm=[0, 2, 1, 3])
+            out1 = mb.transpose(x=out2, perm=[0, 2, 1, 3])
+            return out1, out2
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog), ["relu", "transpose", "transpose"]
+        )
+        self.assertEqual(
+            get_op_types_in_program(prog), ["relu", "transpose", "transpose"]
+        )
+
+        assert block.outputs[1] == block.find_ops(op_type="transpose")[0].outputs[0]
+        assert_model_is_valid(
+            prog,
+            {"x": (10, 2, 3, 5)},
+            expected_output_shapes={
+                block.outputs[0].name: (10, 2, 3, 5),
+                block.outputs[1].name: (10, 3, 2, 5),
+            },
+        )
+
+    """
+    Input graph:
+    input(shape=10,2,3,5)-->relu-->transpose(axis=[0,2,1,3])--->relu--->transpose(axis=[0,2,1,3]) ---->out1(shape=10,2,3,5)
+                                                                     |
+                                                                     |--->relu-->pool--------------->out2(shape=10,3,2,5)
+                                                                     |
+                                                                     |----->pool--------------->out3(shape=10.3.2.5)
+
+
+    Output graph:
+    same as the input graph as materialization ops are greater than cancel ops
+    """
+
+    def test_no_fusion_more_materialization_ops(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(10, 2, 3, 5))])
+        def prog(x):
+            x = mb.relu(x=x)
+            x = mb.transpose(x=x, perm=[0, 2, 1, 3])
+            x1 = mb.relu(x=x)
+            y2 = mb.transpose(x=x1, perm=[0, 2, 1, 3])
+            x2 = mb.relu(x=x1)
+            y3 = mb.avg_pool(
+                x=x2, kernel_sizes=[1, 1], strides=[1, 1], pad_type="valid"
+            )
+            y4 = mb.avg_pool(
+                x=x1, kernel_sizes=[1, 1], strides=[1, 1], pad_type="valid"
+            )
+            return y2, y3, y4
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            ["relu", "transpose", "relu", "transpose", "relu", "avg_pool", "avg_pool"],
+        )
+        self.assertEqual(
+            get_op_types_in_program(prog),
+            ["relu", "transpose", "relu", "transpose", "relu", "avg_pool", "avg_pool"],
+        )
+
+        assert_model_is_valid(
+            prog,
+            {"x": (10, 2, 3, 5)},
+            expected_output_shapes={
+                block.outputs[0].name: (10, 2, 3, 5),
+                block.outputs[1].name: (10, 3, 2, 5),
+                block.outputs[2].name: (10, 3, 2, 5),
+            },
+        )
+
+    """
+    Input graph:
+    input(shape=10,2,3)--->transpose(axis=[0,2,1])----->relu---->transpose(axis=[0,2,1])---->out1
+                                                    |
+                                                    |
+                                                    --->reduce(axis=2)----->log---->transpose(axis=[0,2,1])---->out2
+
+    Output graph:
+    input(shape=10,2,3)----->relu---->out1
+                        |
+                        |
+                        --->reduce(axis=1)----->log---->out2
+    """
+
+    def test_fusion_with_axis_op(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(10, 2, 3))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[0, 2, 1])
+            x1 = mb.relu(x=x)
+            x2 = mb.reduce_mean(x=x, axes=[2], keep_dims=True)
+            y1 = mb.transpose(x=x1, perm=[0, 2, 1])
+            x3 = mb.log(x=x2)
+            y2 = mb.transpose(x=x3, perm=[0, 2, 1])
+            return y1, y2
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            ["transpose", "relu", "reduce_mean", "transpose", "log", "transpose"],
+        )
+        self.assertEqual(get_op_types_in_program(prog), ["relu", "reduce_mean", "log"])
+
+        assert list(block.find_ops(op_type="reduce_mean")[0].inputs["axes"].val) == [1]
+        assert_model_is_valid(
+            prog,
+            {"x": (10, 2, 3)},
+            expected_output_shapes={
+                block.outputs[0].name: (10, 2, 3),
+                block.outputs[1].name: (10, 1, 3),
+            },
+        )
+
+    """
+    Input graph:
+    input(shape=11,2,3,6)--->transpose(axis=[0,3,1,2])---
+                                                       |
+                                                       |
+                                                        --->pad(pad=[0,0,0,0,1,2,3,4])
+                                                              |
+                                                              |-->log--->transpose(axis=[0,2,3,1])-->out1(shape=11,5,10,6)
+
+    Output graph:
+    same as input graph, as transpose cannot be pushed through the pad op since "reflect" mode is only supported
+    along the last two axis
+    """
+
+    def test_fusion_with_pad_reflective_op_0(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(11, 2, 3, 6))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[0, 3, 1, 2])
+            x2 = mb.pad(x=x, pad=[0, 0, 0, 0, 1, 2, 3, 4], mode="reflect")
+            x3 = mb.log(x=x2)
+            y2 = mb.transpose(x=x3, perm=[0, 2, 3, 1])
+            return y2
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog), ["transpose", "pad", "log", "transpose"]
+        )
+        self.assertEqual(
+            get_op_types_in_program(prog), ["transpose", "pad", "log", "transpose"]
+        )
+
+        assert list(block.find_ops(op_type="pad")[0].inputs["pad"].val.flatten()) == [
+            0,
+            0,
+            0,
+            0,
+            1,
+            2,
+            3,
+            4,
+        ]
+        assert_model_is_valid(
+            prog,
+            {"x": (11, 2, 3, 6)},
+            expected_output_shapes={block.outputs[0].name: (11, 5, 10, 6)},
+        )
+
+    """
+    Input graph:
+    input(shape=11,2,3,6)--->transpose(axis=[0,1,3,2])---
+                                                       |
+                                                       |
+                                                        --->pad(pad=[0,0,0,0,1,2,3,4])
+                                                              |
+                                                              |-->log--->transpose(axis=[0,1,3,2])-->out1(shape=11,2,10,9)
+
+    Output graph:
+    input(shape=11,2,3,6)--->pad(pad=[0,0,0,0,3,4,1,2])-->log-->out1(shape=11,2,10,9)
+    """
+
+    def test_fusion_with_pad_reflective_op_1(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(11, 2, 3, 6))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[0, 1, 3, 2])
+            x2 = mb.pad(x=x, pad=[0, 0, 0, 0, 1, 2, 3, 4], mode="reflect")
+            x3 = mb.log(x=x2)
+            y2 = mb.transpose(x=x3, perm=[0, 1, 3, 2])
+            return y2
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog), ["transpose", "pad", "log", "transpose"]
+        )
+        self.assertEqual(get_op_types_in_program(prog), ["pad", "log"])
+
+        assert list(block.find_ops(op_type="pad")[0].inputs["pad"].val.flatten()) == [
+            0,
+            0,
+            0,
+            0,
+            3,
+            4,
+            1,
+            2,
+        ]
+        assert_model_is_valid(
+            prog,
+            {"x": (11, 2, 3, 6)},
+            expected_output_shapes={block.outputs[0].name: (11, 2, 10, 9)},
+        )
+
+    """
+    Input graph:
+    input(shape=11,2,3,6)--->transpose(axis=[0,3,1,2])---
+                                                       |
+                                                       |
+                                                        --->pad(pad=[0,0,0,0,1,2,3,4])
+                                                              |
+                                                              |-->log--->transpose(axis=[0,2,3,1])-->out1(shape=11,5,10,6)
+
+    Output graph:
+    input(shape=11,2,3,6)--->pad(pad=[0,0,1,2,3,4,0,0])-->log-->out1(shape=11,5,10,6)
+    """
+
+    def test_fusion_with_pad_constant_op(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(11, 2, 3, 6))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[0, 3, 1, 2])
+            x2 = mb.pad(
+                x=x, pad=[0, 0, 0, 0, 1, 2, 3, 4], mode="constant", constant_val=3.0
+            )
+            x3 = mb.log(x=x2)
+            y2 = mb.transpose(x=x3, perm=[0, 2, 3, 1])
+            return y2
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog), ["transpose", "pad", "log", "transpose"]
+        )
+        self.assertEqual(get_op_types_in_program(prog), ["pad", "log"])
+
+        assert list(block.find_ops(op_type="pad")[0].inputs["pad"].val.flatten()) == [
+            0,
+            0,
+            1,
+            2,
+            3,
+            4,
+            0,
+            0,
+        ]
+        assert_model_is_valid(
+            prog,
+            {"x": (11, 2, 3, 6)},
+            expected_output_shapes={block.outputs[0].name: (11, 5, 10, 6)},
+        )
+
+    """
+    Input graph:
+                                                    const(shape=2)
+                                                          |
+                                                          V
+    input(shape=1,2,5,5)--->transpose(axis=[0,2,3,1])--->add---->transpose(axis=[0,3,1,2])--->out(shape=1,2,5,5)
+
+    Output graph:
+                        const(shape=1,2,1,1)
+                             |
+                             V
+    input(shape=1,2,5,5)--->add--->out(shape=1,2,5,5)
+    """
+
+    def test_fusion_with_add_constant_op(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 5, 5))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x = mb.add(x=x, y=np.array([10, 100]))
+            x = mb.transpose(x=x, perm=[0, 3, 1, 2])
+            return x
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog), ["transpose", "add", "transpose"]
+        )
+        self.assertEqual(get_op_types_in_program(prog), ["add"])
+
+        assert_model_is_valid(
+            prog,
+            {"x": (1, 2, 5, 5)},
+            expected_output_shapes={block.outputs[0].name: (1, 2, 5, 5)},
+        )
+
+    """
+    Input graph:
+                                                    const(scalar)
+                                                          |
+                                                          V
+    input(shape=1,2,5,5)--->transpose(axis=[0,2,3,1])--->add---->transpose(axis=[0,3,1,2])--->out(shape=1,2,5,5)
+
+    Output graph:
+                        const(scalar)
+                             |
+                             V
+    input(shape=1,2,5,5)--->add--->out(shape=1,2,5,5)
+    """
+
+    def test_fusion_with_add_scalar_constant_op(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 5, 5))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x = mb.add(x=5, y=x)
+            x = mb.transpose(x=x, perm=[0, 3, 1, 2])
+            return x
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog), ["transpose", "add", "transpose"]
+        )
+        self.assertEqual(get_op_types_in_program(prog), ["add"])
+
+        assert_model_is_valid(
+            prog,
+            {"x": (1, 2, 5, 5)},
+            expected_output_shapes={block.outputs[0].name: (1, 2, 5, 5)},
+        )
+
+    """
+    Input graph:
+    input(shape=1,2,5,5)----->transpose(axis=[0,2,3,1])--->add---->transpose(axis=[0,3,1,2])--->out(shape=1,2,5,5)
+                          |                                 ^
+                          |                                 |
+                          |---->relu---->transpose(axis=[0,2,3,1])
+
+    Output graph:
+    input(shape=1,2,5,5)----->add--->out(shape=1,2,5,5)
+                      |        ^
+                      |        |
+                      |------>relu
+    """
+
+    def test_fusion_with_add_broadcastable_0(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 5, 5))])
+        def prog(x):
+            x1 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x2 = mb.relu(x=x)
+            x2 = mb.transpose(x=x2, perm=[0, 2, 3, 1])
+            x3 = mb.add(x=x1, y=x2)
+            y = mb.transpose(x=x3, perm=[0, 3, 1, 2])
+            return y
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            ["transpose", "relu", "transpose", "add", "transpose"],
+        )
+        self.assertEqual(get_op_types_in_program(prog), ["relu", "add"])
+
+        assert block.find_ops(op_type="relu")[0].inputs["x"] == block.inputs["x"]
+        assert block.find_ops(op_type="add")[0].inputs["x"] == block.inputs["x"]
+        assert (
+            block.find_ops(op_type="add")[0].inputs["y"]
+            == block.find_ops(op_type="relu")[0].outputs[0]
+        )
+
+        assert_model_is_valid(
+            prog,
+            {"x": (1, 2, 5, 5)},
+            expected_output_shapes={block.outputs[0].name: (1, 2, 5, 5)},
+        )
+
+    """
+    Input graph:
+    input(shape=1,2,5,5)----->transpose(axis=[0,2,3,1])--->add---->transpose(axis=[0,3,1,2])--->out(shape=1,2,5,5)
+                          |                                 ^
+                          |                                 |
+                          |----------------------->transpose(axis=[0,2,3,1])
+
+    Output graph:
+    input(shape=1,2,5,5)----->add--->out(shape=1,2,5,5)
+                      |        ^
+                      |        |
+                      |---------
+    """
+
+    def test_fusion_with_add_broadcastable_1(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 5, 5))])
+        def prog(x):
+            x1 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x2 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x3 = mb.add(x=x1, y=x2)
+            y = mb.transpose(x=x3, perm=[0, 3, 1, 2])
+            return y
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            ["transpose", "transpose", "add", "transpose"],
+        )
+        self.assertEqual(get_op_types_in_program(prog), ["add"])
+
+        assert block.find_ops(op_type="add")[0].inputs["x"] == block.inputs["x"]
+        assert block.find_ops(op_type="add")[0].inputs["y"] == block.inputs["x"]
+
+        assert_model_is_valid(
+            prog,
+            {"x": (1, 2, 5, 5)},
+            expected_output_shapes={block.outputs[0].name: (1, 2, 5, 5)},
+        )
+
+    """
+    Input graph:
+    input(shape=1,2,5,5)--->transpose(axis=[0,2,3,1])---> relu---->concat(axis=3)----->transpose(axis=[0,3,1,2])----->out1(shape=1,4,5,5)
+                         |                                              ^
+                         |                                              |
+                         |->transpose(axis=[0,2,3,1])--->relu------------
+
+    Output graph:
+    input(shape=1,2,5,5)------> relu---->concat(axis=1)--->out1(shape=1,4,5,5)
+                         |                    ^
+                         |                    |
+                         |---->relu------------
+    """
+
+    def test_concat_pattern_0(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 5, 5))])
+        def prog(x):
+            x1 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x2 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x1 = mb.relu(x=x1)
+            x2 = mb.relu(x=x2)
+            x3 = mb.concat(values=[x1, x2], axis=3)
+            x4 = mb.transpose(x=x3, perm=[0, 3, 1, 2])
+            return x4
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            ["transpose", "transpose", "relu", "relu", "concat", "transpose"],
+        )
+        self.assertEqual(get_op_types_in_program(prog), ["relu", "relu", "concat"])
+
+        assert_model_is_valid(
+            prog,
+            {"x": (1, 2, 5, 5)},
+            expected_output_shapes={block.outputs[0].name: (1, 4, 5, 5)},
+        )
+
+    """
+    Input graph:
+    input(shape=1,2,5,5)--->transpose(axis=[0,2,3,1])---> relu---->concat(axis=3)----->transpose(axis=[0,3,1,2])----->out1(shape=1,4,5,5)
+                         |                                              ^
+                         |                                              |
+                         |->transpose(axis=[0,2,3,1])------->relu--------
+                                                        |
+                                                        V
+                                                       pool--->out2(shape=1,5,5,2)
+
+
+
+    Output graph:
+    input(shape=1,2,5,5)------> relu---->concat(axis=1)--->out1(shape=1,4,5,5)
+                         |                    ^
+                         |                    |
+                         |---->relu------------
+                         |
+                         |--->transpose(axis=[0,2,3,1])---->pool--->out2(shape=1,5,5,2)
+    """
+
+    def test_concat_pattern_1(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 5, 5))])
+        def prog(x):
+            x1 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x2 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x1 = mb.relu(x=x1)
+            x2 = mb.relu(x=x2)
+            x3 = mb.concat(values=[x1, x2], axis=3)
+            x4 = mb.transpose(x=x3, perm=[0, 3, 1, 2])
+            x5 = mb.avg_pool(
+                x=x2, kernel_sizes=[1, 1], strides=[1, 1], pad_type="valid"
+            )
+            return x4, x5
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            [
+                "transpose",
+                "transpose",
+                "relu",
+                "relu",
+                "concat",
+                "transpose",
+                "avg_pool",
+            ],
+        )
+        self.assertEqual(
+            get_op_types_in_program(prog),
+            ["relu", "relu", "concat", "transpose", "avg_pool"],
+        )
+        assert_model_is_valid(
+            prog,
+            {"x": (1, 2, 5, 5)},
+            expected_output_shapes={
+                block.outputs[0].name: (1, 4, 5, 5),
+                block.outputs[1].name: (1, 5, 5, 2),
+            },
+        )
+
+    """
+    Input graph:
+    input(shape=1,2,5,5)--->transpose(axis=[0,2,3,1])---> relu---->concat(axis=3)----->transpose(axis=[0,3,1,2])----->out1(shape=1,4,5,5)
+                         |                                              ^
+                         |                                              |
+                         |->transpose(axis=[0,2,3,1])------->relu--------
+                                                        |
+                                                        V
+                                                       relu--->out2(shape=1,5,5,2)
+
+
+
+    Output graph:
+    input(shape=1,2,5,5)------> relu---->concat(axis=1)--->out1(shape=1,4,5,5)
+                         |                    ^
+                         |                    |
+                         |---->relu------------
+                         |
+                         |--->relu---->transpose(axis=[0,2,3,1])---->out2(shape=1,5,5,2)
+    """
+
+    def test_concat_pattern_2(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 5, 5))])
+        def prog(x):
+            x1 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x2 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x1 = mb.relu(x=x1)
+            x2 = mb.relu(x=x2)
+            x3 = mb.concat(values=[x1, x2], axis=3)
+            x4 = mb.transpose(x=x3, perm=[0, 3, 1, 2])
+            x5 = mb.relu(x=x2)
+            return x4, x5
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            ["transpose", "transpose", "relu", "relu", "concat", "transpose", "relu"],
+        )
+        self.assertEqual(
+            get_op_types_in_program(prog),
+            ["relu", "relu", "concat", "relu", "transpose"],
+        )
+
+        assert_model_is_valid(
+            prog,
+            {"x": (1, 2, 5, 5)},
+            expected_output_shapes={
+                block.outputs[0].name: (1, 4, 5, 5),
+                block.outputs[1].name: (1, 5, 5, 2),
+            },
+        )
+
+    """
+    Input graph:
+    input(shape=1,2,5,5)--->transpose(axis=[0,2,3,1])---> relu---->concat(axis=3)----->transpose(axis=[0,3,1,2])----->out1(shape=1,4,5,5)
+                         |                                              ^
+                         |                                              |
+                         |->transpose(axis=[0,2,3,1])------->relu--------
+                                                        |
+                                                        V
+                                                       out2(shape=1,5,5,2)
+
+
+
+    Output graph:
+    input(shape=1,2,5,5)------> relu---->concat(axis=1)--->out1(shape=1,4,5,5)
+                         |                    ^
+                         |                    |
+                         |---->relu------------
+                         |
+                         |--->transpose(axis=[0,2,3,1])---->out2(shape=1,5,5,2)
+    """
+
+    def test_concat_pattern_3(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 5, 5))])
+        def prog(x):
+            x1 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x2 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x1 = mb.relu(x=x1)
+            x2 = mb.relu(x=x2)
+            x3 = mb.concat(values=[x1, x2], axis=3)
+            x4 = mb.transpose(x=x3, perm=[0, 3, 1, 2])
+            return x4, x2
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            ["transpose", "transpose", "relu", "relu", "concat", "transpose"],
+        )
+        self.assertEqual(
+            get_op_types_in_program(prog), ["relu", "relu", "concat", "transpose"]
+        )
+
+        assert_model_is_valid(
+            prog,
+            {"x": (1, 2, 5, 5)},
+            expected_output_shapes={
+                block.outputs[0].name: (1, 4, 5, 5),
+                block.outputs[1].name: (1, 5, 5, 2),
+            },
+        )
+
+    """
+    Input graph:
+    input(shape=1,2,5,5)--->transpose(axis=[0,2,3,1])---> relu---->concat(axis=3)----->transpose(axis=[0,3,1,2])----->out1(shape=1,4,5,5)
+                         |                                              ^
+                         |                                              |
+                         |->transpose(axis=[0,2,3,1])------->relu--------
+                                                        |
+                                                        V
+                                            transpose(axis=[0,3,1,2]) -----> out2(shape=1,2,5,5)
+
+    Output graph:
+    input(shape=1,2,5,5)---> relu---->concat(axis=1)----->out1(shape=1,4,5,5)
+                     |                     ^
+                     |                     |
+                     |------------------->relu-------->out2(shape=1,2,5,5)
+    """
+
+    def test_concat_pattern_4(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 5, 5))])
+        def prog(x):
+            x1 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x2 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x1 = mb.relu(x=x1)
+            x2 = mb.relu(x=x2)
+            x3 = mb.concat(values=[x1, x2], axis=3)
+            x4 = mb.transpose(x=x3, perm=[0, 3, 1, 2])
+            x5 = mb.transpose(x=x2, perm=[0, 3, 1, 2])
+            return x4, x5
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            [
+                "transpose",
+                "transpose",
+                "relu",
+                "relu",
+                "concat",
+                "transpose",
+                "transpose",
+            ],
+        )
+        self.assertEqual(get_op_types_in_program(prog), ["relu", "relu", "concat"])
+
+        assert_model_is_valid(
+            prog,
+            {"x": (1, 2, 5, 5)},
+            expected_output_shapes={
+                block.outputs[0].name: (1, 4, 5, 5),
+                block.outputs[1].name: (1, 2, 5, 5),
+            },
+        )
+
+    """
+    Input graph:
+                                                     constant(shape=[30,10,5])
+                                                            |
+                                                            V
+    input(shape=10,20,30)--->transpose(axis=[2,0,1])--->concat(axis=2)----->transpose(axis=[1,2,0])----->out1(shape=10,25,30)
+
+    Output graph:
+                        constant(shape=[10,5,30])
+                                |
+                                V
+    input(shape=10,20,30)--->concat(axis=1)----->out1(shape=10,25,30)
+    """
+
+    def test_concat_pattern_5(self):
+        const = np.random.rand(30, 10, 5)
+
+        @mb.program(input_specs=[mb.TensorSpec(shape=(10, 20, 30))])
+        def prog(x):
+            x1 = mb.transpose(x=x, perm=[2, 0, 1])
+            c = mb.const(val=const)
+            x2 = mb.concat(values=[x1, c], axis=2)
+            x3 = mb.transpose(x=x2, perm=[1, 2, 0])
+            return x3
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog), ["transpose", "concat", "transpose"]
+        )
+        self.assertEqual(get_op_types_in_program(prog), ["concat"])
+
+        assert_model_is_valid(
+            prog,
+            {"x": (10, 20, 30)},
+            expected_output_shapes={block.outputs[0].name: (10, 25, 30)},
+        )
+
+    """
+    Input graph:
+                                        input2(shape=30,10,20)-----|
+                                                                   |
+    input(shape=10,20,30)--->transpose(axis=[2,0,1])----->relu-----|----->concat(axis=2)------>out1(shape=90,10,20)
+                                                      |            |
+                                                      |-->relu-----|
+                                                      |
+                                                      |-->relu---->transpose(axis=[1,2,0])---->out2(shape=10,20,30)
+                                                      |
+                                                      |-->relu---->transpose(axis=[1,2,0])---->out3(shape=10,20,30)
+                                                      |
+                                                      |-->relu---->transpose(axis=[1,2,0])---->out4(shape=10,20,30)
+
+    Output graph:
+
+                                        input2(shape=30,10,20)-----|
+                                                                   |
+    input(shape=10,20,30)----->relu--->transpose(axis=[2,0,1])-----|----->concat(axis=2)------>out1(shape=90,10,20)
+                           |                                       |
+                           |-->relu--->transpose(axis=[2,0,1])-----|
+                           |
+                           |-->relu---->out2(shape=10,20,30)
+                           |
+                           |-->relu---->out3(shape=10,20,30)
+                           |
+                           |-->relu---->out4(shape=10,20,30)
+
+    Output graph:
+    """
+
+    def test_concat_pattern_6(self):
+        @mb.program(
+            input_specs=[
+                mb.TensorSpec(shape=(10, 20, 30)),
+                mb.TensorSpec(shape=(30, 10, 20)),
+            ]
+        )
+        def prog(x, y):
+            x1 = mb.transpose(x=x, perm=[2, 0, 1])
+            r1 = mb.relu(x=x1)
+            r2 = mb.relu(x=x1)
+            r3 = mb.relu(x=x1)
+            r4 = mb.relu(x=x1)
+            r5 = mb.relu(x=x1)
+
+            x2 = mb.concat(values=[r1, r2, y], axis=0)
+            x3 = mb.transpose(x=r3, perm=[1, 2, 0])
+            x4 = mb.transpose(x=r4, perm=[1, 2, 0])
+            x5 = mb.transpose(x=r5, perm=[1, 2, 0])
+            return x2, x3, x4, x5
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            [
+                "transpose",
+                "relu",
+                "relu",
+                "relu",
+                "relu",
+                "relu",
+                "concat",
+                "transpose",
+                "transpose",
+                "transpose",
+            ],
+        )
+        self.assertEqual(
+            get_op_types_in_program(prog),
+            [
+                "relu",
+                "relu",
+                "relu",
+                "relu",
+                "relu",
+                "transpose",
+                "transpose",
+                "concat",
+            ],
+        )
+
+        assert_model_is_valid(
+            prog,
+            {"x": (10, 20, 30), "y": (30, 10, 20)},
+            expected_output_shapes={
+                block.outputs[0].name: (90, 10, 20),
+                block.outputs[1].name: (10, 20, 30),
+                block.outputs[2].name: (10, 20, 30),
+                block.outputs[3].name: (10, 20, 30),
+            },
+        )
+
+    """
+    Input graph:
+    input(shape=1,5,5,3)----->transpose(axis=[0,3,1,2])
+                                |
+                                ---->relu-------------->transpose(axis=[0,2,3,1])
+                                             |                  |
+                                             |                  V
+                                             |                 relu
+                                             |                  |
+                                             |                  V
+                                             |         transpose(axis=[0,3,1,2])
+                                             |                  |
+                                             |                  V
+                                             ----------------> add --------> relu---->pool---->out(shape=1,3,5,5)
+
+
+    Output graph:
+
+
+    input(shape=1,5,5,3)---->relu------------------------> relu
+                                         |                  |
+                                         |                  V
+                                         ----------------> add
+                                                            |
+                                                            V
+                                                           relu
+                                                            |
+                                                            V
+                                                           transpose(axis=[0,3,1,2])-->pool---->out(shape=1,3,5,5)
+
+    """
+
+    def test_skip_connection_pattern_0(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 5, 5, 3))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[0, 3, 1, 2])
+            x = mb.relu(x=x)
+            x1 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x2 = mb.relu(x=x1)
+            x3 = mb.transpose(x=x2, perm=[0, 3, 1, 2])
+            x4 = mb.add(x=x, y=x3)
+            x5 = mb.relu(x=x4)
+            x6 = mb.avg_pool(
+                x=x5, kernel_sizes=[1, 1], strides=[1, 1], pad_type="valid"
+            )
+            return x6
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            [
+                "transpose",
+                "relu",
+                "transpose",
+                "relu",
+                "transpose",
+                "add",
+                "relu",
+                "avg_pool",
+            ],
+        )
+        self.assertEqual(
+            get_op_types_in_program(prog),
+            ["relu", "relu", "add", "relu", "transpose", "avg_pool"],
+        )
+        assert_model_is_valid(
+            prog,
+            {"x": (1, 5, 5, 3)},
+            expected_output_shapes={block.outputs[0].name: (1, 3, 5, 5)},
+        )
+
+    """
+    Input graph:
+    input(shape=1,5,5,3)----->transpose(axis=[0,3,1,2])
+                                |
+                                ---->relu-------------->transpose(axis=[0,2,3,1])
+                                             |                  |
+                                             |                  V
+                                             |                 relu
+                                             |                  |
+                                             |                  V
+                                             |         transpose(axis=[0,3,1,2])
+                                             |                  |
+                                             |                  V
+                                             ----------------> add -->transpose(axis=[0,2,3,1])
+                                                                            |
+                                                                            V
+                                                                          relu---->pool---->out(shape=1,5,5,3)
+
+
+    Output graph:
+
+
+    input(shape=1,5,5,3)---->relu------------------------> relu
+                                         |                  |
+                                         |                  V
+                                         ----------------> add
+                                                            |
+                                                            V
+                                                           relu
+                                                            |
+                                                            V
+                                                           pool---->out(shape=1,5,5,3)
+
+    """
+
+    def test_skip_connection_pattern_1(self):
+        @mb.program(input_specs=[mb.TensorSpec(shape=(1, 5, 5, 3))])
+        def prog(x):
+            x = mb.transpose(x=x, perm=[0, 3, 1, 2])
+            x = mb.relu(x=x)
+            x1 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+            x2 = mb.relu(x=x1)
+            x3 = mb.transpose(x=x2, perm=[0, 3, 1, 2])
+            x4 = mb.add(x=x, y=x3)
+            x4 = mb.transpose(x=x4, perm=[0, 2, 3, 1])
+            x5 = mb.relu(x=x4)
+            x6 = mb.avg_pool(
+                x=x5, kernel_sizes=[1, 1], strides=[1, 1], pad_type="valid"
+            )
+            return x6
+
+        prev_prog, prev_block, block = apply_pass_and_basic_check(
+            prog, "common::reduce_transposes"
+        )
+        self.assertEqual(
+            get_op_types_in_program(prev_prog),
+            [
+                "transpose",
+                "relu",
+                "transpose",
+                "relu",
+                "transpose",
+                "add",
+                "transpose",
+                "relu",
+                "avg_pool",
+            ],
+        )
+        self.assertEqual(
+            get_op_types_in_program(prog), ["relu", "relu", "add", "relu", "avg_pool"]
+        )
+        assert_model_is_valid(
+            prog,
+            {"x": (1, 5, 5, 3)},
+            expected_output_shapes={block.outputs[0].name: (1, 5, 5, 3)},
+        )
diff --git a/coremltools/converters/mil/mil/program.py b/coremltools/converters/mil/mil/program.py
new file mode 100644
index 000000000..068956685
--- /dev/null
+++ b/coremltools/converters/mil/mil/program.py
@@ -0,0 +1,171 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+import logging as _logging
+import numpy as _np
+import sympy as _sm
+from . import types
+from .block import Function
+from .var import Var
+from .types.symbolic import k_used_symbols, k_num_internal_syms
+from coremltools.converters.mil.input_types import InputType
+
+
+class Program(object):
+    def __init__(self):
+        self.main_input_types = {}
+        self.functions = {}
+        self.parameters = {}
+
+    def add_function(self, name, ssa_func):
+        if not isinstance(ssa_func, Function):
+            raise ValueError("Only Function can be added to Program.")
+        self.functions[name] = ssa_func
+
+    def add_parameters(self, name, ssa_val):
+        raise NotImplementedError()
+
+    def set_main_input_types(self, inputs):
+        if not isinstance(inputs, tuple):
+            raise ValueError("main inputs should be tuple of TensorType or ImageType")
+        elif not all([isinstance(inp, InputType) for inp in inputs]):
+            raise ValueError("main inputs should be tuple of InputSpec")
+        self.main_input_types = inputs
+
+    def find_ops(self, prefix=None, op_type=None, exactly_one=False):
+        """
+        Return list of ops with name matching `prefix` if specified, and
+        op_type, if specified. At least one of {prefix, op_type} must be
+        specified.
+
+        If `exactly_one` == True, raise ValueError if we find <1 or >1 ops satisfying
+        the criteria.
+
+        prefix: str
+
+        Return list[Operation]. Empty list if no op satisfies.
+        """
+        found_ops = []
+        for f_name, f in self.functions.items():
+            found_ops.extend(f.find_ops(prefix=prefix, op_type=op_type))
+        if exactly_one and len(found_ops) != 1:
+            msg = "Found matching ops not exactly one. Found ops: {}"
+            raise ValueError(msg.format(found_ops))
+        return found_ops
+
+    def validate(self):
+        for f_name, f in self.functions.items():
+            f.validate()
+
+    def __getitem__(self, func_name):
+        if func_name not in self.functions:
+            msg = "Function {} not found in among functions {}."
+            raise KeyError(msg.format(func_name, self.functions.keys()))
+        return self.functions[func_name]
+
+    def __repr__(self):
+        return self.__str__()
+
+    def __str__(self):
+        s = ""
+        for f_name, f in self.functions.items():
+            s += f.to_str(f_name)
+        return s
+
+
+class Placeholder(object):
+    counter = 0
+
+    def __init__(self, sym_shape, dtype=None, name=None):
+        """
+        sym_shape: () or [] for scalar. list, tuple, np.ndarray for tensor. May
+        contain Symbol as symbolic shape (but not string).
+
+        dtype: types.float or other scalar builtin types.
+        """
+        if not isinstance(sym_shape, (list, tuple, _np.generic, _np.ndarray)):
+            raise ValueError("Illegal shape for Placeholder: {}".format(sym_shape))
+        self.sym_shape = sym_shape
+        self.dtype = dtype
+        if self.dtype is None:
+            self.dtype = types.float
+        sym_type = self.type_inference()
+
+        # Globally unique var name for placeholders
+        name = "placeholder_" + str(self.__class__.counter)
+        self.__class__.counter += 1
+
+        # List of output vars (consistent w/ other ops)
+        self.outputs = [Var(name, sym_type)]
+
+    def set_name(self, name):
+        self.name = name
+        self.outputs[0].name = name
+
+    def type_inference(self):
+        if len(self.sym_shape) == 0:
+            return self.dtype
+        return types.tensor(self.dtype, self.sym_shape)
+
+    def __str__(self):
+        return str(self.outputs[0])
+
+
+def get_new_variadic_symbol():
+    global k_num_internal_syms
+    s = Symbol("*is" + str(k_num_internal_syms))
+    k_num_internal_syms += 1
+    return s
+
+
+def get_new_symbol(name=None):
+    """
+    Returns a new symbol, optionally named.
+
+    name: str (optional)
+        Optional name that provides more readability. If the name specified is
+        not available, an extra integer will be appended.
+    """
+    global k_used_symbols
+    global k_num_internal_syms
+
+    if name is not None:
+        s = Symbol(name)
+        if s in k_used_symbols:
+            new_name = name + k_num_internal_syms
+            msg = 'Symbol name "{}" already occupied. Renaming to {}'
+            _logging.warning(msg.format(name, new_name))
+            s = Symbol(new_name)
+    else:
+        s = Symbol("is" + str(k_num_internal_syms))
+    k_num_internal_syms += 1
+    return s
+
+
+class Symbol(_sm.Symbol):
+    def __init__(self, sym_name):
+        """
+        Essentially sympy.Symbol representing an i32 value in shape.
+
+        sym_name: str. If first character is *, then this symbol represents
+        variadic rank. Otherwise the symbol name should start with a alpha
+        character. `sym_name` must be unique if specified, or it'd be auto
+        generated (to a non-variadic symbol). Furthermore, sym_name may not
+        start with 'is' (internal symbol)
+        """
+        if not (sym_name[0].isalpha() or sym_name[0] == "*"):
+            msg = "Symbol name must start with a letter or *. Got {}"
+            raise ValueError(msg.format(sym_name))
+        global k_used_symbols
+        if sym_name in k_used_symbols:
+            msg = "Symbol `{}` is used already."
+            raise ValueError(msg.format(sym_name))
+        k_used_symbols.add(sym_name)
+        self.name = sym_name
diff --git a/coremltools/converters/mil/mil/tests/test_block.py b/coremltools/converters/mil/mil/tests/test_block.py
new file mode 100644
index 000000000..3033c5357
--- /dev/null
+++ b/coremltools/converters/mil/mil/tests/test_block.py
@@ -0,0 +1,332 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.testing_utils import (
+    get_op_types_in_program,
+    assert_same_output_shapes,
+    assert_same_output_names,
+)
+import copy
+
+"""
+Test manipulating variable and operations in the Block.
+
+In the test, we are actually testing Function, which is a child class of
+Block. Technically Function should not inherit from Block, which is a
+debt to be resolved in the future.
+
+Function has some different behaviors from Block that are irrelevant to
+the core API being tested here.
+"""
+
+
+def test_empty_block():
+    """Test an empty program
+    """
+
+    @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))])
+    def prog(x0):
+        return x0
+
+    block = prog.functions["main"]
+    assert len(block.operations) == 0
+    assert len(block.inputs) == 1
+    assert len(block.outputs) == 1
+    assert block.inputs["x0"] == block.outputs[0]
+    print(prog)
+
+
+def test_add_op():
+    """Test add statement to an empty program, also change the output
+    """
+
+    @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))])
+    def prog(x0):
+        return x0
+
+    print("before:\n{}".format(prog))
+    block = prog.functions["main"]
+    x0 = block.inputs["x0"]
+    with block:
+        x1 = mb.log(x=x0)
+    block.set_outputs([x1])
+    print("after:\n{}".format(prog))
+    assert block.inputs["x0"] == block.find_ops(op_type="log")[0].inputs["x"]
+    assert len(block.operations) == 1
+    assert block.operations[0].op_type == "log"
+    assert block.outputs[0] == x1
+
+
+def test_remove_op():
+    """Test remove all ops and return empty program
+    """
+
+    @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))])
+    def prog(x0):
+        x1 = mb.log(x=x0)
+        return x1
+
+    print("before:\n{}".format(prog))
+    block = prog.functions["main"]
+    x0 = block.inputs["x0"]
+    ops = block.find_ops(op_type="log")
+    block.set_outputs([x0])
+    block.remove_ops(ops)
+    print("after:\n{}".format(prog))
+    assert len(block.operations) == 0
+    assert len(block.inputs) == 1
+    assert len(block.outputs) == 1
+    assert block.inputs["x0"] == block.outputs[0]
+
+
+def test_remove_op2():
+    """Test remove ops with multiple identical inputs
+    """
+
+    @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))])
+    def prog(x0):
+        x1 = mb.add(x=x0, y=x0)
+        return x1
+
+    print("before:\n{}".format(prog))
+    block = prog.functions["main"]
+    x0 = block.inputs["x0"]
+    ops = block.find_ops(op_type="add")
+    block.set_outputs([x0])
+    block.remove_ops(ops)
+    print("after:\n{}".format(prog))
+    assert len(block.operations) == 0
+    assert len(block.inputs) == 1
+    assert len(block.outputs) == 1
+    assert block.inputs["x0"] == block.outputs[0]
+
+
+def test_op_removal_and_insertion():
+    """
+    Remove a transpose pair and materialize one transpose before another op
+    Given:
+        %x1 = transpose(%x)
+        %x2 = relu(%x1)
+        %out1 = avg_pool(%x2)
+        %x3 = transpose(%x2)
+        %out2 = log(%x3)
+
+    After removing both transposes:
+        %x2 = relu(%x)
+        %out1 = avg_pool(%x2)
+        %out2 = log(%x2)
+
+    After inserting a transpose:
+        %x2 = relu(%x)
+        %x4 = transpose(%x2)
+        %out1 = avg_pool(%x4)
+        %out2 = log(%x2)
+
+    """
+
+    @mb.program(input_specs=[mb.TensorSpec(shape=(1, 2, 6, 6))])
+    def prog(x):
+        x1 = mb.transpose(x=x, perm=[0, 2, 3, 1])
+        x2 = mb.relu(x=x1)
+        out1 = mb.avg_pool(x=x2, kernel_sizes=[1, 1], strides=[1, 1], pad_type="valid")
+        x3 = mb.transpose(x=x2, perm=[0, 3, 1, 2])
+        out2 = mb.log(x=x3)
+        return out1, out2
+
+    prev_prog = copy.deepcopy(prog)
+
+    print("before:\n{}".format(prog))
+    assert get_op_types_in_program(prog) == [
+        "transpose",
+        "relu",
+        "avg_pool",
+        "transpose",
+        "log",
+    ]
+    block = prog.functions["main"]
+
+    def remove_transpose(block):
+        op = block.find_ops(op_type="transpose")[0]
+        block.replace_uses_of_var_after_op(
+            anchor_op=op.inputs["x"].op,
+            old_var=op.outputs[0],
+            new_var=op.inputs["x"],
+            no_check_var_types=True,
+        )
+        block.remove_ops([op])
+
+    # remove 1st transpose
+    remove_transpose(block)
+    assert get_op_types_in_program(prog) == ["relu", "avg_pool", "transpose", "log"]
+
+    # remove 2nd transpose
+    remove_transpose(block)
+    assert get_op_types_in_program(prog) == ["relu", "avg_pool", "log"]
+
+    print("after transpose ops removal:\n{}".format(prog))
+
+    # insert transpose before pool
+    pool_op = block.find_ops(op_type="avg_pool")[0]
+    with block:
+        y = mb.transpose(x=pool_op.inputs["x"], perm=[0, 2, 3, 1], before_op=pool_op)
+
+    block.replace_uses_of_var_after_op(
+        anchor_op=y.op,
+        end_op=pool_op,
+        old_var=pool_op.inputs["x"],
+        new_var=y,
+        no_check_var_types=True,
+    )
+
+    print("after transpose insertion:\n{}".format(prog))
+    assert get_op_types_in_program(prog) == ["relu", "transpose", "avg_pool", "log"]
+
+    for op in block.operations:
+        op.type_value_inference(overwrite_output=True)
+
+    assert_same_output_names(prev_prog, prog)
+    assert_same_output_shapes(prev_prog, prog)
+
+
+def test_simple_substituion():
+    """Replace log(x+y) with log(x*y)
+    """
+
+    @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4)), mb.TensorSpec(shape=(2, 4))])
+    def prog(x0, y0):
+        x1 = mb.add(x=x0, y=y0)
+        z = mb.log(x=x1)
+        return z
+
+    print("before:\n{}".format(prog))
+    block = prog.functions["main"]
+    assert len(block.find_ops(op_type="log")) == 1
+    assert len(block.find_ops(op_type="add")) == 1
+    assert len(block.find_ops(op_type="mul")) == 0
+
+    add = block.find_ops(op_type="add")[0]
+
+    x0 = add.inputs["x"]
+    y0 = add.inputs["y"]
+    x1 = add.outputs[0]
+
+    with block:
+        # It's important to add 'mul' before 'add' (its even better to do it
+        # immediately after 'add' but we don't have the API)
+        # because we need to replace any op affected by add with 'mul'
+        x2 = mb.mul(x=x0, y=y0, before_op=add)
+
+    assert len(block.find_ops(op_type="mul")) == 1
+    assert len(block.find_ops(op_type="add")) == 1
+    assert len(block.find_ops(op_type="log")) == 1
+
+    # It's important to set anchor_op = 'mul' because new_var is only visible
+    # after 'mul'.
+    block.replace_uses_of_var_after_op(anchor_op=x2.op, old_var=x1, new_var=x2)
+    block.remove_ops([add])
+
+    print("after:\n{}".format(prog))
+    assert len(block.find_ops(op_type="add")) == 0
+    assert len(block.find_ops(op_type="mul")) == 1
+    assert len(block.find_ops(op_type="log")) == 1
+
+
+def test_substitute_nested_op():
+    """"Replace an conditional op with nested block"""
+
+    @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4)), mb.TensorSpec(shape=(2, 4))])
+    def prog(x0, y0):
+        pred = mb.less(x=x0, y=y0)
+        z = mb.cond(
+            pred=pred, _true_fn=lambda: mb.abs(x=x0), _false_fn=lambda: mb.abs(x=y0)
+        )
+        z1 = mb.log(x=z)
+        return z1
+
+    print("before:\n{}".format(prog))
+    block = prog.functions["main"]
+    assert len(block.find_ops(op_type="less")) == 1
+    assert len(block.find_ops(op_type="abs")) == 2
+    assert len(block.find_ops(op_type="cond")) == 1
+    assert len(block.find_ops(op_type="log")) == 1
+
+    cond = block.find_ops(op_type="cond")[0]
+    x0 = block.inputs["x0"]
+    z = cond.outputs[0]
+    block.replace_uses_of_var_after_op(anchor_op=None, old_var=z, new_var=x0)
+
+    # removing cond will also remove the abs ops within its block
+    block.remove_ops([cond])
+
+    print("after:\n{}".format(prog))
+    assert len(block.find_ops(op_type="less")) == 1
+    assert len(block.find_ops(op_type="log")) == 1
+    assert len(block.find_ops(op_type="cond")) == 0
+    assert len(block.find_ops(op_type="abs")) == 0
+
+
+def test_simple_transpose_squash():
+    """Test eliminate consecutive transpose can be canceled
+    """
+
+    @mb.program(input_specs=[mb.TensorSpec(shape=(2, 4))])
+    def prog(x0):
+        x1 = mb.transpose(x=x0, perm=[1, 0])
+        x2 = mb.transpose(x=x1, perm=[1, 0])
+        x3 = mb.log(x=x2)
+        x4 = mb.transpose(x=x3, perm=[1, 0])
+        x5 = mb.transpose(x=x4, perm=[1, 0])
+        x6 = mb.transpose(x=x5, perm=[1, 0])
+        x7 = mb.transpose(x=x6, perm=[1, 0])
+        return x7
+
+    print("before:\n{}".format(prog))
+    block = prog.functions["main"]
+    assert len(block.find_ops(op_type="transpose")) == 6
+
+    def can_squash(trans1, trans2):
+        return (
+            len(trans1.outputs) == 1
+            and len(trans2.outputs) == 1
+            and all(trans1.perm.val == trans2.perm.val)
+        )
+
+    # Find all candidate pairs transposes
+    # we ignore all const (transpose_perm_%x), and add pairs of transpose op as
+    # candidate. This won't generalize to more complicated program with other
+    # shape invariant ops in between.
+    candidates = []
+    non_const_ops = [op for op in block.operations if op.op_type != "const"]
+    for i in range(len(non_const_ops) - 1):
+        op = non_const_ops[i]
+        if len(candidates) and op == candidates[-1][1]:
+            # op is already a squash candidate
+            continue
+        next_op = non_const_ops[i + 1]
+        if (
+            op.op_type == "transpose"
+            and next_op.op_type == "transpose"
+            and can_squash(op, next_op)
+        ):
+            candidates.append((op, next_op))
+
+    # Remove each candidate pairs
+    for (trans1, trans2) in candidates:
+        before = trans1.inputs["x"]
+        after = trans2.outputs[0]
+        block.replace_uses_of_var_after_op(
+            anchor_op=trans2, old_var=after, new_var=before
+        )
+        block.remove_ops([trans1, trans2])
+
+    print("after:\n{}".format(prog))
+    assert len(block.find_ops(op_type="transpose")) == 0
diff --git a/coremltools/converters/mil/mil/tests/test_programs.py b/coremltools/converters/mil/mil/tests/test_programs.py
new file mode 100644
index 000000000..4b192ef8a
--- /dev/null
+++ b/coremltools/converters/mil/mil/tests/test_programs.py
@@ -0,0 +1,144 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+import pytest
+from coremltools import models
+import numpy as np
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil import converter
+import logging
+
+np.random.seed(0)
+
+
+def test_single_layer_example():
+    batch_size, input_dim, output_dim = 2, 4, 2
+
+    @mb.program(
+        input_specs=[mb.TensorSpec(shape=(batch_size, input_dim)),]
+    )
+    def prog(x):
+        # Weight
+        W_val = (
+            np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
+            .reshape(input_dim, output_dim)
+            .T.astype(np.float32)
+        )
+        W = mb.const(val=W_val, mode="file_value", name="const_W")
+
+        # bias
+        b_val = np.array([-0.5, 0.5]).astype(np.float32)
+        b = mb.const(val=b_val, mode="file_value", name="const_b")
+
+        return mb.linear(x=x, weight=W, bias=b, name="lin")
+
+    logging.info("prog:\n", prog)
+
+    proto = converter._convert(prog, convert_from="mil", convert_to="nn_proto")
+
+    feed_dict = {
+        "x": np.random.rand(batch_size, input_dim).astype(np.float32),
+    }
+    model = models.MLModel(proto)
+    assert model is not None
+    prediction = model.predict(feed_dict)
+    assert len(prediction) == 1
+
+
+def test_conv_example():
+    batch, C_in, C_out, H, W = 2, 2, 3, 7, 10
+    kH, kW = 3, 5
+    img_shape, seq_shape = (batch, C_in, H, W), (batch, C_in, H)
+
+    @mb.program(
+        input_specs=[mb.TensorSpec(shape=img_shape), mb.TensorSpec(shape=seq_shape),]
+    )
+    def prog(img, seq):
+        ## 2D convolution
+        # Weight
+        W_2d = np.random.rand(C_out, C_in, kH, kW).astype(np.float32)
+        W_2d = mb.const(val=W_2d, mode="file_value", name="const_W")
+
+        # Test 1: provide only required arguments.
+        conv1 = mb.conv(x=img, weight=W_2d, pad_type="valid")
+        logging.info("conv1 shape: {}".format(conv1.shape))
+
+        # Test 2: stride > 1
+        conv2 = mb.conv(x=img, weight=W_2d, pad_type="valid", strides=[2, 3])
+        logging.info("conv2 shape: {}".format(conv2.shape))
+
+        # Test 3: same padding
+        conv3 = mb.conv(x=img, weight=W_2d, pad_type="same", strides=[2, 3])
+        logging.info("conv3 shape: {}".format(conv3.shape))
+
+        # Test max_pool
+        pool1 = mb.max_pool(
+            x=img, kernel_sizes=[kH, kW], pad_type="valid", strides=[2, 3]
+        )
+        logging.info("pool1 shape: {}".format(pool1.shape))
+
+        # Test max_pool
+        pool2 = mb.max_pool(
+            x=img, kernel_sizes=[kH, kW], pad_type="same", strides=[2, 3]
+        )
+        logging.info("pool2 shape: {}".format(pool2.shape))
+
+        ## 1D convolution
+        W_1d = np.random.rand(C_out, C_in, kH).astype(np.float32)
+        W_1d = mb.const(val=W_1d, mode="file_value", name="const_W_1d")
+        logging.info("W_1d val: {}".format(W_1d.val))
+
+        # Test 4: provide only required arguments for 1D.
+        conv4 = mb.conv(x=seq, weight=W_1d, pad_type="valid")
+
+        logging.info("conv4 shape: {}".format(conv4.shape))
+
+        return conv1, conv2, conv3, pool1, pool2, conv4
+
+    proto = converter._convert(prog, convert_from="mil", convert_to="nn_proto")
+
+    feed_dict = {
+        "img": np.random.rand(*img_shape).astype(np.float32),
+        "seq": np.random.rand(*seq_shape).astype(np.float32),
+    }
+    model = models.MLModel(proto)
+    assert model is not None
+    prediction = model.predict(feed_dict)
+    assert len(prediction) == 6
+
+
+def test_while_example():
+    def body(a, b):
+        return mb.add(x=a, y=b), b
+
+    def cond(a, b):
+        a_mean = mb.reduce_mean(x=a, axes=[0, 1])
+        b_mean = mb.reduce_mean(x=b, axes=[0, 1])
+        return mb.less(x=a_mean, y=b_mean)
+
+    @mb.program(
+        input_specs=[mb.TensorSpec(shape=(1, 2)), mb.TensorSpec(shape=(1, 2)),]
+    )
+    def prog(a, b):
+        return mb.while_loop(_cond=cond, _body=body, loop_vars=(a, b))
+
+    logging.info("prog:\n", prog)
+
+    proto = converter._convert(prog, convert_from="mil", convert_to="nn_proto")
+
+    feed_dict = {
+        "a": np.random.rand(1, 2).astype(np.float32),
+        "b": np.random.rand(1, 2).astype(np.float32),
+    }
+    model = models.MLModel(proto)
+    assert model is not None
+    prediction = model.predict(feed_dict)
+    assert len(prediction) == 2
diff --git a/coremltools/converters/mil/mil/types/__init__.py b/coremltools/converters/mil/mil/types/__init__.py
new file mode 100644
index 000000000..6a9b45755
--- /dev/null
+++ b/coremltools/converters/mil/mil/types/__init__.py
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+# pylint: disable=wildcard-import
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+from .type_double import fp16, fp32, fp64, float, double, is_float
+from .type_int import (
+    int8,
+    int16,
+    int32,
+    int64,
+    int,
+    uint8,
+    uint16,
+    uint32,
+    uint64,
+    uint,
+    is_int,
+)
+from .type_str import str
+from .type_bool import bool, is_bool
+from .type_list import list, empty_list, is_list
+from .type_tensor import (
+    tensor,
+    is_tensor_and_is_compatible,
+    is_tensor_and_is_compatible_general_shape,
+    tensor_has_complete_shape,
+)
+from .type_dict import dict, empty_dict
+from .type_void import void
+from .type_globals_pseudo_type import globals_pseudo_type
+from .type_unknown import unknown
+from .type_tuple import tuple
+from .type_mapping import (
+    is_primitive,
+    is_scalar,
+    is_tensor,
+    is_tuple,
+    is_str,
+    is_builtin,
+    promote_types,
+    numpy_val_to_builtin_val,
+    builtin_to_string,
+    numpy_type_to_builtin_type,
+    type_to_builtin_type,
+    is_subtype,
+    string_to_builtin,
+    nptype_from_builtin,
+)
+from .annotate import annotate
+from .annotate import class_annotate
+from .annotate import apply_delayed_types
+from .annotate import delay_type
+from .type_spec import *
+from .get_type_info import *
+from .operator_names import *
+from .global_methods import global_remap
+from math import log, exp
+
+apply_delayed_types()
diff --git a/coremltools/converters/nnssa/commons/builtins/annotate.py b/coremltools/converters/mil/mil/types/annotate.py
similarity index 81%
rename from coremltools/converters/nnssa/commons/builtins/annotate.py
rename to coremltools/converters/mil/mil/types/annotate.py
index b03327d31..562defe73 100644
--- a/coremltools/converters/nnssa/commons/builtins/annotate.py
+++ b/coremltools/converters/mil/mil/types/annotate.py
@@ -1,7 +1,14 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
+from six import string_types as _string_types
 
 
 class delay_type_cls:
@@ -43,7 +50,7 @@ def annotate(return_type=_invalid_placeholder_type, **kwargs):
      - captured variables
      - function arguments
      - other variables within the function
-    
+
     Ex:
 
         @annotate(compyler.double, a=compyler.double, b=compyler.double)
@@ -63,8 +70,8 @@ def __add__(self, other):
         class double:
             @annotate(delay_type.double, other=delay_type.double)
             def __add__(self, other):
-    
-    After which apply_delayed_types() must be called to fill in the delayed 
+
+    After which apply_delayed_types() must be called to fill in the delayed
     type.
     """
     global annotated_function_list
@@ -94,17 +101,22 @@ def decorator(cls):
     return decorator
 
 
-def apply_delayed_types(type_map=annotated_class_list, fnlist=annotated_function_list):
+def apply_delayed_types(
+    type_map=annotated_class_list, fnlist=annotated_function_list
+):  # pylint: disable=dangerous-default-value
     """
     Apply all delayed types. See annotate()
     """
+    # pylint: disable=no-member
     # type name is a dict from str to type
     for func in fnlist:
-        if hasattr(func, 'return_type') and \
-                isinstance(func.return_type, str) and \
-                func.return_type in type_map:
+        if (
+            hasattr(func, "return_type")
+            and isinstance(func.return_type, _string_types)
+            and func.return_type in type_map
+        ):
             func.return_type = type_map[func.return_type]
-        if hasattr(func, 'type_annotations'):
+        if hasattr(func, "type_annotations"):
             for key in func.type_annotations:
                 if func.type_annotations[key] in type_map:
                     func.type_annotations[key] = type_map[func.type_annotations[key]]
diff --git a/coremltools/converters/nnssa/commons/builtins/get_type_info.py b/coremltools/converters/mil/mil/types/get_type_info.py
similarity index 64%
rename from coremltools/converters/nnssa/commons/builtins/get_type_info.py
rename to coremltools/converters/mil/mil/types/get_type_info.py
index 3a440d9a3..31b7fcce0 100644
--- a/coremltools/converters/nnssa/commons/builtins/get_type_info.py
+++ b/coremltools/converters/mil/mil/types/get_type_info.py
@@ -1,9 +1,14 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
-import types
-from .type_spec import *
+from .type_spec import *  # pylint: disable=wildcard-import
 from .type_void import void
 
 
@@ -12,12 +17,14 @@ def get_python_method_type(py_function):
     function_inputs = []
     function_output = get_type_info(void)
     annotations = {}
-    if hasattr(py_function, 'type_annotations'):
-        annotations = {k: get_type_info(v) for k, v in py_function.type_annotations.items()}
-    if hasattr(py_function, 'return_type'):
+    if hasattr(py_function, "type_annotations"):
+        annotations = {
+            k: get_type_info(v) for k, v in py_function.type_annotations.items()
+        }
+    if hasattr(py_function, "return_type"):
         function_output = get_type_info(py_function.return_type)
     try:
-        if hasattr(py_function, '__func__'):
+        if hasattr(py_function, "__func__"):
             argcount = py_function.__func__.__code__.co_argcount
             argnames = py_function.__func__.__code__.co_varnames[:argcount]
         else:
@@ -26,52 +33,59 @@ def get_python_method_type(py_function):
     except:
         raise TypeError(
             "Unable to derive type information from method %s. "
-            "You might have a misspecified type. Ex: use compyler.int and not int" % py_function)
+            "You might have a misspecified type. Ex: use compyler.int and not int"
+            % py_function
+        )
 
     for arg in argnames:
         if arg in annotations:
             function_inputs.append(annotations[arg])
-        elif arg != 'self':
+        elif arg != "self":
             raise TypeError(
-                "Function " + str(py_function) + " insufficient annotations. " + arg +
-                " needs a type")
+                "Function "
+                + str(py_function)
+                + " insufficient annotations. "
+                + arg
+                + " needs a type"
+            )
     typeinfo = FunctionType(function_inputs, function_output, py_function)
     return typeinfo
 
 
 def get_type_info(t):
-    if hasattr(t, '__type_info__'):
+    if hasattr(t, "__type_info__"):
         ret = t.__type_info__()
-        assert (ret.python_class is not None)
+        assert ret.python_class is not None
         return ret
     elif isinstance(t, type):
         return Type(t.__name__, python_class=t)
-    elif hasattr(t, '__call__'):
+    elif hasattr(t, "__call__"):
         return get_python_method_type(t)
-    else:
-        raise TypeError("Unsupported type %s" % t)
+    raise TypeError("Unsupported type %s" % t)
 
 
 def get_python_class_methods(cls):
     ret = {}
     for key, value in cls.__dict__.items():
-        if hasattr(value, '__call__'):
+        if hasattr(value, "__call__"):
             ret[key] = value
     return ret
 
 
 def get_python_class_slots(class_type):
-    if hasattr(class_type, '__slots__'):
+    if hasattr(class_type, "__slots__"):
         if len(class_type.__slots__) != len(class_type.__slot_types__):
             raise RuntimeError(
-                "__slots__ and __slot_types__ length mismatch in class %s" % (str(class_type)))
+                "__slots__ and __slot_types__ length mismatch in class %s"
+                % (str(class_type))
+            )
         return class_type.__slots__
     else:
         return []
 
 
 def get_python_class_slot_types(class_type):
-    if hasattr(class_type, '__slots__'):
+    if hasattr(class_type, "__slots__"):
         if len(class_type.__slots__) != len(class_type.__slot_types__):
             raise RuntimeError("__slots__ and __slot_types__ length mismatch")
         return [get_type_info(x) for x in class_type.__slot_types__]
diff --git a/coremltools/converters/nnssa/commons/builtins/global_methods.py b/coremltools/converters/mil/mil/types/global_methods.py
similarity index 57%
rename from coremltools/converters/nnssa/commons/builtins/global_methods.py
rename to coremltools/converters/mil/mil/types/global_methods.py
index 859968dd2..e2d0dbd37 100644
--- a/coremltools/converters/nnssa/commons/builtins/global_methods.py
+++ b/coremltools/converters/mil/mil/types/global_methods.py
@@ -1,7 +1,14 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
+
 """
 This defines a list of all the "global methods" like len. Or type cast
 operators like int, list, double, etc.
@@ -9,9 +16,9 @@
 The difficulty with some of these methods is that they don't have fixed types.
 For instance len(x) allows x to be list or a dictionary.
 
-However we don't support function overloading based on types, and we don't 
+However we don't support function overloading based on types, and we don't
 intend to. (It is complicated, requires the parser to be far more intelligent
-and do good type inference; will either require genre to support overloading 
+and do good type inference; will either require genre to support overloading
 or do name mangling.
 
 The final quirk is that we probably should not call these functions "len"
@@ -22,26 +29,26 @@
 """
 
 global_remap = {
-    'len': '__len__',
-    'str': '__str__',
-    'int': '__int__',
-    'double': '__double__',
-    'float': '__double__',
-    'bool': '__bool__',
-    'log': '__log__',
-    'exp': '__exp__',
-    'max': '__max__',
-    'min': '__min__'
+    "len": "__len__",
+    "str": "__str__",
+    "int": "__int__",
+    "double": "__double__",
+    "float": "__double__",
+    "bool": "__bool__",
+    "log": "__log__",
+    "exp": "__exp__",
+    "max": "__max__",
+    "min": "__min__",
 }
 
 global_invremap = {
-    '__len__': 'len',
-    '__str__': 'str',
-    '__int__': 'int',
-    '__double__': 'float',
-    '__bool__': 'bool',
-    '__log__': 'math.log',
-    '__exp__': 'math.exp',
-    '__max__': 'max',
-    '__min__': 'min'
+    "__len__": "len",
+    "__str__": "str",
+    "__int__": "int",
+    "__double__": "float",
+    "__bool__": "bool",
+    "__log__": "math.log",
+    "__exp__": "math.exp",
+    "__max__": "max",
+    "__min__": "min",
 }
diff --git a/coremltools/converters/mil/mil/types/operator_names.py b/coremltools/converters/mil/mil/types/operator_names.py
new file mode 100644
index 000000000..2ffd2ef29
--- /dev/null
+++ b/coremltools/converters/mil/mil/types/operator_names.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+__bin_operator_to_python_name = {}
+__bin_operator_to_python_name["+"] = "__add__"
+__bin_operator_to_python_name["-"] = "__sub__"
+__bin_operator_to_python_name["*"] = "__mul__"
+__bin_operator_to_python_name["/"] = "__div__"
+__bin_operator_to_python_name["%"] = "__mod__"
+__bin_operator_to_python_name["<"] = "__lt__"
+__bin_operator_to_python_name["<="] = "__le__"
+__bin_operator_to_python_name[">"] = "__gt__"
+__bin_operator_to_python_name[">="] = "__ge__"
+__bin_operator_to_python_name["=="] = "__eq__"
+__bin_operator_to_python_name["!="] = "__ne__"
+__bin_operator_to_python_name["in"] = "__contains__"
+__bin_operator_to_python_name["getitem"] = "__getitem__"
+__bin_operator_to_python_name["setitem"] = "__setitem__"
+
+__unary_operator_to_python_name = {}
+__unary_operator_to_python_name["-"] = "__neg__"
+__unary_operator_to_python_name["!"] = "__not__"
+
+
+def bin_operator_to_python_name(op):
+    return __bin_operator_to_python_name.get(op, None)
+
+
+def unary_operator_to_python_name(op):
+    return __unary_operator_to_python_name.get(op, None)
diff --git a/coremltools/converters/mil/mil/types/symbolic.py b/coremltools/converters/mil/mil/types/symbolic.py
new file mode 100644
index 000000000..4b6473cb0
--- /dev/null
+++ b/coremltools/converters/mil/mil/types/symbolic.py
@@ -0,0 +1,82 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import sympy as sm
+import numpy as np
+import six
+
+k_used_symbols = set()
+k_num_internal_syms = 0
+
+
+def is_compatible_symbolic_vector(val_a, val_b):
+    """
+    compare two vector and check if they are compatible.
+    ([is0, 4], [9, 4]), ([is0, 1],[is1, is2]) are twp compatible examples.
+    """
+    val_a = tuple(val_a)
+    val_b = tuple(val_b)
+
+    if len(val_a) != len(val_b):
+        return False
+
+    for a, b in zip(val_a, val_b):
+        if not is_symbolic(a) and not is_symbolic(b):
+            if a != b:
+                return False
+    return True
+
+
+def is_symbolic(val):
+    return issubclass(type(val), sm.Basic)  # pylint: disable=consider-using-ternary
+
+
+def is_variadic(val):
+    return (
+        issubclass(type(val), sm.Symbol) and val.name[0] == "*"
+    )  # pylint: disable=consider-using-ternary
+
+
+def num_symbolic(val):
+    """
+    Return the number of symbols in val
+    """
+    if is_symbolic(val):
+        return 1
+    elif isinstance(val, np.ndarray) and np.issctype(val.dtype):
+        return 0
+    elif hasattr(val, "__iter__"):
+        return sum(any_symbolic(i) for i in val)
+    return 0
+
+
+def any_symbolic(val):
+    if is_symbolic(val):
+        return True
+    if isinstance(val, np.ndarray) and val.ndim == 0:
+        return is_symbolic(val[()])
+    elif isinstance(val, np.ndarray) and np.issctype(val.dtype):
+        return False
+    elif isinstance(val, six.string_types):  # string is iterable
+        return False
+    elif hasattr(val, "__iter__"):
+        return any(any_symbolic(i) for i in val)
+    return False
+
+
+def any_variadic(val):
+    if is_variadic(val):
+        return True
+    elif isinstance(val, np.ndarray) and np.issctype(val.dtype):
+        return False
+    elif isinstance(val, six.string_types):  # string is iterable
+        return False
+    elif hasattr(val, "__iter__"):
+        return any(any_variadic(i) for i in val)
+    return False
+
+
+def isscalar(val):
+    return np.isscalar(val) or issubclass(type(val), sm.Basic)
diff --git a/coremltools/converters/nnssa/commons/builtins/type_bool.py b/coremltools/converters/mil/mil/types/type_bool.py
similarity index 76%
rename from coremltools/converters/nnssa/commons/builtins/type_bool.py
rename to coremltools/converters/mil/mil/types/type_bool.py
index c8887a444..8add1347f 100644
--- a/coremltools/converters/nnssa/commons/builtins/type_bool.py
+++ b/coremltools/converters/mil/mil/types/type_bool.py
@@ -1,9 +1,15 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
 from .annotate import class_annotate, annotate, delay_type
-from .type_spec import *
+from .type_spec import Type
 
 
 @class_annotate()
@@ -24,7 +30,7 @@ def __ne__(self, other):
         return bool(self.val != other.val)
 
     @annotate(delay_type.bool)
-    def __not__(self):
+    def __not__(self, other):
         return bool(not other.val)
 
     @annotate(delay_type.bool)
@@ -43,5 +49,6 @@ def __double__(self):
     def __str__(self):
         return str(self.val)
 
+
 def is_bool(t):
-    return t is bool or isinstance(t,bool)
\ No newline at end of file
+    return t is bool or isinstance(t, bool)
diff --git a/coremltools/converters/nnssa/commons/builtins/type_dict.py b/coremltools/converters/mil/mil/types/type_dict.py
similarity index 69%
rename from coremltools/converters/nnssa/commons/builtins/type_dict.py
rename to coremltools/converters/mil/mil/types/type_dict.py
index ea2015a8b..89ac64fb2 100644
--- a/coremltools/converters/nnssa/commons/builtins/type_dict.py
+++ b/coremltools/converters/mil/mil/types/type_dict.py
@@ -1,12 +1,17 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
 from .annotate import annotate
-from .type_spec import *
-from .type_list import *
-from .type_bool import *
-from .type_int import *
+from .type_spec import Type
+from . import type_bool
+from . import type_int
 from .type_void import void
 from .get_type_info import get_type_info
 
@@ -42,20 +47,20 @@ def __type_info__(cls):
 
         @annotate(T[1], key=T[0])
         def __getitem__(self, key):
-            assert (isinstance(key, self.T[0]))
+            assert isinstance(key, self.T[0])
             return self.val[key]
 
         @annotate(void, key=T[0], newval=T[1])
         def __setitem__(self, key, newval):
-            assert (isinstance(key, self.T[0]))
-            assert (isinstance(newval, self.T[1]))
+            assert isinstance(key, self.T[0])
+            assert isinstance(newval, self.T[1])
             self.val[key] = newval
 
-        @annotate(int)
+        @annotate(type_int.int)
         def __len__(self):
-            return int(len(self.val))
+            return type_int.int(len(self.val))
 
-        @annotate(bool, key=T[0])
+        @annotate(type_bool.bool, key=T[0])
         def __contains__(self, key):
             return key in self.val[key]
 
diff --git a/coremltools/converters/nnssa/commons/builtins/type_double.py b/coremltools/converters/mil/mil/types/type_double.py
similarity index 58%
rename from coremltools/converters/nnssa/commons/builtins/type_double.py
rename to coremltools/converters/mil/mil/types/type_double.py
index 8ff71c011..34f10d6be 100644
--- a/coremltools/converters/nnssa/commons/builtins/type_double.py
+++ b/coremltools/converters/mil/mil/types/type_double.py
@@ -1,12 +1,21 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
+
+import numpy as np
+import math
+import logging
+
 from .annotate import class_annotate, annotate, delay_type
 from .type_bool import bool
-from .get_type_info import get_type_info
-from .type_spec import *
-import math
+from .type_spec import Type
 
 
 def make_float(width):
@@ -17,7 +26,45 @@ class double:
         _width = width
 
         def __init__(self, v=0.0):
-            self.val = v
+            self._val = v
+
+        @property
+        def val(self):
+            return self._val
+
+        @val.setter
+        def val(self, v):
+            from .type_mapping import (
+                nptype_from_builtin,
+                numpy_type_to_builtin_type,
+                builtin_to_string,
+            )
+
+            if not isinstance(v, np.generic):
+                raise ValueError(
+                    "types should have value of numpy type, got {} instead".format(
+                        type(v)
+                    )
+                )
+
+            if isinstance(v, np.floating):
+                v_type = numpy_type_to_builtin_type(v.dtype)
+                if v_type.get_bitwidth() <= self.get_bitwidth():
+                    self._val = v
+                else:
+                    self._val = v.astype(nptype_from_builtin(self.__class__))
+                    logging.warning(
+                        "Saving value type of {} into a builtin type of {}, might lose precision!".format(
+                            v.dtype, builtin_to_string(self.__class__)
+                        )
+                    )
+            else:
+                self._val = v.astype(nptype_from_builtin(self.__class__))
+                logging.warning(
+                    "Saving value type of {} into a builtin type of {}, might be incompatible or loses precision!".format(
+                        v.dtype, builtin_to_string(self.__class__)
+                    )
+                )
 
         @classmethod
         def __type_info__(cls):
@@ -29,27 +76,27 @@ def get_bitwidth(cls):
 
         @annotate(delay_type_float, other=delay_type_float)
         def __add__(self, other):
-            assert (isinstance(other, double))
+            assert isinstance(other, double)
             return double(self.val + other.val)
 
         @annotate(delay_type_float, other=delay_type_float)
         def __sub__(self, other):
-            assert (isinstance(other, double))
+            assert isinstance(other, double)
             return double(self.val - other.val)
 
         @annotate(delay_type_float, other=delay_type_float)
         def __mul__(self, other):
-            assert (isinstance(other, double))
+            assert isinstance(other, double)
             return double(self.val * other.val)
 
         @annotate(delay_type_float, other=delay_type_float)
         def __div__(self, other):
-            assert (isinstance(other, double))
+            assert isinstance(other, double)
             return double(self.val / other.val)
 
         @annotate(delay_type_float, other=delay_type_float)
         def __mod__(self, other):
-            assert (isinstance(other, double))
+            assert isinstance(other, double)
             return double(self.val % other.val)
 
         @annotate(delay_type.bool, other=delay_type_float)
@@ -104,6 +151,7 @@ def __exp__(self):
         def __neg__(self):
             return double(-self.val)
 
+    double.__name__ = "fp%d" % double.get_bitwidth()
     return double
 
 
@@ -115,4 +163,4 @@ def __neg__(self):
 
 
 def is_float(t):
-    return any(t is i or isinstance(t,i) for i in [fp16, fp32, fp64])
+    return any(t is i or isinstance(t, i) for i in [fp16, fp32, fp64])
diff --git a/coremltools/converters/nnssa/commons/builtins/type_globals_pseudo_type.py b/coremltools/converters/mil/mil/types/type_globals_pseudo_type.py
similarity index 52%
rename from coremltools/converters/nnssa/commons/builtins/type_globals_pseudo_type.py
rename to coremltools/converters/mil/mil/types/type_globals_pseudo_type.py
index 2643ba08b..de46a8b2f 100644
--- a/coremltools/converters/nnssa/commons/builtins/type_globals_pseudo_type.py
+++ b/coremltools/converters/mil/mil/types/type_globals_pseudo_type.py
@@ -1,9 +1,14 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
-from .annotate import class_annotate, annotate, delay_type
-from .type_spec import *
+from .type_spec import Type
 
 
 class globals_pseudo_type:
diff --git a/coremltools/converters/nnssa/commons/builtins/type_int.py b/coremltools/converters/mil/mil/types/type_int.py
similarity index 54%
rename from coremltools/converters/nnssa/commons/builtins/type_int.py
rename to coremltools/converters/mil/mil/types/type_int.py
index 7981534b2..e50589068 100644
--- a/coremltools/converters/nnssa/commons/builtins/type_int.py
+++ b/coremltools/converters/mil/mil/types/type_int.py
@@ -1,12 +1,22 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
+
+import numpy as np
+import sympy as sm
+import math
+import logging
+
 from .annotate import class_annotate, annotate, delay_type
-from .get_type_info import get_type_info
 from .type_bool import bool
-from .type_spec import *
-import math
+from .type_spec import Type
 
 
 def make_int(width, unsigned):
@@ -19,7 +29,49 @@ class int:
 
         @annotate(v=delay_type_int)
         def __init__(self, v=0):
-            self.val = v
+            self._val = v
+
+        @property
+        def val(self):
+            return self._val
+
+        @val.setter
+        def val(self, v):
+            from .type_mapping import (
+                nptype_from_builtin,
+                builtin_to_string,
+                numpy_type_to_builtin_type,
+            )
+
+            if not isinstance(v, (np.generic, sm.Basic)):
+                raise ValueError(
+                    "types should have value of numpy type or Symbols, got {} instead".format(
+                        type(v)
+                    )
+                )
+
+            if isinstance(v, sm.Basic):
+                self._val = v
+            elif isinstance(v, np.integer):
+                v_type = numpy_type_to_builtin_type(v.dtype)
+                if v_type.get_bitwidth() <= self.get_bitwidth() and (
+                    v >= 0 or v < 0 and not self.is_unsigned()
+                ):
+                    self._val = v
+                else:
+                    self._val = v.astype(nptype_from_builtin(self.__class__))
+                    logging.warning(
+                        "Saving value type of {} into a builtin type of {}, might overflow or loses precision!".format(
+                            v.dtype, builtin_to_string(self.__class__)
+                        )
+                    )
+            else:
+                self._val = v.astype(nptype_from_builtin(self.__class__))
+                logging.warning(
+                    "Saving value type of {} into a builtin type of {}, might be incompatible or loses precision!".format(
+                        v.dtype, builtin_to_string(self.__class__)
+                    )
+                )
 
         @classmethod
         def __type_info__(cls):
@@ -31,31 +83,31 @@ def get_bitwidth(cls):
 
         @classmethod
         def is_unsigned(cls):
-            return cls._unsigned == 'u'
+            return cls._unsigned == "u"
 
         @annotate(delay_type_int, other=delay_type_int)
         def __add__(self, other):
-            assert (isinstance(other, int))
+            assert isinstance(other, int)
             return int(self.val + other.val)
 
         @annotate(delay_type_int, other=delay_type_int)
         def __sub__(self, other):
-            assert (isinstance(other, int))
+            assert isinstance(other, int)
             return int(self.val - other.val)
 
         @annotate(delay_type_int, other=delay_type_int)
         def __mul__(self, other):
-            assert (isinstance(other, int))
+            assert isinstance(other, int)
             return int(self.val * other.val)
 
         @annotate(delay_type_int, other=delay_type_int)
         def __div__(self, other):
-            assert (isinstance(other, int))
+            assert isinstance(other, int)
             return int(self.val // other.val)
 
         @annotate(delay_type_int, other=delay_type_int)
         def __mod__(self, other):
-            assert (isinstance(other, int))
+            assert isinstance(other, int)
             return int(self.val % other.val)
 
         @annotate(delay_type.bool, other=delay_type_int)
@@ -113,18 +165,21 @@ def __neg__(self):
     return int
 
 
-int8 = make_int(8, '')
-int16 = make_int(16, '')
-int32 = make_int(32, '')
-int64 = make_int(64, '')
+int8 = make_int(8, "")
+int16 = make_int(16, "")
+int32 = make_int(32, "")
+int64 = make_int(64, "")
 int = int64
 
-uint8 = make_int(8, 'u')
-uint16 = make_int(16, 'u')
-uint32 = make_int(32, 'u')
-uint64 = make_int(64, 'u')
+uint8 = make_int(8, "u")
+uint16 = make_int(16, "u")
+uint32 = make_int(32, "u")
+uint64 = make_int(64, "u")
 uint = uint64
 
 
 def is_int(t):
-    return any(t is i or isinstance(t,i) for i in [int8, int16, int32, int64, uint8, uint16, uint32, uint64])
\ No newline at end of file
+    return any(
+        t is i or isinstance(t, i)
+        for i in [int8, int16, int32, int64, uint8, uint16, uint32, uint64]
+    )
diff --git a/coremltools/converters/nnssa/commons/builtins/type_list.py b/coremltools/converters/mil/mil/types/type_list.py
similarity index 51%
rename from coremltools/converters/nnssa/commons/builtins/type_list.py
rename to coremltools/converters/mil/mil/types/type_list.py
index 57ea40a20..7a0475517 100644
--- a/coremltools/converters/nnssa/commons/builtins/type_list.py
+++ b/coremltools/converters/mil/mil/types/type_list.py
@@ -1,21 +1,27 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
 from .annotate import annotate
 from .type_void import void
-from .type_int import *
-from .type_spec import *
+from . import type_int
+from .type_spec import Type
 from .get_type_info import get_type_info
 
 
 def memoize(f):
     memo = {}
 
-    def helper(x):
+    def helper(x, init_length=None, dynamic_length=True):
         if x not in memo:
-            memo[x] = f(x)
-        return memo[x]
+            memo[(x, init_length, dynamic_length)] = f(x, init_length, dynamic_length)
+        return memo[(x, init_length, dynamic_length)]
 
     return helper
 
@@ -27,9 +33,9 @@ def __type_info__(cls):
 
 
 @memoize
-def list(arg):
+def list(arg, init_length=None, dynamic_length=True):
     class list:
-        T = [arg]
+        T = [arg, init_length, dynamic_length]
 
         def __init__(self):
             self.val = []
@@ -40,23 +46,23 @@ def __type_info__(cls):
 
         @annotate(void, other=T[0])
         def append(self, other):
-            assert (isinstance(other, self.T[0]))
+            assert isinstance(other, self.T[0])
             self.val.append(other)
 
-        @annotate(T[0], index=int)
+        @annotate(T[0], index=type_int.int)
         def __getitem__(self, index):
-            assert (isinstance(index, int))
+            assert isinstance(index, type_int.int)
             return self.val[index.val]
 
-        @annotate(void, index=int, newval=T[0])
+        @annotate(void, index=type_int.int, newval=T[0])
         def __setitem__(self, index, newval):
-            assert (isinstance(index, int))
-            assert (isinstance(newval, self.T[0]))
+            assert isinstance(index, type_int.int)
+            assert isinstance(newval, self.T[0])
             self.val[index.val] = newval
 
-        @annotate(int)
+        @annotate(type_int.int)
         def __len__(self):
-            return int(len(self.val))
+            return type_int.int(len(self.val)) if T[1] is None else T[1]
 
     list.__template_name__ = "list[" + arg.__name__ + "]"
     return list
@@ -65,4 +71,4 @@ def __len__(self):
 def is_list(t):
     if t is None:
         return False
-    return get_type_info(t).name == 'list'
+    return get_type_info(t).name == "list"
diff --git a/coremltools/converters/mil/mil/types/type_mapping.py b/coremltools/converters/mil/mil/types/type_mapping.py
new file mode 100644
index 000000000..a619f12eb
--- /dev/null
+++ b/coremltools/converters/mil/mil/types/type_mapping.py
@@ -0,0 +1,258 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from .type_bool import bool as types_bool
+from .type_double import (
+    is_float,
+    fp16 as types_fp16,
+    fp32 as types_fp32,
+    fp64 as types_fp64,
+)
+from .type_list import is_list
+from .type_int import (
+    is_int,
+    int8 as types_int8,
+    int16 as types_int16,
+    int32 as types_int32,
+    int64 as types_int64,
+    uint8 as types_uint8,
+    uint16 as types_uint16,
+    uint32 as types_uint32,
+    uint64 as types_uint64,
+)
+from .type_str import str as types_str
+from .type_unknown import unknown
+import numpy as np
+import six
+from .get_type_info import get_type_info
+
+_types_TO_NPTYPES = {
+    types_bool: np.bool_,
+    types_int8: np.int8,
+    types_int16: np.int16,
+    types_int32: np.int32,
+    types_int64: np.int64,
+    types_uint8: np.uint8,
+    types_uint16: np.uint16,
+    types_uint32: np.uint32,
+    types_uint64: np.uint64,
+    types_fp16: np.float16,
+    types_fp32: np.float32,
+    types_fp64: np.float64,
+    types_str: np.str_,
+}
+
+_types_TO_STRINGS = {
+    types_bool: "bool",
+    types_int8: "i8",
+    types_int16: "i16",
+    types_int32: "i32",
+    types_int64: "i64",
+    types_uint8: "u8",
+    types_uint16: "u16",
+    types_uint32: "u32",
+    types_uint64: "u64",
+    types_fp16: "fp16",
+    types_fp32: "fp32",
+    types_fp64: "fp64",
+    types_str: "str",
+}
+
+_STRINGS_TO_types = {v: k for k, v in _types_TO_STRINGS.items()}
+
+
+def string_to_builtin(s):
+    """
+    Given a str, return its corresponding builtin type.
+    """
+    return _STRINGS_TO_types.get(s, None)
+
+
+def builtin_to_string(builtin_type):
+    """
+    Given a builtin type, return its corresponding string representation.
+    """
+    return _types_TO_STRINGS.get(builtin_type, None)
+
+
+def nptype_from_builtin(btype):
+    """
+    Given a builtin type, return its corresponding Numpy dtype.
+    """
+    return _types_TO_NPTYPES.get(btype, None)
+
+
+def promote_types(dtype1, dtype2):
+    """
+    Get the smallest type to which the given scalar types can be cast.
+
+    Args:
+        dtype1 (builtin):
+        dtype2 (builtin):
+
+    Returns:
+        A builtin datatype or None.
+    """
+    nptype1 = nptype_from_builtin(dtype1)
+    nptype2 = nptype_from_builtin(dtype2)
+    # Circumvent the undesirable np type promotion:
+    # >> np.promote_types(np.float32, np.int)
+    # dtype('float64')
+    if np.issubdtype(nptype1, np.floating) and np.issubdtype(nptype2, np.signedinteger):
+        nppromoted = nptype1
+    elif np.issubdtype(nptype2, np.floating) and np.issubdtype(
+        nptype1, np.signedinteger
+    ):
+        nppromoted = nptype2
+    else:
+        nppromoted = np.promote_types(nptype1, nptype2)
+    return numpy_type_to_builtin_type(nppromoted)
+
+
+def is_primitive(btype):
+    """
+    Is the indicated builtin type a primitive?
+    """
+    return btype is types_bool or btype is types_str or is_float(btype) or is_int(btype)
+
+
+def is_scalar(btype):
+    """
+    Is the given builtin type a scalar integer, float, or boolean?
+    """
+    return btype is types_bool or is_int(btype) or is_float(btype)
+
+
+def is_tensor(tensor_type):
+    if tensor_type is None:
+        return False
+    try:
+        type_info = get_type_info(tensor_type).name
+    except TypeError:
+        return False
+    return type_info == "tensor"
+
+
+def is_str(t):
+    if t is None:
+        return False
+    try:
+        type_info = get_type_info(t).name
+    except TypeError:
+        return False
+    return type_info == "str"
+
+
+def is_tuple(t):
+    if t is None:
+        return False
+    try:
+        type_info = get_type_info(t).name
+    except TypeError:
+        return False
+    return type_info == "tuple"
+
+
+def is_builtin(t):
+    return is_scalar(t) or is_tensor(t) or is_str(t) or is_tuple(t)
+
+
+# Converts a numpy type to its types equivalent.
+# Supports both dtypes and numpy primitive types.
+def numpy_type_to_builtin_type(nptype):
+    if type(nptype) == np.dtype:
+        nptype = nptype.type
+
+    if np.issubclass_(nptype, np.bool) or np.issubclass_(nptype, np.bool_):
+        # numpy as 2 bool types it looks like. what is the difference?
+        return types_bool
+    elif np.issubclass_(nptype, np.int8):
+        return types_int8
+    elif np.issubclass_(nptype, np.int16):
+        return types_int16
+    elif np.issubclass_(nptype, np.int32):
+        return types_int32
+    elif np.issubclass_(nptype, np.int64):
+        return types_int64
+    elif np.issubclass_(nptype, np.uint8):
+        return types_int8
+    elif np.issubclass_(nptype, np.uint16):
+        return types_int16
+    elif np.issubclass_(nptype, np.uint32):
+        return types_int32
+    elif np.issubclass_(nptype, np.uint64):
+        return types_int64
+    elif np.issubclass_(nptype, np.int):
+        # Catch all int
+        return types_int32
+    elif np.issubclass_(nptype, np.object_):
+        # symbolic shape is considered int32
+        return types_int32
+    elif np.issubclass_(nptype, np.float16):
+        return types_fp16
+    elif np.issubclass_(nptype, np.float32) or np.issubclass_(nptype, np.single):
+        return types_fp32
+    elif np.issubclass_(nptype, np.float64) or np.issubclass_(nptype, np.double):
+        return types_fp64
+    elif (
+        np.issubclass_(nptype, six.string_types)
+        or np.issubclass_(nptype, np.string_)
+        or np.issubclass_(nptype, np.str_)
+    ):
+        return types_str
+    else:
+        raise TypeError("Unsupported numpy type: %s" % (nptype))
+
+
+# Tries to get the equivalent builtin type of a
+# numpy or python type.
+def type_to_builtin_type(type):
+    # Infer from numpy type if it is one
+    if type.__module__ == np.__name__:
+        return numpy_type_to_builtin_type(type)
+
+    # Otherwise, try to infer from a few generic python types
+    if np.issubclass_(type, bool):
+        return types_bool
+    elif np.issubclass_(type, six.integer_types):
+        return types_int32
+    elif np.issubclass_(type, six.string_types):
+        return types_str
+    elif np.issubclass_(type, float):
+        return types_fp32
+    else:
+        raise TypeError("Could not determine builtin type for " + str(type))
+
+
+def numpy_val_to_builtin_val(npval):
+    if np.isscalar(npval):
+        ret_type = type_to_builtin_type(type(npval))
+        ret = ret_type()
+        ret.val = npval
+        return ret, ret_type
+    else:
+        builtintype = numpy_type_to_builtin_type(npval.dtype)
+        from . import tensor as types_tensor
+
+        ret_type = types_tensor(builtintype, npval.shape)
+        ret = ret_type()
+        ret.val = npval
+        return ret, ret_type
+
+
+def is_subtype(type1, type2):
+    """
+    Return True if type1 is a subtype of type2. False otherwise.
+    """
+    if type2 == unknown:
+        return True  # any class is a subclass of unknown (None) type.
+    if is_list(type2):
+        return is_list(type1) and is_subtype(type1.T[0], type2.T[0])
+
+    # simplistic handling of types is sufficient for now. Handling compatible
+    # tensor shape requires using types.is_tensor_and_is_compatible
+    return type1 == type2
diff --git a/coremltools/converters/nnssa/commons/builtins/type_spec.py b/coremltools/converters/mil/mil/types/type_spec.py
similarity index 61%
rename from coremltools/converters/nnssa/commons/builtins/type_spec.py
rename to coremltools/converters/mil/mil/types/type_spec.py
index 419634f28..061a3b7f2 100644
--- a/coremltools/converters/nnssa/commons/builtins/type_spec.py
+++ b/coremltools/converters/mil/mil/types/type_spec.py
@@ -1,23 +1,33 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
+from six import string_types as _string_types
 
 
 class Type:
     """
      - Type.name : A string with the name of the object
-     - Type.tparam : For classes with template parameters, (list, dict), this 
+     - Type.tparam : For classes with template parameters, (list, dict), this
          contains a list of Type objects of the template parameters
-     - Type.python_class : The original python class implementing this type. 
+     - Type.python_class : The original python class implementing this type.
                          Two Type objects compare equal
                          only on name and tparam and not python_class
     """
-    __slots__ = ['name', 'tparam', 'python_class']
 
-    def __init__(self, name, tparam=[], python_class=None):
-        assert (isinstance(name, str))
-        assert (isinstance(tparam, list))
+    __slots__ = ["name", "tparam", "python_class"]
+
+    def __init__(self, name, tparam=None, python_class=None):
+        if tparam is None:
+            tparam = []
+        assert isinstance(name, _string_types)
+        assert isinstance(tparam, list)
         self.name = name
         self.tparam = tparam
         self.python_class = python_class
@@ -34,7 +44,7 @@ def __ne__(self, other):
     def __repr__(self):
         ret = self.name
         if len(self.tparam) > 0:
-            ret += '[' + ','.join(repr(x) for x in self.tparam) + ']'
+            ret += "[" + ",".join(repr(x) for x in self.tparam) + "]"
         return ret
 
     def __str__(self):
@@ -45,24 +55,25 @@ def sexp(self):
             return self.name
         else:
             ret = [self.name]
-            ret.append([a.sexp() if hasattr(a, 'sexp') else a for a in self.tparam])
+            ret.append([a.sexp() if hasattr(a, "sexp") else a for a in self.tparam])
             return ret
 
 
 class FunctionType:
-    r"""
+    """
     - FunctionType.inputs : A list of Type objects defining the types of the input
     - FunctionType.output: A Type object defining the type of the output
-    - FunctionType.python\_function : The original python function implementing 
-                                      this type. Two FunctionType objects compare 
-                                      equal only on inputs and output and not 
-                                      python_function 
+    - FunctionType.python_function : The original python function implementing
+                                     this type. Two FunctionType objects compare
+                                     equal only on inputs and output and not
+                                     python_function
     """
-    __slots__ = ['inputs', 'output', 'python_function']
+
+    __slots__ = ["inputs", "output", "python_function"]
 
     def __init__(self, inputs, output, python_function=None):
-        assert (isinstance(inputs, list))
-        assert (isinstance(output, (FunctionType, Type)))
+        assert isinstance(inputs, list)
+        assert isinstance(output, (FunctionType, Type))
         self.inputs = inputs
         self.output = output
         self.python_function = python_function
@@ -74,7 +85,7 @@ def __eq__(self, other):
         return self.inputs == other.inputs and self.output == other.output
 
     def __repr__(self):
-        return '(' + ','.join(repr(x) for x in self.inputs) + ')->' + repr(self.output)
+        return "(" + ",".join(repr(x) for x in self.inputs) + ")->" + repr(self.output)
 
     def __str__(self):
         return self.__repr__()
diff --git a/coremltools/converters/nnssa/commons/builtins/type_str.py b/coremltools/converters/mil/mil/types/type_str.py
similarity index 59%
rename from coremltools/converters/nnssa/commons/builtins/type_str.py
rename to coremltools/converters/mil/mil/types/type_str.py
index 499116ae3..1056bc5f8 100644
--- a/coremltools/converters/nnssa/commons/builtins/type_str.py
+++ b/coremltools/converters/mil/mil/types/type_str.py
@@ -1,9 +1,16 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
 from .annotate import class_annotate, annotate, delay_type
-from .type_spec import *
+from .type_spec import Type
+from six import string_types as _string_types
 
 
 @class_annotate()
@@ -17,5 +24,5 @@ def __type_info__(cls):
 
     @annotate(delay_type.str, other=delay_type.str)
     def __add__(self, other):
-        assert (isinstance(other, str))
+        assert isinstance(other, _string_types)
         return str(self.val + other.val)
diff --git a/coremltools/converters/mil/mil/types/type_tensor.py b/coremltools/converters/mil/mil/types/type_tensor.py
new file mode 100644
index 000000000..d88cfcc62
--- /dev/null
+++ b/coremltools/converters/mil/mil/types/type_tensor.py
@@ -0,0 +1,235 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+
+import numpy as np
+import sympy as sm
+import logging
+
+from .type_spec import Type
+from .get_type_info import get_type_info
+from .type_mapping import (
+    promote_types,
+    is_tensor,
+    nptype_from_builtin,
+    builtin_to_string,
+    numpy_type_to_builtin_type,
+)
+
+
+def memoize(f):
+    memo = {}
+
+    def helper(x, y):
+        y = tuple(y)
+        if (x, y,) not in memo:
+            memo[(x, y,)] = f(x, y,)
+        return memo[(x, y,)]
+
+    return helper
+
+
+def canonical_shape(shape):
+    """ Return shape as tuple of int or Symbol.
+
+    This utility function ensures the shape tuple
+    using a single integer type (to its best effort).
+
+    Args:
+        shape: tuple(int|long|np.int*|Symbol|SymbolExpr...)
+    """
+
+    def try_cast(x):
+        try:
+            # In python2.7, long and int are different types.
+            # If we cast a long int whose value is out of the range of int,
+            # the result is still long, avoiding overflow:
+            #
+            #     `type(2<<64) == long        # true`
+            #     `type(int(2<<64)) == long   # true`
+            x = int(x)
+        except TypeError:
+            # ignore symbolic value (sm.Symbol or sm.Expr)
+            pass
+        return x
+
+    return tuple(try_cast(x) for x in shape)
+
+
+@memoize
+def tensor(primitive, shape):
+    shape = canonical_shape(shape)
+
+    class tensor:
+        T = [primitive, shape]
+
+        def __init__(self):
+            self._val = []
+
+        @classmethod
+        def __type_info__(cls):
+            return Type(
+                "tensor", [get_type_info(primitive)] + list(shape), python_class=cls
+            )
+
+        @classmethod
+        def get_primitive(cls):
+            return primitive
+
+        @classmethod
+        def get_shape(cls):
+            return shape
+
+        @property
+        def val(self):
+            return self._val
+
+        @val.setter
+        def val(self, v):
+            if not isinstance(v, np.ndarray):
+                raise ValueError(
+                    "tensor should have value of type ndarray, got {} instead".format(
+                        type(v)
+                    )
+                )
+
+            v_type = numpy_type_to_builtin_type(v.dtype)
+            promoted_type = promote_types(v_type, primitive)
+            if v_type == primitive or v.dtype == np.dtype("O"):
+                # np.array of symbolic has object type. Don't cast type.
+                self._val = v
+            elif promoted_type == primitive:
+                self._val = v.astype(nptype_from_builtin(primitive))
+            else:
+                logging.warning(
+                    "Saving value type of {} into a builtin type of {}, might lose precision!".format(
+                        v.dtype, builtin_to_string(primitive)
+                    )
+                )
+                self._val = v.astype(nptype_from_builtin(primitive))
+
+    tensor.__template_name__ = (
+        "tensor[" + primitive.__name__ + "," + ",".join(str(s) for s in shape) + "]"
+    )
+    tensor.__name__ = (
+        "tensor[" + ",".join(str(s) for s in shape) + "," + primitive.__name__ + "]"
+    )
+    return tensor
+
+
+def is_tensor_and_is_compatible(tensor_type1, tensor_type2, allow_promotion=False):
+    """
+    Try to find a tensor type compatible with both input types.
+
+    Compatible means that the tensors have the same rank and matching or unspecified
+    dimensions. For example, (10, -1) is compatible with (-1, 20) with the compatible
+    shape (10, 20).
+
+    Args:
+        tensor_type1 (types.tensor)
+        tensor_type2 (types.tensor)
+        allow_promotion (bool): If True, allow primitive types to be promoted.
+
+    Returns:
+        A pair of (bool, type). If the given types are not tensor types with
+        (1) compatible shapes and (2) either identical primitive types or
+        allow_promition=True, return is False, None. Otherwise, return True
+        and the compatible shape. Note that the returned shape may
+        not be the same as either input. For example,
+
+        is_tensor_and_is_compatible(
+            tensor[fp32,[10,-1]],
+            tensor[fp32,[-1,20]]) --> tensor[fp32, [10,20]]
+    """
+
+    if not is_tensor(tensor_type1) or not is_tensor(tensor_type2):
+        return False, None
+    shape1 = tensor_type1.get_shape()
+    shape2 = tensor_type2.get_shape()
+
+    primitive_type = tensor_type1.get_primitive()
+    if primitive_type != tensor_type2.get_primitive():
+        promoted_type = promote_types(primitive_type, tensor_type2.get_primitive())
+        if allow_promotion:
+            primitive_type = promoted_type
+        else:
+            return False, promoted_type
+
+    if len(shape1) == 0:
+        return True, tensor_type2
+    if len(shape2) == 0:
+        return True, tensor_type1
+
+    if len(shape1) != len(shape2):
+        return False, None
+
+    most_specific_shape = []
+    for i in range(len(shape1)):
+        if shape1[i] == -1 or issubclass(type(shape1[i]), sm.Basic):
+            most_specific_shape.append(shape2[i])
+        elif shape2[i] == -1 or issubclass(type(shape2[i]), sm.Basic):
+            most_specific_shape.append(shape1[i])
+        elif shape1[i] == shape2[i]:
+            most_specific_shape.append(shape1[i])
+        elif shape1[i] != shape2[i]:
+            return False, None
+
+    return True, tensor(primitive_type, most_specific_shape)
+
+
+def is_tensor_and_is_compatible_general_shape(tensor_type1, tensor_type2):
+    # returns a pair of (bool, type)
+    # If Both are tensors, and have compatible shape, the first return is true
+    # The return will be the most general version of the tensor type.
+    # Note that this may not be either tensor types. i.e.
+    #
+    # is_tensor_and_is_compatible(tensor[fp32,[10,-1]] ,tensor[fp32,[-1,20]])
+    # will return True, tensor[fp32, [-1,-1]]
+
+    if not is_tensor(tensor_type1) or not is_tensor(tensor_type2):
+        return False, None
+    shape1 = tensor_type1.get_shape()
+    shape2 = tensor_type2.get_shape()
+
+    if tensor_type1.get_primitive() != tensor_type2.get_primitive():
+        return False, None
+
+    if len(shape1) == 0:
+        return True, tensor_type2
+    if len(shape2) == 0:
+        return True, tensor_type1
+
+    if len(shape1) != len(shape2):
+        return False, None
+
+    most_general_shape = []
+    for i in range(len(shape1)):
+        if shape1[i] == -1 or issubclass(type(shape1[i]), sm.Basic):
+            most_general_shape.append(shape1[i])
+        elif shape2[i] == -1 or issubclass(type(shape2[i]), sm.Basic):
+            most_general_shape.append(shape2[i])
+        elif shape1[i] == shape2[i]:
+            most_general_shape.append(shape1[i])
+        elif shape1[i] != shape2[i]:
+            return False, None
+
+    return True, tensor(tensor_type1.get_primitive(), most_general_shape)
+
+
+def tensor_has_complete_shape(tensor_type):
+    if not is_tensor(tensor_type):
+        return True
+    s = tensor_type.get_shape()
+    if -1 in s:
+        return False
+    elif len(s) == 0:
+        return False
+    else:
+        return True
diff --git a/coremltools/converters/nnssa/commons/builtins/type_tuple.py b/coremltools/converters/mil/mil/types/type_tuple.py
similarity index 68%
rename from coremltools/converters/nnssa/commons/builtins/type_tuple.py
rename to coremltools/converters/mil/mil/types/type_tuple.py
index 114063696..93f14e166 100644
--- a/coremltools/converters/nnssa/commons/builtins/type_tuple.py
+++ b/coremltools/converters/mil/mil/types/type_tuple.py
@@ -1,12 +1,17 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
 from .annotate import annotate
-from .type_void import void
-from .type_int import *
-from .type_unknown import *
-from .type_spec import *
+from . import type_int
+from . import type_unknown
+from .type_spec import Type
 from .get_type_info import get_type_info
 
 _global_tuple = tuple
@@ -44,15 +49,11 @@ def __init__(self):
         def __type_info__(cls):
             return Type("tuple", [get_type_info(arg) for arg in args], python_class=cls)
 
-        @annotate(int)
+        @annotate(type_int.int)
         def __len__(self):
             return len(args)
 
-    tuple.__template_name__ = "tuple[" + ",".join([get_type_info(arg).name for arg in args]) + "]"
+    tuple.__template_name__ = (
+        "tuple[" + ",".join([get_type_info(arg).name for arg in args]) + "]"
+    )
     return tuple
-
-
-def is_tuple(t):
-    if t is None:
-        return False
-    return get_type_info(t).name == 'tuple'
diff --git a/coremltools/converters/mil/mil/types/type_unknown.py b/coremltools/converters/mil/mil/types/type_unknown.py
new file mode 100644
index 000000000..0fbe7246c
--- /dev/null
+++ b/coremltools/converters/mil/mil/types/type_unknown.py
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+from .type_spec import Type
+
+
+class unknown:
+    """
+    unknown is basically Any type.
+    """
+
+    @classmethod
+    def __type_info__(cls):
+        return Type("unknown", python_class=cls)
+
+    def __init__(self, val=None):
+        self.val = val
diff --git a/coremltools/converters/nnssa/commons/builtins/type_void.py b/coremltools/converters/mil/mil/types/type_void.py
similarity index 50%
rename from coremltools/converters/nnssa/commons/builtins/type_void.py
rename to coremltools/converters/mil/mil/types/type_void.py
index bc8269f75..fb780d302 100644
--- a/coremltools/converters/nnssa/commons/builtins/type_void.py
+++ b/coremltools/converters/mil/mil/types/type_void.py
@@ -1,9 +1,14 @@
 # -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
 from __future__ import print_function as _
 from __future__ import division as _
 from __future__ import absolute_import as _
-from .annotate import class_annotate, annotate, delay_type
-from .type_spec import *
+from .type_spec import Type
 
 
 class void:
diff --git a/coremltools/converters/mil/mil/var.py b/coremltools/converters/mil/mil/var.py
new file mode 100644
index 000000000..c7d9e5406
--- /dev/null
+++ b/coremltools/converters/mil/mil/var.py
@@ -0,0 +1,246 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from coremltools.converters.mil.mil import types
+from coremltools.converters.mil.mil.types.symbolic import (
+    is_symbolic,
+    any_symbolic,
+)
+
+
+class Var(object):
+    """
+    Var represents the outputs of an Operation. Most Vars are derived from an
+    Operation (including const), and all Vars must have `sym_type`.
+
+    Example Usage:
+
+    from coremltools.converters.mil.mil import Builder as mb
+    from coremltools.converters.mil.mil import Function
+    from coremltools.converters.mil.mil import types
+
+    func_inputs = {"a": mb.placeholder(shape=(1,2)),
+                   "b": mb.placeholder(shape=(1,2)) }
+    with Function(func_inputs) as ssa_func:
+        a, b = ssa_func.inputs["a"], ssa_func.inputs["b"]
+        res = mb.add(x=a, y=b) # res is Var
+        assert types.is_tensor(res.sym_type)
+        assert res.rank == 2
+        assert res.dtype == types.float # since a, b are by default float
+
+        # value is not available at compile time in this  case. If
+        # materializable, res.val would be a numpy / primitive value
+        assert res.val is None
+
+
+    Comment: Except InternalVar and Vars created in while_loop and by
+    placeholder, all Var should only be constructed by Operation to represent
+    outputs.
+
+    Comment: Var hides the details of sym_type vs sym_val vs materialized
+    value, which was represented by 2 objects prior to refactoring.
+
+
+    # Properties:
+
+    name: (str)
+        name in MIL proto NamedValueType. Name is assigned by the parent
+        Operation.
+
+    sym_type [_sym_type]: (builtin type class)
+        All Var must have a (possibly symbolic) type, usually derived from
+        type inference of upstream ops or from default values in _Input.
+
+    sym_val [_sym_val]: (builtin type instance)
+        Possibly symbolic value.
+
+    val [_sym_val]: (np.ndarray or python primitive scalar)
+        Numpy (scalar / tensor) value. `val` is not None iff `sym_val` is
+        not None and does not contain symbols.  Read-only.
+
+    op [_op]: (Operation)
+        The Operation this Var is derived from. May not be None except
+        for InternalVar. Read-only.
+
+    op_output_idx: (int)
+        Idx of the output from Operation corresponding to _Input.  May be
+        None.
+
+    child_ops [_child_ops]: list[Operation]
+        Ops that take this Var as an input.
+    """
+
+    __slots__ = [
+        "name",
+        "_sym_type",
+        "_sym_val",
+        "_op",
+        "op_output_idx",
+        "_child_ops",
+        "consuming_blocks",
+    ]
+
+    def __init__(self, name, sym_type, sym_val=None, op=None, op_output_idx=None):
+        """
+        sym_type (builtin type)
+        sym_val (builtin value)
+        op (Operation)
+        op_output_idx (int)
+        """
+        self.name = name
+        self._sym_type = sym_type
+        self._sym_val = sym_val
+        self._op = op
+        self.op_output_idx = op_output_idx
+        # An op can appear twice if it consumes a var twice (e.g.,
+        # add(%1, %1), while_loop(loop_vars=(%1, %1)).
+        self._child_ops = list()
+
+        # A variable may not be consumed by any op (i.e. len(self._child_ops)
+        # == 0) but is still used as block output. A var can be output of
+        # multiple blocks (e.g., both current block and nested blocks)
+        self.consuming_blocks = list()
+
+    @property
+    def sym_type(self):
+        return self._sym_type
+
+    @property
+    def shape(self):
+        if types.is_tensor(self._sym_type):
+            return self._sym_type.get_shape()
+        return tuple()
+
+    @property
+    def rank(self):
+        return len(self.shape)
+
+    @property
+    def dtype(self):
+        if types.is_tensor(self._sym_type):
+            return self._sym_type.get_primitive()
+        return self._sym_type
+
+    @property
+    def sym_val(self):
+        if self._sym_val is None:
+            return None
+        return self._sym_val.val
+
+    @property
+    def val(self):
+        if self._sym_val is None or any_symbolic(self._sym_val.val):
+            return None
+        return self._sym_val.val
+
+    @property
+    def op(self):
+        return self._op
+
+    @property
+    def child_ops(self):
+        return self._child_ops
+
+    def add_child_op(self, new_op):
+        self._child_ops.append(new_op)
+
+    def remove_child_op(self, target_op, no_check=False):
+        if target_op not in self._child_ops:
+            if no_check:
+                return  # no-op
+            msg = "Op {} does not takes Var {} as input"
+            raise ValueError(msg.format(target_op.name, self.name))
+        self._child_ops.remove(target_op)
+
+    def shape_str(self):
+        annotation = ""
+        if self.val is not None:
+            annotation = "*"
+        elif self.sym_val is not None:
+            annotation = "^"
+        shape_str = str(self.shape)[:-1]  # trim the ")"
+        if self.rank > 1:
+            shape_str += ", "
+        shape_str += types.builtin_to_string(self.dtype) + ")" + annotation
+        return shape_str
+
+    def __str__(self):
+        return "%" + self.name + ": " + self.shape_str()
+
+
+class ListVar(Var):
+    __slots__ = ["_elem_type", "init_length", "dynamic_length"]
+
+    def __init__(
+        self, name, elem_type=None, init_length=None, dynamic_length=True, **kwargs
+    ):
+        """
+        elem_type (builtin.tensor)
+
+        init_length (int): initial length
+
+        dynamic_length (bool): True to allow list to grow. False uses
+        init_length as the fixed size (init_length is runtime length).
+        """
+        super(ListVar, self).__init__(
+            name=name,
+            sym_type=types.list(elem_type, init_length, dynamic_length),
+            sym_val=None,
+            **kwargs
+        )
+        self._elem_type = elem_type
+        self.init_length = init_length
+        self.dynamic_length = dynamic_length
+
+    @property
+    def shape(self):
+        raise ValueError("shape not applicable to ListVar '{}'.".format(self.name))
+
+    @property
+    def rank(self):
+        raise ValueError("rank not applicable to ListVar '{}'".format(self.name))
+
+    @property
+    def dtype(self):
+        raise ValueError("dtype not applicable to ListVar '{}'".format(self.name))
+
+    @property
+    def elem_type(self):
+        return self._elem_type
+
+    @property
+    def elem_shape(self):
+        if self._elem_type == types.unknown:
+            return None
+        return self._elem_type.get_shape()
+
+    def shape_str(self):
+        length = "?"
+        if not self.dynamic_length:
+            length = str(self.init_length)
+        if self._elem_type == types.unknown:
+            return "List[{}, unknown]".format(length)
+        elem_shape = self._elem_type.get_shape()
+        elem_dtype = self._elem_type.get_primitive()
+        shape_str = str(elem_shape)[:-1]  # trim the ")"
+        if len(elem_shape) > 1:
+            shape_str += ", "
+        shape_str += types.builtin_to_string(elem_dtype) + ")"
+        return "List[{}, {}]".format(length, shape_str)
+
+
+class InternalVar(Var):
+    """
+    Internal Var (with '__' prefix and won't appear in SSA) will ALWAYS have
+    `sym_val == builtin.unknown`. InternalVar are constructed by builder only.
+
+    Comment: Internal Var can be used to represent diverse types such as enum
+    type `DataType.FLOAT32`.
+    """
+
+    def __init__(self, val, name=None):
+        super(InternalVar, self).__init__(
+            name=name, sym_type=types.unknown, sym_val=types.unknown(val)
+        )
diff --git a/coremltools/converters/mil/mil/visitors/__init__.py b/coremltools/converters/mil/mil/visitors/__init__.py
new file mode 100644
index 000000000..61aafff42
--- /dev/null
+++ b/coremltools/converters/mil/mil/visitors/__init__.py
@@ -0,0 +1,4 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
diff --git a/coremltools/converters/mil/mil/visitors/dot_visitor.py b/coremltools/converters/mil/mil/visitors/dot_visitor.py
new file mode 100644
index 000000000..89c548791
--- /dev/null
+++ b/coremltools/converters/mil/mil/visitors/dot_visitor.py
@@ -0,0 +1,211 @@
+# -*- coding: utf-8 -*-
+
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+from __future__ import print_function as _
+from __future__ import division as _
+from __future__ import absolute_import as _
+from ..var import Var
+
+
+def _get_input_vars(op, only_nonconst_vars=False):
+    """
+    Return type : List[Var]
+    """
+    input_vars = []
+    for name, val in op.inputs.items():
+        if isinstance(val, Var):
+            if only_nonconst_vars:
+                if val.op and val.op.op_type == "const":
+                    continue
+            input_vars.append(val)
+        elif isinstance(val, (list, tuple)):
+            for var in val:
+                if not isinstance(var, Var):
+                    msg = "unrecognized input type of op='{}', input='{}'"
+                    raise ValueError(msg.format(op.name, name))
+                if only_nonconst_vars:
+                    if var.op and var.op.op_type == "const":
+                        continue
+                input_vars.append(var)
+        else:
+            msg = "unrecognized input type of op='{}', input='{}'"
+            raise ValueError(msg.format(op.name, name))
+    return input_vars
+
+
+class DotVisitor(object):
+    """
+    Generates a dot description of a ssa block
+    """
+
+    def __init__(self, annotation=True):
+        self.result = []
+        self.visited_memo = {}
+        self.highlights = {}
+        self.alternate_labeller = lambda o: o.op_type + ": " + o.name
+        self.annotation = annotation
+
+    def labeller(self, labeller):
+        self.alternate_labeller = labeller
+        return self
+
+    def highlight_nodes(self, nodeset, color="yellow"):
+        for i in nodeset:
+            self.highlights[i] = color
+        return self
+
+    def visit(self, block, op, nodename_prefix=""):
+        """
+        Append edges connecting parents of op to the op
+        """
+
+        if op in self.visited_memo:
+            return self
+
+        label = self.alternate_labeller(op)
+        self.visited_memo[op] = 1
+
+        if op.name in self.highlights and op.name not in [
+            o.name for o in block.outputs
+        ]:
+            self.result.append(
+                '"'
+                + nodename_prefix
+                + "op: "
+                + op.name
+                + '"'
+                + '[label="'
+                + label
+                + '",fillcolor=%s,style=filled,fontcolor=%s]'
+                % (self.highlights[op.name], "violetred")
+            )
+        else:
+            self.result.append(
+                '"'
+                + nodename_prefix
+                + "op: "
+                + op.name
+                + '"'
+                + '[label="'
+                + label
+                + '",fontcolor=%s]' % ("violetred")
+            )
+
+        for input_var in _get_input_vars(op, only_nonconst_vars=True):
+            if input_var.op is not None:
+                input_name = "op: " + input_var.op.name
+            else:
+                input_name = input_var.name
+
+            edge = (
+                '"'
+                + nodename_prefix
+                + input_name
+                + '"'
+                + " -> "
+                + '"'
+                + nodename_prefix
+                + "op: "
+                + op.name
+                + '"'
+            )
+            self.result.append(edge)
+            if input_var.op is not None:
+                self.visit(block, input_var.op, nodename_prefix)
+            else:
+                self.visit_input_var(input_var, nodename_prefix)
+
+        return self
+
+    def visit_input_var(self, var, nodename_prefix=""):
+        label = "input: " + var.name
+
+        if var.name in self.highlights:
+            self.result.append(
+                '"'
+                + nodename_prefix
+                + var.name
+                + '"'
+                + '[label="'
+                + label
+                + '",fillcolor=%s,style=filled,fontcolor=%s]'
+                % (self.highlights[var.name], "violetred")
+            )
+        else:
+            self.result.append(
+                '"'
+                + nodename_prefix
+                + var.name
+                + '"'
+                + '[label="'
+                + label
+                + '",fontcolor=%s]' % ("violetred")
+            )
+
+    def visit_output_vars(self, block, var, nodename_prefix=""):
+
+        label = "output: " + var.name
+        if var.name in self.highlights:
+            self.result.append(
+                '"'
+                + nodename_prefix
+                + var.name
+                + '"'
+                + '[label="'
+                + label
+                + '",fillcolor=%s,style=filled,fontcolor=%s]'
+                % (self.highlights[var.name], "violetred")
+            )
+        else:
+            self.result.append(
+                '"'
+                + nodename_prefix
+                + var.name
+                + '"'
+                + '[label="'
+                + label
+                + '",fontcolor=%s]' % ("violetred")
+            )
+
+        parent_op = var.op
+        edge = (
+            '"'
+            + nodename_prefix
+            + "op: "
+            + parent_op.name
+            + '"'
+            + " -> "
+            + '"'
+            + nodename_prefix
+            + var.name
+            + '"'
+        )
+        self.result.append(edge)
+        self.visit(block, parent_op, nodename_prefix=nodename_prefix)
+
+    def visit_all(self, block, nodename_prefix=""):
+        for out_var in block.outputs:
+            self.visit_output_vars(block, out_var, nodename_prefix=nodename_prefix)
+        for op in block.operations:
+            if op.op_type != "const":
+                self.visit(block, op, nodename_prefix=nodename_prefix)
+        return self
+
+    def get_result(self, graphtype="digraph", graph_name="g"):
+        return (
+            graphtype
+            + " "
+            + graph_name
+            + " {\n\t"
+            + "\n\t".join(str(i) for i in self.result)
+            + ';\n\tlabel="'
+            + graph_name[8:]
+            + '";\n\tfontsize=96;\n}'
+        )
+
+    def __str__(self):
+        return self.get_result()
diff --git a/coremltools/converters/mil/testing_reqs.py b/coremltools/converters/mil/testing_reqs.py
new file mode 100644
index 000000000..cc84639a3
--- /dev/null
+++ b/coremltools/converters/mil/testing_reqs.py
@@ -0,0 +1,34 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import os
+import itertools
+import numpy as np
+import pytest
+
+from coremltools.converters.mil.mil import Builder as mb
+from coremltools.converters.mil.mil.ops.registry import SSAOpRegistry
+
+from coremltools.converters.mil.mil import types
+from coremltools._deps import (
+    _HAS_TF_1,
+    _HAS_TF_2,
+    _HAS_TORCH,
+    MSG_TF1_NOT_FOUND,
+    MSG_TF2_NOT_FOUND,
+)
+from .testing_utils import ssa_fn, is_close, random_gen, converter, _converter
+
+backends = _converter.ConverterRegistry.backends.keys()
+
+np.random.seed(1984)
+
+if _HAS_TF_1 or _HAS_TF_2:
+    import tensorflow as tf
+
+    tf.compat.v1.set_random_seed(1234) if _HAS_TF_1 else tf.random.set_seed(1234)
+
+if _HAS_TORCH:
+    import torch
diff --git a/coremltools/converters/mil/testing_utils.py b/coremltools/converters/mil/testing_utils.py
new file mode 100644
index 000000000..280143478
--- /dev/null
+++ b/coremltools/converters/mil/testing_utils.py
@@ -0,0 +1,288 @@
+#  Copyright (c) 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
+
+import logging
+import numpy as np
+import copy
+
+import coremltools
+from coremltools import converters as converter
+from coremltools.converters.mil import converter as _converter
+from coremltools.converters.mil.mil import Program, Function
+from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY
+from coremltools._deps import _IS_MACOS
+
+converter = converter
+_converter = _converter
+
+
+def assert_op_count_match(program, expect, op=None, verbose=False):
+    """
+    Assert number of ops match expected number. If op is not specified,
+    Count total number of ops and match with expect.
+    """
+    if verbose:
+        print(program)
+
+    count = 0
+    for _, func in program.functions.items():
+        for o in func.operations:
+            if not op:
+                count += 1
+            elif o.op_type.lower() == op.lower():
+                count += 1
+        np.testing.assert_equal(count, expect)
+
+
+def assert_model_is_valid(
+    program, inputs, backend="nn_proto", verbose=True, expected_output_shapes=None
+):
+    """
+    Assert Core ML model is valid.
+
+    Inputs:
+
+    - input: str -> shape tuple. All program input names need to appear in str.
+      shape tuple can only contain positive integers.
+    """
+    input_dict = dict()
+    for name, shape in inputs.items():
+        input_dict[name] = np.random.rand(*shape)
+    proto = _converter._convert(program, convert_from="mil", convert_to=backend)
+    if verbose:
+        from coremltools.models.neural_network.printer import print_network_spec
+
+        print_network_spec(proto, style="coding")
+
+    model = coremltools.models.MLModel(proto)
+    assert model is not None
+    if _IS_MACOS:
+        prediction = model.predict(input_dict, useCPUOnly=True)
+        assert prediction is not None
+        if expected_output_shapes is not None:
+            for out_name, out_shape in expected_output_shapes.items():
+                assert out_name in prediction
+                assert out_shape == prediction[out_name].shape
+
+
+def assert_same_output_names(prog1, prog2, func_name="main"):
+    prog1_outputs = [o.name for o in prog1[func_name].outputs]
+    prog2_outputs = [o.name for o in prog2[func_name].outputs]
+    assert prog1_outputs == prog2_outputs
+
+
+def assert_same_output_shapes(prog1, prog2, func_name="main"):
+    prog1_output_shapes = [o.shape for o in prog1[func_name].outputs]
+    prog2_output_shapes = [o.shape for o in prog2[func_name].outputs]
+    assert prog1_output_shapes == prog2_output_shapes
+
+
+def get_op_types_in_program(prog, func_name="main", skip_const_ops=True):
+    """
+    Return the operation types in prog[func_name],
+    in the same order as they are stored (topological)
+    """
+    op_types_in_program = []
+    for op in prog[func_name].operations:
+        if skip_const_ops:
+            if op.op_type == "const":
+                continue
+        op_types_in_program.append(op.op_type)
+    return op_types_in_program
+
+
+def random_gen(
+    shape,
+    rand_min=0.0,
+    rand_max=1.0,
+    eps_from_int=0.0,
+    allow_duplicate=True,
+    dtype=np.float32,
+):
+    """
+    This helper function generates a random array of shape `shape`
+    The range of generated numbers will be between (rand_min, rand_max].
+    The value of generated numbers will be at least `eps_from_int` apart from integers.
+    If allow_duplicate is set to false, it is guaranteed that value generated are all different.
+    Default data type is np.float32.
+    """
+
+    elem = np.prod(shape)
+    ret = []
+    for _ in range(elem):
+        while True:
+            r = dtype((rand_max - rand_min) * np.random.random() + rand_min)
+            if not allow_duplicate and r in ret:
+                continue
+            if np.fabs(np.round(r) - r) > eps_from_int:
+                ret.append(r)
+                break
+    ret = np.array(ret).reshape(shape)
+    return ret.astype(dtype)
+
+
+def ssa_fn(func):
+    """
+    Deprecated: use @mb.program()
+    """
+
+    def wrapper(*args, **kwargs):
+        prog = Program()
+        with Function({}) as ssa_func:
+            func(*args, **kwargs)
+
+    return wrapper
+
+
+def to_tuple(v):
+    if not isinstance(v, (list, tuple)):
+        return tuple([v])
+    return tuple(v)
+
+
+def is_close(expected, actual, atol=1e-04, rtol=1e-05):
+    """
+    expected, actual: np.array or python primitive (scalar)
+    rtol: relative tolerance. See numpy.isclose.
+    """
+
+    close = np.isclose(expected, actual, atol=atol, rtol=rtol)
+    if not np.all(close):
+        diff = expected - actual
+        num_not_close = np.sum(~close)
+        msg = "Values differ by L1 norm: {}. Num entries not close: {}/{}"
+        logging.error(msg.format(np.sum(np.abs(diff)), num_not_close, expected.size))
+        if num_not_close < 30:
+            logging.error("Differing entries:")
+            logging.error("Expected: {}".format(expected[~close]))
+            logging.error("Actual: {}".format(actual[~close]))
+            logging.error("Delta: {}".format(diff[~close]))
+        return False
+    return True
+
+
+def run_core_ml_predict(proto, input_key_values, use_cpu_only=False):
+    model = coremltools.models.MLModel(proto, useCPUOnly=use_cpu_only)
+    input_key_values = dict(
+        [
+            (
+                k,
+                v.astype(np.float32)
+                if not np.isscalar(v) and not v.shape == ()
+                else np.array([v], dtype=np.float32),
+            )
+            for k, v in input_key_values.items()
+        ]
+    )
+    return model.predict(input_key_values, useCPUOnly=use_cpu_only)
+
+
+def compare_backend(
+    proto,
+    input_key_values,
+    expected_outputs,
+    use_cpu_only=False,
+    atol=1e-04,
+    rtol=1e-05,
+    also_compare_shapes=True,
+):
+    """
+    Inputs:
+        - proto: MLModel proto.
+
+        - input_key_values: str -> np.array. Keys must match those in
+          input_placeholders.
+
+        - expected_outputs: dict[str, np.array]. Required iff
+          frontend_only == False
+
+        - use_cpu_only: True/False.
+    """
+    if _IS_MACOS:
+        pred = run_core_ml_predict(proto, input_key_values, use_cpu_only=use_cpu_only)
+        if also_compare_shapes:
+            compare_shapes(
+                proto,
+                input_key_values,
+                expected_outputs,
+                use_cpu_only=use_cpu_only,
+                pred=pred,
+            )
+        if not use_cpu_only:
+            atol = min(atol * 100.0, 1e-1)
+            rtol = min(rtol * 100.0, 1e-2)
+        for o, expected in expected_outputs.items():
+            msg = (
+                "Output {} differs. useCPUOnly={}.\nInput={}, "
+                + "Expected={}, Output={}\n"
+            )
+            assert is_close(expected, pred[o], atol, rtol), msg.format(
+                o, use_cpu_only, input_key_values, expected, pred[o]
+            )
+
+
+def compare_shapes(
+    proto, input_key_values, expected_outputs, use_cpu_only=False, pred=None
+):
+    """
+    Inputs:
+        - proto: MLModel proto.
+
+        - input_key_values: str -> np.array. Keys must match those in
+          input_placeholders.
+
+        - expected_outputs: dict[str, np.array].
+
+        - use_cpu_only: True/False.
+
+        - pred: Prediction to use, if it has already been computed.
+    """
+
+    if _IS_MACOS:
+        if not pred:
+            pred = run_core_ml_predict(proto, input_key_values, use_cpu_only)
+        for o, expected in expected_outputs.items():
+            msg = "Output: {}. expected shape {} != actual shape {}".format(
+                o, expected.shape, pred[o].shape
+            )
+            # Core ML does not support scalar as output
+            # remove this special case when support is added
+            if expected.shape == () and pred[o].shape == (1,):
+                continue
+            assert pred[o].shape == expected.shape, msg
+
+
+def get_core_ml_prediction(
+    build, input_placeholders, input_values, use_cpu_only=False, backend="nn_proto"
+):
+    """
+    Return predictions of the given model.
+    """
+    program = Program()
+    with Function(input_placeholders) as ssa_func:
+        output_vars = build(**ssa_func.inputs)
+        if isinstance(output_vars, tuple):
+            output_vars = list(output_vars)
+        elif not isinstance(output_vars, list):
+            output_vars = [output_vars]
+        ssa_func.set_outputs(output_vars)
+        program.add_function("main", ssa_func)
+
+    proto = _converter._convert(program, convert_from="mil", convert_to=backend)
+    model = coremltools.models.MLModel(proto, use_cpu_only)
+    return model.predict(input_values, useCPUOnly=use_cpu_only)
+
+
+def apply_pass_and_basic_check(prog, pass_name):
+    """
+    Apply pass to the program
+    """
+    prev_prog = copy.deepcopy(prog)
+    PASS_REGISTRY[pass_name](prog)
+    block = prog.functions["main"]
+    prev_block = prev_prog.functions["main"]
+    assert_same_output_names(prev_prog, prog)
+    assert_same_output_shapes(prev_prog, prog)
+    return prev_prog, prev_block, block
diff --git a/coremltools/converters/nnssa/builder.py b/coremltools/converters/nnssa/builder.py
deleted file mode 100644
index db97b7a28..000000000
--- a/coremltools/converters/nnssa/builder.py
+++ /dev/null
@@ -1,1665 +0,0 @@
-"""
-Helper class for 
-
-1. build a SSA network,
-2. plug in SSA nodes into SSA graph.
-
-"""
-
-import sys
-import numpy as np
-
-from .nnssa import *
-from .commons.parse import numpy_val_to_builtin_val
-
-
-class SSABuilder(object):
-    """
-    A helper class to build SSA network from scratch.
-
-    Would be useful to run with GraphBuilder together.
-
-    Examples
-    --------
-    .. sourcecode:: python
-
-        # SSABuilder initiation with some functions injected
-        >>> sb = SSABuilder(functions=functions)
-
-        # GraphBuilder builds some nodes into graph.
-        >>> gb = GraphBuilder()
-        >>> _ = gb.add_elementwise(...)
-        >>> _ = gb.add_elementwise(...)
-
-        # Add the graph built from GraphBuilder into SSABuilder
-        >>> sb.add_graph(gb.get_graph())
-
-        # Return a NNSSA
-        >>> sb.get_ssa()
-    """
-
-    def __init__(self, functions=None, variables=None, global_resource=None):
-        """
-        Construct a SSABuilder object
-
-        Parameters
-        ----------
-        functions: {str: SSAFunction}
-            Functions that users pre-defined to plug in NetworkEnsembles.
-
-        variables: {str: NNSSA.builtins}
-            Global variables used by nodes in the network. 
-            Reference to set_global / get_global.
-
-        global_resource: dict
-            Global resource used across the network.
-        """
-        self.ssa = NetworkEnsemble()
-        if functions is None:
-            self.ssa.functions = dict()
-        else:
-            self.ssa.functions = functions
-        if variables is None:
-            self.ssa.variables = dict()
-        else:
-            self.ssa.variables = variables
-        if global_resource is None:
-            self.ssa.global_resource = dict()
-        else:
-            self.ssa.global_resource = global_resource
-
-    def get_ssa(self):
-        """
-        Obtain the NNSSA that can be used to do graph surgery/convert to backends.
-        """
-        return copy.deepcopy(self.ssa)
-
-    def add_graph(self, graph={}, name="main"):
-        """
-        Add a function into NNSSA by a constructed graph
-
-        Parameters
-        ----------
-        graph: {str: ParsedNode}
-            A graph that is derived by GraphBuilder or from raw construction.
-
-        name: str
-            The name of the function that would be added.
-        """
-        if name in self.ssa.functions:
-            print("Failed adding graph! Name already exist in the NNSSA network!")
-        else:
-            self.ssa.add_function(name, SSAFunction(graph))
-
-    def add_function(self, function=None, name="main"):
-        """
-        Add a function into NNSSA by SSAFunction
-
-        Parameters
-        ----------
-        function: SSAFunction
-            A SSAFunction
-
-        name: str
-            The name of the function that would be added.
-        """
-        if not isinstance(function, SSAFunction):
-            print("Failed adding function! The input is not a SSAFunction.")
-        elif name in self.ssa.functions:
-            print("Failed adding function! Name already exist in the NNSSA network!")
-        else:
-            self.ssa.add_function(name, function)
-
-
-class GraphBuilder(object):
-    """
-    A helper class to plug in SSA nodes into SSA graph.
-
-    This GraphBuilder is a helper for users to construct SSA graph from node
-    level specifications.
-    GraphBuilder would be helpful if you don't know the specific input order or
-    required attributes for some operation nodes.
-    
-    The GraphBuilder only guarantees to provide attributes that are needed for
-    the NEspresso backend.
-    
-    Examples
-    --------
-    .. sourcecode:: python
-
-        >>> builder = GraphBuilder(ssa, nodename_prefix, ParsedNode)
-
-        >>> one = np.int(1)
-        >>> two = np.int(2)
-        
-        # returns name of the node added in ssa graph
-        >>> const_1 = builder.add_const(one, name='one')
-        >>> const_2 = builder.add_const(two, name='two')
-
-        # Add a "Add" node with input "const_1" and "const_2"
-        >>> const_3 = builder.add_elementwise('Add', const_1, const_2)
-
-    """
-
-    def __init__(self, graph=None, prefix="", node_class=ParsedNode):
-        """
-        Construct a GraphBuilder object
-
-        Parameters
-        ----------
-        graph: dict
-            The dictionary that represents the graph in NetworkEnsemble.SSAFunction.graph
-        
-        prefix: str
-            A string that would be the prefix of the node's name constructed.
-
-        node_class: ParsedNode or sub-class of ParsedNode
-            The node class that would be plugged into graph.
-
-        """
-        if graph is None:
-            self.graph = dict()
-        else:
-            self.graph = graph
-        self.prefix = prefix
-        self.node_class = node_class
-
-    def _find_free_name(self, name):
-        if name not in self.graph:
-            return name
-        i = 0
-        while (True):
-            candidate = name + '_' + str(i)
-            if candidate not in self.graph:
-                return candidate
-            i += 1
-
-    def _build_op(self, op, inputs, **kwargs):
-        name = kwargs.get("name", None)
-        value = kwargs.get("value", None)
-        attr = kwargs.get("attr", {})
-        datatype = kwargs.get("datatype", None)
-
-        inserted_op = self.node_class()
-        inserted_op.op = op
-        inserted_op.inputs = copy.copy(inputs)
-        inserted_op.attr = copy.copy(attr)
-        inserted_op.value = value
-        if datatype is not None:
-            inserted_op.attr['dtype'] = datatype
-            inserted_op.datatype = datatype
-        if name is None:
-            name = op
-        name = self._find_free_name(self.prefix + name)
-        inserted_op.name = name
-        self.graph[name] = inserted_op
-        for i in inputs:
-            self.graph[i].outputs.append(name)
-        return name
-
-    def get_graph(self):
-        """
-        Obtain the graph that can be used for further surgery or initiate a SSAFunction.
-
-        Examples
-        --------
-        .. sourcecode:: python
-
-            # GraphBuilder builds some nodes into graph.
-            >>> _ = builder.add_split(...)
-            >>> _ = builder.add_get_tuple(...)
-            >>> _ = builder.add_elementwise(...)
-            >>> _ = builder.add_elementwise(...)
-            >>> _ = builder.add_elementwise(...)
-
-            # Merge two graphs together
-            >>> gdict = builder.get_graph()
-            >>> gdict1 = some_other_builder.get_graph()
-            >>> merge_graph = {**gdict, **gdict1}
-
-            # or to build a SSAFunction
-            >>> f = SSAFunction(gdict)
-
-        """
-        return copy.deepcopy(self.graph)
-
-    def get_function(self):
-        """
-        Obtain the SSAFunction that is derived from graph
-
-        Examples
-        --------
-        .. sourcecode:: python
-
-            # GraphBuilder builds some nodes into graph.
-            >>> _ = builder.add_split(...)
-            >>> _ = builder.add_get_tuple(...)
-            >>> _ = builder.add_elementwise(...)
-            >>> _ = builder.add_elementwise(...)
-            >>> _ = builder.add_elementwise(...)
-
-            # get_function returns a SSAFunction
-            >>> f = builder.get_function()
-        """
-        return SSAFunction(self.get_graph())
-
-    def get_ssa(self, name='main'):
-        """
-        Obtain the NNSSA that is derived from graph and can be used for
-        graph surgery or backend conversions.
-
-        Parameters
-        ----------
-        name: str
-            The SSAFunction's name that would contain the graph generated by builder.
-
-        """
-
-        ssa = NetworkEnsemble()
-        ssa.add_function(name, self.get_function())
-        return ssa
-
-    def add_placeholder(self, init_from=None, datatype=None, name=None):
-        """
-        Add an 'placeholder' node to the SSAFunction.
-
-        SSANode form:
-            op: 'Placeholder'
-            datatype: NNSSA.builtins type
-
-        Examples
-        --------
-        .. sourcecode:: python
-
-            # Add a placeholder of float32 with tensor shape [-1, 512]
-            >>> ph1 = builder.add_placeholder(builtins.tensor(builtins.fp32, [-1, 512]), name="ph1")
-
-            # Add a placeholder of integer
-            >>> ph2 = builder.add_placeholder(builtins.int32, name="ph2")
-
-        Parameters
-        ----------
-        datatype: NNSSA.builtins type
-            The type of placeholder
-
-        init_from: numpy
-            Can be used to derive the corresponding NNSSA builtins type. Value
-            will be ignored.
-
-        name: str
-            The name of the placeholder
-        """
-
-        if init_from is None and datatype is None:
-            raise ValueError("Datatype not set!")
-        if init_from is not None:
-            _, datatype = numpy_val_to_builtin_val(init_from)
-        return self._build_op('Placeholder', [], datatype=datatype, name=name)
-
-    def add_identity(self, input_name, name=None):
-        """
-        Add an 'identity' node to the SSAFunction.
-        'make_tuple' is used when we need to merge multiple nodes into one for
-        feeding it into a function.
-
-        SSANode form:
-            op: 'identity'
-            inputs: [input_name]
-
-        Parameters
-        ----------
-        input_name: str
-            The input for this node.
-        """
-        return self._build_op('Identity', [input_name], name=name)
-
-    def add_make_tuple(self, input_names, name=None):
-        """
-        Add an 'make_tuple' node to the SSAFunction.
-        'make_tuple' is used when we need to merge multiple nodes into one for
-        feeding it into a function.
-
-        SSANode form:
-            op: 'make_tuple'
-            inputs: input_names
-
-        Examples
-        --------
-        .. sourcecode:: python
-
-            # split some_input into 5 slices.
-            >>> outputs = builder.add_split(some_input, num_split=5)
-            # obtain the first slice from outputs.
-            >>> output = builder.add_get_tuple(outputs, index=0)
-
-        Parameters
-        ----------
-        input_names: str
-            The inputs for this node.
-        """
-        return self._build_op('make_tuple', input_names, name=name)
-
-    def add_get_tuple(self, input_name, index, name=None):
-        """
-        Add an 'get_tuple' node to the SSAFunction.
-        'get_tuple' is used to obtain output from a node with multiple outputs.
-
-        i.e. A = outputs[x] translates to builder.add_get_tuple(A, index=x)
-
-        SSANode form:
-            op: 'get_tuple'
-            inputs: [input_name]
-            attr: 'index'
-
-        Examples
-        --------
-        .. sourcecode:: python
-
-            # split some_input into 5 slices.
-            >>> outputs = builder.add_split(some_input, num_split=5)
-            # obtain the first slice from outputs.
-            >>> output = builder.add_get_tuple(outputs, index=0)
-
-        Parameters
-        ----------
-        input_name: str
-            The input for this node.
-        index: int
-            The index to read from.
-        """
-        return self._build_op('get_tuple', [input_name], name=name, attr={'index': index})
-
-    def add_activation(self, op, input_name, name=None, attr={}):
-        """
-        Add an activation node to the SSAFunction.
-
-        SSANode form:
-            op: op
-            inputs: [input_name]
-            attr: 'alpha', 'beta'
-
-        Parameters
-        ----------
-        op: str
-            The activation function for this node.
-
-            It can be one of the following:
-
-                - 'Sigmoid': sigmoid function.
-                - 'Tanh': tanh function.
-                - 'Relu': Rectified Linear Unit (ReLU) function.
-
-                - 'HardSigmoid': hard sigmoid function, defined as: 
-
-                  `f(x) = min(max(alpha * x + beta, -1), 1)` 
-
-                  where alpha and beta are constant scalars.
-                  [default: alpha = 1.6733, beta = 1.0507]
-                - 'Elu': Exponential linear unit function, defined as: 
-
-                  `f(x) = (x >= 0) * x + (x < 0) * (alpha * exp(x) - 1)`
-
-                  where alpha is a constant scalar.
-                  [default: alpha = 1.0]
-                - 'Selu': Exponential linear unit function, defined as: 
-
-                  `f(x) = beta * ((x >= 0) * x + (x < 0) * (alpha * (exp(x) - 1)))`
-
-                  where alpha and beta are constant scalars.
-                  [default: alpha = 1.6733, beta = 1.0507]
-                  Parameter of Selu is ignored in backend implementation.
-                - 'ThresoldedRelu': Thresholded ReLU function, defined as: 
-
-                  `f(x) = (x >= alpha) * x`
-
-                  where alpha is a constant scalar.
-                  [default: alpha = 1.0]
-                - 'LeakyRelu': leaky relu function, defined as: 
-
-                  `f(x) = (x >= 0) * x + (x < 0) * alpha * x`
-
-                  where alpha is a constant scalar.
-                  [default: alpha = 1.0]
-                - 'Linear': linear function.
-                
-                   `f(x) = alpha * x + beta`    
-
-                  where alpha and beta are constant scalars.
-                  [default: alpha = 1.0, beta=1.0]
-
-        input_name: str
-            The name of the input_node
-
-        name: str
-            The name of this node
-
-        attr: dict
-            Parameters for the activation, depending on activation function chosen.
-
-                - When activation is one of ['Relu', 'Sigmoid', 'Tanh'], attr is ignored.
-                - When activation is one of ['Selu', 'HardSigmoid', 'Linear'], 'alpha' and 'beta' would be read from attr
-                - When activation is one of ['Elu', 'LeakyRelu', 'ThresholdedRelu'], 'alpha' would be read from attr
-
-        """
-        attr['alpha'] = 1.0
-        attr['beta'] = 1.0
-        if 'op' == 'Selu':
-            attr['alpha'] = 1.6732632423543772848170429916717
-            attr['beta'] = 1.0507009873554804934193349852946
-
-        return self._build_op(op, [input_name], name=name, attr=attr)
-
-    def add_elementwise(self, op, inputs, name=None):
-        """
-        Add simple operation node that take 1 or 2 inputs.
-
-        SSANode form:
-            op: op
-            inputs: inputs
-
-        Parameters
-        ----------
-        op: str
-            The operation performed on this node.
-            Elementwise operations support broadcasting.
-            Assume inputs = [A] for unary operations.
-            Assume inputs = [A, B] for binary operations.
-
-            It can be one of the following:
-            
-            Unary operations:
-
-                - 'Cos': `f(A) = Cos(A)`
-
-                - 'Sin': `f(A) = Sin(A)`
-
-                - 'Square': `f(A) = A * A`
-
-                - 'Sqrt': `f(A) = sqrt(A)`
-
-                - 'Rsqrt': `f(A) = 1.0 / sqrt(A)`
-
-                - 'Log': `f(A) = log(A)`
-
-                - 'Neg': `f(A) = -A`
-
-                - 'Floor': `f(A) = int(A)`
-
-                - 'LogicalNot': `f(A) = !A`
-
-            Binary operations:
-
-                - 'Add': `f(A, B) = A + B`
-
-                - 'Sub': `f(A, B) = A - B`
-
-                - 'Mul': `f(A, B) = A * B`
-
-                - 'RealDiv': `f(A, B) = A / B`
-
-                - 'FloorDiv': `f(A, B) = floor(A / B)`
-
-                - 'Pow': `f(A, B) = A ^ B`
-
-                - 'Maximum': `f(A, B) = (A >= B) ? A : B`
-
-                - 'Minimum': `f(A, B) = (A < B) ? A : B`
-
-                - 'LogicalAnd': `f(A, B) = (A && B)`
-
-                - 'LogicalOr': `f(A, B) = (A || B)`
-
-                - 'Equal': `f(A, B) = (A == B)`
-
-                - 'NotEqual': `f(A, B) = (A != B)`
-
-                - 'Less': `f(A, B) = (A < B)`
-
-                - 'LessEqual': `f(A, B) = (A <= B)`
-
-                - 'Greater': `f(A, B) = (A > B)`
-
-                - 'GreaterEqual': `f(A, B) = (A >= B)`
-
-
-        input_names: [str or np.array of np.float32]
-            len(inputs) must be less or equal than 2.
-
-        name: str
-            The name of this node
-
-        """
-        input_names = [self._maybe_add_const(input, "elementwise_input") \
-                       for input in inputs]
-        return self._build_op(op, input_names, name=name)
-
-    def add_reshape(self, input_name, shape, name=None, attr={}):
-        """
-        Add a reshape blob that reshapes input_name to shape.
-
-        SSANode form:
-            op: 'Reshape'
-            inputs: [input_name, shape]
-
-        Parameters
-        ----------
-        input_name: str
-            Name of the input blob that would be reshaped.
-
-        shape: str
-            Name of the input blob that indicates the output shape.
-
-        name: str
-            The name of this node
-
-        """
-        return self._build_op('Reshape', [input_name, shape], name=name)
-
-    def add_matmul(self, input_names, name=None, attr={}):
-        """
-        Add matrix multiplication node.
-        Take input_names [A, B], and rank(A) == rank(B).
-        If rank(A) > 2, it's a batch MatMul (supports broadcasting).
-
-        SSANode form:
-            op: 'MatMul'
-            inputs: input_names
-            attr: 'transpose_a', 'transpose_b'
-
-        Parameters
-        ----------
-        input_names: [str]
-            len(input_names) must equal to 2.
-
-        name: str
-            The name of this node
-
-        attr: dict
-            - 'transpose_a': Transpose input_names[0]
-            - 'transpose_b': Transpose input_names[1]
-        """
-        return self._build_op('MatMul', input_names, name=name, attr=attr)
-
-    def add_tile(self, input_name, multiples, name=None):
-        """
-        Add Tile node.
-        This operation creates a new tensor by replicating input multiples times.
-        The output tensor's i'th dimension has input.dims(i) * multiples[i] elements,
-        and the values of input are replicated multiples[i] times along the 'i'th dimension.
-        For example, tiling [a b c d] by [2] produces [a b c d a b c d].
-
-        SSANode form:
-            op: 'Tile'
-            inputs: [input_name, multiples]
-
-        Parameters
-        ----------
-        input_name: str
-            Name of the input that will be tiled.
-
-        multiples: str
-            Name of the node that indicates how many time will input be tiled.
-
-        name: str
-            The name of this node
-        """
-        return self._build_op('Tile', [input_name, multiples], name=name)
-
-    def add_topk(self, input_name, k, name=None):
-        """
-        Add a topK node.
-        Obtain the top-k along last axis.
-
-        SSANode form:
-            op: 'TopKV2'
-            inputs: [input_name, k]
-
-        Parameters
-        ----------
-        input_name: str
-            Name of the input blob.
-
-        k: str
-            Name of the node that give top-k to extract.
-
-        name: str
-            The name of this node
-
-        Examples
-        --------
-        .. sourcecode:: python
-
-            # The tensor we are going to obtain topK
-            >>> value = builder.add_const(np.random.random(size=(20,10)))
-
-            # The value K for top-K.
-            >>> k = builder.add_const(np.int32(3))
-
-            # split_size we want.
-            >>> topK = builder.add_topk(value, k)
-            
-            # returns of topK
-            # The top-K values are in the 0-th index
-            >>> topk_values = builder.add_get_tuple(topK, 0)
-            # The top-K indices are in the 1-st index
-            >>> topk_indices = builder.add_get_tuple(topK, 1)
-        """
-        return self._build_op('TopKV2', [input_name, k], name=name)
-
-    def add_concat(self, input_names, axis, name=None):
-        """
-        Add Concat node.
-        Take list of node inputs and a node representing the axis: a, b, axis.
-        Equivalent to numpy.concatenate(input_names, axis=axis)
-
-        SSANode form:
-            op: 'ConcatV2'
-            inputs: input_names + [axis]
-
-        Parameters
-        ----------
-        input_names: [str or list[int] or np.ndarray]
-            List of nodes that will be concatenated.
-
-        axis: str or int
-            Name of the node indicating the axis.
-
-        name: str
-            The name of this node
-        """
-        axis = self._maybe_add_const(axis, "concat_axis")
-        input_names = [self._maybe_add_const(input, "concat_input") for input in input_names]
-        return self._build_op('ConcatV2', input_names + [axis], name=name)
-
-    def add_const(self, value, name=None):
-        """
-        Add a constant node.
-
-        SSANode form:
-            op: 'Const'
-            inputs: []
-            value: NNSSA.builtins
-            datatype: NNSSA.builtins type
-
-        Parameters
-        ----------
-        value: NNSSA.builtins types or numpy type.
-            The value of the constant.
-
-        name: str
-            The name of this node
-        """
-        if isinstance(value, (np.generic, np.ndarray)):
-            value, valtype = numpy_val_to_builtin_val(value)
-
-        return self._build_op('Const', [], name=name, value=value, datatype=value.__class__)
-
-    def add_select(self, cond, b_true, b_false, name=None):
-        """
-        Add a select node. i.e. The "if" statement.
-
-        SSANode form:
-            op: 'Select'
-            inputs: [condition, b_true, b_false]
-
-        Parameters
-        ----------
-        cond: str
-        Condition node to determine which branch to output.
-
-        b_true: str
-        If cond is true, output of node would be this true branch.
-
-        b_false: str
-        Otherwise, result of node would be this false branch.
-
-        name: str
-            The name of this node
-        """
-
-        return self._build_op('Select', [cond, b_true, b_false], name=name)
-
-    def add_split(self, split_dim, value, split_size="", num_split=0, name=None):
-        """
-        Add a split node.
-        If num_split != 0, split_size will be ignored.
-        If num_split == 0, split_size is the node name that gives how value should be split.
-
-        Obtain result by get_tuple.
-
-        SSANode form:
-            op: 'Split'
-            inputs: [split_dim, value]
-            attr: 'num_split'
-
-            op: 'SplitV'
-            inputs: [value, split_size, split_dim]
-
-        Parameters
-        ----------
-        split_dim: str
-            Input node which indicates the splitting axis.
-            This needs to be a constant node after all constant propagations.
-
-        split_size: str
-            Input node of a tensor indicating the size of each split.
-
-        value: str
-            Input node which will be splitted.
-
-        num_split: int
-            Number of splits (evenly splitting).
-
-        name: str
-            The name of this node
-
-        Examples
-        --------
-        .. sourcecode:: python
-
-            # The axis we are going to split
-            >>> axis = builder.add_const(np.int(0))
-
-            # A node we want to split.
-            >>> value = builder.add_const(np.random(size=[30, 1, 512]))
-
-            # split_size we want.
-            >>> split_size_1 = builder.add_const(np.array([5, 10, 15]))
-            >>> split_size_2 = builder.add_const(np.array([5, 10, 16]))
-            
-            # Valid split.
-            >>> custom_split_1 = builder.add_split(axis, value, split_size=split_size_1)
-            # custom_output_1_0 has shape = [5, 1, 512]
-            >>> custom_output_1_0 = builder.add_get_tuple(custom_split_1, 0)
-            # custom_output_1_1 has shape = [10, 1, 512]
-            >>> custom_output_1_1 = builder.add_get_tuple(custom_split_1, 1)
-            # custom_output_1_2 has shape = [15, 1, 512]
-            >>> custom_output_1_2 = builder.add_get_tuple(custom_split_1, 2)
-
-            # Invalid split. sum(split_size_2) != value.shape[axis]
-            >>> custom_split_2 = builder.add_split(axis, value, split_size=split_size_2)
-
-            # Valid split.
-            >>> even_split_1 = builder.add_split(axis, value, num_split=3)
-            # even_output_1_0 has shape = [10, 1, 512]
-            >>> even_output_1_0 = builder.add_get_tuple(even_split_1, 0)
-            # even_output_1_1 has shape = [10, 1, 512]
-            >>> even_output_1_1 = builder.add_get_tuple(even_split_1, 1)
-            # even_output_1_2 has shape = [10, 1, 512]
-            >>> even_output_1_2 = builder.add_get_tuple(even_split_1, 2)
-
-            # Invalid split. (value.shape[axis] % num_split) != 0
-            >>> even_split_2 = builder.add_split(axis, value, num_split=4)
-        """
-        if num_split > 0:
-            return self._build_op(
-                'Split', [split_dim, value], name=name, attr={'num_split': num_split})
-        else:
-            return self._build_op('SplitV', [value, split_size, split_dim], name=name)
-
-    def add_slice(
-            self, input_name, begin=None, end=None, size=None, strides=None, squeeze=[], name=None):
-        """
-        Add a slice node.
-
-        SSANode form:
-            op: 'Slice'
-            inputs: [input_name, begin, size]
-
-            op: 'StridedSlice'
-            inputs: [input_name, begin, end, strides]
-            attr: 'shrink_axis_mask'
-
-        Parameters
-        ----------
-        input_name: str
-            Input node to slice from.
-
-        begin: str or list of int
-            The beginning index for slicing.
-            Should have same length as the shape of input node.
-
-        end: str or list of int
-            The ending index for slicing.
-            Should have same length as the shape of input node.
-
-        size: str or list of int
-            The size to slice for each axis.
-            Should have same length as the shape of input node.
-
-        strides: str
-            The slicing stride for each axis.
-            Should have same length as the input node.
-            Need to be used with parameter 'begin' and 'end'.
-
-        squeeze: [int]
-            Axes that would be squeezed out.
-            Need to be used with parameter 'begin' and 'end'.
-
-        name: str
-            The name of this node
-
-        Attributes
-        ----------
-        shrink_axis_mask: int
-            A binary mask used for determine whether to squeeze out some axis.
-            Implementing it by binary masking.
-            Useful for such slices: A[:, 3, :] (i.e. squeeze out axis at 1)
-            Which would have mask value 0*2^0 + 1*2^1 + 0*2^2 = 2
-
-        Examples
-        --------
-        .. sourcecode:: python
-
-            # A tensor of shape (3, 2, 3)
-            >>> value = np.array([[[1, 1, 1], [2, 2, 2]],
-            >>>                   [[3, 3, 3], [4, 4, 4]],
-            >>>                   [[5, 5, 5], [6, 6, 6]]])
-            >>> A = builder.add_const(value)
-            # Slicing out a fix size
-            >>> begin = builder.add_const(np.array([1, 0, 0]))
-            >>> size1 = builder.add_const(np.array([1, 2, 3]))
-            >>> size2 = builder.add_const(np.array([2, 1, 3]))
-            # Returns [[[3, 3, 3],
-            #           [4, 4, 4]]]
-            >>> ret1 = builder.add_slice(begin=begin, size=size1)
-            # Returns [[[3, 3, 3]],
-            #          [[5, 5, 5]]]
-            >>> ret2 = builder.add_slice(begin=begin, size=size2)
-
-            # Slicing out with pythonic-style
-            >>> begin = builder.add_const(np.array([1, -1, 0]))
-            >>> end = builder.add_const(np.array([2, -3, 3]))
-            >>> strides = builder.add_const(np.array([1, -1, 1]))
-            # Same as value[1:2, -1:-3:-1, 0:3]
-            # Returns [[[4, 4, 4],
-            #           [3, 3, 3]]]
-            >>> ret3 = builder.add_slice(begin=begin, end=end, strides=strides)
-            # Same as value[1, -1:-3:-1, 0:3]
-            # Returns [[4, 4, 4],
-            #          [3, 3, 3]]
-            >>> ret4 = builder.add_slice(begin=begin, end=end, strides=strides, squeeze=[0])
-
-        """
-        begin = self._maybe_add_const(begin, "slice_begin")
-        end = self._maybe_add_const(end, "slice_end")
-        size = self._maybe_add_const(size, "slice_size")
-        strides = self._maybe_add_const(strides, "slice_strides")
-        if end is not None and size is not None:
-            raise ValueError("end and size parameter in Slice cannot be used simultaneously.")
-        if strides is not None and size is not None:
-            raise ValueError("stride and size parameter in Slice cannot be used simultaneously.")
-
-        if size is not None:
-            return self._build_op('Slice', [input_name, begin, size], name=name)
-        else:
-            squeeze_mask = 0
-            for i in squeeze:
-                squeeze_mask += 2 ** i
-            return self._build_op(
-                'StridedSlice', [input_name, begin, end, strides],
-                attr={'shrink_axis_mask': squeeze_mask},
-                name=name)
-
-    def add_reduction(self, op, input_name, axis=None, name=None, attr={'keep_dims': False}):
-        """
-        Add a reduction operation.
-
-        SSANode form:
-            op: op
-            inputs: [input_name, axis]
-            attr: 'keep_dims'
-
-        Parameters
-        ----------
-        op: str
-            The operation performed on this node.
-            It can be 'Mean', 'Sum', 'Max', 'Min', 'Prod' or 'ArgMax'.
-
-        input_name: str
-            The name of the input_node
-
-        axis (optional): list of int or node name (str)
-            The axis that the operation would be performed.
-
-        name: str
-            The name of this node
-
-        attr: dict
-            Attributes for reduction node.
-            If 'keep_dims' is set to True, the rank would not change, otherwise,
-            rank(output) = rank(input)-1
-        """
-        axis = self._maybe_add_const(axis, "reduction_axis")
-        return self._build_op(op, [input_name, axis], name=name, attr=attr)
-
-    def add_squeeze(self, input_name, squeeze_dims=[], name=None):
-        """
-        Remove axes with size of 1.
-
-        SSANode form:
-            op: 'Squeeze'
-            inputs: [input_name]
-            attr: 'squeeze_dims'
-
-        Parameters
-        ----------
-        input_name: str
-            The name of the input_node
-
-        squeeze_dims: [int]
-            The axes that are going to be squeezed. If an empty list, all axis
-            with shape=1 will be squeezed out.
-
-        name: str
-            The name of this node
-        """
-        return self._build_op(
-            'Squeeze', [input_name], name=name, attr={'squeeze_dims': squeeze_dims})
-
-    def add_expanddims(self, input_name, expand_dim, name=None):
-        """
-        Expand rank on the expand_dims axes.
-
-        SSANode form:
-            op: 'ExpandDims'
-            inputs: [input_name, expand_dims]
-
-        Parameters
-        ----------
-        input_name: str
-            The name of the input_node
-
-        expand_dim: str or int
-            The name of the node indicating which axis is going to be expanded.
-            This node needs to be constant after constant propagation.
-
-        name: str
-            The name of this node
-        """
-        expand_dim = self._maybe_add_const(expand_dim, "expanddim_axis")
-        return self._build_op('ExpandDims', [input_name, expand_dim], name=name)
-
-    def add_softmax(self, input_name, name=None):
-        """
-        Do softmax on the last axis (axis "-1")
-        If axis is not the last, transpose first.
-
-        SSANode form:
-            op: 'Softmax'
-            inputs: [input_name]
-
-        Parameters
-        ----------
-        input_name: str
-            The name of the input_node
-
-        name: str
-            The name of this node
-        """
-        return self._build_op('Softmax', [input_name], name=name)
-
-    def add_logsoftmax(self, input_name, name=None):
-        """
-        Do logsoftmax on the last axis (axis "-1")
-        If axis is not the last, transpose first.
-
-        SSANode form:
-            op: 'LogSoftmax'
-            inputs: [input_name]
-
-        Parameters
-        ----------
-        input_name: str
-            The name of the input_node
-
-        name: str
-            The name of this node
-        """
-        return self._build_op('LogSoftmax', [input_name], name=name)
-
-    def add_shape(self, input_name, attr=None, name=None):
-        """
-        Get shape of the input node.
-
-        SSANode form:
-            op: 'Shape'
-            inputs: [input_name]
-
-        Parameters
-        ----------
-        input_name: str
-            The name of the input_node
-
-        name: str
-            The name of this node
-        """
-        if attr is None:
-            attr = {}
-        return self._build_op('Shape', [input_name], attr=attr, name=name)
-
-    def add_transpose(self, input_name, axes, name=None):
-        """
-        Do a transpose on input_name of axes.
-        Equivalent of numpy.transpose(input_name, axes)
-
-        SSANode form:
-            op: 'Transpose'
-            inputs: [input_name, axes]
-
-        Parameters
-        ----------
-        input_name: str
-            The name of the input_node
-
-        axes: str
-            The name of the axes blobs.
-
-        name: str
-            The name of this node
-        """
-        return self._build_op('Transpose', [input_name, axes], name=name)
-
-    def add_fill(self, shape, value, name=None):
-        """
-        This node outputs a tensor with shape filled with value.
-
-        SSANode form:
-            op: 'Fill'
-            inputs: [shape, value]
-
-        Parameters
-        ----------
-        shape: str
-            The shape of the tensor of this node's output.
-
-        value: str
-            The name of the blob that consists the value being filled.
-
-        name: str
-            The name of this node
-        """
-        return self._build_op('Fill', [shape, value], name=name)
-
-    def add_gather(self, A, indices, axis=0, name=None):
-        """
-        Gather node. Often used as embedding layers.
-        Equivalent to numpy.take(A, indices, axis)
-
-        SSANode form:
-            op: 'Gather'
-            inputs: [A, indices, axis=0]
-            attr: 'axis'
-
-        Parameters
-        ----------
-        A: str
-            The name of the input_node, which "take" the indices slices.
-
-        indices: str or int or list[int]
-            The name of input node of indices.
-
-        axis: str or int
-            If str, it would be a node that indicates the axis to take.
-            If int, this would just be the "Const" used for take.
-
-        name: str
-            The name of this node
-        """
-        indices = self._maybe_add_const(indices, "gather_indices")
-        if isinstance(axis, int):
-            return self._build_op('Gather', [A, indices], attr={'axis': axis}, name=name)
-        else:
-            return self._build_op('Gather', [A, indices, axis], name=name)
-
-    def add_while(self, input_name, body_function, cond_function, name=None):
-        """
-        Add while node into the graph.
-
-        SSANode form:
-            op: 'while'
-            inputs: [input_name]
-            attr: 'body_function', 'cond_function'
-
-        Parameters
-        ----------
-        input_name: str
-            The name of the input_node, should be type of "make_tuple"
-
-        body_function: str
-            The body function's name.
-
-        body_function: str
-            The cond function's name.
-
-        name: str
-            The name of this node
-
-        Examples
-        --------
-        .. sourcecode:: python
-
-            # Let's build a Network that would be performing:
-            # 
-            # target = some_user_input
-            # i = 0
-            # while (i < target):
-            #     i += 1
-            # 
-            # print(i)
-
-            >>> converter = coremlconverters.NNSSAConverter()
-
-            # We should try to "append" the prefix if we are building from graph level.
-            # No two nodes should have same name in a NNSSA, but it's impossible to
-            # check if you "bottom-top" build from GraphBuilder.
-            >>> sb = coremlconverters.SSABuilder()
-            # The graph builder for the main graph
-            >>> gb = coremlconverters.GraphBuilder(prefix="main_")
-            # The graph builder for the body function
-            >>> body_builder = coremlconverters.GraphBuilder(prefix="body_")
-            # The graph builder for the condition function
-            >>> cond_builder = coremlconverters.GraphBuilder(prefix="cond_")
-
-            # Let's build the main graph first.
-            >>> i = gb.add_const(np.int32(0), name="i")
-            >>> target = gb.add_placeholder(init_from=np.int32(5), name="target")
-            # The input of the while loop needs to be a single "make_tuple" node.
-            # All condition variable and input to body function should be packed here.
-            >>> mt = gb.add_make_tuple([target, i], name="make_tuple_0")
-            # while node takes the tuple node as input, and has the "function names" of body/cond as attributes.
-            # The input "mt" here will pass through to both body and cond function_entry.
-            >>> loop = gb.add_while(mt, "body_function_0", "cond_function_0", name="loop")
-            # The output of the while loop needs to be a single "make_tuple" node.
-            >>> out = gb.add_get_tuple(loop, index=1, name="out")
-
-            # We need a function_entry for every function created.
-            >>> b_entry = body_builder.add_function_entry(name="body_function_0")
-            >>> add_one = body_builder.add_const(np.int32(1), name="one")
-            # The function_entry passed in (which is mt above) is a tuple.
-            >>> to_add = body_builder.add_get_tuple(b_entry, index=1)
-            >>> target = body_builder.add_get_tuple(b_entry, index=0)
-            >>> added = body_builder.add_elementwise("Add", [to_add, add_one])
-            # We also need to pack the returns into a tuple.
-            # Note that the "return" value here will be passed to body and cond's function_entry every itereation.
-            >>> ret = body_builder.add_make_tuple([target, added])
-            >>> body_builder.add_return(ret)
-
-            >>> c_entry = cond_builder.add_function_entry(name="cond_function_0")
-            >>> now = cond_builder.add_get_tuple(c_entry, index=1)
-            >>> target = cond_builder.add_get_tuple(c_entry, index=0)
-            >>> stop = cond_builder.add_elementwise("Less", [now, target], name="cond")
-            # Unlike the return of body function, which will be passed back to function_entry, cond returns a bool.
-            >>> cond = cond_builder.add_return(stop)
-
-            # Let's add everything up.
-            >>> sb.add_graph(gb.get_graph())
-            >>> sb.add_function(body_builder.get_function(), name="body_function_0")
-            >>> sb.add_function(cond_builder.get_function(), name="cond_function_0")
-
-            # This is the NNSSA graph for a while loop.
-            >>> ssa = sb.get_ssa()
-
-        """
-        return self._build_op(
-            'while', [input_name],
-            name=name,
-            attr={
-                'body_function': body_function,
-                'cond_function': cond_function
-            })
-
-    def add_select(self, cond, true_branch, false_branch, name=None):
-        """
-        A select node. Behaves like an "if statement".
-
-        SSANode form:
-            op: 'Select'
-            inputs: [cond, true_branch, false_branch]
-
-        Parameters
-        ----------
-        cond: str
-            The name of the condition blob.
-
-        true_branch: str
-            The name of the blob that the node would output when cond is true.
-
-        false_branch: str
-            The name of the blob that the node would output when cond is false.
-
-        name: str
-            The name of this node
-        """
-        return self._build_op('Select', [cond, true_branch, false_branch], name=name)
-
-    def add_range(self, start=None, stop=None, step=None, name=None):
-        """
-        Construct a range tensor.
-        Equivalent to numpy.arange
-
-        SSANode form:
-            op: 'Range'
-            inputs: [stop, step]
-
-            op: 'Range'
-            inputs: [start, stop, step]
-
-        Parameters
-        ----------
-        start: str or int
-            (Optional) The name of the node that provides the starting index.
-            Default is 0.
-
-        stop: str or int
-            The name of the node that provides the ending index.
-
-        step: str or int
-            The name of the node that provides the step size.
-
-        name: str
-            The name of this node
-        """
-        input_names = []
-        start = self._maybe_add_const(start, "range_start")
-        stop = self._maybe_add_const(stop, "range_stop")
-        step = self._maybe_add_const(step, "range_step")
-        if start is not None:
-            input_names.append(start)
-        input_names = input_names + [stop, step]
-
-        return self._build_op('Range', input_names, name=name)
-
-    def add_shape(self, input_name, name=None):
-        """
-        Output the shape of the input blob.
-
-        SSANode form:
-            op: 'Shape'
-            inputs: [input_name]
-
-        Parameters
-        ----------
-        input_name: str
-            The name of the input blob.
-
-        name: str
-            The name of this node
-        """
-        return self._build_op('Shape', [input_name], name=name)
-
-    def add_rank(self, input_name, name=None):
-        """
-        Output the rank of the input blob.
-
-        SSANode form:
-            op: 'Rank'
-            inputs: [input_name]
-
-        Parameters
-        ----------
-        input_name: str
-            The name of the input blob.
-
-        name: str
-            The name of this node
-        """
-        return self._build_op('Rank', [input_name], name=name)
-
-    def add_padding(self, input_name, paddings, constant_values=0.0, name=None):
-        """
-        Pads a tensor.
-
-        SSANode form:
-            op: 'Pad'
-            inputs: [input_name, paddings]
-            attr: 'constant_values'
-
-        Parameters
-        ----------
-        input_name: str
-            The name of the input_node
-
-        paddings: str
-            The name of the paddings spec.
-            This should be a constant.
-
-        constant_values: float
-            Constant value for padding.
-
-        name: str
-            The name of this node
-        """
-        return self._build_op(
-            'Pad', [input_name, paddings], name=name, attr={'constant_values': constant_values})
-
-    def add_pooling(
-            self,
-            input_name,
-            ksize,
-            strides,
-            pooling_type,
-            padding="SAME",
-            data_format="NHWC",
-            name=None):
-        """
-        Add pooling node for SSAfunctions.
-
-        SSANode form:
-            op: 'MaxPool'
-            inputs: [input_name]
-            attr: 'ksize', 'strides', 'padding', 'data_format'
-
-            op: 'AvgPool'
-            inputs: [input_name]
-            attr: 'ksize', 'strides', 'padding', 'data_format'
-
-        Parameters
-        ----------
-        input_name: str
-            The name of the input_node
-
-        ksize: [int]
-            A list of 4 integers.
-            The size of the window for each dimension of the input tensor.
-
-        strides: [int]
-            A list of 4 integers.
-            The stride of the sliding window for each dimension of the input tensor.
-
-        pooling_type: str
-            Could be "MAX" or "AVG".
-
-        padding: str
-            Could be "SAME" or "VALID".
-
-        data_format: str
-            Could be "NHWC" or "NCHW".
-
-        name: str
-            The name of this node
-        """
-        attr = {}
-        attr['ksize'] = ksize
-        attr['strides'] = strides
-        attr['padding'] = padding
-        attr['data_format'] = data_format
-        if pooling_type == 'MAX':
-            return self._build_op('MaxPool', [input_name], attr=attr, name=name)
-        elif pooling_type == 'AVG':
-            return self._build_op('AvgPool', [input_name], attr=attr, name=name)
-        else:
-            raise ValueError("Pooling type unsupported")
-
-    def add_conv2D(
-            self, input_name, filter_name, strides, padding="SAME", data_format="NHWC", name=None):
-        """
-        Add pooling node for SSAfunctions.
-
-        SSANode form:
-            op: 'Conv2D'
-            inputs: [input_name, filter_name]
-            attr: 'strides', 'padding', 'data_format'
-
-        Parameters
-        ----------
-        input_name: str
-            The name of the input_node, should be a 4D input.
-
-        filter_name: str
-            Should be a 4D input.
-            A 4D tensor of shape [filter_height, filter_width, in_channels, out_channels]
-
-        strides: [int]
-            A list of 4 integers.
-            The stride of the sliding window for each dimension of the input tensor.
-
-        padding: str
-            Could be "SAME" or "VALID".
-
-        data_format: str
-            Could be "NHWC" or "NCHW".
-
-        name: str
-            The name of this node
-        """
-        attr = {}
-        attr['strides'] = strides
-        attr['padding'] = padding
-        attr['data_format'] = data_format
-        attr['dilations'] = [1, 1, 1, 1]
-
-        return self._build_op('Conv2D', [input_name, filter_name], attr=attr, name=name)
-
-    def add_function_entry(self, name=None):
-        """
-        Add function_entry node for SSAfunctions.
-        This is the *input*, so no input should be directed here.
-        The output nodes of this op should always be get_tuple.
-
-        SSANode form:
-            op: 'function_entry'
-            inputs: []
-
-        Parameters
-        ----------
-        name: str
-            The name of this node
-        """
-        return self._build_op('function_entry', [], name=name)
-
-    def add_return(self, input_name, name=None):
-        """
-        Add return node for SSAfunctions.
-
-        SSANode form:
-            op: 'return'
-            inputs: [input_name]
-
-        Parameters
-        ----------
-        input_name: str
-            The name of the input_node, should be type of "make_tuple"
-
-        name: str
-            The name of this node
-        """
-        return self._build_op('return', [input_name], name=name)
-
-    def _maybe_add_const(self, var, var_name=None):
-        """
-        If `var` is int, float, or list, add a const node and return the ssa
-        name of the added const node. If `var` is str or None, return `var`
-        (no-op)
-
-        var_name (str): The SSA name of the new const node, if created, will
-        be `var_name` + incrementing counter.
-        """
-        if isinstance(var, str) or var is None:
-            return var
-        if sys.version_info < (3, 0) and isinstance(var, unicode):
-            return var
-        if not var_name:
-            var_name = "generated_const"
-        if not hasattr(self, var_name):
-            setattr(self, var_name, 0)
-        node_name = self.prefix + var_name + str(getattr(self, var_name))
-        setattr(self, var_name, getattr(self, var_name) + 1)
-        if isinstance(var, int):
-            return self.add_const(np.int32(var), name=node_name)
-        if isinstance(var, float):
-            return self.add_const(np.float32(var), name=node_name)
-        if isinstance(var, list):
-            return self.add_const(np.asarray(var), name=node_name)
-        if isinstance(var, np.ndarray):
-            return self.add_const(var, name=node_name)
-        raise RuntimeError("Unable to create const node for " + str(var))
-
-    def add_LSTMBlock(
-            self,
-            input_vec,
-            W,
-            b,
-            prev_h=None,
-            prev_cs=None,
-            mode='cell',
-            forget_bias=0.0,
-            time_major=True,
-            bidirectional=False,
-            output_all_states=True,
-            name=None):
-        """
-        Build a LSTM Block.
-
-        LSTM Cell performs the following operation:
-            xh = [x, prev_h]
-            [i, ci, f, o] = xh * W + b
-            f = f + forget_bias
-            i = sigmoid(i)
-            f = sigmoid(f)
-            ci = tanh(ci)
-            cs = ci .* i + prev_cs .* f
-            o = sigmoid(o)
-            co = tanh(cs)
-            h = co .* o
-
-        SSANode form:
-            op: 'LSTMBlock'
-            inputs: [input_vec, W, b, prev_h, prev_cs]
-            attr: 'mode', 'forget_bias', 'time_major', 'bidirectional', 'output_all_states'
-
-        Examples
-        --------
-        .. sourcecode:: python
-
-            # A sample for the LSTMBlock that can be used for the encoder (static length if input given)
-            # Setup for the hidden size and input size
-            >>> hidden_size = 8
-            >>> input_size = 15
-            >>> ph_encoder = builder.add_placeholder(datatype=builtins.tensor(builtins.fp32, [5, 1, 15]), name="ph_encoder")
-            >>> W_val = np.random.random(size=(input_size+hidden_size,4*hidden_size)).astype(np.float32)
-            # The weight matrix
-            >>> W = builder.add_const(W_val)
-            # The bias vector
-            >>> b = builder.add_const(np.zeros(shape=[4*hidden_size]).astype(np.float32))
-            # The previous cell state and hidden state can be None if it is in encoder mode.
-            >>> prev_cs_encoder = None
-            >>> prev_h_encoder = None
-
-            >>> lstm_encoder = builder.add_LSTMBlock(ph_encoder,
-            >>>                                      W,
-            >>>                                      b,
-            >>>                                      prev_h=prev_h_encoder,
-            >>>                                      prev_cs=prev_cs_encoder,
-            >>>                                      mode="encoder")
-
-            # Fetch the output through get_tuple.
-            >>> o = builder.add_get_tuple(lstm_encoder, index=0, name="o")
-            >>> h = builder.add_get_tuple(lstm_encoder, index=1, name="h")
-            >>> c = builder.add_get_tuple(lstm_encoder, index=2, name="c")
-
-            # Similarly, we can just use cell for each timestamp if we want.
-            # The input is [batch_size, input_size] without the sequence length.
-            >>> ph_cell = builder.add_placeholder(datatype=builtins.tensor(builtins.fp32, [1, 15]), name="ph_cell")
-            # The previous cell state and hidden state must be set if it is in cell mode.
-            >>> prev_cs_cell = gb.add_const(np.zeros(shape=[hidden_size]).astype(np.float32))
-            >>> prev_h_cell = gb.add_const(np.zeros(shape=[hidden_size]).astype(np.float32))
-
-            # We just reuse weight/bias/sizes with the encoder over here.
-            >>> lstm_cell = builder.add_LSTMBlock(ph_cell,
-            >>>                                   W,
-            >>>                                   b,
-            >>>                                   prev_h=prev_h_cell,
-            >>>                                   prev_cs=prev_cs_cell,
-            >>>                                   mode="cell")
-
-            # Fetch the output through get_tuple. 'o' is dummy in cell mode.
-            >>> _ = builder.add_get_tuple(lstm_cell, index=0)
-            >>> h = builder.add_get_tuple(lstm_cell, index=1, name="h")
-            >>> c = builder.add_get_tuple(lstm_cell, index=2, name="c")
-
-        Parameters
-        ----------
-        input_vec: str
-            The input to the LSTM Block.
-            Shape of input_vec should be:
-                - mode == 'cell':
-                    (batch size, input size)
-                - mode == 'encoder':
-                    time_major == True:  (seq_len, batch size, input size)
-                    time_major == False: (batch size, seq_len, input size)
-
-        W: str [input_size + hidden_size, {4, 8} * hidden_size]
-            The weight matrix.
-            Concatenation of [W_i, W_g, W_f, W_o], see notes on how the order should be.
-            If bidirectional encoder is being used, we will have W as:
-                W = np.concatenate([W_fw, W_bw], axis=-1)
-            Shape should be:
-                - mode == 'cell':
-                    (input_size + hidden_size, 4 * hidden_size)
-                - mode == 'encoder':
-                    bidirectional == True:  (input_size + hidden_size, 8 * hidden_size)
-                    bidirectional == False: (input_size + hidden_size, 4 * hidden_size)
-
-        b: str [4 * hidden_size]
-            The bias vector.
-
-        prev_h: str [batch_size, {1, 2} * hidden_size], optional
-            Output of the previous cell at previous time step.
-            If mode == 'encoder', this is optional and will be set to zero-state if not provided.
-            If bidirectional is True, concatenation should be done on last axis.
-
-        prev_cs: str [batch_size, {1, 2} * hidden_size], optional
-            Value of the cell state at previous timestamp.
-            If mode == 'encoder', this is optional and will be set to zero-state if not provided.
-            If bidirectional is True, concatenation should be done on last axis.
-
-        name: str
-            The name of this node
-
-        mode: str
-            'cell' or 'encoder'
-
-        forget_bias: int
-            See notes. Whether there's a bias for the forget gate.
-
-        time_major: bool
-            See notes. Only valid when mode == 'encoder'. Default True.
-
-        bidirectional: bool
-            See notes. Only valid when mode == 'encoder'. Default False.
-
-        output_all_states: bool
-            See output notes. Only valid when mode == 'encoder'. Default True.
-
-        Outputs
-        -------
-        Need to use get_tuple to obtain outputs.
-
-        - mode == 'cell':
-            [Output_state, Hidden_state, Cell_state]
-        - mode == 'encoder':
-            - bidirectional == False
-                [Output_state, Hidden_state, Cell_state]
-            - bidirectional == True
-                [Output_state, Hidden_state_fwd, Cell_state_fwd,
-                Hidden_state_bck, Cell_state_bck]
-
-            Output_state has shape (assuming time_major = True)
-                - output_all_states = True: [seq_len, batch_size, {1, 2} * hidden_size]
-                - output_all_states = False: [1, batch_size, {1, 2} * hidden_size]
-            Hidden_state*, Cell_state* both have shape [1, batch_size, hidden_size]
-        """
-
-        attr = dict()
-        attr['mode'] = mode
-        attr['forget_bias'] = forget_bias
-        if mode == 'encoder':
-            attr['time_major'] = time_major
-            attr['bidirectional'] = bidirectional
-            attr['output_all_states'] = output_all_states
-
-        inputs = [input_vec, W, b]
-        if mode == 'cell':
-            inputs += [prev_h, prev_cs]
-        return self._build_op('LSTMBlock', inputs, attr=attr, name=name)
diff --git a/coremltools/converters/nnssa/commons/basic_graph_ops.py b/coremltools/converters/nnssa/commons/basic_graph_ops.py
deleted file mode 100644
index a371126af..000000000
--- a/coremltools/converters/nnssa/commons/basic_graph_ops.py
+++ /dev/null
@@ -1,265 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-
-
-def connect_edge(g, source, dest):
-    g[source].outputs.append(dest)
-    g[dest].inputs.append(source)
-
-
-def replace_source(g, source, dest, new_source):
-    for idx, d in enumerate(g[dest].inputs):
-        if d == source:
-            g[dest].inputs[idx] = new_source
-            g[new_source].outputs.append(dest)
-
-    g[source].outputs = [i for i in g[source].outputs if i != dest]
-
-
-def replace_control_source(g, source, dest, new_source):
-    for idx, d in enumerate(g[dest].control_inputs):
-        if d == source:
-            g[dest].control_inputs[idx] = new_source
-            g[new_source].control_outputs.append(dest)
-
-    g[source].control_outputs = [i for i in g[source].control_outputs if i != dest]
-
-
-def replace_dest(g, source, dest, new_dest):
-    for idx, d in enumerate(g[source].outputs):
-        if d == dest:
-            g[source].outputs[idx] = new_dest
-            g[new_dest].inputs.append(source)
-
-    g[dest].inputs = [i for i in g[dest].inputs if i != source]
-
-
-def replace_control_dest(g, source, dest, new_dest):
-    for idx, d in enumerate(g[source].control_outputs):
-        if d == dest:
-            g[source].control_outputs[idx] = new_dest
-            g[new_dest].control_inputs.append(source)
-
-    g[dest].control_inputs = [i for i in g[dest].control_inputs if i != source]
-
-
-def connect_dests(g, source, dests):
-    for i in dests:
-        connect_edge(g, source, i)
-
-
-def connect_sources(g, sources, dest):
-    for i in sources:
-        connect_edge(g, i, dest)
-
-
-def disconnect_edge(g, source, dest):
-    g[source].outputs = [i for i in g[source].outputs if i != dest]
-    g[dest].inputs = [i for i in g[dest].inputs if i != source]
-
-
-def disconnect_control_edge(g, source, dest):
-    g[source].control_outputs = [i for i in g[source].control_outputs if i != dest]
-    g[dest].control_inputs = [i for i in g[dest].control_inputs if i != source]
-
-
-def disconnect_vertex_outs(g, source):
-    source_node = g[source]
-    for out in source_node.outputs:
-        g[out].inputs = [i for i in g[out].inputs if i != source_node.name]
-    source_node.outputs = []
-
-
-def disconnect_vertex_ins(g, dest):
-    dest_node = g[dest]
-    for innode in dest_node.inputs:
-        g[innode].outputs = [i for i in g[innode].outputs if i != dest_node.name]
-    dest_node.inputs = []
-
-
-def disconnect_vertex_control_ins(g, dest):
-    dest_node = g[dest]
-    for innode in dest_node.control_inputs:
-        g[innode].control_outputs = [
-            i for i in g[innode].control_outputs if i != dest_node.name
-        ]
-    dest_node.control_inputs = []
-
-
-def disconnect_vertex_control_outs(g, source):
-    source_node = g[source]
-    for out in source_node.control_outputs:
-        g[out].control_inputs = [
-            i for i in g[out].control_inputs if i != source_node.name
-        ]
-    source_node.control_outputs = []
-
-
-def delete_node(g, name):
-    disconnect_vertex_ins(g, name)
-    disconnect_vertex_outs(g, name)
-    disconnect_vertex_control_ins(g, name)
-    disconnect_vertex_control_outs(g, name)
-    del g[name]
-
-
-def replace_node(g, original_node, new_node):
-    for o in list(g[original_node].control_outputs):
-        replace_control_source(g, original_node, o, new_node)
-    for o in list(g[original_node].outputs):
-        replace_source(g, original_node, o, new_node)
-
-
-def fill_outputs(gd):
-    """
-    Fills the output lists of of a graph of ParsedNode
-
-    Takes a graph in "dict{str, ParsedNode}" form, and returns a new graph.
-    """
-    # fill outputs
-    for k, v in gd.items():
-        for i in v.inputs:
-            gd[i].outputs.append(v.name)
-        for i in v.control_inputs:
-            gd[i].control_outputs.append(v.name)
-    get_tuple_ops = ['Split', 'SplitV', 'LSTMBlock']
-    for k, v in gd.items():
-        if v.op in get_tuple_ops:
-            outputs = [[out, int(gd[out].attr['index'])] for out in v.outputs]
-            outputs.sort(key=lambda x: x[1])
-            gd[k].outputs = [out for [out, _] in outputs]
-
-    return gd
-
-
-def check_connections(gd):
-    """
-    Given a graph, checks that all
-     - inputs/outputs are symmetric
-     - control_inputs/control_outputs are symmetric
-     - The graph does not reference vertices outside of the graph
-
-    Takes a graph in "dict{str, ParsedNode}" form. Does not return,
-    asserts false on failure.
-    """
-    # check that inputs and outputs line up
-    for k, v in gd.items():
-        for i in v.inputs:
-            assert (k in gd[i].outputs)
-        for i in v.outputs:
-            assert (k in gd[i].inputs)
-        for i in v.control_inputs:
-            assert (k in gd[i].control_outputs)
-        for i in v.control_outputs:
-            message = "Node " + k + " not in " + i + " control_inputs"
-            assert (k in gd[i].control_inputs), message
-
-
-def const_determined_nodes(gd, assume_variable_nodes=[]):
-    """
-    Given a graph, extract all nodes that only depends on const nodes.
-
-    # TODO: extract nodes that depends on the "const part" of placeholders.
-    """
-    vis = {}
-
-    def visit(node):
-        # make sure node is a ParsedNode
-        from ..nnssa import ParsedNode
-        if not isinstance(node, ParsedNode):
-            node = gd[node]
-        if node.name in vis:
-            return
-
-        if 'Const' in node.op:
-            vis[node.name] = True
-        elif 'Variable' in node.op:
-            vis[node.name] = False
-        elif 'Placeholder' in node.op:
-            vis[node.name] = False
-        elif 'TensorArray' in node.op:
-            vis[node.name] = False
-        elif "function" in node.op:
-            vis[node.name] = False
-        elif "global" in node.op:
-            vis[node.name] = False
-        elif node.name in assume_variable_nodes:
-            vis[node.name] = False
-        else:
-            ret = True
-            vis[node.name] = False
-            for innode in node.inputs:
-                if innode not in vis:
-                    visit(innode)
-                if not vis[innode]:
-                    ret = False
-                    break
-            vis[node.name] = ret
-
-    for k, v in gd.items():
-        if k in vis:
-            continue
-        visit(k)
-
-    ret = []
-    for k, v in vis.items():
-        if v:
-            ret.append(k)
-    return ret
-
-
-def topsort(graph):
-    if len(graph) == 0:
-        return []
-    inedge_count = {k: len(v.inputs) + len(v.control_inputs) for k, v in graph.items()}
-    ret = []
-    curboundary = [k for k, v in inedge_count.items() if v == 0]
-    nextboundary = []
-    if len(curboundary) == 0:
-        raise ValueError("Graph is not a DAG!")
-
-    while (len(curboundary) > 0):
-        ret.extend(curboundary)
-        for b in curboundary:
-            for o in graph[b].outputs + graph[b].control_outputs:
-                inedge_count[o] -= 1
-                if inedge_count[o] == 0:
-                    nextboundary.append(o)
-        curboundary = nextboundary
-        nextboundary = []
-    if len(ret) != len(graph):
-        raise ValueError("Graph is not a DAG!")
-    return ret
-
-
-def simple_topsort(inputs):
-    if len(inputs) == 0:
-        return []
-    outputs = {k: [] for k in inputs}
-    for k in inputs:
-        for o in inputs[k]:
-            outputs[o].append(k)
-
-    inedge_count = {k: len(v) for k, v in inputs.items()}
-    ret = []
-    curboundary = [k for k, v in inedge_count.items() if v == 0]
-    nextboundary = []
-    if len(curboundary) == 0:
-        raise ValueError("Graph is not a DAG!")
-
-    while (len(curboundary) > 0):
-        ret.extend(curboundary)
-        for b in curboundary:
-            for o in outputs[b]:
-                inedge_count[o] -= 1
-                if inedge_count[o] == 0:
-                    nextboundary.append(o)
-        curboundary = nextboundary
-        nextboundary = []
-    if len(ret) != len(inputs):
-        import pdb
-        pdb.set_trace()
-        raise ValueError("Graph is not a DAG!")
-    return ret
diff --git a/coremltools/converters/nnssa/commons/builtins/__init__.py b/coremltools/converters/nnssa/commons/builtins/__init__.py
deleted file mode 100644
index 08a3d82ff..000000000
--- a/coremltools/converters/nnssa/commons/builtins/__init__.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# -*- coding: utf-8 -*-
-# pylint: disable=wildcard-import
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-
-from .type_double import fp16, fp32, fp64, float, double, is_float
-from .type_int import int8, int16, int32, int64, int, uint8, uint16, uint32, uint64, uint, is_int
-from .type_str import str
-from .type_bool import bool, is_bool
-from .type_list import list, empty_list, is_list
-from .type_tensor import tensor, is_tensor_and_is_compatible, \
-        is_tensor_and_is_compatible_general_shape, is_tensor, tensor_has_complete_shape
-from .type_dict import dict, empty_dict
-from .type_void import void
-from .type_globals_pseudo_type import globals_pseudo_type
-from .type_unknown import unknown
-from .type_tuple import tuple, is_tuple
-from .utils import is_primitive, is_scalar
-from .annotate import annotate
-from .annotate import class_annotate
-from .annotate import apply_delayed_types
-from .annotate import delay_type
-from .type_spec import *
-from .get_type_info import *
-from .operator_names import *
-from .global_methods import global_remap
-from math import log, exp
-
-apply_delayed_types()
\ No newline at end of file
diff --git a/coremltools/converters/nnssa/commons/builtins/operator_names.py b/coremltools/converters/nnssa/commons/builtins/operator_names.py
deleted file mode 100644
index 16168a548..000000000
--- a/coremltools/converters/nnssa/commons/builtins/operator_names.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-__bin_operator_to_python_name = {}
-__bin_operator_to_python_name['+'] = '__add__'
-__bin_operator_to_python_name['-'] = '__sub__'
-__bin_operator_to_python_name['*'] = '__mul__'
-__bin_operator_to_python_name['/'] = '__div__'
-__bin_operator_to_python_name['%'] = '__mod__'
-__bin_operator_to_python_name['<'] = '__lt__'
-__bin_operator_to_python_name['<='] = '__le__'
-__bin_operator_to_python_name['>'] = '__gt__'
-__bin_operator_to_python_name['>='] = '__ge__'
-__bin_operator_to_python_name['=='] = '__eq__'
-__bin_operator_to_python_name['!='] = '__ne__'
-__bin_operator_to_python_name['in'] = '__contains__'
-__bin_operator_to_python_name['getitem'] = '__getitem__'
-__bin_operator_to_python_name['setitem'] = '__setitem__'
-
-__unary_operator_to_python_name = {}
-__unary_operator_to_python_name['-'] = '__neg__'
-__unary_operator_to_python_name['!'] = '__not__'
-
-
-def bin_operator_to_python_name(op):
-    return __bin_operator_to_python_name.get(op, None)
-
-
-def unary_operator_to_python_name(op):
-    return __unary_operator_to_python_name.get(op, None)
diff --git a/coremltools/converters/nnssa/commons/builtins/type_tensor.py b/coremltools/converters/nnssa/commons/builtins/type_tensor.py
deleted file mode 100644
index e0ca03198..000000000
--- a/coremltools/converters/nnssa/commons/builtins/type_tensor.py
+++ /dev/null
@@ -1,161 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-from .annotate import annotate
-from .type_void import void
-from .type_int import *
-from .type_spec import *
-from .get_type_info import get_type_info
-import sympy as sm
-
-
-def memoize(f):
-    memo = {}
-
-    def helper(x, y):
-        y = tuple(y)
-        if (
-                x,
-                y,
-        ) not in memo:
-            memo[(
-                x,
-                y,
-            )] = f(
-                x,
-                y,
-            )
-        return memo[(
-            x,
-            y,
-        )]
-
-    return helper
-
-
-@memoize
-def tensor(primitive, shape):
-    shape = tuple(shape)
-
-    class tensor:
-        T = [primitive, shape]
-
-        def __init__(self):
-            self.val = []
-
-        @classmethod
-        def __type_info__(cls):
-            return Type("tensor", [get_type_info(primitive)] + list(shape), python_class=cls)
-
-        @classmethod
-        def get_primitive(cls):
-            return primitive
-
-        @classmethod
-        def get_shape(cls):
-            return shape
-
-    tensor.__template_name__ = "tensor[" + primitive.__name__ + "," + ",".join(
-        str(s) for s in shape) + "]"
-    return tensor
-
-
-def is_tensor_and_is_compatible(tensor_type1, tensor_type2):
-    # returns a pair of (bool, type)
-    # If Both are tensors, and have compatible shape, the first return is true
-    # The return will be the most specific version of the tensor type.
-    # Note that this may not be either tensor types. i.e.
-    #
-    # is_tensor_and_is_compatible(tensor[fp32,[10,-1]] ,tensor[fp32,[-1,20]])
-    # will return True, tensor[fp32, [10,20]]
-
-    if tensor_type1 is None or tensor_type2 is None:
-        return False, None
-    if get_type_info(tensor_type1).name != 'tensor' or get_type_info(tensor_type2).name != 'tensor':
-        return False, None
-    shape1 = tensor_type1.get_shape()
-    shape2 = tensor_type2.get_shape()
-
-    if tensor_type1.get_primitive() != tensor_type2.get_primitive():
-        return False, None
-
-    if len(shape1) == 0:
-        return True, tensor_type2
-    if len(shape2) == 0:
-        return True, tensor_type1
-
-    if len(shape1) != len(shape2):
-        return False, None
-
-    most_specific_shape = []
-    for i in range(len(shape1)):
-        if shape1[i] == -1 or issubclass(type(shape1[i]), sm.Basic):
-            most_specific_shape.append(shape2[i])
-        elif shape2[i] == -1 or issubclass(type(shape2[i]), sm.Basic):
-            most_specific_shape.append(shape1[i])
-        elif shape1[i] == shape2[i]:
-            most_specific_shape.append(shape1[i])
-        elif shape1[i] != shape2[i]:
-            return False, None
-
-    return True, tensor(tensor_type1.get_primitive(), most_specific_shape)
-
-
-def is_tensor_and_is_compatible_general_shape(tensor_type1, tensor_type2):
-    # returns a pair of (bool, type)
-    # If Both are tensors, and have compatible shape, the first return is true
-    # The return will be the most general version of the tensor type.
-    # Note that this may not be either tensor types. i.e.
-    #
-    # is_tensor_and_is_compatible(tensor[fp32,[10,-1]] ,tensor[fp32,[-1,20]])
-    # will return True, tensor[fp32, [-1,-1]]
-
-    if tensor_type1 is None or tensor_type2 is None:
-        return False, None
-    if get_type_info(tensor_type1).name != 'tensor' or get_type_info(tensor_type2).name != 'tensor':
-        return False, None
-    shape1 = tensor_type1.get_shape()
-    shape2 = tensor_type2.get_shape()
-
-    if tensor_type1.get_primitive() != tensor_type2.get_primitive():
-        return False, None
-
-    if len(shape1) == 0:
-        return True, tensor_type2
-    if len(shape2) == 0:
-        return True, tensor_type1
-
-    if len(shape1) != len(shape2):
-        return False, None
-
-    most_general_shape = []
-    for i in range(len(shape1)):
-        if shape1[i] == -1 or issubclass(type(shape1[i]), sm.Basic):
-            most_general_shape.append(shape1[i])
-        elif shape2[i] == -1 or issubclass(type(shape2[i]), sm.Basic):
-            most_general_shape.append(shape2[i])
-        elif shape1[i] == shape2[i]:
-            most_general_shape.append(shape1[i])
-        elif shape1[i] != shape2[i]:
-            return False, None
-
-    return True, tensor(tensor_type1.get_primitive(), most_general_shape)
-
-
-def is_tensor(tensor_type):
-    if tensor_type is None:
-        return False
-    return get_type_info(tensor_type).name == 'tensor'
-
-
-def tensor_has_complete_shape(tensor_type):
-    if not is_tensor(tensor_type):
-        return True
-    s = tensor_type.get_shape()
-    if -1 in s:
-        return False
-    elif len(s) == 0:
-        return False
-    else:
-        return True
diff --git a/coremltools/converters/nnssa/commons/builtins/type_unknown.py b/coremltools/converters/nnssa/commons/builtins/type_unknown.py
deleted file mode 100644
index b35611351..000000000
--- a/coremltools/converters/nnssa/commons/builtins/type_unknown.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-from .annotate import class_annotate, annotate, delay_type
-from .type_spec import *
-
-
-class unknown:
-    @classmethod
-    def __type_info__(cls):
-        return Type("unknown", python_class=cls)
diff --git a/coremltools/converters/nnssa/commons/builtins/utils.py b/coremltools/converters/nnssa/commons/builtins/utils.py
deleted file mode 100644
index 19eddc36f..000000000
--- a/coremltools/converters/nnssa/commons/builtins/utils.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# -*- coding: utf-8 -*-
-from .type_bool import bool as builtins_bool
-from .type_double import is_float, fp16 as builtins_fp16, fp32 as builtins_fp32, fp64 as builtins_fp64
-from .type_int import is_int, int8 as builtins_int8, int16 as builtins_int16, int32 as builtins_int32, \
-    int64 as builtins_int64, uint8 as builtins_uint8, uint16 as builtins_uint16, uint32 as builtins_uint32, \
-    uint64 as builtins_uint64
-from .type_str import str as builtins_str
-import numpy as np
-
-_NPTYPES_TO_BUILTINS = {
-    np.dtype(np.bool_): builtins_bool,
-    np.dtype(np.int8): builtins_int8,
-    np.dtype(np.int16): builtins_int16,
-    np.dtype(np.int32): builtins_int32,
-    np.dtype(np.int64): builtins_int64,
-    np.dtype(np.uint8): builtins_uint8,
-    np.dtype(np.uint16): builtins_uint16,
-    np.dtype(np.uint32): builtins_uint32,
-    np.dtype(np.uint64): builtins_uint64,
-    np.dtype(np.float16): builtins_fp16,
-    np.dtype(np.float32): builtins_fp32,
-    np.dtype(np.float64): builtins_fp64
-}
-
-_BUILTINS_TO_NPTYPES = {
-    builtins_bool: np.bool_,
-    builtins_int8: np.int8,
-    builtins_int16: np.int16,
-    builtins_int32: np.int32,
-    builtins_int64: np.int64,
-    builtins_uint8: np.uint8,
-    builtins_uint16: np.uint16,
-    builtins_uint32: np.uint32,
-    builtins_uint64: np.uint64,
-    builtins_fp16: np.float16,
-    builtins_fp32: np.float32,
-    builtins_fp64: np.float64
-}
-
-
-def builtin_from_nptype(nptype):
-    """
-    Given a numpy dtype, return its corresponding primitive builtin type.
-    """
-    return _NPTYPES_TO_BUILTINS.get(np.dtype(nptype), None)
-
-
-def nptype_from_builtin(btype):
-    """
-    Given a Nitro builtin type, return its corresponding Numpy dtype.
-    """
-    return _BUILTINS_TO_NPTYPES.get(btype, None)
-
-
-def promote_types(dtype1, dtype2):
-    """
-    Get the smallest type to which the given scalar types can be cast.
-
-    Args:
-        dtype1 (apple_nitro.builtin):
-        dtype2 (apple_nitro.builtin):
-    
-    Returns:
-        A Nitro builtin datatype or None.
-    """
-    nptype1 = nptype_from_builtin(dtype1)
-    nptype2 = nptype_from_builtin(dtype2)
-    nppromoted = np.promote_types(nptype1, nptype2)
-    return builtin_from_nptype(nppromoted)
-
-
-def is_primitive(btype):
-    """
-    Is the indicated Nitro builtin type a primitive?
-    """
-    return btype is builtins_bool or btype is builtins_str or is_float(btype) or is_int(btype)
-
-def is_scalar(btype):
-    """
-    Is the given builtin type a scalar integer, float, or boolean?
-    """
-    return btype is builtins_bool or is_int(btype) or is_float(btype)
\ No newline at end of file
diff --git a/coremltools/converters/nnssa/commons/dot_visitor.py b/coremltools/converters/nnssa/commons/dot_visitor.py
deleted file mode 100644
index e1fcb11a6..000000000
--- a/coremltools/converters/nnssa/commons/dot_visitor.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-from . import builtins
-
-
-class DotVisitor(object):
-    """
-    Generates a dot description of a graph in dictionary form.
-    """
-
-    def __init__(self, annotation=None):
-        self.result = []
-        self.visited_memo = {}
-        self.highlights = {}
-        self.alternate_labeller = None
-        self.annotation = annotation
-
-    def labeller(self, labeller):
-        self.alternate_labeller = labeller
-        return self
-
-    def highlight_nodes(self, nodeset, color='yellow'):
-        for i in nodeset:
-            self.highlights[i] = color
-        return self
-
-    def visit(self, graph, node, nodename_prefix=''):
-        if node.name in self.visited_memo:
-            return
-
-        # For printing datatype, breaks type
-        if node.attr.get('symbolic_datatype', None) is not None:
-            dtype = str(builtins.get_type_info(node.attr['symbolic_datatype']))
-        elif node.datatype is not None:
-            dtype = str(builtins.get_type_info(node.datatype))
-        else:
-            dtype = "Unknown"
-
-        label = ''
-        if self.alternate_labeller is not None:
-            label = self.alternate_labeller(node)
-        else:
-            if len(node.outputs) == 0:
-                label = '\\n{' + node.name + '}'
-            if 'Placeholder' in node.op:
-                label = '\\n{' + node.name + '}'
-            if node.op == 'while':
-                label = '\\n{body: ' + node.attr["body_function"] + ' cond:' + node.attr[
-                    "cond_function"] + '}'
-            if node.op == 'function':
-                label = '\\n{body: ' + node.attr["function_name"] + '}'
-            if node.op == 'function_entry':
-                label = '\\n{' + node.name + '}'
-            label = node.op + ':' + dtype + label
-
-        if node.name in self.highlights:
-            self.result.append(
-                '"' + nodename_prefix + node.name + '"' + '[label="' + label +
-                '",fillcolor=%s,style=filled,fontcolor=%s]' % (
-                    self.highlights[node.name],
-                    'violetred' if node.attr.get(self.annotation, False) else 'black'))
-        else:
-            self.result.append(
-                '"' + nodename_prefix + node.name + '"' + '[label="' + label + '",fontcolor=%s]' %
-                ('violetred' if node.attr.get(self.annotation, False) else 'black'))
-
-        for i in node.inputs:
-            input_name = i
-            edge = '"' + nodename_prefix + input_name + '"' + " -> " + '"' + nodename_prefix + node.name + '"'
-            innode = graph[input_name]
-            self.result.append(edge)
-
-        for i in node.control_inputs:
-            input_name = i
-            edge = '"' + nodename_prefix + input_name + '"' + " -> " + '"' + nodename_prefix + node.name + '"'
-            innode = graph[input_name]
-            edge = edge + " [style=dotted]"
-            self.result.append(edge)
-
-        self.visited_memo[node.name] = 1
-
-        for i in node.inputs:
-            input_name = i
-            if input_name[0] == '^':
-                input_name = input_name[1:]
-            assert (input_name in graph)
-            self.visit(graph, graph[input_name], nodename_prefix)
-        return self
-
-    def visit_all(self, graph, nodename_prefix=''):
-        for i in graph:
-            self.visit(graph, graph[i], nodename_prefix)
-        return self
-
-    def get_result(self, graphtype="digraph", graph_name="g"):
-        return graphtype + ' ' + graph_name + ' {\n\t' + '\n\t'.join(
-            str(i) for i in self.result) + ';\n\tlabel="' + graph_name[8:] + '";\n\tfontsize=96;\n}'
-
-    def __str__(self):
-        return self.get_result()
diff --git a/coremltools/converters/nnssa/commons/parse.py b/coremltools/converters/nnssa/commons/parse.py
deleted file mode 100644
index f6745c5ee..000000000
--- a/coremltools/converters/nnssa/commons/parse.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-
-import numpy as np
-
-from . import builtins
-from .builtins import get_type_info
-
-
-def numpy_primitive_type_to_builtin_type(nptype):
-    if np.issubclass_(nptype, np.bool) or np.issubclass_(nptype, np.bool_):
-        # numpy as 2 bool types it looks like. what is the difference?
-        return builtins.bool
-    elif np.issubclass_(nptype, np.int8):
-        return builtins.int8
-    elif np.issubclass_(nptype, np.int16):
-        return builtins.int16
-    elif np.issubclass_(nptype, np.int32):
-        return builtins.int32
-    elif np.issubclass_(nptype, np.int64):
-        return builtins.int64
-    elif np.issubclass_(nptype, np.uint8):
-        return builtins.int8
-    elif np.issubclass_(nptype, np.uint16):
-        return builtins.int16
-    elif np.issubclass_(nptype, np.uint32):
-        return builtins.int32
-    elif np.issubclass_(nptype, np.uint64):
-        return builtins.int64
-    elif np.issubclass_(nptype, np.float16):
-        return builtins.fp16
-    elif np.issubclass_(nptype, np.float32):
-        return builtins.fp32
-    elif np.issubclass_(nptype, np.float64):
-        return builtins.fp64
-    else:
-        raise TypeError("Not supported numpy type: %s" % (nptype))
-
-
-def numpy_val_to_builtin_val(npval):
-    if np.isscalar(npval):
-        ret_type = numpy_primitive_type_to_builtin_type(type(npval))
-        ret = ret_type()
-        ret.val = npval
-        return ret, ret_type
-    else:
-        builtintype = numpy_primitive_type_to_builtin_type(npval.dtype.type)
-        ret_type = builtins.tensor(builtintype, npval.shape)
-        ret = ret_type()
-        ret.val = npval
-        return ret, ret_type
-
-
-def parse_reverse_shape(t):
-    mapping = {
-        1: builtins.float,
-        2: builtins.double,
-        3: builtins.int32,
-        4: builtins.uint8,
-        5: builtins.int16,
-        6: builtins.int8,
-        7: builtins.str,
-        9: builtins.int64,
-        10: builtins.bool,
-        17: builtins.uint16,
-        22: builtins.uint32,
-        23: builtins.uint64
-    }
-
-    for v in mapping.values():
-        if t == v:
-            return []
-    if builtins.is_tensor(t):
-        return t.get_shape()
-    if builtins.is_tuple(t) or builtins.is_list(t):
-        if len(t.T) > 1:
-            print(t.T)
-            raise ValueError("parse_reverse_shape doesn't support nested non-simple tuple/list")
-        return [-1] + list(parse_reverse_shape(t.T[0]))
-    raise ValueError("Unsupported type (%s)" % (builtins.get_type_info(t)))
-
-
-def parse_reverse_type(t):
-    mapping = {
-        1: builtins.float,
-        2: builtins.double,
-        3: builtins.int32,
-        4: builtins.uint8,
-        5: builtins.int16,
-        6: builtins.int8,
-        7: builtins.str,
-        9: builtins.int64,
-        10: builtins.bool,
-        17: builtins.uint16,
-        22: builtins.uint32,
-        23: builtins.uint64
-    }
-
-    for k, v in mapping.items():
-        if t == v:
-            return k
-
-    assert False, "%s cannot be parsed to builtin type" % (t)
diff --git a/coremltools/converters/nnssa/commons/serialization/__init__.py b/coremltools/converters/nnssa/commons/serialization/__init__.py
deleted file mode 100644
index f443b011b..000000000
--- a/coremltools/converters/nnssa/commons/serialization/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-from .dump_impl import dump, dump_obj
-from .file_writer import file_writer
-from .file_reader import file_reader
diff --git a/coremltools/converters/nnssa/commons/serialization/dump_impl.py b/coremltools/converters/nnssa/commons/serialization/dump_impl.py
deleted file mode 100644
index a6a275a0a..000000000
--- a/coremltools/converters/nnssa/commons/serialization/dump_impl.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-from .file_writer import file_writer
-import io
-import sys
-import numpy as np
-from .. import builtins
-from .types import *
-
-
-def _dump_impl(obj, writer, expected_type=None):
-    if isinstance(obj, bool) or isinstance(
-            obj, int) or ((sys.version_info < (3, 0)) and isinstance(obj, long)) or issubclass(
-                type(obj), int) or isinstance(obj, np.integer):
-        assert (expected_type is None or builtins.get_type_info(expected_type).name == 'int')
-        writer.write_byte(py_types.int.value)
-        writer.write_int(obj)
-    elif isinstance(obj, bytes):
-        # str == bytes in python 2
-        assert (expected_type is None or builtins.get_type_info(expected_type).name == 'str')
-        writer.write_byte(py_types.str.value)
-        writer.write_int(len(obj))
-        writer.write_str(obj)
-    elif isinstance(obj, str) or ((sys.version_info < (3, 0)) and isinstance(obj, unicode)):
-        assert (expected_type is None or builtins.get_type_info(expected_type).name == 'str')
-        writer.write_byte(py_types.str.value)
-        obj = obj.encode('latin-1')
-        writer.write_int(len(obj))
-        writer.write_str(obj)
-    elif isinstance(obj, float) or isinstance(obj, np.float32) or isinstance(obj, np.double):
-        assert (expected_type is None or builtins.get_type_info(expected_type).name == 'double')
-        writer.write_byte(py_types.double.value)
-        writer.write_double(obj)
-    elif isinstance(obj, list):
-        assert (expected_type is None or builtins.get_type_info(expected_type).name == 'list')
-        writer.write_byte(py_types.list.value)
-        writer.write_int(len(obj))
-        for i in obj:
-            _dump_impl(i, writer)
-    elif isinstance(obj, dict):
-        assert (expected_type is None or builtins.get_type_info(expected_type).name == 'dict')
-        writer.write_byte(py_types.dict.value)
-        writer.write_int(len(obj))
-        for i in obj:
-            _dump_impl(i, writer)
-            _dump_impl(obj[i], writer)
-    elif isinstance(obj, np.ndarray):
-        # dump the shape. Then the data.
-
-        # TODO: Support more types.
-        if obj.dtype == np.float64 or obj.dtype == np.bool:
-            obj = obj.astype(np.float32)
-        writer.write_byte(py_types.ndarray.value)
-        writer.write_byte(dump_np_types(obj.dtype))
-        writer.write_int(len(obj.shape))
-        for i in obj.shape:
-            writer.write_int(i)
-        writer.write_str(np.ravel(obj, order='C').tobytes())
-
-    elif hasattr(obj, '__slots__'):
-        import pdb
-        pdb.set_trace()
-        assert (hasattr(obj, '__slot_types__'))
-        assert (len(obj.__slots__) == len(obj.__slot_types__))
-        _dump_impl(obj.__version__(), writer)
-        slot_and_types = sorted(zip(obj.__slots__, obj.__slot_types__), key=lambda x: x[0])
-
-        for s in sorted(slot_and_types):
-            try:
-                _dump_impl(getattr(obj, s[0]), writer, s[1])
-            except AssertionError:
-                received_type = type(getattr(obj, s[0]))
-                raise TypeError(
-                    "%s member is of the wrong type. Expected %s, got %s" %
-                    (s[0], str(s[1]), str(received_type)))
-
-    else:
-        raise TypeError('Cannot serialize object of type %s' % str(type(obj)))
-
-
-def dump_obj(obj, writer):
-    _dump_impl(obj, writer)
-
-
-def dump(obj, filename):
-    handle = open(filename, 'wb')
-    writer = file_writer(handle)
-    _dump_impl(obj, writer)
-    handle.close()
diff --git a/coremltools/converters/nnssa/commons/serialization/file_reader.py b/coremltools/converters/nnssa/commons/serialization/file_reader.py
deleted file mode 100644
index 54847b215..000000000
--- a/coremltools/converters/nnssa/commons/serialization/file_reader.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-import struct
-import numpy as np
-from .types import *
-
-
-class file_reader:
-    def __init__(self, handle):
-        self.handle = handle
-
-    def _read_byte(self):
-        s = struct.calcsize('b')
-        i = self.handle.read(s)
-        return struct.unpack('b', i)[0]
-
-    def _read_int(self):
-        s = struct.calcsize('q')
-        i = self.handle.read(s)
-        return int(struct.unpack('q', i)[0])
-
-    def _read_double(self):
-        s = struct.calcsize('d')
-        i = self.handle.read(s)
-        return float(struct.unpack('d', i)[0])
-
-    def _read_str(self):
-        s = self._read_int()
-        i = self.handle.read(s).decode('latin-1')
-        return i
-
-    def _read_list(self):
-        s = self._read_int()
-        i = []
-        for _ in range(s):
-            i.append(self.read_value())
-        return i
-
-    def _read_dict(self):
-        s = self._read_int()
-        i = {}
-        for _ in range(s):
-            k = self.read_value()
-            v = self.read_value()
-            i[k] = v
-        return i
-
-    def _read_ndarray(self):
-        get_np_type = np_types(self._read_byte()).name
-        np_type = getattr(np, get_np_type)
-        rank = self._read_int()
-        shape = []
-        for i in range(rank):
-            shape.append(self._read_int())
-        elements = 1
-        for i in shape:
-            elements *= i
-        reader = self.handle.read(elements * np.dtype(np_type).itemsize)
-        array_str = np.fromstring(reader, dtype=np_type, count=elements)
-        return np.reshape(array_str, shape)
-
-    def read_value(self):
-        get_k_type = self._read_byte()
-        k_type = py_types(get_k_type).name
-        return getattr(self, '_read_' + k_type)()
diff --git a/coremltools/converters/nnssa/commons/serialization/file_writer.py b/coremltools/converters/nnssa/commons/serialization/file_writer.py
deleted file mode 100644
index fed36db73..000000000
--- a/coremltools/converters/nnssa/commons/serialization/file_writer.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-import struct
-
-
-class file_writer:
-    def __init__(self, handle):
-        self.handle = handle
-
-    def write_bool(self, i):
-        i = bool(i)
-        self.handle.write(struct.pack('?', i))
-
-    def write_byte(self, i):
-        i = int(i)
-        self.handle.write(struct.pack('b', i))
-
-    def write_int(self, i):
-        i = int(i)
-        self.handle.write(struct.pack('q', i))
-
-    def write_double(self, i):
-        i = float(i)
-        self.handle.write(struct.pack('d', i))
-
-    def write_str(self, i):
-        if isinstance(i, bytes) or isinstance(i, str):
-            self.handle.write(i)
-        else:
-            i = str(i)
-            self.handle.write(i)
diff --git a/coremltools/converters/nnssa/commons/serialization/types.py b/coremltools/converters/nnssa/commons/serialization/types.py
deleted file mode 100644
index 9b0eb4d77..000000000
--- a/coremltools/converters/nnssa/commons/serialization/types.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from enum import Enum, unique
-import numpy as np
-
-
-# This should line up with C++'s src/numeric/primitive_type.hpp
-@unique
-class np_types(Enum):
-    bool_ = 0
-    int8 = 1
-    int16 = 2
-    int32 = 3
-    int64 = 4
-    uint8 = 5
-    uint16 = 6
-    uint32 = 7
-    uint64 = 8
-    float32 = 9
-    float64 = 10
-
-
-@unique
-class py_types(Enum):
-    int = 0
-    double = 1
-    str = 2
-    list = 4
-    dict = 5
-    ndarray = 9
-
-
-def dump_np_types(npt):
-    if npt == np.bool:
-        return np_types.bool_.value
-    elif npt == np.int8:
-        return np_types.int8.value
-    elif npt == np.int16:
-        return np_types.int16.value
-    elif npt == np.int32:
-        return np_types.int32.value
-    elif npt == np.int64:
-        return np_types.int64.value
-    elif npt == np.uint8:
-        return np_types.uint8.value
-    elif npt == np.uint16:
-        return np_types.uint16.value
-    elif npt == np.uint32:
-        return np_types.uint32.value
-    elif npt == np.uint64:
-        return np_types.uint64.value
-    elif npt == np.float32:
-        return np_types.float32.value
-    elif npt == np.float64:
-        return np_types.float64.value
-    else:
-        raise ValueError("Cannot dump type %s" % (type(npt)))
diff --git a/coremltools/converters/nnssa/commons/symbolic.py b/coremltools/converters/nnssa/commons/symbolic.py
deleted file mode 100644
index 2ab8bce8a..000000000
--- a/coremltools/converters/nnssa/commons/symbolic.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import sympy as sm
-import numpy as np
-
-
-def is_symbolic_or_known(val):
-    return (np.isscalar(val) and val != -1) or issubclass(type(val), sm.Basic)
-
-
-def is_symbolic_or_unknown(val):
-    return (np.isscalar(val) and val == -1) or issubclass(type(val), sm.Basic)
-
-
-def is_symbolic(val):
-    return issubclass(type(val), sm.Basic)
-
-
-def any_symbolic_or_unknown(val):
-    if is_symbolic_or_unknown(val):
-        return True
-    elif isinstance(val, np.ndarray) and np.issctype(val.dtype):
-        return False
-    elif hasattr(val, '__iter__'):
-        return any(any_symbolic_or_unknown(i) for i in val)
-    else:
-        return is_symbolic_or_unknown(val)
-
-
-def isscalar(val):
-    return np.isscalar(val) or issubclass(type(val), sm.Basic)
diff --git a/coremltools/converters/nnssa/commons/utils.py b/coremltools/converters/nnssa/commons/utils.py
deleted file mode 100644
index 44d374759..000000000
--- a/coremltools/converters/nnssa/commons/utils.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-
-import sys
-
-_varname_charset = set([chr(i) for i in range(ord('A'),
-                                              ord('Z') + 1)] +
-                       [chr(i) for i in range(ord('a'),
-                                              ord('z') + 1)] +
-                       [chr(i) for i in range(ord('0'),
-                                              ord('9') + 1)] + ['_'])
-
-if sys.version_info >= (3, 0):
-    str_types = (str)
-else:
-    str_types = (str, unicode)
-
-
-def escape_name(name):
-    ret = ''.join([i if i in _varname_charset else '_' for i in name])
-    if ret.endswith('_'):
-        return ret
-    else:
-        return ret + '_'
-
-
-def escape_fn_name(name):
-    ret = ''.join([i if i in _varname_charset else '_' for i in name])
-    ret = escape_name(name)
-    if ret.startswith('f_'):
-        return ret
-    else:
-        return 'f_' + ret
-
-
-def normalize_names(names):
-    if isinstance(names, str_types):
-        return names.replace(':', '__').replace('/', '__')
-    return [i.replace(':', '__').replace('/', '__') for i in names]
diff --git a/coremltools/converters/nnssa/coreml/graph_pass/op_fusions.py b/coremltools/converters/nnssa/coreml/graph_pass/op_fusions.py
deleted file mode 100644
index 392619728..000000000
--- a/coremltools/converters/nnssa/coreml/graph_pass/op_fusions.py
+++ /dev/null
@@ -1,1020 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-
-import numpy as np
-from ...commons import builtins
-from ...commons.symbolic import *
-from ...commons.basic_graph_ops import disconnect_edge, connect_edge, \
-    delete_node, replace_node, connect_dests, topsort, replace_source
-from ...nnssa import ParsedNode
-
-ELEMENTWISE_OPS = {
-    'Maximum',
-    'Minimum',
-    'Add',
-    'AddV2',
-    'Sub',
-    'BiasAdd',
-    'Mul',
-    'RealDiv',
-    'Sigmoid',
-    'Relu',
-    'Relu6',
-    'LeakyRelu',
-    'Tanh',
-    'Identity',
-    'Sqrt',
-    'Rsqrt',
-    'Pow',
-    'LRN',
-    'Square',
-    'SquaredDifference'
-}
-
-# Native SSA nodes with data_format attributes of NHWC / NCHW
-NATIVE_NHWC_OPS = {
-    'Conv2D', 'Conv2DBackpropInput', 'DepthwiseConv2dNative',
-    'Pooling', 'MaxPool', 'AvgPool', 'DepthToSpace', 'SpaceToDepth',
-}
-
-REDUCTION_OPS = {
-    'Mean',
-    'Max'
-}
-
-
-def _check_number_inputs(node, n):
-    return len(node.inputs) == n
-
-
-def _check_number_outputs(node, n):
-    return len(node.outputs) == n
-
-
-def _check_single_out_vector_constant_node(node):
-    return node.op == 'Const' and len(node.outputs) == 1 and \
-           node.value is not None and len(np.squeeze(node.value.val).shape) == 1
-
-
-def _check_rank_matches(node1, node2):
-    rank1 = len(node1.datatype.get_shape())
-    rank2 = len(node2.datatype.get_shape())
-    return rank1 == rank2
-
-
-def _update_padding_and_crop_values_2d(pad_values, crop_values, params):
-    def _new_pad_crop_1d(p1, p2, c1, c2, k, s, n1):
-        n2 = np.floor((n1 + p1 + p2 - k) / s) + 1
-        if 1 + c1 * s <= p1:
-            p1 -= c1 * s
-            c1 = 0
-        if k + (n2 - c2 - 1) * s > p1 + n1:
-            p2 = k + (n2 - c2 - 1) - (p1 + n1)
-            c2 = 0
-        return p1, p2, c1, c2
-
-    p1, p2, c1, c2 = _new_pad_crop_1d(pad_values[2], pad_values[3],
-                                      crop_values[2], crop_values[3],
-                                      params['kh'], params['sh'], params['Hin'])
-    pad_values[2:] = np.array([p1, p2], dtype=np.int)
-    crop_values[2:] = np.array([c1, c2], dtype=np.int)
-
-    p1, p2, c1, c2 = _new_pad_crop_1d(pad_values[0], pad_values[1],
-                                      crop_values[0], crop_values[1],
-                                      params['kw'], params['sw'], params['Win'])
-    pad_values[:2] = np.array([p1, p2], dtype=np.int)
-    crop_values[:2] = np.array([c1, c2], dtype=np.int)
-
-
-def _is_NHWC(graph, node):
-    if node.op == 'ResizeBilinear' or node.op == 'ResizeNearestNeighbor' \
-            or node.op == 'MirrorPad':
-        return True
-    if node.op in NATIVE_NHWC_OPS and node.attr.get('data_format') == 'NHWC':
-        return True
-
-    if node.op == 'Concat':  # Concat's first input is axis
-        return all(graph[inp].attr.get('data_format') == 'NHWC_format_inserted'
-                   for inp in node.inputs[1:])
-    if node.op == 'ConcatV2':  # ConcatV2's last input is axis
-        return all(graph[inp].attr.get('data_format') == 'NHWC_format_inserted'
-                   for inp in node.inputs[:-1])
-
-    if node.op == 'Pad' and len(node.datatype.get_shape()) == 4:
-        # adjust constant padding values
-        parent_node = graph[node.inputs[1]]
-        if parent_node.value is not None:
-            val = np.array(parent_node.value.val)
-            if len(val) == 4 and builtins.is_tensor(parent_node.datatype) and len(parent_node.outputs) == 1:
-                parent_node.value.val = parent_node.value.val[[0, 3, 1, 2]]
-        return True
-
-    if node.op in REDUCTION_OPS:
-        if not any([graph[inp].attr.get('data_format', '') ==
-                    'NHWC_format_inserted' for inp in node.inputs]):
-            return False
-        if not node.attr.get('keep_dims', True):
-            return False
-        # adjust axis / dims / reduction_indices values
-        for inp in node.inputs:
-            parent_node = graph[inp]
-            if parent_node.value is not None:
-                val = np.array(parent_node.value.val)
-                if isinstance(parent_node.value.val, np.int32):
-                    val = np.array([parent_node.value.val])
-                m_nhwc_to_nchw = {0: 0, 1: 2, 2: 3, 3: 1}
-                reduction_indices = np.array([m_nhwc_to_nchw[x if x >= 0 else 4 + x] for x in val], dtype=np.int32)
-                parent_node.value.val = np.reshape(reduction_indices, parent_node.value.val.shape)
-                node.attr['reduction_indices'] = reduction_indices
-        return True
-
-    if node.op in ELEMENTWISE_OPS:
-        # if its an element-wise op and if all of its parent(s) are
-        # "NHWC_format_inserted" or given that at least one of the parents
-        # is "NHWC_format_inserted" and rest are vector constants, then the
-        # node is also declared to be "NHWC_format_inserted"
-
-        NHWC_parent = any([graph[inp].attr.get('data_format',
-                                               None) == 'NHWC_format_inserted' for inp in node.inputs])
-
-        if NHWC_parent:
-            for inp in node.inputs:
-                parent_node = graph[inp]
-                if parent_node.attr.get('data_format', None) == 'NHWC_format_inserted':
-                    continue
-                elif parent_node.value is not None:
-                    val = np.array(parent_node.value.val)
-                    # constant scalar
-                    if val.shape == () and not builtins.is_tensor(parent_node.datatype) and len(parent_node.outputs) == 1:
-                        continue
-                    # constant vector
-                    if len(val.shape) == 1 and builtins.is_tensor(parent_node.datatype) and len(parent_node.outputs) == 1:
-                        continue
-                    else:
-                        return False
-                else:
-                    return False
-            return True
-
-    return False
-
-
-def _insert_transpose_to_or_from_nchw(graph, src, dst, transpose_node_name, transpose_params=None):
-    """
-    Insert a node called 'transpose_node_name' between src and dst
-    This node should be a transpose node with params 'transpose_params'
-    """
-
-    if not transpose_params:
-        transpose_params = [0, 3, 1, 2]  # channel_last to channel_first
-
-    # First check whether the node already exists in the graph or not.
-
-    if transpose_node_name in graph:
-        tp_node = graph[transpose_node_name]
-        if dst.name not in tp_node.outputs:
-            tp_node.outputs.append(dst.name)
-    else:
-        # the node does not exist, so create a fresh one
-        tp_node = ParsedNode()
-        tp_node.op = 'Transpose'
-        tp_node.name = transpose_node_name
-
-        # Adjust type inference
-        if builtins.is_tensor(src.datatype):
-            s = src.datatype.get_shape()
-            if len(s) == 4:
-                tp_shape = tuple([s[transpose_params[0]], s[transpose_params[1]], s[transpose_params[2]], s[transpose_params[3]]])
-                tp_node.datatype = builtins.tensor(src.datatype.get_primitive(), tp_shape)
-            else:
-                tp_node.datatype = src.datatype
-
-        tp_node.inputs = [src.name]
-        tp_node.outputs = [dst.name]
-        tp_node.attr['dim'] = transpose_params
-        graph[transpose_node_name] = tp_node
-
-    # Rename dst's input 'src' to 'transpose_node_name'
-    for idx, inp in enumerate(dst.inputs):
-        if inp == src.name:
-            dst.inputs[idx] = transpose_node_name
-            break
-
-    # Rename src's output from 'dst' to 'transpose_node_name'
-    if transpose_node_name in src.outputs:
-        # 'transpose_node_name' already exists as an output of the src,
-        # we just need to delete dst node from the output list of src, instead of replacing it
-        if dst.name in src.outputs:
-            src.outputs.remove(dst.name)
-    else:
-        for idx, outp in enumerate(src.outputs):
-            if outp == dst.name:
-                src.outputs[idx] = transpose_node_name
-                break
-
-
-def _insert_transpose_to_nchw(graph, src, dst):
-    tp_node_name = src.name + "_to_nchw"
-    _insert_transpose_to_or_from_nchw(graph, src, dst, tp_node_name, [0, 3, 1, 2])
-
-
-def _insert_transpose_from_nchw(graph, src, dst):
-    tp_node_name = src.name + "_to_nhwc"
-    _insert_transpose_to_or_from_nchw(graph, src, dst, tp_node_name, [0, 2, 3, 1])
-
-
-def transform_nhwc_to_nchw(nnssa):
-    """
-    Mark each one of the node with "NHWC", so that the conversion process
-    could avoid inserting unnecessary transpositions.
-    A node's format is "NHWC" if and only if:
-    (1) it is a conv or pooling or image_resize layer with "NHWC" data format
-    (2) it is a rank-preserving operation whose inputs are all "NHWC"
-    """
-    for fn_key in list(nnssa.functions.keys()):
-        graph = nnssa.functions[fn_key].graph
-        # this pass needs the ssa to be in the topologically sorted order
-        node_names = topsort(graph)
-
-        # Mark all NHWC nodes
-        nhwc_nodes = []
-        for name in node_names:
-            node = graph[name]
-            if len(node.outputs) > 0 and len(node.inputs) > 0 and _is_NHWC(graph, node):
-                node.attr['data_format'] = 'NHWC_format_inserted'
-                nhwc_nodes.append(name)
-
-        for name in nhwc_nodes:
-            node = graph[name]
-
-            # Adjust type inference
-            if builtins.is_tensor(node.datatype):
-                s = node.datatype.get_shape()
-                if len(s) == 4:
-                    new_shape = tuple([s[0], s[3], s[1], s[2]])
-                    node.datatype = builtins.tensor(node.datatype.get_primitive(), new_shape)
-                    node.attr['symbolic_datatype'] = node.datatype
-
-            if '_output_shapes' in node.attr:
-                orig_out_shapes = node.attr['_output_shapes']
-                if len(orig_out_shapes) == 1 and len(orig_out_shapes[0]) == 4:
-                    s = orig_out_shapes[0]
-                    node.attr['_output_shapes'] = [[s[0], s[3], s[1], s[2]]]
-
-            if node.op in ELEMENTWISE_OPS:
-                for inp in node.inputs:
-                    parent_node = graph[inp]
-                    if parent_node.value is None:
-                        continue
-
-                    # if there is a constant vector input
-                    val = np.array(parent_node.value.val)
-                    if len(val.shape) == 1 and builtins.is_tensor(parent_node.datatype):
-                        new_shape = (1, val.shape[0], 1, 1)
-                        parent_node.datatype = builtins.tensor(
-                            parent_node.datatype.get_primitive(), new_shape
-                        )
-                        parent_node.value.val = np.reshape(
-                            parent_node.value.val, new_shape
-                        )
-
-            # Insert NHWC -> NCHW transpose
-            for i, inp_node_name in enumerate(list(node.inputs)):
-                inp_node_format = graph[inp_node_name].attr.get('data_format')
-                symbolic_value = graph[inp_node_name].attr['symbolic_value']
-                if (graph[inp_node_name].op == 'Const' or
-                        len(graph[inp_node_name].datatype.get_shape()) != 4 or
-                        (symbolic_value and not any_symbolic_or_unknown(symbolic_value))):
-                    # Const weights and parameters
-                    continue
-
-                if inp_node_format != 'NHWC_format_inserted':
-                    assert len(graph[inp_node_name].datatype.get_shape()) == 4
-                    _insert_transpose_to_nchw(graph, graph[inp_node_name], node)
-
-            # Insert NCHW -> NHWC transpose
-            for i, out_node_name in enumerate(list(node.outputs)):
-                out_node_format = graph[out_node_name].attr.get('data_format')
-                if out_node_format != 'NHWC_format_inserted':
-                    _insert_transpose_from_nchw(graph, node, graph[out_node_name])
-
-            # Adjust output shape and concat layer's axis parameter
-            if node.op == 'Concat' and len(node.inputs) > 1 and graph[node.inputs[0]].value is not None:
-                axis = graph[node.inputs[0]].value.val
-                axis = 4 + axis if axis < 0 else axis
-                if axis == 3:
-                    node.attr['axis'] = 1
-                elif axis == 2 or axis == 1:
-                    node.attr['axis'] = axis + 1
-                else:
-                    node.attr['axis'] = axis
-
-            if node.op == 'ConcatV2' and len(node.inputs) > 1 and graph[node.inputs[-1]].value is not None:
-                axis = graph[node.inputs[-1]].value.val
-                axis = 4 + axis if axis < 0 else axis
-                if axis == 3:
-                    node.attr['axis'] = 1
-                elif axis == 2 or axis == 1:
-                    node.attr['axis'] = axis + 1
-                else:
-                    node.attr['axis'] = axis
-
-
-def fuse_bias_add(nnssa):
-    # look for 'BiasAdd' nodes following 'MatMul' or 'Conv2D'. If the other input in
-    # 'BiasAdd' is coming from a const node, then copy the value of that const
-    # in the parent and remove the 'BiasAdd', i.e. connect its children
-    # to its parent.
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
-        keys = list(f.graph.keys())
-        nodes_fused = []
-        for k in keys:
-            if k not in f.graph:
-                continue
-            current_node = f.graph[k]
-            if current_node.op == 'BiasAdd' and len(current_node.inputs) == 2:
-                parent_node = f.graph[current_node.inputs[0]]
-                second_p_node = f.graph[current_node.inputs[1]]
-                ops_to_merge = ['MatMul', 'Conv2D', 'DepthwiseConv2dNative']
-                if (
-                    (parent_node.op in ops_to_merge
-                     and len(parent_node.outputs) == 1)
-                    and
-                    (second_p_node.value is not None
-                     and len(second_p_node.outputs) == 1
-                     and second_p_node.outputs[0] == k)
-                ):
-                    parent_node.attr['bias'] = second_p_node.value.val
-                    disconnect_edge(f.graph, second_p_node.name, k)  # disconnect the const
-                    disconnect_edge(f.graph, parent_node.name, k)  # disconnect the first parent
-                    for out_node in current_node.outputs:
-                        f.graph[parent_node.name].outputs.append(out_node)
-                        if current_node.name in f.graph[out_node].inputs:
-                            idx = f.graph[out_node].inputs.index(current_node.name)
-                            f.graph[out_node].inputs[idx] = parent_node.name
-                        else:
-                            raise ValueError('[Op Fusion] fuse_bias_add() cannot identify biasAdd output.')
-                    nodes_fused.append(k)
-                    nodes_fused.append(second_p_node.name)
-
-        for nf in nodes_fused:
-            delete_node(f.graph, nf)
-        if len(nodes_fused) > 0:
-            print("[Op Fusion] fuse_bias_add() deleted {} nodes.".format(len(nodes_fused)))
-
-
-def onehot_matmul_to_embedding(nnssa):
-    # Look for 'MatMul' whose first input is 'OneHot'
-    # and replace it with embedding op
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
-        keys = list(f.graph.keys())
-
-        for k in keys:
-            if k not in f.graph:
-                continue
-            current_node = f.graph[k]
-            if len(current_node.inputs) < 1:
-                continue
-            inp_node = f.graph[current_node.inputs[0]]
-            if (current_node.op == 'BatchMatMul' or current_node.op == 'MatMul') and inp_node.op == 'OneHot':
-                assert len(inp_node.inputs) == 4, 'OneHot node should have 4 inputs'
-                onehot_params = [f.graph[name].attr.get('value') for name in inp_node.inputs[1:]]
-                depth_val, on_val, off_val = [x.val[0] for x in onehot_params]
-                # Change the current node operation to Embedding
-                current_node.op = 'Embedding'
-                current_node.attr['depth'] = depth_val
-                current_node.attr['on_value'] = on_val
-                current_node.attr['off_value'] = off_val
-                # Replace OneHot with its first input
-                onehot_inp_node_names = inp_node.inputs[:]
-                replace_node(f.graph, inp_node.name, onehot_inp_node_names[0])
-
-                # Now delete the OneHot node and other input nodes
-                delete_node(f.graph, onehot_inp_node_names[1])
-                print('[Op Fusion] Node %s is removed.' % (onehot_inp_node_names[1]))
-                delete_node(f.graph, onehot_inp_node_names[2])
-                print('[Op Fusion] Node %s is removed.' % (onehot_inp_node_names[2]))
-                delete_node(f.graph, onehot_inp_node_names[3])
-                print('[Op Fusion] Node %s is removed.' % (onehot_inp_node_names[3]))
-                delete_node(f.graph, inp_node.name)
-                print('[Op Fusion] Node %s is removed.' % inp_node.name)
-
-
-def _search_nodes_by_type(gf, node_names, op_types):
-    for name in node_names:
-        if gf[name].op in op_types:
-            return gf[name]
-
-
-def _match_layernorm_pattern(gf, entry_node):
-    """ Return the nodes that form the subgraph of a LayerNormalization layer
-    """
-
-    def _axes_in_range(axes, rank):
-        return all([x in range(-rank, rank) for x in axes])
-
-    try:
-        params = {}
-        mean_1 = _search_nodes_by_type(gf, entry_node.outputs, ['Mean'])
-        sqdiff_2 = _search_nodes_by_type(gf, entry_node.outputs, ['SquaredDifference'])
-        mul_3 = _search_nodes_by_type(gf, entry_node.outputs, ['Mul'])
-
-        if not (mean_1.op == 'Mean' and sqdiff_2.op == 'SquaredDifference' and
-                mul_3.op == 'Mul'):
-            return None
-        const_4 = gf[mean_1.inputs[1]]
-        mean_1_rank = len(mean_1.datatype.get_shape())
-        if not (const_4.op == 'Const' and len(const_4.value.val) == 1 and
-                _axes_in_range(const_4.value.val, mean_1_rank)):
-            return None
-        axes = const_4.value.val
-        mean_5 = gf[sqdiff_2.outputs[0]]
-        if not (mean_5.op == 'Mean'):
-            return None
-        const_6 = gf[mean_5.inputs[1]]
-        mean_5_rank = len(mean_5.datatype.get_shape())
-        if not (const_6.op == 'Const' and len(const_6.value.val) == 1 and
-                axes == const_6.value.val):
-            return None
-
-        axes = sorted([x if x > 0 else mean_1_rank - x for x in
-                       const_4.value.val])
-        ref_axes = list(range(mean_1_rank - len(axes), mean_1_rank))
-        if not all([x == y for (x, y) in zip(axes, ref_axes)]):
-            return None
-        params['axes'] = axes
-
-        add_7 = gf[mean_5.outputs[0]]
-        const_8 = gf[add_7.inputs[1]]  # epsilon
-        params['epsilon'] = const_8.value.val
-        rsqrt_9 = gf[add_7.outputs[0]]
-        mul_10 = gf[rsqrt_9.outputs[0]]
-        if not (add_7.op in ['Add','AddV2'] and const_8.op == 'Const' and
-                rsqrt_9.op == 'Rsqrt' and mul_10.op == 'Mul'):
-            return None
-        const_11 = gf[mul_10.inputs[1]]
-        params['gamma'] = const_11.value.val
-        if not (mul_3.name in mul_10.outputs and len(mul_10.outputs) == 2):
-            return None
-        mul_12 = gf[mul_10.outputs[1]] if gf[mul_10.outputs[0]] == mul_3 else \
-            gf[mul_10.outputs[0]]
-
-        sub_13 = gf[mul_12.outputs[0]]
-        if not (mul_12.op == 'Mul' and sub_13.op == 'Sub'):
-            return None
-        const_14 = gf[sub_13.inputs[0]]
-        if not const_14.op == 'Const':
-            return None
-        params['beta'] = const_14.value.val
-        add_15 = gf[sub_13.outputs[0]]
-        if not (gf[add_15.inputs[0]] == mul_3 and add_15.op in ['Add','AddV2']):
-            return None
-
-        layernorm_nodes = [mean_1, sqdiff_2, mul_3, const_4, mean_5, const_6,
-                           add_7, const_8, rsqrt_9, mul_10, const_11, mul_12, sub_13, const_14,
-                           add_15]
-
-        return (layernorm_nodes, params)
-    except Exception as e:
-        return None
-
-
-def _fuse_layer_norm(graph):
-    keys = list(graph.keys())
-    count = 0
-    for k in keys:
-        if k not in graph:
-            continue
-        current_node = graph[k]
-        layernorm_nodes_params = _match_layernorm_pattern(graph, current_node)
-        if layernorm_nodes_params is not None:
-            ln_nodes, ln_params = layernorm_nodes_params
-            out_node = ln_nodes[-1]
-
-            # Instantiate a new fused node in the graph
-            fused_ln_node = ParsedNode()
-            fused_ln_node.op = 'LayerNormalization'
-            fused_ln_node.name = out_node.name + '_layernorm'
-            fused_ln_node.attr = ln_params
-            fused_ln_node.datatype = current_node.datatype
-
-            graph[fused_ln_node.name] = fused_ln_node
-
-            # Connect fused node to entry and output nodes
-            connect_edge(graph, current_node.name, fused_ln_node.name)
-            replace_node(graph, out_node.name, fused_ln_node.name)
-
-            # Delete nodes
-            ln_node_names = [x.name for x in ln_nodes]
-            for name in ln_node_names:
-                delete_node(graph, name)
-
-            count += 1
-
-    if count > 0:
-        print('[Op Fusion] Fused {} layer normalizations.'.format(count))
-
-
-def fuse_layer_norm(nnssa):
-    """
-    Layernorm op replaces the following sub-graph:
-
-    [...] -----> Mean ---> SquaredDifference ---> Mean ---> Add/AddV2 (epsilon) ---> Rsqrt ---> Mul (gamma) ---->  Mul ----> Sub (beta) ---->  Add/AddV2 -------> [...]
-      |            |             ^                                                                  |               ^                              ^
-      |            |             |                                                                  |               |                              |
-      | --------------------------                                                                  |               |                              |
-      |            |------------------------------------------------------------------------------------------------                               |
-      |                                                                                             |------------------------------> Mul------------
-      |                                                                                                                               ^
-      |                                                                                                                               |
-      | -------------------------------------------------------------------------------------------------------------------------------
-
-    """
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
-        _fuse_layer_norm(f.graph)
-
-
-def _match_gelu_pattern(gf, entry_node):
-    """ Return the nodes that form the subgraph of a GELU layer
-    """
-    try:
-        if not len(entry_node.outputs) == 3:
-            return None
-        pow_1 = _search_nodes_by_type(gf, entry_node.outputs, ['Pow'])
-        add_2 = _search_nodes_by_type(gf, entry_node.outputs, ['Add', 'AddV2'])
-        mul_3 = _search_nodes_by_type(gf, entry_node.outputs, ['Mul'])
-
-        if not (pow_1.op == 'Pow' and add_2.op in ['Add','AddV2'] and mul_3.op == 'Mul'):
-            return None
-        const_4 = gf[pow_1.inputs[1]]
-        if not (const_4.op == 'Const' and int(round(const_4.value.val)) == 3):
-            return None
-        mul_5 = gf[pow_1.outputs[0]]
-        const_6 = gf[mul_5.inputs[0]]
-        if not (const_6.op == 'Const' and \
-                abs(const_6.value.val - 0.0447) < 1e-3):
-            return None
-        if not (gf[add_2.inputs[0]] == entry_node and \
-                gf[add_2.inputs[1]] == mul_5):
-            return None
-        mul_7 = gf[add_2.outputs[0]]
-        const_8 = gf[mul_7.inputs[0]]
-        if not abs(const_8.value.val - np.sqrt(2 / np.pi)) < 1e-3:
-            return None
-        tanh_9 = gf[mul_7.outputs[0]]
-        add_10 = gf[tanh_9.outputs[0]]
-        const_11 = gf[add_10.inputs[0]]
-        if not (tanh_9.op == 'Tanh' and add_10.op in ['Add','AddV2'] and \
-                const_11.op == 'Const' and int(round(const_11.value.val)) == 1):
-            return None
-        mul_12 = gf[add_10.outputs[0]]
-        const_13 = gf[mul_12.inputs[0]]
-        if not (mul_12.op == 'Mul' and const_13.op == 'Const' and \
-                abs(const_13.value.val - 0.5) < 1e-3):
-            return None
-        if not ([gf[mul_3.inputs[0]], gf[mul_3.inputs[1]]] == [entry_node, mul_12] \
-                or [gf[mul_3.inputs[1]], gf[mul_3.inputs[0]]] == [entry_node, mul_12]):
-            return None
-
-        gelu_nodes = [pow_1, add_2, mul_3, const_4, mul_5, const_6, mul_7,
-                      const_8, tanh_9, add_10, const_11, mul_12, const_13]
-
-        return gelu_nodes
-
-    except:
-        return None
-
-
-def _fuse_gelu(graph):
-    keys = list(graph.keys())
-    count = 0
-    for k in keys:
-        if k not in graph:
-            continue
-        current_node = graph[k]
-        gelu_nodes = _match_gelu_pattern(graph, current_node)
-        if gelu_nodes is not None:
-            out_node = gelu_nodes[2]
-
-            # Instantiate a new fused node in the graph
-            fused_gelu_node = ParsedNode()
-            fused_gelu_node.op = 'GeLU'
-            fused_gelu_node.name = out_node.name + '_gelu'
-            fused_gelu_node.attr = {}
-            fused_gelu_node.datatype = current_node.datatype
-
-            graph[fused_gelu_node.name] = fused_gelu_node
-
-            # Connect fused node to entry and output nodes
-            connect_edge(graph, current_node.name, fused_gelu_node.name)
-            replace_node(graph, out_node.name, fused_gelu_node.name)
-
-            # Delete nodes
-            gelu_node_names = [x.name for x in gelu_nodes]
-            for name in gelu_node_names:
-                delete_node(graph, name)
-
-            count += 1
-
-    if count > 0:
-        print('[Op Fusion] Fused {} GeLUs.'.format(count))
-
-
-def fuse_gelu(nnssa):
-    """
-    This is the Gelu pattern:
-    [...] -----> Pow (3) ----> Mul (.0447) -----> Add/AddV2 -----> Mul (sqrt(2/pi)) ---> tanh ----> Add/AddV2 (1) ----> Mul (0.5) -----> Mul ------> [...]
-      |                                            ^                                                                                      ^
-      |                                            |                                                                                      |
-      |------------------------------------------------------------------------------------------------------------------------------------
-
-    y = ( tanh((.0447)x^3 + x ) * (sqrt(2/pi)) + 1 ) * 0.5 * x
-
-    Replace this subgraph with a single "GeLU" op
-    """
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
-        _fuse_gelu(f.graph)
-
-
-def fuse_batch_norm(ssa):
-    """
-    A graph pass that match and fuses following op patterns into a single BatchNorm op.
-
-    Pattern 1:
-             [Const]   [Const]
-                |         |
-                V         V
-    [...] --> [Mul] --> [Add] --> [...] to [...] --> [BatchNorm] --> [...]
-
-    Pattern 2:
-             [Const]   [Const]   [Const]
-                |         |         |
-                V         V         V
-    [...] --> [Sub] --> [Mul] --> [Add] --> [...] to [...] --> [BatchNorm] --> [...]
-
-    Pattern 3:
-             [Const]   [Const]       [Const]     [Const]
-                |         |            |            |
-                V         V            V            V
-    [...] --> [Sub] --> [RealDiv] --> [Mul] --> [BiasAdd] --> [...] to [...] --> [BatchNorm] --> [...]
-    """
-
-    def _match_batch_norm_pattern(graph, entry_node, pattern_ops):
-        if not _check_number_outputs(entry_node, 1):
-            return None
-        nodes_to_merge = list()
-        node = graph[entry_node.outputs[0]]
-        for i, op in enumerate(pattern_ops):
-            if node.op != op:
-                return None
-            if node.op != pattern_ops[i] and not _check_number_outputs(node, 1):
-                return None
-            if not _check_number_inputs(node, 2):
-                return None
-            node_inputs = [graph[n].op.lower() for n in node.inputs]
-            try:
-                const_node = graph[node.inputs[node_inputs.index('const')]]
-            except ValueError:
-                return None
-            if not _check_single_out_vector_constant_node(const_node):
-                return None
-            if not _check_rank_matches(const_node, node):
-                return None
-            nodes_to_merge.extend([const_node, node])
-            if len(node.outputs) == 0:  # do not fuse the output layer
-                return None
-            node = graph[node.outputs[0]]
-        if len(nodes_to_merge) != len(pattern_ops) * 2:
-            return None
-        return nodes_to_merge
-
-    def _merge_batch_norm(graph, nodes, pattern_id=1):
-        expected_num_nodes = 4
-        if pattern_id == 2:
-            expected_num_nodes = 6
-        elif pattern_id == 3:
-            expected_num_nodes = 8
-        assert len(nodes) == expected_num_nodes
-
-        current_node = graph[nodes[1].inputs[0]]
-        out_node = nodes[-1]
-        bn_outputs = out_node.outputs[:]
-
-        fused_bn_node = ParsedNode()
-        fused_bn_node.op = 'BatchNorm'
-        fused_bn_node.name = out_node.name + '_batch_norm'
-
-        fused_bn_node.attr = {
-            'gamma': np.squeeze(nodes[0].value.val),
-            'beta': np.squeeze(nodes[2].value.val),
-        }
-        if pattern_id == 2:
-            fused_bn_node.attr = {
-                'mean': np.squeeze(nodes[0].value.val),
-                'gamma': np.squeeze(nodes[2].value.val),
-                'beta': np.squeeze(nodes[4].value.val),
-            }
-        elif pattern_id == 3:
-            fused_bn_node.attr = {
-                'mean': np.squeeze(nodes[0].value.val),
-                'gamma': np.squeeze(nodes[4].value.val) / np.squeeze(nodes[2].value.val),
-                'beta': np.squeeze(nodes[6].value.val),
-            }
-
-        fused_bn_node.datatype = current_node.datatype
-        graph[fused_bn_node.name] = fused_bn_node
-
-        # combine control i/o
-        control_inputs = []
-        control_outputs = []
-        bn_node_names = [x.name for x in nodes]
-
-        for name in bn_node_names:
-            control_inputs += graph[name].control_inputs
-            control_outputs += graph[name].control_outputs
-
-            # Modify control outputs with name of fused batch norm node.
-            for control_output_name in graph[name].control_outputs:
-                ctrl_node = graph[control_output_name]
-                for i, inpt_name in enumerate(ctrl_node.control_inputs):
-                    if inpt_name == name:
-                        ctrl_node.control_inputs[i] = fused_bn_node.name
-
-        fused_bn_node.control_inputs = control_inputs
-        fused_bn_node.control_outputs = control_outputs
-
-        # connect fused node to entry and output nodes
-        connect_edge(graph, current_node.name, fused_bn_node.name)
-        connect_dests(graph, fused_bn_node.name, bn_outputs)
-
-        # correct output's inputs order
-        for out in bn_outputs:
-            if len(graph[out].inputs) < 2:
-                continue
-            out_inputs = graph[out].inputs
-            a = out_inputs.index(out_node.name)
-            b = out_inputs.index(fused_bn_node.name)
-            out_inputs[a], out_inputs[b] = out_inputs[b], out_inputs[a]
-
-        # delete merged nodes
-        for name in bn_node_names:
-            delete_node(graph, name)
-
-    def _fuse_batch_norm(graph):
-        keys = list(graph.keys())
-        count = 0
-        for k in keys:
-            if k not in graph:
-                continue
-            current_node = graph[k]
-
-            # return nodes order: [Const, Sub, Const, RealDiv, Const, Mul, Const, BiasAdd]
-            nodes3 = _match_batch_norm_pattern(graph, current_node, ['Sub', 'RealDiv', 'Mul', 'BiasAdd'])
-            # return nodes order: : [Const, Sub, Const, Mul, Const, Add]
-            nodes2 = _match_batch_norm_pattern(graph, current_node, ['Sub', 'Mul', 'Add'])
-            # return nodes order: : [Const, Mul, Const, Add]
-            nodes1 = _match_batch_norm_pattern(graph, current_node, ['Mul', 'Add'])
-
-            if nodes3:
-                _merge_batch_norm(graph, nodes=nodes3, pattern_id=3)
-                count += len(nodes3)
-
-            if nodes2:
-                _merge_batch_norm(graph, nodes=nodes2, pattern_id=2)
-                count += len(nodes2)
-
-            if nodes1:
-                _merge_batch_norm(graph, nodes=nodes1, pattern_id=1)
-                count += len(nodes1)
-
-        if count > 0:
-            print('[Op Fusion] Fused {} nodes into BatchNorms.'.format(count))
-
-    for fn_key in list(ssa.functions.keys()):
-        f = ssa.functions[fn_key]
-        _fuse_batch_norm(f.graph)
-
-
-def fuse_pad_into_conv(nnssa):
-    """
-    A graph pass that match and fuses following op patterns into one Conv2D op.
-
-    Pattern 1:
-    [Const]
-      |
-      V
-    [Pad] --> [Conv2D] --> [...] to [Conv2D] --> [...]
-    """
-
-    def _match_pad_conv2d_pattern(graph, entry_node):
-        if not _check_number_outputs(entry_node, 1):
-            return None
-        conv2d_node = graph[entry_node.outputs[0]]
-        if not (conv2d_node.op == 'Conv2D' and _check_number_outputs(conv2d_node, 1) and _check_number_inputs(conv2d_node, 1)):
-            return None
-        if conv2d_node.attr.get('padding', '').lower() != 'valid':
-            return None
-        return [entry_node, conv2d_node]
-
-    def _fuse_pad_into_conv(graph):
-        keys = list(graph.keys())
-        count = 0
-        for k in keys:
-            if k not in graph:
-                continue
-            current_node = graph[k]
-            if current_node.op != 'Pad':
-                continue
-
-            nodes = _match_pad_conv2d_pattern(graph, current_node)  # [Pad, Conv2D]
-
-            if nodes:
-                pad_node, conv2d_node = nodes
-                previous_node = pad_node.inputs[0]
-                paddings = graph[pad_node.inputs[1]].value.val
-                pad_h, pad_w = paddings[-2], paddings[-1]
-
-                # fused node in the graph
-                conv2d_node.attr.update({
-                    'pad_h': pad_h, 'pad_w': pad_w
-                })
-                graph[conv2d_node.name] = conv2d_node
-
-                # delete pad const node and pad node
-                delete_node(graph, pad_node.inputs[1])
-                delete_node(graph, pad_node.name)
-                connect_edge(graph, previous_node, conv2d_node.name)
-
-                count += 1
-
-        if count > 0:
-            print('[Op Fusion] Fused {} Pad nodes into Conv2D.'.format(count))
-
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
-        _fuse_pad_into_conv(f.graph)
-
-
-def spatial_reduce_to_global_pool(nnssa):
-    """
-    A graph pass to translate a spatial reduce op to global pool op for better GPU performance.
-    """
-    reduce_ops = {'mean', 'max'}
-
-    def _spatial_reduce_to_global_pool(graph):
-        keys = list(graph.keys())
-        count = 0
-        for k in keys:
-            if k not in graph:
-                continue
-            current_node = graph[k]
-            if current_node.op.lower() not in reduce_ops:
-                continue
-            reduction_indices = current_node.attr.get('reduction_indices')
-            # reduction on height and weight dimensions
-            hw_dims = {(2, 3), (3, 2), (-2, -1), (-1, -2), (2, -1), (-1, 2), (-2, 3), (3, -2)}
-            if tuple(reduction_indices) in hw_dims:
-                # replace reduce op to global pooling op
-                previous_node = current_node.inputs[0]
-                output_nodes = current_node.outputs[:]
-
-                pooling_node = ParsedNode()
-                pooling_node.op = 'AvgPool' if current_node.op.lower() == 'mean' else 'MaxPool'
-                pooling_node.name = current_node.name + '_pooling'
-                pooling_node.attr = {
-                    'padding': 'valid'.upper(),
-                    'global_pooling': True,
-                }
-                pooling_node.datatype = current_node.datatype
-                graph[pooling_node.name] = pooling_node
-
-                for output in output_nodes:
-                    replace_source(graph, current_node.name, output, pooling_node.name)
-                delete_node(graph, current_node.name)
-                connect_edge(graph, previous_node, pooling_node.name)
-
-                count += 1
-
-        if count > 0:
-            print('[Op Fusion] Tuned {} Reductions.'.format(count))
-
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
-        _spatial_reduce_to_global_pool(f.graph)
-
-
-def fuse_batch_to_space_or_space_to_batch(ssa):
-    """
-    A graph pass to fuse patterns related to space/batch transformations.
-    """
-
-    def _match_batch_to_space_nd(graph, entry_node):
-        nodes = list()
-        prev_node = entry_node
-        while len(nodes) < 2:
-            if len(prev_node.inputs) > 0 and graph[prev_node.inputs[0]].op:
-                prev_node = graph[prev_node.inputs[0]]
-                if prev_node.op == 'Transpose':
-                    continue
-                nodes.append(prev_node.op)
-            else:
-                break
-        if len(nodes) > 1 \
-                and (nodes[0] == 'Conv2d' or nodes[0] == 'DepthwiseConv2dNative') \
-                and nodes[1] == 'SpaceToBatchND':
-            return entry_node
-        return None
-
-    def _match_space_to_batch_nd(graph, entry_node):
-        nodes = list()
-        next_node = entry_node
-        while len(nodes) < 2:
-            if len(next_node.inputs) > 0 and graph[next_node.outputs[0]].op:
-                next_node = graph[next_node.outputs[0]]
-                if next_node.op == 'Transpose':
-                    continue
-                nodes.append(next_node.op)
-            else:
-                break
-        if len(nodes) > 1 \
-                and (nodes[0] == 'Conv2d' or nodes[0] == 'DepthwiseConv2dNative') \
-                and nodes[1] == 'BatchToSpaceND':
-            return entry_node
-        return None
-
-    def _fuse_batch_to_space_or_space_to_batch(graph):
-        keys = list(graph.keys())
-        count = 0
-        nodes = list()
-        for k in keys:
-            if k not in graph:
-                continue
-            current_node = graph[k]
-
-            if current_node.op == 'BatchToSpaceND' and len(current_node.outputs) == 1:
-                node = _match_batch_to_space_nd(graph, current_node)
-                nodes += [node] if node is not None else []
-
-            if current_node.op == 'SpaceToBatchND' and len(current_node.outputs) == 1:
-                node = _match_space_to_batch_nd(graph, current_node)
-                nodes += [node] if node is not None else []
-
-        for n in nodes:
-            previous_node = n.inputs[0]
-            output_node = n.outputs[0]
-            connect_edge(graph, previous_node, output_node)
-            # make sure output's inputs is in correct order
-            out_inputs = graph[output_node].inputs
-            a = out_inputs.index(n.name)
-            b = out_inputs.index(previous_node)
-            out_inputs[a], out_inputs[b] = out_inputs[b], out_inputs[a]
-
-            if n.op == 'SpaceToBatchND':
-                padding_values = [0] * 4
-                dilations = list(graph[n.inputs[1]].value.val)
-                paddings = graph[n.inputs[2]].value.val
-                padding_values[2] = paddings[0, 0]  # top
-                padding_values[3] = paddings[0, 1]  # bottom
-                padding_values[0] = paddings[1, 0]  # left
-                padding_values[1] = paddings[1, 1]  # right
-                graph[output_node].attr.update({'dilations': dilations})
-                needs_padding_before = True if sum(padding_values) != 0 else False
-                if needs_padding_before:
-                    graph[output_node].attr.update({'_paddings_before': padding_values})
-
-            elif n.op == 'BatchToSpaceND':
-                cropping_values = [0] * 4
-                croppings = graph[n.inputs[2]].value.val
-                cropping_values[2] = croppings[0, 0]  # top
-                cropping_values[3] = croppings[0, 1]  # bottom
-                cropping_values[0] = croppings[1, 0]  # left
-                cropping_values[1] = croppings[1, 1]  # right
-                needs_cropping_after = False
-                border_mode = n.attr.get('padding', '').lower()
-                if sum(cropping_values) != 0:
-                    if border_mode != 'valid':
-                        needs_cropping_after = True
-                    else:
-                        raise NotImplementedError('unhandled BatchToSpaceND case.')
-                if needs_cropping_after:
-                    graph[output_node].attr.update({'_cropping_after': cropping_values})
-
-            # adjust type inference
-            shape = list(graph[previous_node].datatype.get_shape())
-            graph[output_node].datatype = builtins.tensor(graph[output_node].datatype.get_primitive(), tuple(shape))
-
-            delete_node(graph, n.name)
-            count += 1
-
-        if count > 0:
-            print('[Op Fusion] Skipped {} BatchToSpaceND / SpaceToBatchND nodes.'.format(count))
-
-    for fn_key in list(ssa.functions.keys()):
-        f = ssa.functions[fn_key]
-        _fuse_batch_to_space_or_space_to_batch(f.graph)
diff --git a/coremltools/converters/nnssa/coreml/graph_pass/op_removals.py b/coremltools/converters/nnssa/coreml/graph_pass/op_removals.py
deleted file mode 100644
index 86e528b2c..000000000
--- a/coremltools/converters/nnssa/coreml/graph_pass/op_removals.py
+++ /dev/null
@@ -1,338 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-
-import copy
-from ...commons.basic_graph_ops import delete_node, disconnect_edge, replace_node, replace_control_dest, connect_edge, connect_dests
-from .op_fusions import _check_number_inputs, _check_number_outputs
-
-
-def remove_no_ops_and_shift_control_dependencies(nnssa):
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
-        for name, node in f.graph.copy().items():
-            if node.op == "NoOp":
-                for each_control_output in node.control_outputs:
-                    f.graph[each_control_output].control_inputs.remove(node.name)
-
-                for each_control_input in node.control_inputs:
-                    f.graph[each_control_input].control_outputs.remove(node.name)
-
-                for each_control_output in node.control_outputs:
-                    for each_control_input in node.control_inputs:
-                        f.graph[each_control_output].control_inputs.append(each_control_input)
-                        f.graph[each_control_input].control_outputs.append(each_control_output)
-
-                del f.graph[name]
-
-
-def constant_weight_link_removal(nnssa):
-    # look for constant nodes and if they are feeding into
-    # 'MatMul' or 'Conv2D', then copy the value to their attributes and delete the link.
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
-        keys = list(f.graph.keys())
-        for k in keys:
-            if k not in f.graph:
-                continue
-            is_const = (f.graph[k].value is not None) and (k not in f.outputs)
-            if is_const:
-                for o in f.graph[k].outputs:
-                    nextnode = f.graph[o]
-                    op_type = nextnode.op
-                    if op_type == 'MatMul' or op_type == 'Conv2D':
-                        if nextnode.inputs[1] == k:
-                            nextnode.attr['W'] = f.graph[k].value.val
-                            disconnect_edge(f.graph, k, o)
-
-
-def remove_single_isolated_node(nnssa):
-    # remove nodes that do not have any output and input
-    delete_count = 0
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
-        keys = list(f.graph.keys())
-        for k in keys:
-            if k not in f.graph:
-                continue
-            if len(f.graph[k].outputs) == 0 and len(f.graph[k].inputs) == 0:
-                delete_count += 1
-                delete_node(f.graph, k)
-
-    print('%d disconnected nodes deleted' % delete_count)
-
-
-def _remove_internal_identity_nodes(nnssa):
-    '''
-    remove identity nodes that are not connected to the model outputs
-    '''
-    delete_count = 0
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
-        for name in list(f.graph.keys()):
-            if name not in f.graph:
-                continue
-            node = f.graph[name]
-
-            # Check if the node is in graph outputs
-            if len(node.inputs) != 1:
-                continue
-            if len(node.outputs) == 0 and len(node.control_outputs) == 0:
-                continue
-
-            # Remove identity node
-            inp_node = f.graph[node.inputs[0]]
-            if node.op == 'Identity' and inp_node.op != 'get_tuple':
-                delete_count += 1
-                parent_name = f.graph[name].inputs[0]
-                disconnect_edge(f.graph, parent_name, name)
-                for control_input in f.graph[name].control_inputs:
-                    replace_control_dest(f.graph, control_input, name, parent_name)
-
-                replace_node(f.graph, name, parent_name)  # join parent to children
-                delete_node(f.graph, name)
-
-    return delete_count
-
-
-def _remove_output_identity_nodes(nnssa):
-    '''
-    remove identity nodes that ARE connected to the model outputs
-    '''
-    delete_count = 0
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
-        keys = list(f.graph.keys())
-        for k in keys:
-            if k not in f.graph:
-                continue
-            node = f.graph[k]
-
-            if node.op != 'Identity' or len(node.inputs) != 1:
-                continue
-
-            if len(node.outputs) != 0 or (k not in f.outputs) or k != node.name:
-                continue
-            # this means node k is the "output-identity" node that nnssa inserts
-            # we remove it here
-            parent_name = node.inputs[0]
-            parent_node = f.graph[parent_name]
-
-            # Continue if parent node has an other outputs than identity node.
-            if any([an_output != k for an_output in parent_node.outputs]):
-                continue
-
-            delete_count += 1
-
-            # Remove Identity node and copy existing parent node
-            parent_node = copy.deepcopy(f.graph[parent_name])
-            for control_input_name in node.control_inputs:
-                if control_input_name == parent_node.name:
-                    continue
-                if control_input_name in parent_node.control_inputs:
-                    continue
-
-                parent_node.control_inputs.append(control_input_name)
-
-            del f.graph[k]
-            del f.graph[parent_name]
-            parent_node.name = k
-            parent_node.outputs = []
-            f.graph[k] = parent_node
-
-            node = f.graph[k]
-            for p in node.inputs:
-                for idx, out in enumerate(f.graph[p].outputs):
-                    if out == parent_name:
-                        f.graph[p].outputs[idx] = k
-
-            for p in node.control_inputs:
-                for idx, out in enumerate(f.graph[p].control_outputs):
-                    if out == parent_name:
-                        f.graph[p].control_outputs[idx] = k
-
-    return delete_count
-
-
-def remove_identity(nnssa):
-    '''
-    remove node of type 'identity', connect its parent to its child.
-    Disable this pass, if ssa contains more than 1 functions. In that case
-    a few 'identity' nodes are crucial to get data in/out of body of loops
-    '''
-    if len(nnssa.functions.keys()) > 1:
-        return
-    delete_count = _remove_internal_identity_nodes(nnssa)
-    delete_count += _remove_output_identity_nodes(nnssa)
-    print('%d identity nodes deleted' % delete_count)
-
-
-def remove_oneway_split(nnssa):
-    """ Remove split op with 1 output that splits the input into itself.
-    """
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
-        keys = list(f.graph.keys())
-        for k in keys:
-            if k not in f.graph:
-                continue
-            node = f.graph[k]
-            if not (node.op == 'Split' and node.attr['num_split'] == 1 and
-                    len(node.datatype.T) == 1 and len(node.inputs) == 2):
-                continue
-
-            if f.graph[node.inputs[0]].op == 'Const':
-                axis_name, parent_name = node.inputs
-            elif f.graph[node.inputs[1]].op == 'Const':
-                parent_name, axis_name = node.inputs
-            else:
-                continue
-
-            if len(node.outputs) == 1 and f.graph[node.outputs[0]].op == 'get_tuple':
-                get_tuple_name = node.outputs[0]
-            else:
-                continue
-
-            parent_node = f.graph[parent_name]
-            get_tuple_node = f.graph[get_tuple_name]
-            for out_name in get_tuple_node.outputs:
-                out_node = f.graph[out_name]
-                out_node.inputs = [parent_name if x == get_tuple_name else x \
-                    for x in out_node.inputs]
-                out_node.control_inputs = [parent_name if x == get_tuple_name \
-                    else x for x in out_node.control_inputs]
-            parent_node.outputs = get_tuple_node.outputs[:]
-            parent_node.control_outputs = get_tuple_node.control_outputs[:]
-
-            del f.graph[axis_name], f.graph[k], f.graph[get_tuple_name]
-
-
-def remove_noneffective_transpose(nnssa):
-    """
-    A graph pass to eliminate extra noneffective consecutive transpose ops.
-    """
-
-    def _match(graph, entry_node, pattern_ops):
-        if not _check_number_outputs(entry_node, 1):
-            return None
-        nodes_to_merge = list()
-        node = entry_node
-        for i, op in enumerate(pattern_ops):
-            if node.op.lower() != pattern_ops[i]:
-                return None
-            if not _check_number_outputs(node, 1) and i == 0:
-                return None
-            if not _check_number_inputs(node, 2):
-                return None
-            node_inputs = [graph[n].op.lower() for n in node.inputs]
-            try:
-                const_node = graph[node.inputs[node_inputs.index('const')]]
-            except ValueError:
-                return None
-            nodes_to_merge.extend([node, const_node])
-            # do not fuse the output layer
-            if len(node.outputs) == 0:
-                return None
-            node = graph[node.outputs[0]]
-        return nodes_to_merge
-
-    def _remove_noneffective_transpose(graph):
-        keys = list(graph.keys())
-        count = 0
-        for k in keys:
-            if k not in graph:
-                continue
-            current_node = graph[k]
-            if current_node.op.lower() not in {'transpose'}:
-                continue
-
-            nodes = _match(graph, current_node, ['transpose'])
-            if nodes:
-                assert len(nodes) == 2
-                # remove transpose op that does nothing
-                perm = list(nodes[1].value.val)
-                if perm == sorted(perm):
-                    previous_node_name = current_node.inputs[0]
-                    output_nodes = current_node.outputs[:]
-                    delete_node(graph, nodes[1].name)
-                    delete_node(graph, nodes[0].name)
-                    connect_dests(graph, previous_node_name, output_nodes)
-                    # make sure output's inputs is in correct order
-                    out_inputs = graph[output_nodes[0]].inputs
-                    a = out_inputs.index(graph[output_nodes[0]].inputs[0])
-                    b = out_inputs.index(previous_node_name)
-                    out_inputs[a], out_inputs[b] = out_inputs[b], out_inputs[a]
-                    count += 1
-
-        if count > 0:
-            print('[Op Removal] Deleted {} transpose ops.'.format(count))
-
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
-        _remove_noneffective_transpose(f.graph)
-
-
-def remove_noneffective_reshape(nnssa):
-    """
-    A graph pass to eliminate extra noneffective consecutive reshape ops.
-    """
-
-    def _match(graph, entry_node):
-        # currently only merge two consecutive reshape ops
-        pattern_ops = ['reshape', 'reshape']
-        if not _check_number_outputs(entry_node, 1):
-            return None
-        nodes_to_merge = list()
-        node = entry_node
-        for i, op in enumerate(pattern_ops):
-            if node.op.lower() != pattern_ops[i]:
-                return None
-            if not _check_number_outputs(node, 1) or not _check_number_inputs(node, 2):
-                return None
-            # do not fuse the output layer
-            if len(node.outputs) == 0:
-                return None
-            node_inputs = [graph[n].op.lower() for n in node.inputs]
-            try:
-                const_node = graph[node.inputs[node_inputs.index('const')]]
-            except ValueError:
-                return None
-            if const_node.op.lower() != 'const':
-                return None
-            nodes_to_merge.extend([node, const_node])
-            node = graph[node.outputs[0]]
-        return nodes_to_merge
-
-    def _remove_noneffective_reshape(graph):
-        keys = list(graph.keys())
-        count = 0
-        for k in keys:
-            if k not in graph:
-                continue
-            current_node = graph[k]
-            if current_node.op.lower() not in {'reshape'}:
-                continue
-
-            nodes = _match(graph, current_node)
-            if nodes:
-                assert len(nodes) == 4
-                # squash consecutive reshape into last one
-                previous_node = current_node.inputs[0]
-                output_nodes = current_node.outputs[:]
-                delete_node(graph, nodes[1].name)
-                delete_node(graph, nodes[0].name)
-                connect_dests(graph, previous_node, output_nodes)
-                # make sure output's inputs is in correct order
-                out_inputs = graph[output_nodes[0]].inputs
-                a = out_inputs.index(nodes[3].name)
-                b = out_inputs.index(previous_node)
-                out_inputs[a], out_inputs[b] = out_inputs[b], out_inputs[a]
-                count += 1
-
-        if count > 0:
-            print('[Op Removal] Deleted {} reshape ops.'.format(count))
-
-    for fn_key in list(nnssa.functions.keys()):
-        f = nnssa.functions[fn_key]
-        _remove_noneffective_reshape(f.graph)
diff --git a/coremltools/converters/nnssa/coreml/shapes.py b/coremltools/converters/nnssa/coreml/shapes.py
deleted file mode 100644
index d96d5750b..000000000
--- a/coremltools/converters/nnssa/coreml/shapes.py
+++ /dev/null
@@ -1,828 +0,0 @@
-"""
-Shape inference functions.
-"""
-
-
-def _transpose(layer_spec, input_shapes):
-    axes = list(layer_spec.transpose.axes)
-    input_shape = input_shapes[0]
-    output_shape = [None] * len(input_shape)
-
-    for j in range(len(input_shape)):
-        output_shape[j] = input_shape[axes[j]]
-
-    return [output_shape]
-
-
-def _get_shape(layer_spec, input_shapes):
-    rank = len(input_shapes[0])
-    return [[rank]]
-
-
-def _slice_static(layer_spec, input_shapes):
-    params = layer_spec.sliceStatic
-    input_shape = input_shapes[0]
-    rank = len(input_shape)
-    output_shape = [-1] * rank
-    begin_indices = params.beginIds
-    end_indices = params.endIds
-    begin_masks = params.beginMasks
-    end_masks = params.endMasks
-
-    for i in range(rank):
-        begin_indices[i] = begin_indices[i] if begin_indices[i] >= 0 else input_shape[i] + begin_indices[i]
-        end_indices[i] = end_indices[i] if end_indices[i] >= 0 else input_shape[i] + end_indices[i]
-    for idx, dim in enumerate(input_shape):
-        if dim > 0:  # known
-            begin = None if params.beginMasks[idx] else begin_indices[idx]
-            end = None if params.endMasks[idx] else end_indices[idx]
-            thisslice = slice(begin, end, params.strides[idx])
-            thisslicelen = len(list(range(input_shape[idx]))[thisslice])
-            output_shape[idx] = thisslicelen
-
-    return [output_shape]
-
-
-def _slice_dynamic(layer_spec, input_shapes):
-    input_shape = input_shapes[0]
-    rank = len(input_shape)
-    output_shape = [-1] * rank
-    return [output_shape]
-
-
-def _squeeze(layer_spec, input_shapes):
-    if layer_spec.squeeze.squeezeAll:
-        return [[1]]
-    axes = list(layer_spec.squeeze.axes)
-    input_shape = input_shapes[0]
-    rank = len(input_shape)
-
-    if axes is None or len(axes) == 0:
-        raise NotImplementedError('Unspecified axes not implemented.')
-    output_shape = []
-    axes = [axis if axis >= 0 else rank + axis for axis in axes]
-    for dim in range(rank):
-        if dim not in axes:
-            output_shape.append(input_shape[dim])
-        elif input_shape[dim] > 0 and input_shape[dim] != 1:
-            raise ValueError(
-                '[Shaper] Cannot squeeze on index %d of shape %s' % (dim, str(input_shape)))
-    return [output_shape] if output_shape else [[1]]
-
-
-def _range_dynamic(layer_spec, input_shapes):
-    if len(input_shapes) == 3:
-        return [[-1]]  # 1 output containing an unknown length of vector
-    else:
-        raise NotImplementedError('NNSSA converter can only handle 3-input dynamic range at this time.')
-
-
-def _range_static(layer_spec, input_shapes):
-    if len(input_shapes) == 3:
-        return [[-1]]
-    else:
-        params = layer_spec.rangeStatic
-        start, end, step = params.startValue, params.endValue, params.stepSizeValue
-        return [[int((end - start) / step)]]
-
-
-def _load_constant(layer_spec, input_shapes):
-    shape = list(layer_spec.loadConstant.shape)
-    return [shape]
-
-
-def _load_constant_nd(layer_spec, input_shapes):
-    shape = list(layer_spec.loadConstantND.shape)
-    return [shape]
-
-
-def _add(layer_spec, input_shapes):
-    if len(input_shapes) == 2:
-        r = max(len(input_shapes[0]), len(input_shapes[1]))
-        # broadcasting if necessary
-        output_shapes = [[1] * (r - len(s)) + s for s in input_shapes]
-        output_shapes = [max(output_shapes[0], output_shapes[1])]
-    elif len(input_shapes) == 1:
-        output_shapes = input_shapes
-    else:
-        raise ValueError("[Shaper] Expects _add layers having either 1 or 2 inputs")
-    return output_shapes
-
-
-def _broadcastable(layer_spec, input_shapes):
-    def broadcast_dim(x, y):
-        if x < 0 or y < 0:
-            return -1
-        if x == 1 or y == 1:
-            return max([x, y])
-        elif x == y:
-            return x
-        else:
-            return None
-
-    max_rank = max([len(s) for s in input_shapes])
-    extended_input_shapes = [[1] * (max_rank - len(s)) + list(s) for s in input_shapes]
-    output_shape = [1] * max_rank
-    for i_dim in range(max_rank):
-        for s in extended_input_shapes:
-            output_shape[i_dim] = broadcast_dim(output_shape[i_dim], s[i_dim])
-            if output_shape[i_dim] is None:
-                raise ValueError('[Shaper] Cannot broadcast input_shapes %s' % (str(input_shapes)))
-    return [output_shape]
-
-
-def _scatter(layer_spec, input_shapes):
-    # inputs: [target, source, indices]
-    return [input_shapes[0]]
-
-
-def _scatter_nd(layer_spec, input_shapes):
-    # get the values of the shape input
-    return [input_shapes[0]]
-
-
-def _gather(layer_spec, input_shapes):
-    if len(input_shapes) == 2:
-        indices_shape = input_shapes[1]
-        return [list(indices_shape) + list(input_shapes[0][1:])]
-    else:
-        raise ValueError("[Shaper] Gather layer accepts only 2 inputs")
-
-
-def _gather_nd(layer_spec, input_shapes):
-    param_shape = input_shapes[0]
-    index_shape = input_shapes[1]
-    index_rank = len(index_shape)
-    output_shape = index_shape[:-1]
-    output_shape[index_rank - 1:] = param_shape[index_shape[index_rank - 1]:]
-    return [output_shape]
-
-
-def _concat_nd(layer_spec, input_shapes):
-    if layer_spec.WhichOneof('layer') == 'concat':
-        axis = -3
-    else:
-        axis = layer_spec.concatND.axis
-    rank = len(input_shapes[0])
-    output_shape = list(input_shapes[0][:])
-    if axis < 0:
-        axis += rank
-
-    for shape in input_shapes:
-        if len(shape) != rank:
-            raise ValueError('[Shaper] Unable to shape concatND: ranks mismatch')
-
-    for shape in input_shapes[1:]:
-        for idx, dim in enumerate(shape):
-            if output_shape[idx] == -1 or dim == -1:
-                output_shape[idx] = -1
-                continue
-            if idx == axis:
-                output_shape[idx] += dim
-            elif output_shape[idx] != dim:
-                raise ValueError('[Shaper] Unable to shape concatND: shapes mismatch')
-    return [output_shape]
-
-
-def _inner_product(layer_spec, input_shapes):
-    if len(input_shapes) == 1:  # static weight
-        input_shape = input_shapes[0]
-        in_channels = layer_spec.innerProduct.inputChannels
-        out_channels = layer_spec.innerProduct.outputChannels
-        if input_shape[-1] != in_channels:
-            raise ValueError('[Shaper] Inner Product layer input channels mismatch')
-        return [input_shape[0:-1] + [out_channels]]
-    elif len(input_shapes) == 2:
-        input_shape, mat_shape = input_shapes[0:2]
-        in_channels = input_shape[-1]
-        if in_channels != -1 and in_channels != mat_shape[-2]:
-            raise ValueError('[Shaper] Inner Product layer input channels mismatch')
-        out_channels = mat_shape[-1]
-        return [input_shape[0:-1] + [out_channels]]
-    else:
-        raise ValueError('[Shaper] Inner Product needs either 1 or 2 inputs')
-
-
-def _split_nd(layer_spec, input_shapes):
-    if len(input_shapes) != 1:
-        raise NotImplementedError('[Shaper] Dynamic split not implemented.')
-    axis = layer_spec.splitND.axis
-    num_splits = layer_spec.splitND.numSplits
-    output_shape = input_shapes[0][:]
-    output_shape[axis] /= num_splits
-    if output_shape[axis] == 0:
-        raise ValueError('[Shaper] Cannot split shape %s on axis %d' % (str(output_shape), axis))
-    return [output_shape] * num_splits
-
-
-def _identity(layer_spec, input_shapes):
-    return input_shapes[:]
-
-
-def _reverse_seq(layer_spec, input_shapes):
-    return [input_shapes[0]]
-
-
-def _expand_dims(layer_spec, input_shapes):
-    input_shape = input_shapes[0]
-    axes = list(layer_spec.expandDims.axes)
-    target_rank = len(input_shape) + len(axes)
-    axes = [axis if axis >= 0 else axis + target_rank for axis in axes]
-
-    output_shape = input_shape[:]
-    for axis in axes:
-        output_shape = list(output_shape[0:axis]) + [1] + list(output_shape[axis:])
-    return [output_shape]
-
-
-def _where_non_zero(layer_spec, input_shapes):
-    input_shape = input_shapes[0]
-    rank = len(input_shape)
-    output_shape = [-1, rank]
-    return [output_shape]
-
-
-def _stack(layer_spec, input_shapes):
-    axis = layer_spec.stack.axis
-    num_inputs = len(layer_spec.input)
-    shape = input_shapes[0]
-    for s in input_shapes:
-        if s != shape:
-            raise ValueError('[Shaper] stack input shapes mismatch')
-    output_shape = shape[:axis] + [num_inputs] + shape[axis:]
-    return [output_shape]
-
-
-def _batched_mat_mul(layer_spec, input_shapes):
-    if len(input_shapes) == 1:
-        a_shape = list(input_shapes[0][:])
-        a_shape[-1] = int(layer_spec.batchedMatmul.weightMatrixSecondDimension)
-        return [a_shape]
-    elif len(input_shapes) == 2:
-        a_shape, b_shape = input_shapes
-        if len(a_shape) < 2 or len(b_shape) < 2:
-            raise ValueError('[Shaper] MatMul with 2 inputs require the ranks of both inputs to be no less than 2')
-        tp_a = layer_spec.batchedMatmul.transposeA
-        tp_b = layer_spec.batchedMatmul.transposeB
-        r_x, c_x = a_shape[-2:]
-        r_y, c_y = b_shape[-2:]
-        r_o = c_x if tp_a else r_x
-        c_o = r_y if tp_b else c_y
-        output_shape = list(a_shape[0:-2]) + [r_o, c_o]
-        return [output_shape]
-    else:
-        raise NotImplementedError('[Shaper] Batched MatMul requires either 1 or 2 inputs')
-
-
-def _embedding_nd(layer_spec, input_shapes):
-    input_shape = input_shapes[0]
-    if input_shape[-1] != 1:
-        raise ValueError('[Shaper] Last dimension of EmbeddingND input must be 1')
-    vocab_size = layer_spec.embeddingND.vocabSize
-    embedding_size = int(layer_spec.embeddingND.embeddingSize)
-    output_shape = input_shapes[0][:]
-    output_shape[-1] = embedding_size
-    return [output_shape]
-
-
-def _conv2d(layer_spec, input_shapes):
-    raise NotImplementedError('Conv2D: shape logic not implemented')
-
-
-def _reshape_static(layer_spec, input_shapes):
-    target_shape = list(layer_spec.reshapeStatic.targetShape)
-    return [target_shape] if target_shape else [[1]]
-
-
-def _reduce(layer_spec, input_shapes):
-    axis_param = layer_spec.reduce.axis
-    axis = None
-    if axis_param == 2:
-        axis = -3
-    elif axis_param == 3:
-        axis = -2
-    elif axis_param == 4:
-        axis = -1
-    else:
-        raise NotImplementedError(
-            '[Shaper] Reduce with axis parameter %s is not implemented.' % (str(axis_param)))
-    output_shape = input_shapes[0][:]
-    output_shape[axis] = 1
-    return [output_shape]
-
-
-def _reduce_general(params, input_shapes):
-    if params.reduceAll:
-        return [[1]]
-
-    axes = list(params.axes)
-    output_shape = list(input_shapes[0][:])
-    if params.keepDims:
-        for axis in axes:
-            output_shape[axis] = 1
-    else:
-        for axis in axes:
-            output_shape[axis] = None
-        output_shape = [dim for dim in output_shape if dim is not None]
-
-    return [output_shape] if output_shape else [[1]]
-
-
-def _reduce_logsumexp(layer_spec, input_shapes):
-    return _reduce_general(layer_spec.reduceLogSumExp, input_shapes)
-
-
-def _reduce_prod(layer_spec, input_shapes):
-    return _reduce_general(layer_spec.reduceProd, input_shapes)
-
-
-def _reduce_mean(layer_spec, input_shapes):
-    return _reduce_general(layer_spec.reduceMean, input_shapes)
-
-
-def _reduce_sum(layer_spec, input_shapes):
-    return _reduce_general(layer_spec.reduceSum, input_shapes)
-
-
-def _reduce_max(layer_spec, input_shapes):
-    return _reduce_general(layer_spec.reduceMax, input_shapes)
-
-
-def _reduce_min(layer_spec, input_shapes):
-    return _reduce_general(layer_spec.reduceMin, input_shapes)
-
-
-def _argmax(layer_spec, input_shapes):
-    params = layer_spec.argMax
-    axis = params.axis
-    keepdims = not params.removeDim
-
-    output_shape = input_shapes[0][:]
-    if keepdims:
-        output_shape[axis] = 1
-    else:
-        output_shape[axis] = None
-        output_shape = [dim for dim in output_shape if dim is not None]
-
-    return [output_shape] if output_shape else [[1]]
-
-
-def _argmin(layer_spec, input_shapes):
-    params = layer_spec.argMin
-    axis = params.axis
-    keepdims = not params.removeDim
-
-    output_shape = input_shapes[0][:]
-    if keepdims:
-        output_shape[axis] = 1
-    else:
-        output_shape[axis] = None
-        output_shape = [dim for dim in output_shape if dim is not None]
-
-    return [output_shape] if output_shape else [[1]]
-
-
-def _tile(layer_spec, input_shapes):
-    params = layer_spec.tile
-    reps = params.reps
-    assert len(reps) == len(input_shapes[0])
-    return [[reps[i] * input_shapes[0][i] for i in range(len(reps))]]
-
-
-def _fill_static(layer_spec, input_shapes):
-    params = layer_spec.fillStatic
-    output_shape = params.targetShape
-    return [output_shape]
-
-
-def _fill_dynamic(layer_spec, input_shapes):
-    assert (len(input_shapes) == 1 and len(input_shapes[0]) == 1)
-    rank = int(input_shapes[0][0])
-    return [[-1] * rank]
-
-
-def _broadcast_to_like(layer_spec, input_shapes):
-    return [input_shapes[1]]
-
-
-def _broadcast_to_static(layer_spec, input_shapes):
-    params = layer_spec.broadcastToStatic
-    output_shape = params.targetShape
-    return [output_shape]
-
-
-def _pad(layer_spec, input_shapes):
-    return [[-1] * len(input_shapes[0])]
-
-
-def _mirror_pad(layer_spec, input_shapes):
-    params = layer_spec.padding
-    pad_h = params.paddingAmounts.borderAmounts[0]
-    pad_w = params.paddingAmounts.borderAmounts[1]
-    output_shape = input_shapes[0]
-    output_shape[2] += pad_h.startEdgeSize + pad_h.endEdgeSize
-    output_shape[3] += pad_w.startEdgeSize + pad_w.endEdgeSize
-    return [output_shape]
-
-
-def _crop(layer_spec, input_shapes):
-    return [[-1] * len(input_shapes[0])]
-
-
-def _topk(layer_spec, input_shapes):
-    params = layer_spec.topK
-    value_shape = index_shape = input_shapes[0][:-1] + [params.K]
-    output_shapes = [value_shape, index_shape]
-    return output_shapes
-
-
-def _unidirectional_lstm(layer_spec, input_shapes):
-    shape = input_shapes[0]
-    hidden_size = input_shapes[1][2]
-    shape[2] = hidden_size
-    return [shape] * 3
-
-
-def _reorganize_data(layer_spec, input_shapes):
-    block_size = layer_spec.reorganizeData.blockSize
-    output_shape = input_shapes[0][:]
-    if 'SpaceToDepth' in layer_spec.name or 'SpaceToBatchND' in layer_spec.name:
-        output_shape[2] //= block_size
-        output_shape[3] //= block_size
-        output_shape[1] = output_shape[1] * block_size * block_size
-    elif 'DepthToSpace' in layer_spec.name or 'BatchToSpaceND' in layer_spec.name:
-        output_shape[2] *= block_size
-        output_shape[3] *= block_size
-        output_shape[1] = output_shape[1] // (block_size * block_size)
-    return [output_shape]
-
-
-# We'll enable them one by one
-_LAYER_REGISTRY = {
-    'transpose': _transpose,
-    'getShape': _get_shape,
-    'sliceStatic': _slice_static,
-    'sliceDynamic': _slice_dynamic,
-    'squeeze': _squeeze,
-    'rangeStatic': _range_static,
-    'rangeDynamic': _range_dynamic,
-    'loadConstant': _load_constant,
-    'loadConstantND': _load_constant_nd,
-    'gather': _gather,
-    'gatherND': _gather_nd,
-    'scatter': _scatter,
-    'scatterND': _scatter_nd,
-    'logicalOr': _broadcastable,
-    'logicalNot': _identity,
-    'lessThan': _broadcastable,
-    'lessEqual': _broadcastable,
-    'greaterThan': _broadcastable,
-    'greaterEqual': _broadcastable,
-    'equal': _broadcastable,
-    'notEqual': _broadcastable,
-    'logicalAnd': _broadcastable,
-    'add': _add,
-    'multiply': _add,
-    'concatND': _concat_nd,
-    'concat': _concat_nd,
-    'innerProduct': _inner_product,
-    'activation': _identity,
-    'reverse': _identity,
-    'reverseSeq': _reverse_seq,
-    'copy': _identity,
-    'expandDims': _expand_dims,
-    'stack': _stack,
-    'whereNonZero': _where_non_zero,
-    'addBroadcastable': _broadcastable,
-    'subtractBroadcastable': _broadcastable,
-    'divideBroadcastable': _broadcastable,
-    'whereBroadcastable': _broadcastable,
-    'maxBroadcastable': _broadcastable,
-    'minBroadcastable': _broadcastable,
-    'modBroadcastable': _broadcastable,
-    'floorDivBroadcastable': _broadcastable,
-    'powBroadcastable': _broadcastable,
-    'conv2d': _conv2d,
-    'multiplyBroadcastable': _broadcastable,
-    'reshapeStatic': _reshape_static,
-    # 'convolution': _convolution, # We propagate convolutional shapes by directly assigning from SSA output shape
-    'embeddingND': _embedding_nd,
-    'softmax': _identity,
-    'softmaxND': _identity,
-    'unary': _identity,
-    'bias': _add,
-    'max': _add,
-    'min': _add,
-    'reduce': _reduce,
-    'argMax': _argmax,
-    'argMin': _argmin,
-    'reduceLogSumExp': _reduce_logsumexp,
-    'reduceProd': _reduce_prod,
-    'reduceMean': _reduce_mean,
-    'reduceSum': _reduce_sum,
-    'reduceMax': _reduce_max,
-    'reduceMin': _reduce_min,
-    'splitND': _split_nd,
-    'batchedMatmul': _batched_mat_mul,
-    'sin': _identity,
-    'cos': _identity,
-    'tan': _identity,
-    'tile': _tile,
-    'fillLike': _identity,
-    'fillStatic': _fill_static,
-    'fillDynamic': _fill_dynamic,
-    'uniDirectionalLSTM': _unidirectional_lstm,
-    'broadcastToLike': _broadcast_to_like,
-    'broadcastToStatic': _broadcast_to_static,
-    'constantPad': _pad,
-    'padding': _mirror_pad,
-    'crop': _crop,
-    'sign': _identity,
-    'ceil': _identity,
-    'floor': _identity,
-    'round': _identity,
-    'topK': _topk,
-    'reorganizeData': _reorganize_data,
-    'batchnorm': _identity,
-    'clip': _identity,
-    'lrn': _identity,
-}
-
-
-def _get_translator_function(layer_type):
-    """Get the right translator function
-    """
-    if layer_type in _LAYER_REGISTRY:
-        return _LAYER_REGISTRY[layer_type]
-    else:
-        raise TypeError(
-            "Shape computation function missing for layer of type %s." % type(layer_type))
-
-
-def _insert_to_dict(dic, key, val):
-    """ Insert key to dic, where dic[key] value is a list of unique elements
-    """
-    if key not in dic:
-        dic[key] = []
-    if val not in dic[key]:
-        dic[key].append(val)
-
-
-def get_common_shape(x, y):
-    """ Get common shape z from two shapes, x and y.
-    If x and y are of different ranks, error out.
-    If x and y have the same rank, but x[i] != y[i] for some i, then z[i] = -1, indicating UNKNOWN.
-    If x and y are equal, z = x
-    """
-    z = None
-    if len(x) == len(y):
-        z = list(x)
-        for idx in range(len(x)):
-            z[idx] = x[idx] if x[idx] == y[idx] else -1
-    return z
-
-
-def is_static_shape(shape):
-    return not (False in [x > 0 for x in shape])
-
-
-def is_a_shape_of(x, y):
-    """
-    True if x is a shape of y.
-    y uses -1 to indicate arbitrary number.
-    If y is None, then it represent a "missing" shape. In this case it will return True.
-    """
-    if y is None:
-        return True
-    x = (1,) if len(x) == 0 else x  # Scalar should be interpreted as an 1-element array
-    y = (1,) if len(y) == 0 else y  # Scalar should be interpreted as an 1-element array
-    if len(x) != len(y):
-        return False
-    return all([(a[0] == a[1] or a[1] == -1) for a in zip(x, y)])
-
-
-def _propagate_shapes(nn_spec, blob_names, shapes, srcs, dsts, layer_specs):
-    """
-    Traverse the neural network spec. The spec may not be top level.
-    This should be used as the internal recursive call. Use traverse() to do the top level traversal.
-    blob_names - a list of blob names
-    shapes - a dictionary of {blob_name: shape}
-    srcs - a dictionary of {blob_name: layers_writing_to_it}
-    dsts - a dictionary of {blob_name: layers_reading_from_it}
-    layer_specs - a dictionary of {layer_name: layer_spec} for easy access to parameters.
-
-    srcs, dsts, and layer_specs are byproducts that are not necessary for propagating the shapes.
-    I made these for debugging purposes.
-    """
-    layers = nn_spec.layers
-    for i, layer in enumerate(layers):
-        # Register layer
-        layer_name = layer.name
-        layer_specs[layer_name] = layer
-        # Register input blobs
-        for j, blob_name in enumerate(layer.input):
-            if blob_name not in blob_names:
-                raise ValueError(
-                    '[Shaper] Layer %s input[%d] (%s) has never been seen before.' %
-                    (layer_name, j, blob_name))
-            if blob_name not in shapes:
-                raise ValueError(
-                    '[Shaper] The shape of input[%d] (%s) needed for layer "%s" cannot be determined.'
-                    % (j, blob_name, layer_name))
-            # Mark the layer as the destination of blob
-            _insert_to_dict(dsts, blob_name, layer_name)
-
-        layer_type = layer.WhichOneof('layer')
-        if layer_type not in _LAYER_REGISTRY:
-            raise NotImplementedError(
-                '[Shaper] Layer "{}" of type "{}" not implemented'.format(layer_name, layer_type))
-        if layer_type == 'forloop':
-            # If a nested network, recursively traverse into it
-            _propagate_shapes(layer.condition)
-            _propagate_shapes(layer.bodyNetwork)
-        elif layer_type == 'branch':
-            _propagate_shapes(layer.ifBranch)
-            _propagate_shapes(layer.elseBranch)
-        else:
-            # If a regular layer, compute output blob shapes.
-            layer_translator = _get_translator_function(layer_type)
-            input_shapes = [shapes[b] for b in layer.input]
-            output_shapes = layer_translator(layer, input_shapes)
-
-        # Register output blobs
-        for k, blob_name in enumerate(layer.output):
-            if blob_name not in blob_names:
-                blob_names.append(blob_name)
-            _insert_to_dict(srcs, blob_name, layer_name)
-            if blob_name not in shapes:
-                shapes[blob_name] = output_shapes[k]
-            else:
-                common_shape = get_common_shape(shapes[blob_name], output_shapes[k])
-                if common_shape is None:
-                    raise ValueError(
-                        'Unable to resolve shape for blob %s, with potential shape %s and %s' %
-                        (blob_name, str(shapes[blob_name]), str(output_shapes[k])))
-
-
-def _finalize_spec(nn_spec, shapes, overwrite=True):
-    """
-    This is the internal recursive call. Use propagate_shapes() to do the top level traversal.
-    nn_spec: spec for the neural network
-    shapes: a {str : shape} dictionary tracking the name -> coreml_shape pair
-    overwrite: If True, will discard existing tensor shapes in the spec.
-               If False, will check for tensor shape existence, write it if spec does not have tensor field,
-               otherwise will check for consistency.
-    """
-    layers = nn_spec.layers
-    for i, layer in enumerate(layers):
-        layer_type = layer.WhichOneof('layer')
-
-        if overwrite:
-            del layer.inputTensor[:]
-            del layer.outputTensor[:]
-
-        # input
-        if len(layer.inputTensor) == 0:
-            for j, blob_name in enumerate(layer.input):
-                shape = shapes[blob_name]
-                ts = layer.inputTensor.add()
-                ts.rank = len(shape)
-                ts.dimValue.extend(list(shape))
-        else:  # This does the check
-            for j, blob_name in enumerate(layer.input):
-                shape = shapes[blob_name]
-                ts = layer.inputTensor[j]
-                existing_shape = list(ts.dimValue)
-                if not (is_a_shape_of(existing_shape, shape)
-                        or is_a_shape_of(shape, existing_shape)):
-                    raise ValueError(
-                        '[Shaper] For layer %s, Existing shape %s does not match new shape %s' %
-                        (layer.name, str(existing_shape), str(shape)))
-
-        # output
-        if len(layer.outputTensor) == 0:
-            for j, blob_name in enumerate(layer.output):
-                shape = shapes[blob_name]
-                ts = layer.outputTensor.add()
-                ts.rank = len(shape)
-                ts.dimValue.extend(list(shape))
-        else:  # This does the check
-            for j, blob_name in enumerate(layer.output):
-                shape = shapes[blob_name]
-                ts = layer.outputTensor[j]
-                existing_shape = list(ts.dimValue)
-                if not (is_a_shape_of(existing_shape, shape)
-                        or is_a_shape_of(shape, existing_shape)):
-                    raise ValueError(
-                        '[Shaper] For layer %s, Existing shape %s does not match new shape %s' %
-                        (layer.name, str(existing_shape), str(shape)))
-
-        # If a nested network, recursively traverse into it
-        if layer_type == 'forloop':
-            _finalize_spec(layer.condition)
-            _finalize_spec(layer.bodyNetwork)
-        elif layer_type == 'branch':
-            _finalize_spec(layer.ifBranch)
-            _finalize_spec(layer.elseBranch)
-        else:
-            pass
-
-
-def propagate_shapes(mlmodel_spec, overwrite=True):
-    """
-    Propagate input shapes in the spec into every layer
-    This changes the mlmodel_spec!!
-    mlmodel_spec - the MLModel spec with the model descriptions
-    overwrite - if True, will overwrite existing tensor shapes
-    """
-    blob_names = []
-    srcs = {}
-    dsts = {}
-    shapes = {}
-    layer_specs = {}
-
-    # put the inputs into Shaper
-    for feature in mlmodel_spec.description.input:
-        name = feature.name
-        blob_names.append(name)
-        srcs[name] = []
-        shapes[name] = list(feature.type.multiArrayType.shape)
-
-    top_nn_spec = mlmodel_spec.neuralNetwork
-    _propagate_shapes(top_nn_spec, blob_names, shapes, srcs, dsts, layer_specs)
-    _finalize_spec(top_nn_spec, shapes, overwrite=overwrite)
-
-    output_names = [output.name for output in mlmodel_spec.description.output]
-
-    if overwrite:
-        del mlmodel_spec.description.output[:]
-
-    if len(mlmodel_spec.description.output) == 0:
-        for name in output_names:
-            output_ = mlmodel_spec.description.output.add()
-            output_.name = name
-            shape = shapes[name]
-            for n in shape:
-                output_.type.multiArrayType.shape.append(n)
-    else:
-        for output_ in mlmodel_spec.description.output:
-            existing_shape = list(output_.type.multiArrayType.shape)
-            shape = shapes[output_.name]
-
-            if not (is_a_shape_of(existing_shape, shape) or is_a_shape_of(shape, existing_shape)):
-                raise ValueError(
-                    '[Shaper] For layer %s, Existing shape %s does not match new shape %s' %
-                    (layer.name, str(existing_shape), str(shape)))
-
-
-def propagate_single_layer(layer, shapes, output_shapes=None, custom_shape_function=None):
-    """
-    Propagate input shape to output shape for a single layer, which could have nested networks
-    layer : a layer spec
-    shapes : a dictionary that stores all known shapes
-    output_shapes : if None, the output tensors' shapes are computed by its shape propagation function,
-        defined by _get_translator_function(layer_type). If not None, will force output_shapes to be
-        written as the output spec of the layer.
-    custom_shape_function : if None, shape function from _LAYER_REGISTRY will be used to infer shape,
-        If not None, provided function will be used to compute output shape.
-    """
-    for j, blob_name in enumerate(layer.input):
-        if blob_name not in shapes:
-            raise ValueError(
-                '[Shaper] The shape of input[%d] (%s) needed for layer "%s" cannot be determined.' %
-                (j, blob_name, layer.name))
-
-    layer_type = layer.WhichOneof('layer')
-    if output_shapes is None:
-        if layer_type not in _LAYER_REGISTRY and custom_shape_function is None:
-            raise NotImplementedError(
-                '[Shaper] Layer "{}" of type "{}" not implemented'.format(layer.name, layer_type))
-        layer_translator = _get_translator_function(layer_type) if layer_type in _LAYER_REGISTRY else custom_shape_function
-        input_shapes = [list(shapes[b]) for b in layer.input]
-        output_shapes = layer_translator(layer, input_shapes)
-
-    # Register output blobs
-    for k, blob_name in enumerate(layer.output):
-        if blob_name not in shapes:
-            shapes[blob_name] = output_shapes[k]
-        else:
-            common_shape = get_common_shape(shapes[blob_name], output_shapes[k])
-            if common_shape is None:
-                raise ValueError(
-                    'Unable to resolve shape for blob %s, with potential shape %s and %s' %
-                    (blob_name, str(shapes[blob_name]), str(output_shapes[k])))
-
-    # Write into layer spec
-    del (layer.inputTensor[:])
-    for j, blob_name in enumerate(layer.input):
-        shape = shapes[blob_name]
-        ts = layer.inputTensor.add()
-        ts.rank = len(shape)
-        ts.dimValue.extend(list(map(int, shape)))
-
-    del (layer.outputTensor[:])
-    for j, blob_name in enumerate(layer.output):
-        shape = shapes[blob_name]
-        ts = layer.outputTensor.add()
-        ts.rank = len(shape)
-        ts.dimValue.extend(list(map(int, shape)))
diff --git a/coremltools/converters/nnssa/coreml/ssa_converter.py b/coremltools/converters/nnssa/coreml/ssa_converter.py
deleted file mode 100644
index feac522c6..000000000
--- a/coremltools/converters/nnssa/coreml/ssa_converter.py
+++ /dev/null
@@ -1,2982 +0,0 @@
-import numpy as np
-
-from warnings import warn
-
-from six import string_types as _string_types
-
-from coremltools.models import datatypes
-from coremltools.proto import NeuralNetwork_pb2, Model_pb2
-from coremltools.models.neural_network import NeuralNetworkBuilder
-from coremltools.models.neural_network.flexible_shape_utils import set_multiarray_ndshape_range
-from collections import Iterable
-import coremltools
-
-from ..commons import builtins
-from ..commons.basic_graph_ops import topsort, check_connections
-
-from .graph_pass import *
-
-try:
-    import shapes
-except:
-    from . import shapes
-
-DEBUG = False
-
-
-def _is_scalar(type_):
-    if type_ is None:
-        return False
-    result = builtins.is_int(type_) or builtins.is_float(type_) or builtins.is_bool(type_)
-    if builtins.is_tensor(type_) and (len(type_.get_shape()) == 0):
-        result = True
-    return result
-
-
-def ssa_convert(ssa,
-                top_func='main',
-                inputs=None,
-                outputs=None,
-                image_input_names=None,
-                image_format=None,
-                is_bgr=False,
-                red_bias=0.0,
-                green_bias=0.0,
-                blue_bias=0.0,
-                gray_bias=0.0,
-                image_scale=1.0,
-                class_labels=None,
-                predicted_feature_name=None,
-                predicted_probabilities_output='',
-                add_custom_layers=False,
-                custom_conversion_functions=None,
-                custom_shape_functions=None,
-                optional_inputs=None
-                ):
-    """
-    Convert NNSSA into Core ML spec.
-    ssa : NetworkEnsemble
-        Required parameter
-        NNSSA to be converted to CoreML spec.
-    top_func : str or 'main'
-        Function entry point
-    inputs : dict of str -> list/tuple or None
-        Input features of CoreML specs. Must be a dictionary with
-        name as key and shape as value {name: shape},
-        where name is the input's name, shape is the
-        shape of the feature tensor. The shape must be static - all
-        dimensions of shape should be a positive integer.
-        When not provided, SSA converter will treat all input nodes
-        in top level NNSSA as inputs.
-    outputs : list of str or None
-        Output features of CoreML specs. Must be a list of [name].
-        When not provided, SSA converter will treat all output nodes
-        in top level NNSSA as outputs.
-    add_custom_layers : bool or False
-        If True, then `custom` layers will be added to the model in place
-        for unsupported ops.
-        Parameters for these custom layers should be filled manually by editing the mlmodel
-        or the 'custom_conversion_functions' argument can be used to do the same during the process of conversion
-    custom_conversion_functions : dict of str -> function or empty dict
-        Specify custom function to be used for conversion for given op. User can override existing conversion
-        function and provide their own custom implementation to convert certain ops. Dictionary key must be string
-        specifying Op name or Op type and value must be a function implementation available in current context.
-        If user provides two separate functions for node name and node type, then custom function tied to node name will be used.
-        As, function tied to node type is more generic than one tied to node name.
-        custom_conversion_functions option is different than add_custom_layers.
-        Both options can be used in conjunction in which case, custom function will be invoked for provided ops and
-        custom layer will be added for ops with no respective conversion function. This option gives finer control to user.
-        One use case could be to modify input attributes or certain graph properties before calling existing conversion function.
-        Note that, It is custom conversion function's responsibility to add respective Core ML layer into builder (coremltools's NeuralNetworkBuilder)
-    custom_shape_functions : dict of str -> functions or empty dict
-        Specify custom function to compute `output` shape given `input` shape for given custom operator
-        This is required for new converter path, which maintains and propagates shapes while converting operators.
-    image_format: str
-      Optional and valid if image_input_names is also set. Specify either 'NCHW' or 'NHWC' to set or
-      override the image format. If not set, tries to use hints from the graph which may be present in convolution or
-      other image-specific layers. Ultimately defaults to NHWC.
-    """
-    if not custom_conversion_functions:
-        custom_conversion_functions = dict()
-    if not custom_shape_functions:
-        custom_shape_functions = dict()
-    if not optional_inputs:
-        optional_inputs = list()
-
-    if outputs is not None:
-        ssa.extract_subgraph(outputs, name=top_func)
-
-    if DEBUG:
-        import graphviz
-        dot_string = ssa.get_dot_string(annotation=True, name_and_op_style=True, highlight_debug_nodes=[])
-        graphviz.Source(dot_string).view(filename='/tmp/ssa')
-
-    # apply passes on the ssa, prior to conversion
-    # note: ideally order of passes should not matter, however, might be few special cases
-    # fuse_batch_to_space_or_space_to_batch needs to be applied before transform_nhwc_to_nchw
-    passes = [
-        constant_weight_link_removal,
-        onehot_matmul_to_embedding,
-        fuse_layer_norm,
-        fuse_gelu,
-        fuse_batch_to_space_or_space_to_batch,
-        fuse_bias_add,
-        transform_nhwc_to_nchw,
-        remove_identity,
-        remove_no_ops_and_shift_control_dependencies,
-        remove_single_isolated_node,
-        fuse_batch_norm,
-        spatial_reduce_to_global_pool,
-        fuse_pad_into_conv,
-        remove_oneway_split,
-        remove_noneffective_transpose,
-        remove_noneffective_reshape
-    ]
-
-    for p in passes:
-        p(ssa)
-
-    if DEBUG:
-        import graphviz
-        dot_string = ssa.get_dot_string(annotation=True, name_and_op_style=True, highlight_debug_nodes=[])
-        graphviz.Source(dot_string).view(filename='/tmp/ssa_after_passes')
-
-    for f in list(ssa.functions.values()):
-        check_connections(f.graph)
-
-    # Set classifier flag
-    is_classifier = class_labels is not None
-    neural_network_type = 'classifier' if is_classifier else None
-
-    converter = SSAConverter(ssa,
-                             top_func=top_func,
-                             inputs=inputs,
-                             outputs=outputs,
-                             neural_network_type=neural_network_type,
-                             add_custom_layers=add_custom_layers,
-                             custom_conversion_functions=custom_conversion_functions,
-                             custom_shape_functions=custom_shape_functions,
-                             optional_inputs=optional_inputs)
-
-    converter.convert()
-
-    builder = converter._get_builder(func=top_func)
-    # Add image input identifier
-    if image_input_names is not None and isinstance(
-            image_input_names, _string_types):
-        image_input_names = [image_input_names]
-
-    # Add classifier classes (if applicable)
-    if is_classifier:
-        classes = []
-        classes_in = class_labels
-        if isinstance(classes_in, _string_types): # string
-            import os
-            if not os.path.isfile(classes_in):
-                raise ValueError("Path to class labels (%s) does not exist." % \
-                                 classes_in)
-            with open(classes_in, 'r') as f:
-                classes = f.read()
-            classes = classes.splitlines()
-        elif type(classes_in) is list:  # list[int or str]
-            classes = classes_in
-        else:
-            raise ValueError('Class labels must be a list of integers / strings,' \
-                             ' or a file path')
-
-        if predicted_feature_name is not None:
-            builder.set_class_labels(
-                classes, predicted_feature_name=predicted_feature_name,
-                prediction_blob=predicted_probabilities_output)
-        else:
-            builder.set_class_labels(classes)
-
-    detected_image_format = ssa.get_image_format()
-    if image_format and detected_image_format and image_format != detected_image_format:
-        warn('[SSAConverter] Detected image format different from input.'
-              'Detected: {} Input: {}'.format(detected_image_format, image_format))
-    image_format = image_format or detected_image_format or 'NHWC'
-
-    # Set pre-processing parameters
-    builder.set_pre_processing_parameters(image_input_names=image_input_names,
-                                          is_bgr=is_bgr,
-                                          red_bias=red_bias,
-                                          green_bias=green_bias,
-                                          blue_bias=blue_bias,
-                                          gray_bias=gray_bias,
-                                          image_scale=image_scale,
-                                          image_format=image_format)
-
-    mlmodel_spec = converter.get_spec()
-
-    # Required if an output node produces multiple outputs
-    # Generate new output features
-    modified_output_features_list = []
-    for idx, output_feature in enumerate(mlmodel_spec.description.output):
-        if output_feature.name in converter.op_tensor_map:
-            atype = mlmodel_spec.description.output[idx].type
-            for aname in converter.op_tensor_map[output_feature.name]:
-                new_feature = Model_pb2.FeatureDescription()
-                new_feature.name = aname
-                new_feature.type.CopyFrom(atype)
-                if aname not in [feature.name for feature in modified_output_features_list]:
-                    modified_output_features_list.append(new_feature)
-        else:
-            modified_output_features_list.append(output_feature)
-
-    # delete the existing output feature
-    mlmodel_spec.description.ClearField('output')
-
-    # creating new output features description
-    mlmodel_spec.description.output.extend(modified_output_features_list)
-
-    # MLModel passes
-    mlmodel_passes = [remove_disconnected_layers,
-                      remove_redundant_transposes,
-                     ]
-    for p in mlmodel_passes:
-        p(mlmodel_spec)
-
-    if DEBUG:
-        coremltools.models.utils.save_spec(mlmodel_spec, '/tmp/model_from_spec.mlmodel')
-
-    return mlmodel_spec
-
-
-class SSAConverter(object):
-    def __init__(self,
-                 net_ensemble,  # type: NetworkEnsemble
-                 top_func='main',  # type: str
-                 inputs=None,  # type: Dict[str, tuple]
-                 outputs=None,  # type: List[str]
-                 neural_network_type=None,  # type: str
-                 add_custom_layers=False,  # type: bool
-                 custom_conversion_functions={},  # type: Dict[Text, Any]
-                 custom_shape_functions={},  # type: Dict[Text, Any]
-                 optional_inputs=[]  # type: List[str]
-                 ):
-        self.net_ensemble = net_ensemble
-        self.top_func = top_func  # string indicating the top level function
-        if self.top_func not in self.net_ensemble.functions:
-            raise ValueError(
-                'Top level function %s not in the NetworkEnsemble Provided' % self.top_func)
-
-        # get top level inputs and outputs to instantiate spec
-        self.net_ensemble.functions[top_func].find_inputs_and_outputs()
-        top_input_names = list(map(str, self.net_ensemble.functions[top_func].inputs))
-        top_output_names = list(map(str, self.net_ensemble.functions[top_func].outputs))
-
-        top_ssa = self.net_ensemble.functions[top_func]
-
-        # custom conversion functions
-        self.custom_conversion_functions = custom_conversion_functions
-        self.add_custom_layers = add_custom_layers
-        self.custom_shape_functions = custom_shape_functions
-
-        # find_inputs_and_outputs() generates a list of required inputs, which
-        # may not be supplied by inputs. We need to make sure that the
-        # user-supplied inputs name and shape are consistent with the NNSSA.
-        top_input_shapes = []
-        for name in top_input_names:
-            node = top_ssa.graph[name]
-
-            shape = self._get_tensor_shape_from_type(node.datatype)
-
-            if shape is None and inputs is None:
-                raise ValueError(
-                    'NNSSA input "%s" has non-static shape %s, please provide in argument "inputs"'
-                    % (name, str(shape)))
-            if inputs is not None:
-                if name not in inputs:
-                    raise ValueError(
-                        'Input "%s" is required by SSAConverter, but not passed in argument "inputs"' % name)
-                if shapes.is_static_shape(inputs[name]) and not shapes.is_a_shape_of(inputs[name], shape):
-                    raise ValueError(
-                        'Input "%s" expects a shape compatible to %s, but is given %s' %
-                        (name, str(shape), inputs[name]))
-                # Now that we can use the shape to create top_input_shapes
-                shape = inputs[name] if inputs[name] else [1, ]
-            top_input_shapes.append(shape)
-
-        top_input_types = []
-        is_input_optional = [True if name in optional_inputs else False for name in top_input_names]
-        is_input_dynamic = [True if not shapes.is_static_shape(shape) else False for shape in top_input_shapes]
-        for idx, dims in enumerate(top_input_shapes):
-            if is_input_dynamic[idx]:
-                static_shape = [dim_size if dim_size > 0 else 1 for dim_size in dims]
-            else:
-                static_shape = dims
-
-            top_input_types.append(datatypes.Array(*static_shape))
-        top_input_features = list(zip(top_input_names, top_input_types))
-
-        # TODO - verify outputs
-        if outputs is not None:
-            top_output_features = []
-            for name in outputs:
-                if name in self.net_ensemble.variables.keys():  # Variable/States are optional inputs & outputs to be added later
-                    continue
-                elif name in top_output_names:
-                    top_output_features.append((name, None))
-                else:
-                    if len(top_output_names) == 1:
-                        raise ValueError('Output "{}" is not an output node in the source graph. Do you mean "{}"?'
-                                         .format(name, top_output_names[0]))
-                    else:
-                        raise ValueError('Output "%s" is not an output node in the source graph.' % name)
-        else:
-            top_output_features = list(zip(top_output_names, [None] * len(top_output_names)))
-
-        self.top_builder = NeuralNetworkBuilder(input_features=top_input_features,
-                                                output_features=top_output_features,
-                                                disable_rank5_shape_mapping=True,
-                                                mode=neural_network_type,
-                                                use_float_arraytype=True)
-
-        self.spec = self.top_builder.spec
-
-        for idx, input in enumerate(self.spec.description.input):
-            if is_input_dynamic[idx]:
-                input_name = top_input_names[idx]
-                dynamic_shape = top_input_shapes[idx]
-                lower_bounds, upper_bounds = [], []
-                for dim_size in dynamic_shape:
-                    if dim_size > 0:
-                        lower_bounds.append(dim_size)
-                        upper_bounds.append(dim_size)
-                    else:
-                        lower_bounds.append(1)
-                        upper_bounds.append(-1)
-                set_multiarray_ndshape_range(self.spec, input_name, lower_bounds=lower_bounds, upper_bounds=upper_bounds)
-
-            if is_input_optional[idx]:
-                self.spec.description.input[idx].type.isOptional = True
-
-        self.CONVERT_FUNCTION_MAP = {
-            'Abs': self._convert_unary_common,
-            'Add': self._convert_binary,
-            'AddV2': self._convert_binary,
-            'AddN': self._convert_addn,
-            'All': self._convert_reduction,
-            'Any': self._convert_reduction,
-            'ArgMax': self._convert_argmax,
-            'ArgMin': self._convert_argmin,
-            'AvgPool': self._convert_avgpool,
-            'BatchMatMul': self._convert_batched_mat_mul,
-            'BatchNorm': self._convert_batchnorm,
-            'BatchToSpaceND': self._convert_batch_to_space_nd,
-            'BiasAdd': self._convert_binary_broadcastable,
-            'Cast': self._convert_cast,
-            'Ceil': self._convert_unary_common,
-            'ClipByValue': self._convert_clip,
-            'Concat': self._convert_concat_nd,
-            'ConcatV2': self._convert_concat_nd,
-            'Const': self._convert_const,
-            'Conv2D': self._convert_conv2d,
-            'Conv2DBackpropInput': self._convert_conv2d_transpose,
-            'Cos': self._convert_unary_trigonometric,
-            'DepthToSpace': self._convert_reorganize_data,
-            'DepthwiseConv2dNative': self._convert_conv2d,
-            'Einsum': self._convert_einsum,
-            'Elu': self._convert_unary_activation,
-            'Embedding': self._convert_embedding,
-            'Equal': self._convert_binary_broadcastable,
-            'Exp': self._convert_unary_common,
-            'ExpandDims': self._convert_expand_dims,
-            'Fill': self._convert_fill,
-            'Floor': self._convert_unary_common,
-            'FloorDiv': self._convert_binary_broadcastable,
-            'FloorMod': self._convert_floor_mod,
-            'Gather': self._convert_gather,
-            'GatherNd': self._convert_gather_nd,
-            'GeLU': self._convert_gelu,
-            'Greater': self._convert_binary_broadcastable,
-            'GreaterEqual': self._convert_binary_broadcastable,
-            'Identity': self._convert_identity,
-            'LRN': self._convert_lrn,
-            'LSTMBlock': self._convert_lstm_block_cell,
-            'LayerNormalization': self._convert_layer_normalization,
-            'LeakyRelu': self._convert_unary_activation,
-            'Less': self._convert_binary_broadcastable,
-            'LessEqual': self._convert_binary_broadcastable,
-            'Log': self._convert_unary_common,
-            'LogSoftmax': self._convert_unary_log_softmax,
-            'LogicalAnd': self._convert_binary_broadcastable,
-            'LogicalNot': self._convert_unary_logical_not,
-            'LogicalOr': self._convert_binary_broadcastable,
-            'MatMul': self._convert_batched_mat_mul,
-            'MatrixBandPart': self._convert_matrix_band_part,
-            'Max': self._convert_reduction,
-            'MaxPool': self._convert_maxpool,
-            'Maximum': self._convert_binary_broadcastable,
-            'Mean': self._convert_reduction,
-            'Min': self._convert_reduction,
-            'Minimum': self._convert_binary_broadcastable,
-            'MirrorPad': self._convert_mirror_pad,
-            'Mul': self._convert_binary,
-            'Neg': self._convert_unary_neg,
-            'NotEqual': self._convert_binary_broadcastable,
-            'Pack': self._convert_pack,
-            'Pad': self._convert_constant_pad,
-            'PadV2': self._convert_constant_pad,
-            'Placeholder': self._convert_input,
-            'Pow': self._convert_binary_broadcastable,
-            'Prod': self._convert_reduction,
-            'Range': self._convert_range,
-            'RealDiv': self._convert_binary,
-            'Reciprocal': self._convert_unary_inverse,
-            'Relu': self._convert_unary_activation,
-            'Relu6': self._convert_unary_activation_relu6,
-            'Reshape': self._convert_reshape,
-            'ResizeBilinear': self._convert_resize_bilinear,
-            'ResizeNearestNeighbor': self._convert_resize_nearest_neighbor,
-            'ReverseSequence': self._convert_reverse_sequence,
-            'ReverseV2': self._convert_reverse,
-            'Round': self._convert_unary_common,
-            'Rsqrt': self._convert_unary_common,
-            'ScatterNd': self._convert_scatter_nd,
-            'SelectMask': self._convert_select,
-            'Shape': self._convert_shape,
-            'Sigmoid': self._convert_unary_activation,
-            'Sign': self._convert_unary_common,
-            'Sin': self._convert_unary_trigonometric,
-            'Size': self._convert_size,
-            'Selu': self._convert_selu,
-            'Slice': self._convert_slice,
-            'Softmax': self._convert_softmax,
-            'SpaceToBatchND': self._convert_space_to_batch_nd,
-            'SpaceToDepth': self._convert_reorganize_data,
-            'Split': self._convert_split,
-            'SplitV': self._convert_split,
-            'Sqrt': self._convert_unary_common,
-            'Square': self._convert_unary_square,
-            'SquaredDifference': self._convert_squared_difference,
-            'Squeeze': self._convert_squeeze,
-            'StridedSlice': self._convert_slice,
-            'Sub': self._convert_binary,
-            'Sum': self._convert_reduction,
-            'Softplus': self._convert_unary_activation,
-            'Tan': self._convert_unary_trigonometric,
-            'Tanh': self._convert_unary_activation,
-            'TensorArrayGatherV3': self._convert_tensorarray_gather,
-            'TensorArrayReadV3': self._convert_tensorarray_read,
-            'TensorArrayScatterV3': self._convert_array_scatter,
-            'TensorArraySizeV3': self._convert_tensorarray_size,
-            'TensorArrayV3': self._convert_tensorarray_alloc,
-            'TensorArrayWriteV3': self._convert_tensorarray_write,
-            'Tile': self._convert_tile,
-            'TopKV2': self._convert_topk,
-            'Transpose': self._convert_transpose,
-            'Unpack': self._convert_unpack,
-            'Where': self._convert_where,
-            'function_entry': self._convert_function,
-            'get_global': self._convert_get_global,
-            'get_tuple': self._convert_get_tuple,
-            'iff': self._convert_iff,
-            'make_tuple': self._convert_make_tuple,
-            'return': self._convert_return,
-            'set_global': self._convert_set_global,
-            'while': self._convert_while,
-            'ZerosLike': self._convert_zeros_like
-        }
-
-        # converter state variables
-        # func_stack stores a list of NNSSA function names
-        self.func_stack = [self.top_func]
-        # Theoretically, there should be a one-to-one mapping between
-        # SSA function and nn_spec, which is associated with a NeuralNetworkBuilder
-        self.func_builder_map = {self.top_func: self.top_builder}
-        # All the shapes of the tensor of CoreML str:shape
-        self.tensor_shapes = {
-            name: top_input_shapes[idx]
-            for idx, name in enumerate(top_input_names)
-        }
-        # Map for tensors generated by special ops (make_tuple, get_tuple, function, return, etc)
-        # and value is the list of node names that represent tensors
-        self.op_tensor_map = {}
-
-        # all variables/states are treated as both inputs & outputs.
-        for name, aVariable in self.net_ensemble.variables.items():
-            if _is_scalar(aVariable):
-                shape = [1, ]
-            else:
-                assert builtins.is_tensor(aVariable)
-                shape = list([int(i) if i and i > 0 else 1 for i in self._get_tensor_shape_from_type(aVariable)])
-
-            self.top_builder.add_optionals([(name + '__invar__', shape)], [(name + '__outvar__', shape)])
-            self.tensor_shapes[name + '__invar__'] = shape
-
-    def get_spec(self):
-        return self.spec
-
-    def print_function_nodes(self, func_name):
-        if func_name not in self.net_ensemble.functions:
-            raise ValueError('%s is not a function name in NetworkEnsemble' % func_name)
-        graph = self.net_ensemble.functions[func_name].graph
-        for name, node in graph.items():
-            if node.op == 'get_global':
-                print('%s (%s) var = %s' % (name, node.op, node.attr['variable']))
-            if node.op == 'set_global':
-                print('%s (%s) var = %s' % (name, node.op, node.attr['variable']))
-
-    def get_nnssa_inputs_outputs(self):
-        inputs, outputs, placeholder_defaults = self.net_ensemble._get_inputs_outputs()
-        print('Inputs: ')
-        for i in inputs:
-            print(i)
-        print('Outputs: ')
-        for o in outputs:
-            print(o)
-        print('Placeholders with default: ')
-        for p in placeholder_defaults:
-            print(p)
-        return inputs, outputs, placeholder_defaults
-
-    def convert(self):
-        """ Convert the NNSSA function on top of func_stack into NeuralNetworkSpec.
-        """
-        func_name = self.func_stack[-1]
-        func = self.net_ensemble.functions[func_name]
-        print('[SSAConverter] Converting function %s ...' % func_name)
-
-        # Do a topological sort
-        restricted_graph = {}
-        function = self.net_ensemble.functions[func_name]
-        for k, v in function.graph.items():
-            if len(v.outputs) > 0 and all(
-                    [function.graph[i].value is not None for i in v.outputs]):
-                continue
-            restricted_graph[k] = v
-        instruction_order = topsort(restricted_graph)
-
-        # Make a buffer between variable inputs
-        builder = self._get_builder()
-        for name, var in self.net_ensemble.variables.items():
-            layer = builder.add_copy(
-                name=name + '_copy',
-                input_name=name + '__invar__',
-                output_name=name)
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        # Convert operations one by one
-        for idx, node_name in enumerate(instruction_order):
-            node = func.graph[node_name]
-            op_type = node.op
-
-            custom_conversion_name = None
-            if node_name in self.custom_conversion_functions:
-                custom_conversion_name = node_name
-            elif op_type in self.custom_conversion_functions:
-                custom_conversion_name = op_type
-
-            # Set conversion function and message
-            conversion_message = ''
-            if custom_conversion_name is not None:
-                conversion_message = ' with custom conversion function'
-            elif op_type in self.CONVERT_FUNCTION_MAP:
-                convert_func = self.CONVERT_FUNCTION_MAP[op_type]
-            elif self.add_custom_layers:
-                # Add custom layer
-                convert_func = self._convert_custom_layer
-                conversion_message = ' with custom layer'
-            else:
-                raise NotImplementedError(
-                    '[SSAConverter] Conversion for op %s not implemented, terminating...' % op_type)
-
-            print('[SSAConverter] [{}/{}] Converting op type: \'{}\', name: \'{}\'{}{}'.format(
-                idx + 1, len(instruction_order), op_type, node_name, conversion_message,
-                ((', output_shape: ' + str(node.datatype.get_shape()) + '.') if builtins.is_tensor(node.datatype) else '.')))
-
-            # If custom conversion method is provided, use it
-            # Otherwise, invoke internal conversion method
-            if custom_conversion_name is not None:
-                self.custom_conversion_functions[custom_conversion_name](self, node)
-            else:
-                convert_func(node)
-
-        # Make a buffer between variable inputs
-        builder = self._get_builder()
-        for name, var in self.net_ensemble.variables.items():
-            layer = builder.add_copy(
-                name=name + '_copy_r',
-                input_name=name,
-                output_name=name + '__outvar__')
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _get_builder(self, func=None):
-        if func is None:
-            func = self.func_stack[-1]
-        return self.func_builder_map[func]
-
-    def _get_tensor_shape_from_type(self, type_):
-        if _is_scalar(type_):
-            shape = (1,)
-        elif builtins.is_tensor(type_):
-            shape = type_.get_shape()
-        elif builtins.is_list(type_):
-            element_shape = type_.T[0].get_shape()
-            for ashape in type_.T:
-                assert ashape.get_shape() == element_shape
-            shape = [-1] + list(element_shape)
-        else:
-            shape = None
-        return shape
-
-    def _get_input_tensors(self, node, inspect_shapes=True):
-        """ Get the input nodes, their names and types for a node.
-        There are three cases:
-        (1) (Tuple case) input is a tuple. In this case, expand that tuple input into a list of input tensors
-        (2) (Regular case) input is a node name. In this case just copy it.
-        (3) (Indexed tuple case) input is one element in a tuple. In this case it should be stored in op_tensor_map
-        """
-        input_nodes, input_names, input_types = [], [], []
-
-        for name in node.inputs:
-            if name in self.op_tensor_map:
-                input_names.extend(self.op_tensor_map[name])
-            else:
-                input_names.append(name)
-
-        for name in input_names:
-            if name in self.net_ensemble.variables:
-                input_node, _ = self.__get_node_and_type_by_name(name + "/read")
-                input_type = self.net_ensemble.variables[name]
-            else:
-                input_node, input_type = self.__get_node_and_type_by_name(name)
-
-            assert input_node is not None
-            assert input_type is not None
-            input_nodes.append(input_node)
-            input_types.append(input_type)
-
-            if inspect_shapes:
-                self.__compare_propagated_and_inferred_shape(name, input_type)
-
-        return input_nodes, input_names, input_types
-
-    def __get_node_and_type_by_name(self, name):
-        for fname in self.func_stack[::-1]:
-            func = self.net_ensemble.functions[fname]
-            if name in func.graph:
-                node = func.graph[name]
-                return node, node.datatype
-
-        for node_name, output_names in self.op_tensor_map.items():
-            if name in output_names:
-                node, type_ = self.__get_node_and_type_by_name(node_name)
-                if builtins.is_tuple(type_):
-                    Id = output_names.index(name)
-                    type_ = node.datatype.T[Id]
-                return node, type_
-
-        return None, None
-
-    def __compare_propagated_and_inferred_shape(self, name, type_):
-
-        propagated_shape = tuple(self.tensor_shapes[name])
-        if _is_scalar(type_):
-            inferred_shape = (1,)
-        elif builtins.is_tensor(type_):
-            inferred_shape = type_.get_shape()
-        elif builtins.is_list(type_):
-            element_shape = type_.T[0].get_shape()
-            for ashape in type_.T:
-                assert ashape.get_shape() == element_shape
-            inferred_shape = [-1] + list(element_shape)
-        else:
-            raise ValueError('[SSAConverter] Failed to infer shape for tensor %s' % name)
-
-        mismatch = '[SSAConverter] Shape mismatch for {}: inferred {} vs. propagated {}.'.format(
-            name, inferred_shape, propagated_shape)
-
-        if len(propagated_shape) != len(inferred_shape):
-            raise ValueError(mismatch)
-
-        for pdim, idim in zip(propagated_shape, inferred_shape):
-            if pdim == -1 or idim == -1 or pdim == idim:
-                continue
-            raise ValueError(mismatch)
-
-    def _convert_input(self, node):
-        """ Convert an input node. For now, we may just need to skip it.
-        """
-        pass
-
-    def _convert_const(self, node):
-        """ Convert a constant node.
-        """
-        node_value = node.value
-        if node_value is None:
-            node_value = node.attr.get('value')
-        val = np.array(node_value.val)
-        if len(val.shape) == 0:
-            val = np.array([node_value.val])
-        builder = self._get_builder()
-        layer = builder.add_load_constant_nd(
-            name=node.name, output_name=node.name, constant_value=val, shape=val.shape)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_custom_layer(self, node):
-        """ Add custom layer
-        """
-        params = NeuralNetwork_pb2.CustomLayerParams()
-        params.className = node.op
-        params.description = "Custom layer that corresponds to the TensorFlow op {}".format(node.op)
-        builder = self._get_builder()
-        layer = builder.add_custom(name=node.name,
-                                   input_names=node.inputs,
-                                   output_names=[node.name],
-                                   custom_proto_spec=params)
-
-        if node.op not in self.custom_shape_functions:
-            raise ValueError('Custom Shape Function for {} not provided!'.format(node.op))
-        shapes.propagate_single_layer(layer, self.tensor_shapes, custom_shape_function=self.custom_shape_functions[node.op])
-
-    def _convert_einsum(self, node):
-
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        if len(input_names) > 2:
-            raise ValueError("currently, 'einsum' operation is only supported when it has less than equal to 2 inputs.")
-        equation = node.attr.get('equation')
-        if not '->' in equation:
-            raise ValueError('current einsum does not support matrix diagonal operations.')
-
-        # Helper functions
-        def get_transpose_map(prefix, suffix):
-            prefix_map = dict(zip(prefix,range(len(prefix))))
-            return [prefix_map[x] for x in suffix]
-
-        def get_product(array, map):
-            if len(array) == 0:
-                raise ValueError('equation {} not supported currently.'.format(equation))
-            result = 1
-            for num in array:
-                result *= map[num]
-            return result
-
-        # Parse equation
-        prefix = equation.split('->')[0]
-        suffix = equation.split('->')[1]
-
-        # Pattern matching
-        builder = self._get_builder()
-        if not ',' in prefix:
-            # Transpose
-            axes = get_transpose_map(prefix, suffix)
-            layer = builder.add_transpose(
-                    name = node.name,
-                    axes = axes,
-                    input_name = input_names[0],
-                    output_name = node.name)
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        else:
-            a, b = prefix.split(',')
-
-            if suffix == '':
-                # Inner Product
-                axes = get_transpose_map(a, b)
-                transpose_name = node.name + '_transpose'
-                layer = builder.add_transpose(
-                        name = transpose_name,
-                        axes = axes,
-                        input_name = input_names[0],
-                        output_name = transpose_name)
-                shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-                mul_name = node.name + '_multiply'
-                layer = builder.add_multiply_broadcastable(
-                        name = mul_name,
-                        input_names = [transpose_name, input_names[1]],
-                        output_name = mul_name)
-                shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-                reduce_sum_name = node.name+ '_reduce_sum'
-                layer = builder.add_reduce_sum(
-                        name = reduce_sum_name,
-                        input_name = mul_name,
-                        output_name = node.name,
-                        reduce_all = True)
-                shapes.propagate_single_layer(layer, self.tensor_shapes)
-            else:
-                prefix_set = set(a).union(set(b))
-
-                # Find the dimensions to be reduced in matrix multiplication
-                dims_reduce = sorted([dim for dim in prefix_set if dim not in suffix])
-                if not all([dim in a and dim in b for dim in dims_reduce]):
-                    raise ValueError('equation {} not supported currently.'.format(equation))
-
-                # Find the batch dimensions needs to be keep
-                dims_batch = sorted([dim for dim in a if dim in b and dim in suffix])
-
-                # Find the dimensions only in a or b
-                dims_a = sorted([dim for dim in a if dim not in b and dim in suffix])
-                dims_b = sorted([dim for dim in b if dim not in a and dim in suffix])
-
-                # Sort a and b
-                a_sorted = dims_batch + dims_a + dims_reduce
-                b_sorted = dims_batch + dims_b + dims_reduce
-
-                # Transpose inputs into order of [dims_batch] + [dims_x] + [dims_reduce]
-                transpose_name_a = node.name + '_transpose_input_a_0'
-                axes = get_transpose_map(a, a_sorted)
-                layer = builder.add_transpose(
-                        name = transpose_name_a,
-                        axes = axes,
-                        input_name = input_names[0],
-                        output_name = transpose_name_a)
-                shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-                transpose_name_b = node.name + '_transpose_input_b_0'
-                axes = get_transpose_map(b, b_sorted)
-                layer = builder.add_transpose(
-                        name = transpose_name_b,
-                        axes = axes,
-                        input_name = input_names[1],
-                        output_name = transpose_name_b)
-                shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-                # Reshape a into [dims_batch] + product([dims_a]) + product([dims_reduce])
-                # Reshape b into [dims_batch] + product([dims_reduce]) + product([dims_b])
-                a_shape = self._get_tensor_shape_from_type(input_types[0])
-                b_shape = self._get_tensor_shape_from_type(input_types[1])
-                a_map = dict(zip(a, a_shape))
-                b_map = dict(zip(b, b_shape))
-                dims_reduce_product = get_product(dims_reduce, a_map)
-                dims_a_product = get_product(dims_a, a_map)
-                dims_b_product = get_product(dims_b, b_map)
-
-                a_output_shape = map(lambda x:a_map[x], dims_batch) + \
-                                 [dims_a_product] + \
-                                 [dims_reduce_product]
-
-                b_output_shape = map(lambda x:b_map[x], dims_batch) + \
-                                 [dims_b_product] + \
-                                 [dims_reduce_product]
-
-                reshape_name_a = node.name + '_reshape_input_a_0'
-                layer = builder.add_reshape_static(
-                        name = reshape_name_a,
-                        input_name = transpose_name_a,
-                        output_name = reshape_name_a,
-                        output_shape = tuple(a_output_shape))
-                shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-                reshape_name_b_0 = node.name + '_reshape_input_b_0'
-                layer = builder.add_reshape_static(
-                        name = reshape_name_b_0,
-                        input_name = transpose_name_b,
-                        output_name = reshape_name_b_0,
-                        output_shape = tuple(b_output_shape))
-                shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-                batch_num = len(dims_batch)
-                axes = range(batch_num) + [batch_num+1, batch_num]
-                reshape_name_b = node.name + '_reshape_input_b_0_1'
-                layer = builder.add_transpose(
-                        name = reshape_name_b,
-                        axes = axes,
-                        input_name = reshape_name_b_0,
-                        output_name = reshape_name_b)
-                shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-                # Batch matrix multiplication
-                # Result should be in shape [dims_batch] + product([dims_a]) + product([dims_b])
-                batch_mat_mul_name = node.name + '_batch_mat_mul'
-                layer = builder.add_batched_mat_mul(
-                        name = batch_mat_mul_name,
-                        input_names = [reshape_name_a, reshape_name_b],
-                        output_name = batch_mat_mul_name)
-                shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-                # Reshape tensor to [dims_batch] + [dims_a] + [dims_b]
-                reshape_name = node.name + '_reshape'
-                output_shape = map(lambda x: a_map[x], dims_batch) + \
-                               map(lambda x: a_map[x], dims_a) + \
-                               map(lambda x: b_map[x], dims_b)
-                layer = builder.add_reshape_static(
-                        name = reshape_name,
-                        input_name = batch_mat_mul_name,
-                        output_name = reshape_name,
-                        output_shape = tuple(output_shape))
-                shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-                # Transpose tensor to suffix
-                transpose_name = node.name + '_transpose'
-                axes = get_transpose_map(dims_batch + dims_a + dims_b, suffix)
-                layer = builder.add_transpose(
-                        name = transpose_name,
-                        axes = axes,
-                        input_name = reshape_name,
-                        output_name = node.name)
-                shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_transpose(self, node):
-        """ Convert a transpose op.
-        """
-        # permute dimensions are assumed to be a const
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        dim = input_nodes[1].value.val if len(input_names) > 1 else node.attr.get('dim')
-        if dim is None:
-            raise ValueError('[SSAConverter] Cannot handle dynamic Transpose')
-        dim = list(dim)
-        builder = self._get_builder()
-
-        layer = builder.add_transpose(
-            name=node.name, axes=dim, input_name=input_names[0], output_name=node.name)
-
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_shape(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        assert (len(input_names) == 1)
-        builder = self._get_builder()
-        layer = builder.add_get_shape(
-            name=node.name, input_name=input_names[0], output_name=node.name)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_selu(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        assert (len(input_names) == 1)
-        builder = self._get_builder()
-
-        elu_output_name = node.name + '_elu'
-        builder.add_activation(node.name +'__elu__', 'ELU', input_names[0], elu_output_name,
-                             params=1.6732632)
-        builder.add_elementwise(node.name,
-                          input_names=elu_output_name,
-                          output_name=node.name,
-                          mode='MULTIPLY',
-                          alpha=1.05070098)
-
-        self.tensor_shapes[node.name] = self._get_tensor_shape_from_type(node.datatype)
-
-    def _convert_size(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        assert (len(input_names) == 1)
-        builder = self._get_builder()
-        layer = builder.add_get_shape(
-            name=node.name + "_shape", input_name=input_names[0], output_name=node.name + "_shape")
-
-        layer = builder.add_reduce_prod(
-            name=node.name,
-            input_name=node.name + "_shape",
-            output_name=node.name,
-            keepdims=True,
-            reduce_all=True)
-
-        self.tensor_shapes[node.name] = [1]
-
-    def _convert_slice(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        has_squeeze = 'squeeze' in node.attr and node.attr['squeeze']
-        axes = node.attr.get('squeeze')
-
-        if _is_scalar(node.datatype):
-            output_shape = []
-        elif builtins.is_tensor(node.datatype):
-            output_shape = self._get_tensor_shape_from_type(node.datatype)
-        else:
-            output_shape = None
-
-        if has_squeeze:
-            if output_shape is None:
-                raise ValueError('[SSAConverter] Unable to determine output shapes for Slice')
-            if len(output_shape) == 0 and len(axes) == 1:
-                has_squeeze = False
-
-        slice_output_name = node.name + '_slice_' if has_squeeze else node.name
-
-        builder = self._get_builder()
-
-        rank = len(self._get_tensor_shape_from_type(input_types[0]))
-        begin_masks = [True if i in node.attr['begin_masks'] else False for i in range(rank)]
-        end_masks = [True if i in node.attr['end_masks'] else False for i in range(rank)]
-        if 'slice' not in node.attr:
-            assert node.attr["new_axis_mask"] == 0
-            assert len(input_names) >= 4
-            layer = builder.add_slice_dynamic(name=slice_output_name,
-                                              input_names=input_names[:4],
-                                              output_name=slice_output_name,
-                                              begin_masks=begin_masks,
-                                              end_masks=end_masks)
-
-            if not has_squeeze and output_shape:
-                self.tensor_shapes[node.name] = output_shape
-            else:
-                shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        else:
-            # For simple RNN, node.attr always has a 'slice'
-            # This means slicing is always static
-            # each slice is [begin, end, step]
-            slices = node.attr['slice']
-            begin_indices, end_indices, strides = [], [], []
-            for s in slices:
-                begin_indices.append(s[0])
-                end_indices.append(s[1])
-                strides.append(s[2])
-
-            layer = builder.add_slice_static(
-                name=slice_output_name,
-                input_name=input_names[0],
-                output_name=slice_output_name,
-                begin_ids=begin_indices,
-                end_ids=end_indices,
-                strides=strides,
-                begin_masks=begin_masks,
-                end_masks=end_masks)
-
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        if has_squeeze:
-            input_shape = self._get_tensor_shape_from_type(input_types[0])
-            input_rank = len(input_shape)
-            squeeze_all = (input_rank == len(axes))
-            layer = builder.add_squeeze(
-                name=node.name,
-                input_name=slice_output_name,
-                output_name=node.name,
-                axes=axes if not squeeze_all else None,
-                squeeze_all=squeeze_all)
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_range(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        if len(input_names) != 3:
-            raise ValueError(
-                'CoreML NeuralNetwork range layer must have 3 inputs: start, end and step')
-        input_names = [input_names[1], input_names[0], input_names[2]]
-
-        builder = self._get_builder()
-        layer = builder.add_range_dynamic(name=node.name, output_name=node.name, input_names=input_names)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_tensorarray_alloc(self, node):
-        # TensorArray is a list of tensors, it will be treated as a rank+1
-        # tensor when converted. The shape information is stored at two
-        # different places - node input specifies the length of the list
-        # while the node's datatype stores the shape of each tensor allocated.
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        assert (len(input_names) == 1)
-
-        element_shape = node.datatype.T[0].get_shape()
-        if (not node.attr.get('identical_element_shapes', True) or
-                not all([atype.get_shape() == element_shape for atype in node.datatype.T])):
-            raise ValueError(
-                '[SSAConverter] TensorArray allocation cannot handle arrays'
-                'with tensors of various shapes.')
-
-        has_static_element_shape = all([dim > 0 for dim in element_shape])
-
-        if input_nodes[0].op == 'Const':
-            length = input_nodes[0].value.val
-            array_size = length if length > 0 else 1
-        elif 'size' in node.attr and isinstance(node.attr['size'], int):
-            array_size = node.attr['size']
-        else:
-            array_size = None
-
-        # Simpler case: No dynamic shape
-        if array_size is not None and has_static_element_shape:
-            array_shape = [array_size] + list(element_shape)
-            layer = self._get_builder().add_load_constant_nd(
-                name=node.name,
-                output_name=node.name,
-                constant_value=np.zeros(array_shape, dtype='float'),
-                shape=array_shape)
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-        elif has_static_element_shape:
-            # Load element shape into network
-            builder = self._get_builder()
-            if element_shape:
-                node_es_name = node.name + '__element_shape'
-                layer = builder.add_load_constant_nd(
-                    name=node_es_name,
-                    output_name=node_es_name,
-                    constant_value=np.array(element_shape, dtype='float'),
-                    shape=[len(element_shape)])
-                shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-                # Concatenate list length (the input, should be a constant vector of size 1) with element shape
-                node_arr_shape_name = node.name + '__arr_shape'
-                layer = builder.add_concat_nd(
-                    name=node_arr_shape_name,
-                    input_names=input_names + [node_es_name],
-                    output_name=node_arr_shape_name,
-                    axis=0)
-                shapes.propagate_single_layer(layer, self.tensor_shapes)
-            else:
-                node_arr_shape_name = input_names[0]
-
-            # Now allocate required shape
-            layer = builder.add_fill_dynamic(
-                name=node.name, input_name=node_arr_shape_name, output_name=node.name)
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-            # Overwrite the output shape with fixed element shape
-            self.tensor_shapes[node.name][1:] = element_shape
-            layer.outputTensor[0].dimValue[1:] = element_shape
-        else:
-            raise ValueError(
-                '[SSAConverter] TensorArray allocation cannot determine element shapes statically'
-            )
-
-    def _convert_array_scatter(self, node):
-        # NNSSA input order: indices, value, array
-        # CoreML input order: container (array), indices, slices (value)
-
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        if len(input_names) != 3:
-            raise ValueError('Scatter only accepts 3 inputs')
-        indices, value, array = input_names
-        layer = self._get_builder().add_scatter(
-            name=node.name, input_names=[array, indices, value], output_name=node.name)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_make_tuple(self, node):
-        # make tuple aggregates a list of SSA nodes (which also stands for their outputs)
-        # For now, I think recording the make_tuple node itself for reference would suffice.
-        if node.name in self.op_tensor_map:
-            raise ValueError('make_tuple node %s should not be visited twice.' % node.name)
-
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        self.op_tensor_map[node.name] = input_names
-
-    def _convert_while(self, node):
-        # In CoreML, loops and branches should be designed such that inputs / outputs
-        # should be empty, because it is not necessary and not clearly defined.
-        # Should only take a tuples
-        assert (len(node.inputs) == 1)
-        current_graph = self.net_ensemble.functions[self.func_stack[-1]].graph
-        assert (current_graph[node.inputs[0]].op == 'make_tuple')
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        self.op_tensor_map[node.name] = input_names
-        builder_top = self._get_builder()
-        while_layer = builder_top.add_loop(name=node.name)
-
-        loop_param = while_layer.loop
-        loop_param.maxLoopIterations = 0
-
-        # Both body function and condition function share the same inputs (args) of the loop
-        # convert the condition function
-        if 'cond_function' in node.attr:
-            if not loop_param.HasField('conditionNetwork'):
-                loop_param.condition.MergeFromString(b'')
-            cond_func_name = node.attr['cond_function']
-            # TODO - need to find cond_var name
-            self.func_stack.append(cond_func_name)
-            self.func_builder_map[cond_func_name] = NeuralNetworkBuilder(
-                nn_spec=loop_param.conditionNetwork, disable_rank5_shape_mapping=True)
-
-            self.op_tensor_map[cond_func_name] = input_names
-            self.convert()
-            cond_func = self.net_ensemble.functions[cond_func_name]
-            ret_node_name = cond_func.outputs[0]
-            loop_param.conditionVar = cond_func.graph[ret_node_name].inputs[0]
-            self.func_stack.pop()
-        else:
-            raise ValueError('Unable to determine condition function in the loop')
-
-        # convert the body function
-        if 'body_function' not in node.attr:
-            raise ValueError('A "while" SSA node should not be empty.')
-        if not loop_param.HasField('bodyNetwork'):
-            loop_param.bodyNetwork.MergeFromString(b'')
-
-        body_func_name = node.attr['body_function']
-        self.func_stack.append(body_func_name)
-        self.func_builder_map[body_func_name] = NeuralNetworkBuilder(
-            nn_spec=loop_param.bodyNetwork, disable_rank5_shape_mapping=True)
-
-        self.op_tensor_map[body_func_name] = input_names
-        self.convert()
-
-        # The body function should re-write variables when it returns.
-        body_func = self.net_ensemble.functions[body_func_name]
-        loop_var_tuple_name = None
-        for k, v in body_func.graph.items():
-            # k is name, v is node
-            if v.op == 'make_tuple' and body_func.graph[v.outputs[0]].op == 'return':
-                loop_var_tuple_name = k
-                break
-
-        loop_var_names = self.op_tensor_map[loop_var_tuple_name]
-        assert len(loop_var_names) == len(input_names)
-
-        # Loop body should have the same input and output
-        builder_body = self._get_builder()
-        for src, dst in zip(loop_var_names, input_names):
-            # loop variables may be passed as an input to while op but unused.
-            if src == dst:
-                continue
-            layer = builder_body.add_copy(
-                name='copy_' + src + '_' + dst, input_name=src, output_name=dst)
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        # Pop back into while's loop
-        self.func_stack.pop()
-
-    def _convert_function(self, node):
-        # Function node is the entry point of a function
-        pass
-
-    def _convert_get_tuple(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        self.op_tensor_map[node.name] = [input_names[node.attr['index']]] if node.attr['index'] < len(input_names) else []
-
-    def _convert_get_global(self, node):
-        input_name = node.attr["variable"]
-        self.op_tensor_map[node.name] = [input_name]
-
-    def _convert_set_global(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        output_name = node.attr["variable"]
-
-        builder = self._get_builder()
-
-        if len(node.outputs) > 0:
-            self.op_tensor_map[node.name] = [input_names[0]]
-
-        if input_nodes[0].op == "Const" and input_nodes[0].value.val.size == 0:
-            return
-
-        if output_name != input_names[0]:
-            layer = builder.add_copy(name=node.name,
-                                     input_name=input_names[0],
-                                     output_name=output_name)
-
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_return(self, node):
-        # When converting a body function of a loop, return node should overwrite body functions' input tensors
-        pass
-
-    def _convert_unary_logical_not(self, node):
-        assert len(node.inputs) == 1
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        layer = self._get_builder().add_logical(
-            name=node.name,
-            input_names=input_names,
-            output_name=node.name,
-            mode='NOT')
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_floor_mod(self, node):
-        assert len(node.inputs) == 2
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        a, b = input_names
-        a_div_b = node.name + "_floor_div"
-        floor_a = node.name + "_floor_a"
-
-        if builtins.is_int(node.attr['T']):
-            round_a = node.name + "_round_a"
-            round_b = node.name + "_round_b"
-
-            layer = self._get_builder().add_round(name=round_a,
-                                                  input_name=a,
-                                                  output_name=round_a)
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-            layer = self._get_builder().add_round(name=round_b,
-                                                  input_name=b,
-                                                  output_name=round_b)
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-            a, b = round_a, round_b
-
-        layer = self._get_builder().add_floor_div_broadcastable(
-            name=a_div_b, input_names=[a, b], output_name=a_div_b)
-
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = self._get_builder().add_multiply_broadcastable(
-            name=floor_a, input_names=[a_div_b, b], output_name=floor_a)
-
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = self._get_builder().add_subtract_broadcastable(
-            name=node.name, input_names=[a, floor_a], output_name=node.name)
-
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_squared_difference(self, node):
-        assert (len(node.inputs) == 2)
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        sub_node_name = node.name + '_sub_'
-
-        layer = self._get_builder().add_subtract_broadcastable(
-            name=sub_node_name, input_names=input_names, output_name=sub_node_name)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = self._get_builder().add_unary(
-            name=node.name, input_name=sub_node_name, output_name=node.name, mode='power', alpha=2.0)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_select(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        assert (len(input_names) == 3)
-        cond_name, true_name, false_name = input_names
-
-        if "expand_dims" in node.attr:
-            axes = node.attr["expand_dims"]
-            cond_output_name = node.name + '_expanded'
-            layer = self._get_builder().add_expand_dims(
-                name=cond_output_name, input_name=cond_name, output_name=cond_output_name, axes=axes)
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-            cond_name = cond_output_name
-
-        layer = self._get_builder().add_where_broadcastable(
-            name=node.name,
-            input_names=[cond_name, true_name, false_name],
-            output_name=node.name)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_where(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        if len(input_names) == 3:
-            self._convert_select(node)
-        else:
-            assert len(input_names) == 1
-            layer = self._get_builder().add_where_nonzero(name=node.name,
-                                                          input_name=input_names[0],
-                                                          output_name=node.name)
-
-            self.tensor_shapes[node.name] = self._get_tensor_shape_from_type(node.datatype)
-
-    def _convert_softmax(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        axis = -1 if 'axis' not in node.attr else node.attr['axis']
-        layer = self._get_builder().add_softmax_nd(
-            name=node.name, input_name=input_names[0], output_name=node.name, axis=axis)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_tensorarray_read(self, node):
-        # TensorArrayReadV3 slices an element from TensorArray, which in NNSSA is a list.
-        # This is equivalent to array gather
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        slice_output_name = node.name + '_slice_'
-        layer = self._get_builder().add_gather(
-            name=node.name + '_gather_',
-            input_names=input_names[::-1],
-            output_name=slice_output_name,
-            axis=0)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        # tensorarray_read should generate only 1 slice, so adding a squeeze should be enough
-        layer = self._get_builder().add_squeeze(
-            name=node.name + '_squeeze_',
-            input_name=slice_output_name,
-            output_name=node.name,
-            axes=[0])
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_tensorarray_write(self, node):
-        """def TensorArrayWrite(index, value, array):
-        array[index] = value
-        return array
-        """
-        # node.inputs = ['index', 'value', 'array']
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        assert (len(input_names) == 3)
-
-        index_name, value_name, array_name = input_names
-        if 'dynamic_size' in input_nodes[-1].attr:
-            builder = self._get_builder()
-            layer = builder.add_get_shape(
-                name=array_name + '_full_shape',
-                input_name=array_name,
-                output_name=array_name + '_full_shape')
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-            layer = builder.add_slice_static(
-                name=array_name + '_length',
-                input_name=array_name + '_full_shape',
-                output_name=array_name + '_length',
-                begin_ids=[0],
-                end_ids=[1],
-                begin_masks=[False],
-                end_masks=[False],
-                strides=[1])
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-            layer = builder.add_slice_static(
-                name=array_name + '_element_shape',
-                input_name=array_name + '_full_shape',
-                output_name=array_name + '_element_shape',
-                begin_ids=[1],
-                end_ids=[1],
-                begin_masks=[False],
-                end_masks=[True],
-                strides=[1])
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-            layer = builder.add_greater_than(
-                name=array_name + "_is_growing",
-                input_names=[index_name, array_name + '_length'],
-                output_name=array_name + "_is_growing",
-                use_greater_than_equal=True
-            )
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-            layer = builder.add_branch(
-                name=array_name + "_condition",
-                input_name=array_name + "_is_growing")
-
-            ifbranch = NeuralNetworkBuilder(nn_spec=layer.branch.ifBranch,
-                                            disable_rank5_shape_mapping=True)
-
-            layer = ifbranch.add_fill_dynamic(
-                name=array_name + "_alloc",
-                input_name=array_name + '_element_shape',
-                output_name=array_name + "_alloc",
-                value=0.0)
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-            layer = ifbranch.add_expand_dims(
-                name=array_name + "_new_element",
-                input_name=array_name + "_alloc",
-                output_name=array_name + "_new_element",
-                axes=[0])
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-            layer = ifbranch.add_concat_nd(
-                name=array_name + "_updated",
-                input_names=[array_name, array_name + "_new_element"],
-                output_name=array_name + "_updated",
-                axis=0)
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-            layer = ifbranch.add_copy(
-                name=array_name + '_assign',
-                input_name=array_name + "_updated",
-                output_name=array_name
-            )
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        values_name = node.name + '_expanded'
-        layer = self._get_builder().add_expand_dims(
-            name=values_name, input_name=value_name, output_name=values_name, axes=[0])
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        # 3 inputs: [Scatter target, indices, scatter source]
-        layer = self._get_builder().add_scatter(
-            name=node.name,
-            input_names=[array_name, index_name, values_name],
-            output_name=node.name)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_addn(self, node):
-
-        # TODO: Support single value addn
-        # Blocked by a bug in coremltools
-        if len(node.inputs) <= 1:
-            raise ValueError("Only supports two or more inputs for add_n operation.")
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        prev_name = input_names[0]
-        for i in range(1,len(input_names)):
-
-            node_name = node.name + '_' + str(i)
-            output_name = node.name if i == len(input_names) -1 else node_name
-
-            layer = self._get_builder().add_elementwise(
-                name=node_name, input_names=[prev_name, input_names[i]],
-                output_name=output_name, mode='ADD')
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-            prev_name = node_name
-
-    def _convert_concat_nd(self, node):
-        assert len(node.inputs) > 1
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        axis = node.attr.get('axis')
-        if axis is None:
-            axis = input_nodes[-1].value.val if node.op == 'ConcatV2' else input_nodes[0].value.val
-        if axis is None:
-            raise NotImplementedError('[SSAConverter] Dynamic concatenation is not supported')
-        input_names = input_names[:-1] if node.op == 'ConcatV2' else input_names[1:]
-        input_types = input_types if node.op == 'ConcatV2' else input_types[1:]
-        input_names = [name for i, name in enumerate(input_names) if self._get_tensor_shape_from_type(input_types[i])[axis] != 0]
-
-        if len(input_names) == 1:
-            self.op_tensor_map[node.name] = input_names
-            return
-
-        if node.attr.get('data_format', None) == 'NHWC_format_inserted' and (axis == 1 or axis == -3):
-            layer = self._get_builder().add_elementwise(node.name, input_names, node.name, 'CONCAT')
-        else:
-            layer = self._get_builder().add_concat_nd(
-                name=node.name, input_names=input_names, output_name=node.name, axis=axis)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_batched_mat_mul(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        weight, bias = None, None
-        if len(input_names) == 1:
-            weight = node.attr.get('W', node.attr.get('W_const'))
-            bias = node.attr.get('bias')
-        elif len(input_names) == 2 and input_nodes[1].op == 'Const':
-            input_names = [input_names[0]]
-            weight = input_nodes[1].value.val
-            bias = node.attr.get('bias')
-
-        transpose_a = node.attr.get('adj_x', False) or node.attr.get('transpose_a', False)
-        transpose_b = node.attr.get('adj_y', False) or node.attr.get('transpose_b', False)
-        if len(input_names) == 1 and transpose_b and weight is not None:
-            weight = weight.transpose((1, 0))
-
-        n_rows = 0 if weight is None else weight.shape[0]
-        n_cols = 0 if weight is None else weight.shape[1]
-        builder = self._get_builder()
-        layer = builder.add_batched_mat_mul(
-            name=node.name,
-            input_names=input_names,
-            output_name=node.name,
-            W=weight,  # (batched_mat_mul requires Cin, Cout)
-            weight_matrix_rows=n_rows,
-            weight_matrix_columns=n_cols,
-            bias=bias,
-            transpose_a=transpose_a,
-            transpose_b=transpose_b)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_split(self, node):
-        # Only handles static splits
-        axis = node.attr['split_dim']
-        split = node.attr['split']
-        split = [size for size in split if size != 0]
-        num_splits = len(split)
-
-        has_equal_splits = all([size == split[0] for size in split])
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        if num_splits == 1:
-            if node.name in [feature.name for feature in self.get_spec().description.output]:
-                layer = self._get_builder().add_activation(
-                        name=node.name,
-                        non_linearity='LINEAR',
-                        input_name=input_names[-1],
-                        output_name=node.name,
-                        params=(1.0, 0.0))
-                shapes.propagate_single_layer(layer, self.tensor_shapes)
-            else:
-                self.op_tensor_map[node.name] = [input_names[-1]]
-            return
-
-        # Split output is a tuple. We need to split them into a list of tensors
-        output_names = [(node.name + '_' + str(i)) for i in range(num_splits)]
-        if node.name in self.op_tensor_map:
-            raise ValueError(
-                '[SSAConverter] split node %s should not be visited twice.' % node.name)
-        self.op_tensor_map[node.name] = output_names
-
-        tensor_id = -1 if node.op == 'Split' else 0
-        if has_equal_splits:
-            layer = self._get_builder().add_split_nd(
-                name=node.name,
-                input_name=input_names[tensor_id],
-                output_names=output_names,
-                axis=axis,
-                num_splits=num_splits)
-        else:
-            layer = self._get_builder().add_split_nd(
-                name=node.name,
-                input_name=input_names[tensor_id],
-                output_names=output_names,
-                axis=axis,
-                split_sizes=list(split))
-
-        if not has_equal_splits:
-            for i, name in enumerate(output_names):
-                self.tensor_shapes[name] = self._get_tensor_shape_from_type(node.datatype.T[i])
-        else:
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_identity(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        if node.name in [feature.name for feature in self.get_spec().description.output]:
-            layer = self._get_builder().add_activation(
-                name=node.name,
-                non_linearity='LINEAR',
-                input_name=input_names[0],
-                output_name=node.name,
-                params=(1.0, 0.0))
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-        else:
-            self.op_tensor_map[node.name] = [input_names[-1]]
-
-    def _convert_tensorarray_size(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        assert (len(input_names) == 1)
-
-        builder = self._get_builder()
-        layer = builder.add_get_shape(
-            name=node.name + '_full_shape',
-            input_name=input_names[0],
-            output_name=node.name + '_full_shape')
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = builder.add_slice_static(
-            name=node.name,
-            input_name=node.name + '_full_shape',
-            output_name=node.name,
-            begin_ids=[0],
-            end_ids=[1],
-            begin_masks=[False],
-            end_masks=[False],
-            strides=[1])
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_tensorarray_gather(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        assert (len(input_names) == 2)
-
-        layer = self._get_builder().add_gather(
-            name=node.name, input_names=input_names[::-1], output_name=node.name, axis=0)
-
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_pack(self, node):
-        axis = node.attr.get('axis')
-        axis = axis if axis else 0
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        if len(input_names) == 1:
-            if _is_scalar(input_types[0]):  # skip /identity op in this case
-                self.op_tensor_map[node.name] = input_names
-            else:
-                layer = self._get_builder().add_expand_dims(
-                    name=node.name, input_name=input_names[0], output_name=node.name, axes=[axis])
-        else:
-            if all([_is_scalar(input_type) for input_type in input_types]):
-                layer = self._get_builder().add_concat_nd(
-                    name=node.name, input_names=input_names, output_name=node.name, axis=axis)
-            else:
-                if axis == -1:
-                    axis = len(input_types[0].get_shape())
-                layer = self._get_builder().add_stack(
-                    name=node.name, input_names=input_names, output_name=node.name, axis=axis)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_unpack(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        output_names = [(node.name + '_' + str(i) + '_') for i in range(len(node.datatype.T))]
-        self.op_tensor_map[node.name] = output_names
-        num_splits = node.attr['num']
-        axis = int(node.attr['axis'])
-        interm_output_names = [name + '_unsqueezed_' for name in output_names]
-        layer = self._get_builder().add_split_nd(
-            name=node.name, input_name=input_names[0], output_names=interm_output_names, axis=axis,
-            num_splits=num_splits)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        for in_name, out_name in zip(interm_output_names, output_names):
-            layer = self._get_builder().add_squeeze(
-                name=out_name, input_name=in_name, output_name=out_name, axes=[0])
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_gather(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        # NNSSA: [u'encoder/Variable/read', u'Placeholder', u'encoder/embedding_lookup/axis']
-        # CoreML         Given two inputs, 'data' and 'indices', gather the slices of 'data'
-        axis = node.attr['axis']
-        layer = self._get_builder().add_gather(
-            name=node.name, input_names=input_names[0:2], output_name=node.name, axis=axis)
-
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_gather_nd(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        layer = self._get_builder().add_gather_nd(
-            name=node.name,
-            input_names=input_names,
-            output_name=node.name
-        )
-        self.tensor_shapes[node.name] = self._get_tensor_shape_from_type(node.datatype)
-
-    def _convert_scatter_nd(self, node):
-        assert len(node.inputs) == 3
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        indices, updates, shape = input_names
-
-        if input_nodes[2].value:
-            output_shape = input_nodes[2].value.val
-            layer = self._get_builder().add_fill_static(
-                name=node.name + '_tmp',
-                output_name=node.name + '_tmp',
-                output_shape=output_shape,
-            )
-        else:
-            layer = self._get_builder().add_fill_dynamic(
-                name=node.name + '_tmp',
-                input_name= shape,
-                output_name=node.name + '_tmp'
-            )
-
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = self._get_builder().add_scatter_nd(
-            name=node.name,
-            input_names=[node.name + '_tmp', indices, updates],
-            output_name=node.name,
-            mode='ADD'
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_unary_square(self, node):
-        assert len(node.inputs) == 1
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        layer = self._get_builder().add_elementwise(
-            name=node.name, input_names=input_names * 2, output_name=node.name, mode='MULTIPLY')
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_unary_neg(self, node):
-        assert len(node.inputs) == 1
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        layer = self._get_builder().add_elementwise(
-            name=node.name, input_names=[input_names[0]], output_name=node.name, mode='MULTIPLY', alpha=-1.0)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_conv2d(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        weight = None
-        bias = None
-        if len(input_names) == 1:
-            weight = node.attr.get('W', node.attr.get('W_const'))
-            bias = node.attr.get('bias')
-        elif len(input_names) == 2:
-            input_names = [input_names[0]]
-            if input_nodes[1].op == 'Const':
-                weight = input_nodes[1].value.val
-            bias = node.attr.get('bias')
-
-        if weight is None:
-            raise NotImplementedError(
-                '[SSAConverter] Dynamic weights in convolution not implemented')
-
-        dilations_factors = node.attr.get('dilations', [1, 1, 1, 1])
-        assert len(weight.shape) == 4, 'Conv2d: weight parameter not rank 4'
-
-        data_format = node.attr.get('data_format', 'NHWC')
-
-        conv_input_name = input_names[0]
-        conv_output_name = node.name
-        builder = self._get_builder()
-
-        if data_format == 'NHWC' or data_format == 'NHWC_format_inserted':
-            stride_height = node.attr.get('strides', [1, 1, 1, 1])[1]
-            stride_width = node.attr.get('strides', [1, 1, 1, 1])[2]
-        else:
-            stride_height = node.attr.get('strides', [1, 1, 1, 1])[-2]
-            stride_width = node.attr.get('strides', [1, 1, 1, 1])[-1]
-
-        border_mode = node.attr.get('padding').lower()
-
-        groups = 1
-        kernel_height, kernel_width, kernel_channels, output_channels = weight.shape
-        if node.op == 'DepthwiseConv2dNative':
-            depth_multiplier = weight.shape[3]
-            weight = np.reshape(weight,
-                                (kernel_height, kernel_width, 1, kernel_channels * depth_multiplier))
-            output_channels = kernel_channels * depth_multiplier
-            groups = kernel_channels
-            kernel_channels = 1
-
-        pad_h = node.attr.get('pad_h', [0, 0])
-        pad_w = node.attr.get('pad_w', [0, 0])
-
-        paddings_before = node.attr.get('_paddings_before', None)
-        if paddings_before:
-            layer = builder.add_padding(
-                name=node.name + '_paddings_before',
-                left=paddings_before[0],
-                right=paddings_before[1],
-                top=paddings_before[2],
-                bottom=paddings_before[3],
-                value=0,
-                input_name=conv_input_name,
-                output_name=node.name + '_paddings_before'
-            )
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        builder.add_convolution(
-            name=conv_output_name,
-            kernel_channels=kernel_channels,
-            output_channels=output_channels,
-            height=kernel_height,
-            width=kernel_width,
-            stride_height=stride_height,
-            stride_width=stride_width,
-            border_mode=border_mode,
-            groups=groups,
-            W=weight,
-            b=bias,
-            has_bias=(bias is not None),
-            is_deconv=False,
-            output_shape=None,
-            input_name=conv_input_name if not paddings_before else node.name + '_paddings_before',
-            output_name=conv_output_name,
-            dilation_factors=dilations_factors,
-            padding_bottom=pad_h[0],
-            padding_top=pad_h[1],
-            padding_left=pad_w[0],
-            padding_right=pad_w[1]
-        )
-
-        self.tensor_shapes[node.name] = self._get_tensor_shape_from_type(node.datatype)
-
-    def _convert_pool(self, node, layer_type):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        data_format = node.attr.get('data_format', 'NHWC')
-        kernel_sizes = node.attr.get('ksize', [1, 1, 1, 1])
-        stride_sizes = node.attr.get('strides', [1, 1, 1, 1])
-        padding_type = node.attr.get('padding')
-        global_pooling = node.attr.get('global_pooling', False)
-
-        if data_format == 'NHWC' or data_format == 'NHWC_format_inserted':
-            kernel_height = kernel_sizes[1]
-            kernel_width = kernel_sizes[2]
-            stride_height = stride_sizes[1]
-            stride_width = stride_sizes[2]
-        else:
-            kernel_height = kernel_sizes[-2]
-            kernel_width = kernel_sizes[-1]
-            stride_height = stride_sizes[-2]
-            stride_width = stride_sizes[-1]
-
-        self._get_builder().add_pooling(
-            name=node.name,
-            height=kernel_height,
-            width=kernel_width,
-            stride_height=stride_height,
-            stride_width=stride_width,
-            layer_type=layer_type,
-            padding_type=padding_type,
-            input_name=input_names[0],
-            output_name=node.name,
-            exclude_pad_area=True,
-            is_global=global_pooling
-        )
-
-        self.tensor_shapes[node.name] = self._get_tensor_shape_from_type(node.datatype)
-
-    def _convert_maxpool(self, node):
-        self._convert_pool(node, 'MAX')
-
-    def _convert_avgpool(self, node):
-        self._convert_pool(node, 'AVERAGE')
-
-    def _convert_reshape(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        if _is_scalar(node.datatype) and self._get_tensor_shape_from_type(input_types[0]) == (1,):  # skip/identity op in that case
-            self.op_tensor_map[node.name] = [input_names[0]]
-        elif self._get_tensor_shape_from_type(input_types[0]) == self._get_tensor_shape_from_type(node.datatype) \
-                and sum([i < 0 for i in self._get_tensor_shape_from_type(node.datatype)]) <= 1:
-            # in this case reshape is not changing the shape
-            self.op_tensor_map[node.name] = [input_names[0]]
-        elif (builtins.is_tensor(node.datatype) and
-              sum([i < 0 for i in self._get_tensor_shape_from_type(node.datatype)]) <= 1):
-
-            output_shape = self._get_tensor_shape_from_type(node.datatype)
-            layer = self._get_builder().add_reshape_static(
-                name=node.name,
-                input_name=input_names[0],
-                output_name=node.name,
-                output_shape=output_shape)
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-        else:
-            layer = self._get_builder().add_reshape_dynamic(
-                name=node.name, input_names=input_names, output_name=node.name)
-
-            self.tensor_shapes[node.name] = self._get_tensor_shape_from_type(node.datatype)
-
-    def _convert_matrix_band_part(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        assert (len(input_names) == 3)
-        assert all([x.op == 'Const' for x in input_nodes[-2:]])
-
-        lower = input_nodes[1].value.val
-        upper = input_nodes[2].value.val
-
-        builder = self._get_builder()
-        builder.add_matrix_band_part(
-            name = node.name,
-            input_name= input_names[0],
-            output_name=node.name,
-            num_lower=lower,
-            num_upper=upper)
-
-        self.tensor_shapes[node.name] = self._get_tensor_shape_from_type(node.datatype)
-
-    def _convert_argmax(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        axis = node.attr['reduction_indices'][0]
-        layer = self._get_builder().add_argmax(
-            name=node.name,
-            input_name=input_names[0],
-            output_name=node.name,
-            axis=axis,
-            keepdims=node.attr.get("keep_dims", False))
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_argmin(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        axis = node.attr['reduction_indices'][0]
-        layer = self._get_builder().add_argmin(
-            name=node.name,
-            input_name=input_names[0],
-            output_name=node.name,
-            axis=axis,
-            keepdims=node.attr.get("keep_dims", False))
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_reverse(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        reverse_axes = input_nodes[1].value.val
-        rank = len(self.tensor_shapes[input_names[0]])
-        reverse_dim = [False] * rank
-        for axis in reverse_axes:
-            reverse_dim[axis] = True
-
-        layer = self._get_builder().add_reverse(
-            name=node.name, input_name=input_names[0], output_name=node.name, reverse_dim=reverse_dim)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_expand_dims(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        if _is_scalar(input_types[0]):  # skip/identity op in that case
-            input_nodes[0].datatype = builtins.tensor(input_types[0], (1,))
-            self.op_tensor_map[node.name] = [input_names[0]]
-        if len(input_names) == 2 and input_nodes[1].value.val is None:
-            raise NotImplementedError("[SSAConverter] Cannot handle dynamic expandDims")
-
-        axes = input_nodes[1].value.val
-        axes = list(axes) if isinstance(axes, Iterable) else [axes]
-        layer = self._get_builder().add_expand_dims(
-            name=node.name, input_name=input_names[0], output_name=node.name, axes=axes)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_squeeze(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        axes = node.attr["squeeze_dims"]
-        layer = self._get_builder().add_squeeze(
-            name=node.name,
-            input_name=input_names[0],
-            output_name=node.name,
-            axes=axes)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_cast(self, node):
-        assert len(node.inputs) == 1
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        layer = self._get_builder().add_round(
-            name=node.name,
-            input_name=input_names[0],
-            output_name=node.name)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_reverse_sequence(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        batch_axis = node.attr['batch_dim']
-        seq_axis = node.attr['seq_dim']
-
-        layer = self._get_builder().add_reverse_sequence(
-            name=node.name,
-            input_names=input_names,
-            output_name=node.name,
-            batch_axis=batch_axis,
-            seq_axis=seq_axis)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_embedding(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        weight = None
-        if len(input_names) == 1:
-            weight = node.attr.get('W')
-        elif len(input_names) == 2 and input_nodes[1].op == 'Const':
-            weight = input_nodes[1].value.val  # (batch, depth, out_channels)
-
-        if weight is None:
-            raise ValueError('[SSAConverter] Unable to handle dynamic embedding')
-
-        out_channels = weight.shape[-1]
-        depth = node.attr['depth']
-        weight = weight.reshape([depth, out_channels]).transpose((1, 0))
-
-        expanddim_name = node.name + '_expandim_'
-
-        layer = self._get_builder().add_expand_dims(
-            name=expanddim_name, input_name=input_names[0], output_name=expanddim_name, axes=[-1])
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = self._get_builder().add_embedding_nd(
-            name=node.name,
-            input_name=expanddim_name,
-            output_name=node.name,
-            vocab_size=depth,
-            embedding_size=out_channels,
-            W=weight)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_tile(self, node):
-        assert len(node.inputs) == 2
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        reps = input_nodes[1].value.val
-        layer = self._get_builder().add_tile(
-            name=node.name,
-            input_name=input_names[0],
-            output_name=node.name,
-            reps=reps
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_lstm_block_cell(self, node):
-        assert len(node.inputs) == 5
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        x, w_name, b_name, h_prev, c_prev = input_names
-
-        weight = input_nodes[1].value.val
-        bias = input_nodes[2].value.val
-
-        builder = self._get_builder()
-
-        def igfo_to_ifog(data):
-            i, g, f, o = np.split(data, 4, axis=-1)
-            return np.concatenate([i, f, o, g], axis=-1)
-
-        hidden_size = weight.shape[-1] // 4
-        input_size = weight.shape[0] - hidden_size
-
-        W_h_fw = weight[input_size:, :4 * hidden_size]
-        W_h_fw = igfo_to_ifog(W_h_fw)
-        W_h_fw = np.transpose(W_h_fw, [1, 0])
-        W_h_fw = np.ascontiguousarray(W_h_fw)
-        W_h_fw = np.split(W_h_fw, 4, axis=0)
-
-        W_x_fw = weight[:input_size, :4 * hidden_size]
-        W_x_fw = igfo_to_ifog(W_x_fw)
-        W_x_fw = np.transpose(W_x_fw, [1, 0])
-        W_x_fw = np.ascontiguousarray(W_x_fw)
-        W_x_fw = np.split(W_x_fw, 4, axis=0)
-
-        b_fw = bias[:4 * hidden_size]
-        b_fw = igfo_to_ifog(b_fw)
-        b_fw = np.split(b_fw, 4, axis=-1)
-
-        forget_bias = node.attr.get('forget_bias')
-        has_forget_bias = forget_bias and forget_bias != 0.0
-        if has_forget_bias:
-            b_fw[1] += forget_bias
-
-        layer = builder.add_expand_dims(
-            name=node.name + '_in_expand',
-            input_name=x,
-            output_name=node.name + '_in_expand',
-            axes=[-1, -2]
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = builder.add_expand_dims(
-            name=node.name + '_h_prev_expand',
-            input_name=h_prev,
-            output_name=node.name + '_h_prev_expand',
-            axes=[0, -1, -2]
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = builder.add_expand_dims(
-            name=node.name + '_c_prev_expand',
-            input_name=c_prev,
-            output_name=node.name + '_c_prev_expand',
-            axes=[0, -1, -2]
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = builder.add_unilstm(
-            name=node.name + '_lstm',
-            W_h=W_h_fw,
-            W_x=W_x_fw,
-            b=b_fw,
-            hidden_size=hidden_size,
-            input_size=input_size,
-            input_names=[
-                node.name + '_in_expand',
-                node.name + '_h_prev_expand',
-                node.name + '_c_prev_expand'
-            ],
-            output_names=[
-                node.name + '_lstm_out',
-                node.name + '_lstm_h',
-                node.name + '_lstm_c',
-            ],
-            forget_bias=has_forget_bias,
-            output_all=True,
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = builder.add_squeeze(
-            name=node.name + '_out',
-            input_name=node.name + '_lstm_out',
-            output_name=node.name + '_out',
-            axes=[-1, -2]
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = builder.add_copy(
-            name=node.name + '_temp_h',
-            input_name=node.name + '_lstm_out',
-            output_name=node.name + '_temp_h'
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        # workaround: Core ML LSTM layer outputs the states on last sequence
-        layer = builder.add_broadcast_to_like(
-            name=node.name + '_temp_c',
-            input_names=[node.name + '_lstm_c', node.name + '_lstm_out'],
-            output_name=node.name + '_temp_c',
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = builder.add_squeeze(
-            name=node.name + '_h',
-            input_name=node.name + '_temp_h',
-            output_name=node.name + '_h',
-            axes=[-1, -2]
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = builder.add_squeeze(
-            name=node.name + '_c',
-            input_name=node.name + '_temp_c',
-            output_name=node.name + '_c',
-            axes=[-1, -2]
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        self.op_tensor_map[node.name] = [
-            node.name + '_out', node.name + '_h', node.name + '_c'
-        ]
-
-    def _convert_constant_pad(self, node):
-        # operator Pad has 2 inputs, PadV2 has 3 inputs
-        assert len(node.inputs) == 2 or len(node.inputs) == 3
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        constant_value = 0
-        if len(node.inputs) == 3:
-            constant_value = input_nodes[2].value.val
-            if constant_value == -np.inf:
-                INT_MIN = - np.iinfo(np.int64).max - 1
-                constant_value = np.float(INT_MIN)
-
-            if constant_value == np.inf:
-                INT_MAX = np.iinfo(np.int64).max
-                constant_value = np.float(INT_MAX)
-
-        # this layer takes at most 2 inputs
-        input_names = input_names[:2]
-        layer = self._get_builder().add_constant_pad(
-            name=node.name,
-            input_names=input_names,
-            output_name=node.name,
-            value=constant_value
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_mirror_pad(self, node):
-        assert len(node.inputs) == 2
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        paddings = input_nodes[1].value.val  # rank 4, nhwc
-        left, right = paddings[2][0], paddings[2][1]
-        top, bottom = paddings[1][0], paddings[1][1]
-
-        if node.attr.get('mode', '').lower() == 'symmetric':
-            warn('[SSAConverter]Warning: Symmetric MirrorPad is not supported'
-                  'but can be approximated with non-symmetric padding in some'
-                  'cases. Conversion will continue, but expect some loss'
-                  'of model accuracy.')
-        builder = self._get_builder()
-
-        layer = builder.add_padding(
-            name=node.name,
-            left=left,
-            right=right,
-            top=top,
-            bottom=bottom,
-            input_name=input_names[0],
-            output_name=node.name,
-            padding_type='reflection'
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_topk(self, node):
-        assert len(node.inputs) == 2
-        if node.attr.get('sorted') is False:
-            raise NotImplementedError('sorted should be set to True.')
-
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        k = input_nodes[1].value.val
-        output_names = [(node.name + '_' + str(i)) for i in range(2)]
-        layer = self._get_builder().add_topk(
-            name=node.name,
-            input_names=[input_names[0]],
-            output_names=output_names,
-            k=k,
-            axis=-1
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-        self.op_tensor_map[node.name] = output_names
-
-    def _convert_unary_log_softmax(self, node):
-        assert len(node.inputs) == 1
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        axis = -1 if 'axis' not in node.attr else node.attr['axis']
-
-        layer = self._get_builder().add_reduce_logsumexp(
-            name=node.name + "_logsumexp",
-            input_name=input_names[0],
-            output_name=node.name + "_logsumexp",
-            axes=[axis],
-            keepdims=True,
-            reduce_all=False
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = self._get_builder().add_subtract_broadcastable(
-            name=node.name,
-            input_names=input_names + [node.name + "_logsumexp"],
-            output_name=node.name
-        )
-
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_unary_inverse(self, node):
-        assert len(node.inputs) == 1
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        layer = self._get_builder().add_unary(
-            name=node.name,
-            input_name=input_names[0],
-            output_name=node.name,
-            mode='inverse'
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_batchnorm(self, node):
-        assert len(node.inputs) == 1
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        if 'gamma' not in node.attr or 'beta' not in node.attr:
-            raise ValueError('BatchNorm node must have attributes \'gamma\' and \'beta\'')
-        gamma = node.attr.get('gamma')
-        num_channels = len(gamma)
-        beta = node.attr.get('beta')
-        mean = node.attr.get('mean', np.zeros((num_channels,)))
-        variance = node.attr.get('variance', np.ones((num_channels,)))
-        layer = self._get_builder().add_batchnorm(
-            name=node.name,
-            channels=num_channels,
-            gamma=gamma,
-            beta=beta,
-            mean=mean,
-            variance=variance,
-            input_name=input_names[0],
-            output_name=node.name
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_unary_common(self, node):
-        assert len(node.inputs) == 1
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        op = node.op.lower()  # type of the unary operator
-        if op in ['sqrt', 'rsqrt', 'exp', 'log', 'abs']:
-            layer = self._get_builder().add_unary(
-                name=node.name, input_name=input_names[0], output_name=node.name, mode=op)
-        else:
-            # same function name for TensorFlow and Core ML
-            func = getattr(self._get_builder(), 'add_' + op)
-            layer = func(name=node.name, input_name=input_names[0], output_name=node.name)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_unary_trigonometric(self, node):
-        assert len(node.inputs) == 1
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        op = node.op.lower()  # type of the unary operator
-        # assumes TensorFlow and Core ML has same op name
-        func = getattr(self._get_builder(), 'add_' + op)
-        layer = func(name=node.name, input_name=input_names[0], output_name=node.name)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_unary_activation(self, node):
-        assert len(node.inputs) == 1
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        op = node.op.upper()  # type of the unary operator
-        params = None
-        if op in ['LEAKYRELU']:
-            params = ([node.attr['alpha']])
-        elif op in ['ELU']:
-            params = 1.0
-        layer = self._get_builder().add_activation(
-            name=node.name,
-            input_name=input_names[0],
-            output_name=node.name,
-            non_linearity=op,
-            params=params
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_unary_activation_relu6(self, node):
-        assert len(node.inputs) == 1
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        builder = self._get_builder()
-        layer = builder.add_activation(
-            name=node.name + '_relu',
-            input_name=input_names[0],
-            output_name=node.name + '_relu',
-            non_linearity='RELU',
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = builder.add_clip(
-            name=node.name,
-            input_name=node.name + '_relu',
-            output_name=node.name,
-            max_value=6.0
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_gelu(self, node):
-        assert len(node.inputs) == 1
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        # Core ML has 3 modes: EXACT, TANH_APPROXIMATION, SIGMOID_APPROXIMATION
-        layer = self._get_builder().add_gelu(
-            name=node.name,
-            input_name=input_names[0],
-            output_name=node.name,
-            mode='TANH_APPROXIMATION')
-
-        output_shape = self._get_tensor_shape_from_type(node.datatype)
-        shapes.propagate_single_layer(layer, self.tensor_shapes,
-                                      output_shapes=[output_shape])
-
-    def _convert_reduction(self, node):
-        assert len(node.inputs) == 2
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        if len(input_names) == 2:
-            axes = np.array(input_nodes[1].value.val).flatten()
-            reduction_indices = list(axes) if isinstance(axes, Iterable) else [axes]
-        elif 'reduction_indices' in node.attr:
-            reduction_indices = node.attr['reduction_indices']
-        else:
-            reduction_indices = node.attr['axis']
-
-        if 'keep_dims' in node.attr:
-            keepdims = node.attr['keep_dims']
-        else:
-            keepdims = node.attr['keepdims']
-
-        op = node.op.lower()  # type of the unary operator
-        if op in ['all', 'any']:
-            op = 'prod' if op == 'all' else 'sum'
-
-        func = getattr(self._get_builder(), 'add_reduce_' + op)
-        layer = func(
-            name=node.name,
-            input_name=input_names[0],
-            output_name=node.name,
-            axes=reduction_indices,
-            keepdims=keepdims,
-            reduce_all=not reduction_indices
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_resize_bilinear(self, node):
-        # In TF, ResizeBilinear requires channel-last image axis order
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        if len(input_names) == 2 and input_nodes[1].op == 'Const':
-            target_size = input_nodes[1].value.val
-        else:
-            raise ValueError('[SSAConverter] Unable to determine target size'
-                             'for ResizeBilinear')
-
-        mode = 'STRICT_ALIGN_ENDPOINTS_MODE' if node.attr.get(
-            'align_corners', False) else 'UPSAMPLE_MODE'
-
-        builder = self._get_builder()
-        layer = builder.add_resize_bilinear(
-            name=node.name,
-            input_name=input_names[0],
-            output_name=node.name,
-            target_height=target_size[0],
-            target_width=target_size[1],
-            mode=mode)
-
-        output_shape = self._get_tensor_shape_from_type(node.datatype)
-        shapes.propagate_single_layer(layer, self.tensor_shapes,
-                                      output_shapes=[output_shape])
-
-    def _convert_resize_nearest_neighbor(self, node):
-        # In TF, ResizeNearestNeighbor requires channel-last image axis order
-        # During conversion, NNSSA's output shape should have been modified
-        # to NCHW in transform_nhwc_to_nchw()
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        if len(input_names) == 2 and input_nodes[1].op == 'Const':
-            target_size = input_nodes[1].value.val
-        else:
-            raise ValueError('[SSAConverter] Unable to determine target size'
-                             'for ResizeNearestNeighbor')
-        try:
-            input_shape = self._get_tensor_shape_from_type(input_types[0])
-        except:
-            input_shape = None
-
-        if input_shape is None or len(input_shape) != 4:
-            raise ValueError('[SSAConverter] ResizeNearestNeighbor has invalid'
-                             'input shape {}'.format(input_shape))
-
-        if target_size[0] < input_shape[2] and target_size[1] < input_shape[3]:
-            self._convert_resize_bilinear(node)
-
-        elif target_size[0] > input_shape[2] and target_size[1] > input_shape[3]:
-            if (target_size[0] % input_shape[2] > 0 or
-                    target_size[1] % input_shape[3] > 0):
-                raise ValueError('[SSAConverter] Unsupported fractional'
-                                 'nearest-neighbor upsampling')
-
-            scaling_factor_h = int(target_size[0] / input_shape[2])
-            scaling_factor_w = int(target_size[1] / input_shape[3])
-
-            if scaling_factor_h <= 0 or scaling_factor_w <= 0:
-                raise ValueError('[SSAConverter] Invalid scaling factor.')
-
-            if node.attr.get('align_corners', False) is True:
-                raise ValueError('[SSAConverter] CoreML does not support '
-                                 'ResizeNearestNeighbor with align_core.')
-
-            builder = self._get_builder()
-            layer = builder.add_upsample(
-                name=node.name,
-                scaling_factor_h=scaling_factor_h,
-                scaling_factor_w=scaling_factor_w,
-                input_name=input_names[0],
-                output_name=node.name,
-                mode='NN')
-
-            output_shape = self._get_tensor_shape_from_type(node.datatype)
-            shapes.propagate_single_layer(layer, self.tensor_shapes,
-                                          output_shapes=[output_shape])
-        else:
-            raise NotImplementedError("[SSAConverter] Unsupported resizing option.")
-
-    def _convert_layer_normalization(self, node):
-        assert len(node.inputs) == 1
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        input_name = input_names[0]
-        builder = self._get_builder()
-        gamma = node.attr['gamma']
-        beta = node.attr['beta']
-        axes = node.attr['axes']
-        epsilon = node.attr['epsilon']
-        input_shape = list(input_types[0].get_shape())
-
-        if (len(input_shape) in [2, 3] and len(axes) == 1 and \
-                axes[0] == len(input_shape) - 1):
-            # Performance enhancement for some models with layer-norm
-            builder.add_reshape_static(name=input_name + '_reshape',
-                                       input_name=input_name,
-                                       output_name=input_name + '_reshape',
-                                       output_shape=input_shape + [1, 1])
-
-            builder.add_mvn(name=input_name + '_mvn',
-                            input_name=input_name + '_reshape',
-                            output_name=input_name + '_mvn', across_channels=True,
-                            normalize_variance=True, epsilon=epsilon)
-
-            builder.add_scale(name=node.name + '_5d',
-                              input_name=input_name + '_mvn',
-                              output_name=node.name + '_5d', W=gamma, b=beta, has_bias=True,
-                              shape_scale=[len(gamma)], shape_bias=[len(beta)])
-
-            builder.add_reshape_static(name=node.name,
-                                       input_name=node.name + '_5d',
-                                       output_name=node.name,
-                                       output_shape=input_shape)
-
-        else:
-            # General implementation
-            input_shape = input_types[0].get_shape()
-            rdims = len(axes)
-            normalized_shape = node.datatype.get_shape()[-rdims:]
-            if gamma.shape != normalized_shape:
-                gamma = np.zeros(normalized_shape) + gamma
-            if beta.shape != normalized_shape:
-                beta = np.zeros(normalized_shape) + beta
-
-            builder.add_layer_normalization(node.name, input_name, node.name,
-                                            normalized_shape, gamma, beta, eps=1e-5)
-
-        self.tensor_shapes[node.name] = self._get_tensor_shape_from_type(
-            node.datatype)
-
-    def _convert_binary(self, node):
-        """
-        Convert binary operator
-            - Attempts to add elementwise operator if possible
-            - Otherwise, inserts broadcastable operator
-        """
-        def _is_elementwise_scalar_check(input_type):
-            """
-            Checks if element is scalar
-                - A scalar
-                - 0-D tensor
-                - 1-D tensor with only one element
-            """
-            if _is_scalar(input_type):
-                return True
-            shape = input_type.get_shape()
-            if builtins.is_tensor(input_type) and len(shape) == 1 and shape[0] == 1:
-                return True
-            return False
-
-        # CoreML elementwise operator has limited brodcastable support
-        # Check if first shape can be broadcasted to second shape
-        def _is_broadcastable_shape(shape_0, shape_1):
-            assert (len(shape_0) > 0 and len(shape_1) > 0)
-            if shape_0[0] != 1 and shape_0[0] != shape_1[0]:
-                return False
-
-            if shape_0[1:] == [1] * (len(shape_0)-1):
-                return True
-            return False
-
-        def _convert_binary_elementwise(node):
-            """
-            Adds binary elementwise operator
-                - Returns True if successful
-                - Otherwise returns False
-            """
-            assert len(node.inputs) == 2
-            input_nodes, input_names, input_types = self._get_input_tensors(node)
-            builder = self._get_builder()
-            elementwise_support = {'add', 'addv2', 'sub', 'mul', 'realdiv'}
-            op = node.op.lower()
-
-            if op not in elementwise_support:
-                return False
-
-            # If any of the input is dynamic, cannot add Elementwise operator
-            for _input in input_types:
-                if -1 in self._get_tensor_shape_from_type(_input):
-                    return False
-
-            alpha = None
-            inputs = []
-            if input_nodes[1].op == 'Const' and _is_elementwise_scalar_check(input_types[1]):
-                # Note alpha is second input is scalar
-                alpha = input_nodes[1].value.val
-                inputs = [input_names[0]]
-            elif input_nodes[0].op == 'Const' and _is_elementwise_scalar_check(input_types[0]):
-                # Note alpha is first input is scalar
-                alpha = input_nodes[0].value.val
-                inputs = [input_names[1]]
-            else:
-                # If both inputs are not scalar, ensure shape is same
-                # If any of the input is not tensor, add broadcastable layer instead
-                if not (builtins.is_tensor(input_types[0]) and builtins.is_tensor(input_types[1])):
-                    return False
-
-                shape_0 = list(input_types[0].get_shape())
-                shape_1 = list(input_types[1].get_shape())
-
-                # Make sure, any of the input is not rank-0
-                if len(shape_0) == 0 or len(shape_1) == 0:
-                    return False
-
-                if _is_broadcastable_shape(shape_0, shape_1) or _is_broadcastable_shape(shape_1, shape_0):
-                    pass
-
-                # NOTE: Special case, one of the input has multiple 1 dims and same shape
-                # e.g. (1, 4, 5) and (4, 5): in this case, we can expand second
-                # input to make equivalent to (1, 4, 5)
-                elif abs(len(shape_0) - len(shape_1)) > 0:
-                    small_index = 1 if len(shape_0) > len(shape_1) else 0
-
-                    # Switch shape and make first shape smaller to infer axis information
-                    if small_index == 1:
-                        shape_0, shape_1 = shape_1, shape_0
-
-                    same_shape_index = len(shape_1) - len(shape_0)
-                    shape_temp = [1] * same_shape_index + shape_0
-                    if shape_temp != shape_1:
-                        return False
-
-                    # Extend one of the input to allow use of elementwise operator
-                    layer = builder.add_expand_dims(name=node.name+'_'+input_names[small_index]+'_'+'_expand_dims',
-                                                    input_name=input_names[small_index],
-                                                    output_name=input_names[small_index]+'_expanded',
-                                                    axes=list(range(same_shape_index)))
-                    shapes.propagate_single_layer(layer, self.tensor_shapes)
-                    input_names[small_index] += '_expanded'
-
-                elif shape_0 != shape_1:
-                    return False
-                inputs = input_names
-
-            # Div operation cannot be simulated with more than one input and
-            # without Alpha
-            if op == 'realdiv' and alpha is None:
-                return False
-
-            if op == 'realdiv':
-                # Inverse Alpha to simulate DIV using MUL operator
-                if alpha is None:
-                    raise ValueError("Incorrect configuration!! Alpha not provided for Elementwise Div operator")
-                alpha = 1 / float(alpha)
-            elif op == 'sub':
-                if alpha is not None and inputs[0] == input_names[0]:
-                    alpha = -alpha
-                else:
-                    neg_index = 1
-                    if alpha:
-                        neg_index = 0
-                    layer = builder.add_elementwise(name=node.name+'_'+inputs[neg_index]+'_neg',
-                                                    input_names=[inputs[neg_index]],
-                                                    output_name=inputs[neg_index]+'_neg',
-                                                    mode='MULTIPLY',
-                                                    alpha=-1.0)
-                    inputs[neg_index] += '_neg'
-                    shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-            # map certain ops to different but equivalent ops
-            mapping_op = {'ADDV2':'ADD', 'SUB':'ADD', 'REALDIV':'MULTIPLY', 'MUL':'MULTIPLY'}
-            op = op.upper()
-            op = mapping_op.get(op, op)
-            layer = builder.add_elementwise(name=node.name,
-                                            input_names=inputs,
-                                            output_name=node.name,
-                                            mode=op,
-                                            alpha=alpha)
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-            return True
-
-        # Try to add Elementwise operator if possible,
-        # If configuration not supported, insert broadcastable operator instead
-        if not _convert_binary_elementwise(node):
-            self._convert_binary_broadcastable(node)
-
-    def _convert_binary_broadcastable(self, node):
-        assert len(node.inputs) == 2
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        builder = self._get_builder()
-        op = node.op.lower()  # type of the unary operator
-        compare_greater_ops = {'greater', 'greaterequal'}
-        compare_equal_ops = {'equal', 'notequal'}
-        compare_less_ops = {'less', 'lessequal'}
-        logical_ops = {'logicaland': 'AND', 'logicalor': 'OR'}
-        math_ops = {'sub': 'subtract', 'mul': 'multiply', 'realdiv': 'divide',
-                    'floordiv': 'floor_div', 'maximum': 'max', 'minimum': 'min',
-                    'biasadd': 'add', 'pow': 'pow', 'addv2': 'add'}
-        if op in compare_greater_ops:
-            layer = builder.add_greater_than(
-                name=node.name,
-                input_names=input_names,
-                output_name=node.name,
-                use_greater_than_equal='equal' in op
-            )
-        elif op in compare_equal_ops:
-            op = 'not_equal' if op == 'notequal' else op
-            func = getattr(builder, 'add_' + op)
-            layer = func(
-                name=node.name,
-                input_names=input_names,
-                output_name=node.name
-            )
-        elif op in compare_less_ops:
-            layer = builder.add_less_than(
-                name=node.name,
-                input_names=input_names,
-                output_name=node.name,
-                use_less_than_equal='equal' in op
-            )
-        elif op in logical_ops.keys():
-            layer = self._get_builder().add_logical(
-                name=node.name,
-                input_names=input_names,
-                output_name=node.name,
-                mode=logical_ops[op]
-            )
-        elif op in math_ops.keys():
-            func = getattr(builder, 'add_' + math_ops[op] + '_broadcastable')
-            layer = func(
-                name=node.name,
-                input_names=input_names,
-                output_name=node.name
-            )
-        else:  # same function name for TensorFlow and Core ML
-            func = getattr(builder, 'add_' + op + '_broadcastable')
-            layer = func(
-                name=node.name,
-                input_names=input_names,
-                output_name=node.name
-            )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_fill(self, node):
-        assert len(node.inputs) == 2
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        value = input_nodes[1].value.val
-
-        layer = self._get_builder().add_fill_dynamic(name=node.name,
-                                                     input_name=input_names[0],
-                                                     output_name=node.name,
-                                                     value=value)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_iff(self, node):
-        assert len(node.inputs) == 3
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        layer = self._get_builder().add_branch(name=node.name,
-                                               input_name=input_names[0])
-
-        ifbranch = NeuralNetworkBuilder(nn_spec=layer.branch.ifBranch,
-                                        disable_rank5_shape_mapping=True)
-
-        ifbranch.add_activation(name=node.name + "_if_",
-                                non_linearity='LINEAR',
-                                input_name=input_names[1],
-                                output_name=node.name,
-                                params=(1.0, 0.0))
-
-        elsebranch = NeuralNetworkBuilder(nn_spec=layer.branch.elseBranch,
-                                          disable_rank5_shape_mapping=True)
-
-        elsebranch.add_activation(name=node.name + "_else_",
-                                  non_linearity='LINEAR',
-                                  input_name=input_names[2],
-                                  output_name=node.name,
-                                  params=(1.0, 0.0))
-
-        self.tensor_shapes[node.name] = self._get_tensor_shape_from_type(node.datatype)
-
-    def _convert_reorganize_data(self, node):
-        assert len(node.inputs) == 1
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        block_size = node.attr.get('block_size', 2)
-        if node.op == 'SpaceToDepth':
-            mode = 'SPACE_TO_DEPTH'
-        else:  # node.op == 'DepthToSpace':
-            mode = 'DEPTH_TO_SPACE'
-
-        builder = self._get_builder()
-
-        layer = builder.add_reorganize_data(
-            name=node.name,
-            input_name=input_names[0],
-            output_name=node.name,
-            mode=mode,
-            block_size=block_size
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_space_to_batch_nd(self, node):
-        assert len(node.inputs) == 3
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        block_shape = input_nodes[1].value.val
-        if len(block_shape.flatten()) != 2 or block_shape[0] != block_shape[1]:
-            raise NotImplementedError('non-equal block shape is not yet supported')
-        paddings = input_nodes[2].value.val
-        needs_paddings = any(paddings.flatten())
-        builder = self._get_builder()
-
-        layer = builder.add_transpose(
-            name=node.name + '_transpose1',
-            input_name=input_names[0],
-            output_name=node.name + '_transpose1',
-            axes=[3, 0, 1, 2]
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        if needs_paddings:
-            left, right = paddings[1][0], paddings[1][1]
-            top, bottom = paddings[0][0], paddings[0][1]
-            layer = builder.add_padding(
-                name=node.name + '_padding',
-                left=left,
-                right=right,
-                top=top,
-                bottom=bottom,
-                input_name=node.name + '_transpose1',
-                output_name=node.name + '_padding'
-            )
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = builder.add_reorganize_data(
-            name=node.name + '_reorganize',
-            input_name=node.name + '_transpose1' if not needs_paddings else node.name + '_padding',
-            output_name=node.name + '_reorganize',
-            mode='space_to_depth'.upper(),
-            block_size=block_shape[0]
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = builder.add_transpose(
-            name=node.name,
-            input_name=node.name + '_reorganize',
-            output_name=node.name,
-            axes=[1, 2, 3, 0]
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_batch_to_space_nd(self, node):
-        assert len(node.inputs) == 3
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        block_shape = input_nodes[1].value.val
-        if block_shape[0] != block_shape[1]:
-            raise NotImplementedError('non-equal block shape is not yet supported')
-        crops = input_nodes[2].value.val
-        needs_cropping = any(crops.flatten())
-
-        builder = self._get_builder()
-
-        layer = builder.add_transpose(
-            name=node.name + '_transpose1',
-            input_name=input_names[0],
-            output_name=node.name + '_transpose1',
-            axes=[3, 0, 1, 2]
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = builder.add_reorganize_data(
-            name=node.name + '_reorganize',
-            input_name=node.name + '_transpose1',
-            output_name=node.name + '_reorganize',
-            mode='depth_to_space'.upper(),
-            block_size=block_shape[0]
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        if needs_cropping:
-            left, right = crops[1][0], crops[1][1]
-            top, bottom = crops[0][0], crops[0][1]
-            layer = builder.add_crop(
-                name=node.name + '_cropping',
-                left=left,
-                right=right,
-                top=top,
-                bottom=bottom,
-                offset=0,
-                input_names=[node.name + '_reorganize'],
-                output_name=node.name + '_cropping'
-            )
-            shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-        layer = builder.add_transpose(
-            name=node.name,
-            input_name=node.name + '_reorganize' if not needs_cropping else node.name + '_cropping',
-            output_name=node.name,
-            axes=[1, 2, 3, 0]
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_conv2d_transpose(self, node):
-        assert len(node.inputs) == 3
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        input_name = input_names[2]
-        weight = input_nodes[1].value.val
-        bias = node.attr.get('bias')
-
-        strides = node.attr.get('strides')
-        border_mode = node.attr.get('padding').lower()
-        stride_height = strides[1]
-        stride_width = strides[2]
-        kernel_channels = input_types[-1].get_shape()[1]
-        output_channels = node.datatype.get_shape()[1]
-
-        self._get_builder().add_convolution(
-            name=node.name,
-            kernel_channels=kernel_channels,
-            output_channels=output_channels,
-            height=weight.shape[0],
-            width=weight.shape[1],
-            stride_height=stride_height,
-            stride_width=stride_width,
-            border_mode=border_mode,
-            groups=1,
-            W=np.transpose(weight, (0, 1, 3, 2)),
-            b=bias,
-            has_bias=(bias is not None),
-            is_deconv=True,
-            output_shape=None,
-            input_name=input_name,
-            output_name=node.name
-        )
-
-        self.tensor_shapes[node.name] = self._get_tensor_shape_from_type(node.datatype)
-
-    def _convert_lrn(self, node):
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-        alpha = node.attr.get('alpha')
-        beta = node.attr.get('beta')
-        bias = node.attr.get('bias')
-        depth_radius = node.attr.get('depth_radius')
-        n_channels = self._get_tensor_shape_from_type(input_types[-1])[-1]
-        if node.attr.get('data_format') == 'NHWC_format_inserted':
-            n_channels = self._get_tensor_shape_from_type(input_types[-1])[1]
-        layer = self._get_builder().add_lrn(
-            name=node.name,
-            input_name=input_names[0],
-            output_name=node.name,
-            alpha=alpha * n_channels,
-            beta=beta,
-            local_size=depth_radius,
-            k=bias
-        )
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
-
-    def _convert_clip(self, node):
-
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        min_value = input_nodes[1].value.val
-        max_value = input_nodes[2].value.val
-
-        layer = self._get_builder().add_clip(name=node.name,
-                                             input_name=input_names[0],
-                                             output_name=node.name,
-                                             min_value=min_value,
-                                             max_value=max_value)
-
-        self.tensor_shapes[node.name] = self._get_tensor_shape_from_type(node.datatype)
-
-    def _convert_zeros_like(self, node):
-        """ Convert a ZerosLike node.
-        """
-        input_nodes, input_names, input_types = self._get_input_tensors(node)
-
-        shape = input_types[0].get_shape()
-        builder = self._get_builder()
-        if -1 not in shape:
-            # We can use fill static or load constant as shape is known
-            val = np.zeros(shape)
-            if len(shape) == 0:
-                val = np.array([0])
-            layer = builder.add_load_constant_nd(
-                name=node.name, output_name=node.name, constant_value=val, shape=val.shape)
-        else:
-            # Insert dynamic zeros like
-            layer = builder.add_fill_like(
-                name=node.name, input_name=input_names[0], output_name=node.name, value=0.0)
-        shapes.propagate_single_layer(layer, self.tensor_shapes)
diff --git a/coremltools/converters/nnssa/frontend/common_pass.py b/coremltools/converters/nnssa/frontend/common_pass.py
deleted file mode 100644
index cad5ca871..000000000
--- a/coremltools/converters/nnssa/frontend/common_pass.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-import traceback
-
-from .graph_pass import *
-
-
-def common_pass(ssa, resume_on_errors=False, **kwargs):
-    passes = [
-        trace_constants,
-        shift_get_global_to_set_global,
-        type_inference_pass,
-        common_symbolic_value_elimination,
-        delete_unnecessary_constant_nodes,
-        remove_identities,
-        delete_unnecessary_constant_nodes,
-        add_identity_outputs
-    ]
-
-    omit_symbolic_pass = kwargs.get("omit_symbolic_pass", False)
-    if omit_symbolic_pass:
-        passes = [i for i in passes if i is not common_symbolic_value_elimination]
-
-    omit_shift_global_pass = kwargs.get("omit_shift_global_pass", True)
-    if omit_shift_global_pass:
-        passes = [i for i in passes if i is not shift_get_global_to_set_global]
-
-    if resume_on_errors is False:
-        for p in passes:
-            p(ssa)
-    else:
-        for p in passes:
-            try:
-                p(ssa)
-            except:
-                tb = traceback.format_exc()
-                print("Exception in pass " + str(p))
-                print(tb)
-                print("Ignoring and continuing to next pass")
-
-    return ssa
diff --git a/coremltools/converters/nnssa/frontend/graph_pass/__init__.py b/coremltools/converters/nnssa/frontend/graph_pass/__init__.py
deleted file mode 100644
index 17fa68608..000000000
--- a/coremltools/converters/nnssa/frontend/graph_pass/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-
-from .type_inference import type_inference_pass
-from .delete_constant import delete_unnecessary_constant_nodes
-from .identity_outputs import add_identity_outputs
-from .trace_constants import trace_constants
-from .remove_identities import remove_identities
-from .common_symbolic_value_elimination import common_symbolic_value_elimination
-from .shift_global import shift_get_global_to_set_global
-
-from .remove_unused_nodes import remove_unused_nodes
diff --git a/coremltools/converters/nnssa/frontend/graph_pass/common_symbolic_value_elimination.py b/coremltools/converters/nnssa/frontend/graph_pass/common_symbolic_value_elimination.py
deleted file mode 100644
index 0deb10b5e..000000000
--- a/coremltools/converters/nnssa/frontend/graph_pass/common_symbolic_value_elimination.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import sympy as sm
-import numpy as np
-from ...commons.symbolic import *
-from ...commons.basic_graph_ops import topsort, replace_source, disconnect_edge, connect_edge
-
-
-def make_hashable(v):
-    if is_symbolic(v):
-        return str(v), True
-    elif hasattr(v, '__iter__'):
-        z = [make_hashable(h) for h in v]
-        return tuple(i[0] for i in z), any(i[1] for i in z)
-    else:
-        return v, False
-
-
-def compute_roots(gdict, topsort_order):
-    # for each node, compute the list of initial inputs (inrank=0) i
-    # that lead up to the node
-    roots = {k: set() for k in gdict}
-    for t in topsort_order:
-        if len(gdict[t].inputs) == 0:
-            # roots have themselves as roots
-            roots[t].add(t)
-            continue
-        for i in gdict[t].inputs:
-            roots[t] = roots[t].union(roots[i])
-    return roots
-
-
-def common_symbolic_value_elimination_impl(gdict):
-    order = topsort(gdict)
-    roots = compute_roots(gdict, order)
-    values = {}
-    for k in order:
-        n = gdict[k]
-        nodeval = n.attr.get('symbolic_value')
-        if nodeval is None:
-            continue
-
-        nodeval_list = nodeval if isinstance(nodeval, list) else [nodeval]
-
-        for anodeval in nodeval_list:
-            val = anodeval.val
-            if isscalar(val) and val == -1:
-                continue
-            if (not isscalar(val)) and -1 in val:
-                continue
-            if isinstance(val, np.ndarray) and np.issctype(val.dtype) and val.size > 100:
-                continue
-
-            hashable_val, any_symbolic = make_hashable(val)
-            if any_symbolic:
-                if hashable_val in values:
-                    # rewrite graph
-                    othernodes = values[hashable_val]
-                    for othernode in othernodes:
-                        if len(roots[othernode].intersection(roots[n.name])) > 0:
-                            outputs = list(n.outputs)
-                            for outnode in outputs:
-                                replace_source(gdict, n.name, outnode, othernode)
-                else:
-                    values[hashable_val] = values.get(hashable_val, []) + [k]
-
-
-def common_symbolic_value_elimination_impl2(gdict):
-    order = topsort(gdict)
-    roots = compute_roots(gdict, order)
-    values = {}
-    node_values = {}
-    for k in order:
-        n = gdict[k]
-        nodeval = n.attr.get('symbolic_value')
-        build_val = False
-        try:
-            if nodeval is None:
-                build_val = True
-            elif isscalar(nodeval.val) and nodeval.val == -1:
-                build_val = True
-            elif (not isscalar(nodeval.val)) and -1 in nodeval.val:
-                build_val = True
-        except:
-            build_val = True
-
-        if build_val == False:
-            hashable_val, _ = make_hashable(nodeval.val)
-        else:
-            effective_val = [n.op, sorted(list(n.attr)), [node_values[v] for v in n.inputs]]
-            hashable_val, _ = make_hashable(effective_val)
-
-        hashable_val = hash(hashable_val)
-        node_values[n.name] = hashable_val
-        if hashable_val in values:
-            # rewrite graph
-            othernodes = values[hashable_val]
-            for othernode in othernodes:
-                if len(roots[othernode].intersection(roots[n.name])) > 0:
-                    outputs = list(n.outputs)
-                    for outnode in outputs:
-                        replace_source(gdict, n.name, outnode, othernode)
-        else:
-            values[hashable_val] = values.get(hashable_val, []) + [k]
-
-
-def common_symbolic_value_elimination(nnssa):
-    for i in nnssa.functions:
-        common_symbolic_value_elimination_impl(nnssa.functions[i].graph)
-        #common_symbolic_value_elimination_impl2(nnssa.functions[i].graph)
diff --git a/coremltools/converters/nnssa/frontend/graph_pass/identity_outputs.py b/coremltools/converters/nnssa/frontend/graph_pass/identity_outputs.py
deleted file mode 100644
index b7ea87b97..000000000
--- a/coremltools/converters/nnssa/frontend/graph_pass/identity_outputs.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-from ...nnssa import ParsedNode
-from ...commons.basic_graph_ops import check_connections
-
-
-def add_identity_outputs(ssa, main_function='main'):
-    """
-    This op changes the output nodes of the main function to always be
-    an "Identity" op. Essentially:
-
-    someop(name:Result)
-
-    gets transformed to
-
-    someop(name:Result_orig) --> Identity(name:Result)
-
-    This simplies operation movement since the main function is kinda
-    special in that it is the only one that is not actually a function
-    with a single entry and exit point.
-    """
-    main = ssa.functions[main_function]
-    vnames = list(main.graph.keys())[:]
-    for v in vnames:
-        node = main.graph[v]
-        if (len(node.outputs) == 0 and node.op != 'Identity'
-            and node.op != "NoOp" and node.op != 'set_global'):
-            # current node is appended with _orig, new node takes current name
-            name = ssa._find_free_name(node.name + '_orig')
-            original_name = node.name
-            # rename node
-            node.name = name
-            main.graph[node.name] = node
-            # create new Identity node
-            new_node = ParsedNode()
-            new_node.op = 'Identity'
-            new_node.name = original_name
-            new_node.datatype = node.datatype
-            new_node.value = node.value
-            main.graph[new_node.name] = new_node
-
-            # modify input, control_input nodes to point to the modified name
-            for i in node.inputs:
-                main.graph[i].outputs = [
-                    o if o != original_name else name for o in main.graph[i].outputs
-                ]
-            for i in node.control_inputs:
-                main.graph[i].control_outputs = [
-                    o if o != original_name else name for o in main.graph[i].control_outputs
-                ]
-            # We maintain control outputs coming from the new node.
-            # Since the new node takes the original name, we don't need to modify
-            # the rest of the graph
-            new_node.control_outputs = node.control_outputs
-            node.control_outputs = []
-
-            # connect up old node and new node
-            node.outputs = [new_node.name]
-            new_node.inputs = [node.name]
-
-    check_connections(main.graph)
diff --git a/coremltools/converters/nnssa/frontend/graph_pass/remove_identities.py b/coremltools/converters/nnssa/frontend/graph_pass/remove_identities.py
deleted file mode 100644
index b07cb1ee9..000000000
--- a/coremltools/converters/nnssa/frontend/graph_pass/remove_identities.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-from ...commons.basic_graph_ops import replace_source, delete_node
-from ...commons import builtins
-
-
-class RemoveIdentities(object):
-    def __init__(self, whole_graph):
-        self.whole_graph = whole_graph
-
-    def trace(self):
-        # check that every function is only called in one place
-        for fname, fn in self.whole_graph.functions.items():
-            self._trace_function(fname)
-
-    def _trace_function(self, fname):
-        fn = self.whole_graph.functions[fname]
-        nodenames = list(fn.graph.keys())
-        for nodename in nodenames:
-            node = fn.graph[nodename]
-            if node.op == 'Identity':
-                # the main function is a little special.
-                # An identity node might be an output
-                if fname == 'main' and len(node.outputs) == 0:
-                    continue
-                value = node.inputs[0]
-                for o in node.outputs:
-                    replace_source(fn.graph, nodename, o, value)
-                delete_node(fn.graph, nodename)
-
-
-def remove_identities(ssa):
-    RemoveIdentities(ssa).trace()
diff --git a/coremltools/converters/nnssa/frontend/graph_pass/remove_unused_nodes.py b/coremltools/converters/nnssa/frontend/graph_pass/remove_unused_nodes.py
deleted file mode 100644
index e0ed477bd..000000000
--- a/coremltools/converters/nnssa/frontend/graph_pass/remove_unused_nodes.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-from ...commons.basic_graph_ops import replace_source, delete_node, disconnect_edge
-from ...commons import builtins
-
-
-def remove_unused_nodes(ssa):
-    # Very inefficient. Could be improved, not necessary considering graph size now.
-    removed = True
-    while removed:
-        removed = False
-        removed = sub_removal(ssa)
-
-
-def sub_removal(ssa):
-    removed = False
-    for f in ssa.functions.keys():
-        if not f.startswith("body_function_"):
-            continue
-        body_name = f
-        cond_name = f.replace('body', 'cond')
-        body = ssa.functions[body_name]
-        cond = ssa.functions[cond_name]
-
-        for bk, bv in body.graph.items():
-            if bv.op == 'function':
-                body_start = bk
-            if bv.op == 'return':
-                body_end = bk
-        for ck, cv in cond.graph.items():
-            if cv.op == 'function':
-                cond_start = ck
-            if cv.op == 'return':
-                cond_end = ck
-
-        inputs = [False] * len(body.graph[body_name].datatype.T)
-        for gt in body.graph[body_name].outputs:
-            assert (body.graph[gt].op == 'get_tuple')
-            inputs[body.graph[gt].attr['index']] = True
-        for gt in cond.graph[cond_name].outputs:
-            assert (cond.graph[gt].op == 'get_tuple')
-            inputs[cond.graph[gt].attr['index']] = True
-
-        remove = []
-        for idx, used in enumerate(inputs):
-            if not used:
-                remove.append(idx)
-        if len(remove) == 0:
-            continue
-        else:
-            removed = True
-
-        gd = body.graph
-        mt = gd[body_end].inputs[0]
-        for idx in remove:
-            disconnect_edge(gd, gd[mt].inputs[idx], mt)
-
-        for gt in gd[body_name].outputs:
-            # Need to do it in the stupid way, get_tuple are not sorted in index.
-            rm_idx = 0
-            idx = gd[gt].attr['index']
-            while rm_idx < len(remove) and remove[rm_idx] < idx:
-                rm_idx += 1
-                gd[gt].attr['index'] -= 1
-        for gt in cond.graph[cond_name].outputs:
-            # Need to do it in the stupid way, get_tuple are not sorted in index.
-            rm_idx = 0
-            idx = cond.graph[gt].attr['index']
-            while rm_idx < len(remove) and remove[rm_idx] < idx:
-                rm_idx += 1
-                cond.graph[gt].attr['index'] -= 1
-
-        rm_nodes = []
-        _, rm_nodes = DFS_check(gd, body_name)
-        for node in rm_nodes:
-            delete_node(gd, node)
-
-
-def DFS_check(gd, node):
-    if gd[node].op == 'return':
-        return True, []
-    if len(gd[node].outputs) == 0:
-        return False, [node]
-    valid = False
-    rm_nodes = []
-    for out in gd[node].outputs:
-        v, rm = DFS_check(gd, out)
-        if v:
-            valid = True
-        rm_nodes += rm
-    if not valid:
-        return False, rm_nodes + [node]
-    return True, rm_nodes
diff --git a/coremltools/converters/nnssa/frontend/graph_pass/shift_global.py b/coremltools/converters/nnssa/frontend/graph_pass/shift_global.py
deleted file mode 100644
index 275291497..000000000
--- a/coremltools/converters/nnssa/frontend/graph_pass/shift_global.py
+++ /dev/null
@@ -1,109 +0,0 @@
-from ...commons.basic_graph_ops import topsort, delete_node, replace_source, connect_edge, check_connections
-from ...nnssa import ParsedNode
-
-
-def shift_get_global_to_set_global(nnssa):
-    # For very set_global with only 1 get_global, shift all computations
-    # which only depend on the result of get_global to be at the set_global
-    # instead, even if they are across different functions
-    delete_count = 0
-    global_get_count = {}
-    global_set_count = {}
-    for fname in nnssa.functions:
-        f = nnssa.functions[fname]
-        for n in f.graph:
-            if f.graph[n].op == 'get_global':
-                v = f.graph[n].attr['variable']
-                global_get_count[v] = global_get_count.get(v, []) + [(fname, n)]
-            elif f.graph[n].op == 'set_global':
-                v = f.graph[n].attr['variable']
-                global_set_count[v] = global_set_count.get(v, []) + [(fname, n)]
-
-    for v in global_set_count:
-        varname = v
-        if len(global_set_count[v]) == 1 and len(global_get_count.get(v, [])) == 1:
-            set_function_name, set_node = global_set_count[v][0]
-            get_function_name, get_node = global_get_count[v][0]
-            get_function = nnssa.functions[get_function_name]
-            set_function = nnssa.functions[set_function_name]
-
-            get_fn_node_inputs = _trace_inputs(get_function.graph)
-            nodes_to_transplant = [
-                i for i, v in get_fn_node_inputs.items() if len(v) == 1 and v[0] == get_node
-            ]
-            nodes_to_transplant = _find_upstream_nodes(get_function.graph, nodes_to_transplant)
-            nodes_to_transplant_set = set(nodes_to_transplant)
-            if len(nodes_to_transplant_set) == 1:
-                continue
-            transplant_output_nodes = [
-                i for i in nodes_to_transplant
-                if len(set(get_function.graph[i].outputs) - nodes_to_transplant_set) > 0
-            ]
-
-            # create new nodes
-            new_get_globals = [ParsedNode() for i in range(len(transplant_output_nodes))]
-            new_set_globals = [ParsedNode() for i in range(len(transplant_output_nodes))]
-            for i in range(len(new_get_globals)):
-                new_get_globals[i].name = varname + '_get_global_shift_' + str(i)
-                new_get_globals[i].op = 'get_global'
-                new_get_globals[i].attr['variable'] = varname + '_get_global_shift_' + str(i)
-                new_set_globals[i].name = varname + '_set_global_shift_' + str(i)
-                new_set_globals[i].op = 'set_global'
-                new_set_globals[i].attr['variable'] = varname + '_get_global_shift_' + str(i)
-                get_function.graph[new_get_globals[i].name] = new_get_globals[i]
-                set_function.graph[new_set_globals[i].name] = new_set_globals[i]
-            for ctr, i in enumerate(transplant_output_nodes):
-                onodes = list(get_function.graph[i].outputs[:])
-                for o in onodes:
-                    if o not in nodes_to_transplant_set:
-                        replace_source(get_function.graph, i, o, new_get_globals[ctr].name)
-
-            # transplant
-            for d in nodes_to_transplant:
-                n = get_function.graph[d]
-                del get_function.graph[d]
-                set_function.graph[d] = n
-
-            for ctr, i in enumerate(transplant_output_nodes):
-                connect_edge(set_function.graph, i, new_set_globals[ctr].name)
-
-            connect_edge(set_function.graph, set_node, get_node)
-            set_function.graph[set_node].op = 'Identity'
-            set_function.graph[get_node].op = 'Identity'
-            del set_function.graph[set_node].attr['variable']
-            del set_function.graph[get_node].attr['variable']
-            # update variables
-            del nnssa.variables[varname]
-            # unknown type and value
-            for i in new_get_globals:
-                nnssa.variables[i.name] = None
-            check_connections(get_function.graph)
-            check_connections(set_function.graph)
-
-
-def _trace_inputs(graph):
-    t = topsort(graph)
-    inputs = {}
-    for n in t:
-        if graph[n].op == 'Const':
-            inputs[n] = []
-        elif len(graph[n].inputs) == 0:
-            inputs[n] = [n]
-        else:
-            s = set()
-            for i in graph[n].inputs:
-                s |= set(inputs[i])
-            inputs[n] = list(s)
-    return inputs
-
-
-def _find_upstream_nodes(graph, nodes):
-    queue = nodes[:]
-    visited = {}
-    while len(queue) > 0:
-        n = queue.pop()
-        if n in visited:
-            continue
-        visited[n] = True
-        queue = queue + graph[n].inputs
-    return visited.keys()
diff --git a/coremltools/converters/nnssa/frontend/graph_pass/trace_constants.py b/coremltools/converters/nnssa/frontend/graph_pass/trace_constants.py
deleted file mode 100644
index 864c4e0e8..000000000
--- a/coremltools/converters/nnssa/frontend/graph_pass/trace_constants.py
+++ /dev/null
@@ -1,288 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-from ...commons.basic_graph_ops import topsort, delete_node, disconnect_edge, connect_edge
-from ...commons import builtins
-
-
-class ConstantTracing(object):
-    def __init__(self, whole_graph):
-        self.whole_graph = whole_graph
-        self.node_value_trace = {}
-
-    def trace(self):
-        # check that every function is only called in one place
-        callcount = {}
-        for fname, fn in self.whole_graph.functions.items():
-            for nodename, node in fn.graph.items():
-                if node.op == 'while':
-                    fname = node.attr['cond_function']
-                    callcount[fname] = callcount.get(fname, 0) + 1
-                    fname = node.attr['body_function']
-                    callcount[fname] = callcount.get(fname, 0) + 1
-        for v in callcount.values():
-            assert (v == 1)
-        self._trace_function('main')
-        # now trace all functions which use a constant C
-        # function F uses C is if there exists some node in F which
-        # takes C as an input, and the node is not one of get_tuple, make_tuple and while
-        const_usages = {}
-        for (fname, nodename), v in self.node_value_trace.items():
-            if type(v) is tuple and self.whole_graph.functions[v[0]].graph[v[1]].value is not None:
-                fn = self.whole_graph.functions[fname]
-                node = fn.graph[nodename]
-                for o in node.outputs:
-                    if fn.graph[o].op not in ['make_tuple', 'while']:
-                        if v not in const_usages:
-                            const_usages[v] = set()
-                        const_usages[v].add(fname)
-        # while loops are special. Because the inputs and outputs are bound.
-        # So if I have to line up the corresponding input/output terms for each
-        # while loop. Both terms have to be in const_usages for it to be
-        # actually removable.
-        for fname, fn in self.whole_graph.functions.items():
-            for nodename, node in fn.graph.items():
-                if node.op == 'while':
-                    for i, o in zip(self.node_value_trace[(fname, node.inputs[0])],
-                                    self.node_value_trace[(fname, nodename)]):
-                        if i in const_usages and o not in const_usages:
-                            del const_usages[i]
-                        if o in const_usages and i not in const_usages:
-                            del const_usages[o]
-
-        remap_candidates = {}
-        for k, v in const_usages.items():
-            if k[0] not in v and len(v) == 1:
-                remap_candidates[k] = list(v)[0]
-                print(k, list(v)[0])
-
-        # remap_candidates[(fn,const_node,)] -> target_function
-        # means that we can move this const_node into that particular function
-        for (fn, const_node), target_function in remap_candidates.items():
-            self._move_constant(fn, const_node, target_function)
-
-    def _trace_function(self, fname):
-        fn = self.whole_graph.functions[fname]
-
-        nodenames = topsort(fn.graph)
-        for nodename in nodenames:
-            node = fn.graph[nodename]
-            if (
-                    fname,
-                    nodename,
-            ) in self.node_value_trace:
-                continue
-            if node.value is not None:
-                self.node_value_trace[(
-                    fname,
-                    nodename,
-                )] = (
-                    fname,
-                    nodename,
-                )
-            elif node.op == 'make_tuple':
-                val = [self.node_value_trace.get((
-                    fname,
-                    i,
-                ), None) for i in node.inputs]
-                self.node_value_trace[(
-                    fname,
-                    nodename,
-                )] = val
-            elif node.op == 'get_tuple':
-                inval = self.node_value_trace[(
-                    fname,
-                    node.inputs[0],
-                )]
-                if type(inval) is list:
-                    # input is a tuple we traced
-                    self.node_value_trace[(
-                        fname,
-                        nodename,
-                    )] = inval[node.attr['index']]
-                else:
-                    # input is a tuple type we don't recognize. give it a unique key
-                    self.node_value_trace[(
-                        fname,
-                        nodename,
-                    )] = (
-                        fname,
-                        nodename,
-                    )
-            elif node.op == 'Identity' or node.op == 'return':
-                self.node_value_trace[(
-                    fname,
-                    nodename,
-                )] = self.node_value_trace[(
-                    fname,
-                    node.inputs[0],
-                )][:]
-            elif node.op == 'while':
-                # check that the cond and body function have only 1 input and one output
-                cond_function = node.attr['cond_function']
-                body_function = node.attr['body_function']
-                cond_entry = self.whole_graph.functions[cond_function].inputs
-                body_entry = self.whole_graph.functions[body_function].inputs
-                cond_exit = self.whole_graph.functions[cond_function].outputs
-                body_exit = self.whole_graph.functions[body_function].outputs
-                assert (len(cond_entry) == 1)
-                assert (len(body_entry) == 1)
-                assert (len(cond_exit) == 1)
-                assert (len(body_exit) == 1)
-                cond_entry = cond_entry[0]
-                body_entry = body_entry[0]
-                cond_exit = cond_exit[0]
-                body_exit = body_exit[0]
-
-                inval = self.node_value_trace[(
-                    fname,
-                    node.inputs[0],
-                )]
-                if (
-                        cond_function,
-                        cond_entry,
-                ) not in self.node_value_trace:
-                    self.node_value_trace[(
-                        cond_function,
-                        cond_entry,
-                    )] = inval[:]
-                    self._trace_function(cond_function)
-                if (
-                        body_function,
-                        body_entry,
-                ) not in self.node_value_trace:
-                    self.node_value_trace[(
-                        body_function,
-                        body_entry,
-                    )] = inval[:]
-                    self._trace_function(body_function)
-                # unify exits
-                body_outval = self.node_value_trace[(
-                    body_function,
-                    body_exit,
-                )]
-                self.node_value_trace[(
-                    fname,
-                    nodename,
-                )] = body_outval[:]
-            else:
-                self.node_value_trace[(
-                    fname,
-                    nodename,
-                )] = (
-                    fname,
-                    nodename,
-                )
-
-    def _move_constant(self, source_fname, source_node_name, target_fname):
-        # now. this is the painful part. quite a lot of rewriting has to happen here
-
-        source_fn = self.whole_graph.functions[source_fname]
-        target_fn = self.whole_graph.functions[target_fname]
-        source_const_key = (
-            source_fname,
-            source_node_name,
-        )
-        source_const_node = source_fn.graph[source_node_name]
-
-        # move the nodes.
-
-        # make sure the new const node is in the trace
-        self.node_value_trace[(
-            target_fname,
-            source_const_node.name,
-        )] = (
-            target_fname,
-            source_const_node.name,
-        )
-
-        # We first remove references to the source node from all functions
-        fnames = list(self.whole_graph.functions.keys())
-        fnames.pop(fnames.index(source_fname))
-        fnames.pop(fnames.index(target_fname))
-        fnames = [source_fname, target_fname] + fnames
-        for fname in fnames:
-
-            fn = self.whole_graph.functions[fname]
-            # remove the const node everywhere including from the target function
-            # This might seem awkward. but not really.
-            #
-            # Essentially, we are entirely removing the source constant node
-            # entirely and re-associating everything which uses it in the target
-            # function with the moved constant. This means that even the
-            # entry point and exit points of the target function have to be
-            # rewritten. Thus it is just simpler to just think about it
-            # as a complete deletion of the source constant node.
-            #
-            # The only care we have to take is the get_tuple in the target graph
-            # has be modified. And not deleted.
-            delete_nodes = []
-            nodenames = topsort(fn.graph)
-            for nodename in nodenames:
-                node = fn.graph[nodename]
-                cur_node_key = (
-                    fname,
-                    nodename,
-                )
-                if type(self.node_value_trace[cur_node_key]) is list and \
-                        self.node_value_trace[cur_node_key].count(source_const_key) > 0:
-                    # remove the input
-                    idx = self.node_value_trace[cur_node_key].index(source_const_key)
-                    if node.op == 'make_tuple':
-                        disconnect_edge(fn.graph, node.inputs[idx], nodename)
-                    # update type
-                    if node.datatype is not None:
-                        newtype = list(node.datatype.T)[:]
-                        newtype.pop(idx)
-                        node.datatype = builtins.tuple(newtype)
-                    # update the trace. maintain invariants
-                    self.node_value_trace[cur_node_key].pop(idx)
-                elif type(self.node_value_trace[cur_node_key]) is tuple and \
-                        self.node_value_trace[cur_node_key] == source_const_key:
-                    delete_nodes.append(nodename)
-                elif node.op == 'get_tuple':
-                    my_trace = self.node_value_trace[(
-                        fname,
-                        nodename,
-                    )]
-                    parent_trace = self.node_value_trace[(
-                        fname,
-                        node.inputs[0],
-                    )]
-                    if type(parent_trace) is list:
-                        node.attr['index'] = parent_trace.index(my_trace)
-
-            if fname == source_fname:
-                delete_node(source_fn.graph, source_node_name)
-                target_fn.graph[source_node_name] = source_const_node
-                for d in delete_nodes:
-                    if d != source_node_name:
-                        delete_node(fn.graph, d)
-            elif fname == target_fname:
-                # if this is the target function, we rewrite.
-                for d in delete_nodes:
-                    assert (len(fn.graph[d].inputs) == 1)
-                    fn.graph[d].op = 'Identity'
-                    disconnect_edge(fn.graph, fn.graph[d].inputs[0], d)
-                    connect_edge(fn.graph, source_const_node.name, d)
-                    self.node_value_trace[(
-                        fname,
-                        d,
-                    )] = (
-                        fname,
-                        source_const_node.name,
-                    )
-            else:
-                for d in delete_nodes:
-                    delete_node(fn.graph, d)
-
-        # cleanup. delete the old node from the trace
-        del self.node_value_trace[(
-            source_fname,
-            source_const_node.name,
-        )]
-
-
-def trace_constants(ssa):
-    ConstantTracing(ssa).trace()
diff --git a/coremltools/converters/nnssa/frontend/graph_pass/type_inference.py b/coremltools/converters/nnssa/frontend/graph_pass/type_inference.py
deleted file mode 100644
index eb792951a..000000000
--- a/coremltools/converters/nnssa/frontend/graph_pass/type_inference.py
+++ /dev/null
@@ -1,2839 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-
-import collections
-import logging
-import math
-import numpy as np
-import operator
-import sympy as sm
-import sys
-import six
-
-PY3 = False
-if sys.version_info >= (3, 0):
-    PY3 = True
-
-from ...commons import builtins
-from ...commons.builtins.utils import promote_types
-from ...commons.symbolic import *  # pylint: disable=wildcard-import
-
-short_var_name_cache = {}
-"""Mapping of operator names to a function to evaluate its symbolic_value."""
-_SYMBOL_MAP_OPS = {
-    'Add': operator.add,
-    'Equal': operator.eq,
-    'FloorDiv': operator.floordiv,
-    'FloorMod': operator.mod,
-    'Greater': operator.gt,
-    'GreaterEqual': operator.ge,
-    'Less': operator.lt,
-    'LessEqual': operator.le,
-    'Mul': operator.mul,
-    'NotEqual': operator.ne,
-    'RealDiv': operator.truediv,
-    'Sub': operator.sub
-}
-
-_SYMBOL_REDUCE_OPS = {
-    'ArgMax': np.argmax,
-    'Max': np.amax,
-    'Mean': np.mean,
-    'Min': np.amin,
-    'Prod': np.prod,
-    'Sum': np.sum
-}
-
-
-def get_conv_outdim(in_dim, ks, stride, dl, padding_type):
-    try:
-        if padding_type == 'VALID':
-            ks_dilated = (ks - 1) * dl + 1
-            return (in_dim - ks_dilated) / stride + 1
-        elif padding_type == 'SAME':
-            return math.ceil(in_dim * 1.0 / stride)
-        else:
-            raise ValueError('[TypeInference] Unrecognized padding type.')
-    except Exception as e:
-        raise ValueError('[TypeInference] Error fetching padding values: {}'.format(e))
-
-
-def get_short_var_name(name):
-    if name in short_var_name_cache:
-        return short_var_name_cache[name]
-    else:
-        shortname = 's_' + str(len(short_var_name_cache))
-        short_var_name_cache[name] = shortname
-        return shortname
-
-
-def replace_neg_1_with_symbolic(val, name):
-    for i in range(len(val)):
-        if np.isscalar(val[i]) and val[i] == -1:
-            val[i] = sm.Symbol(get_short_var_name(name + '_' + str(i)), positive=True)
-    return val
-
-
-def make_symbol(name):
-    return sm.Symbol(get_short_var_name(name), positive=True)
-
-
-def to_int(ls):
-    for i in range(len(ls)):
-        if is_symbolic_or_unknown(ls[i]):
-            continue
-        ls[i] = int(ls[i])
-    return ls
-
-
-def try_to_np_type(v):
-    """
-    np types are easier to handle than python primitive. (e.g., calling
-    reshape on np.int32 is valid, but not on int.)
-    """
-    if isinstance(v, int):
-        return np.int32(v)
-    if isinstance(v, float):
-        return np.float32(v)
-    return v
-
-
-def reshape_with_symbol(v, shape):
-    """
-    Perform basic reshape if v is symbolic (not array of symbols).
-    """
-    if is_symbolic_or_unknown(v):
-        return np.array(v).reshape(shape)
-    shape = [int(s) for s in shape]
-    return v.reshape(shape)
-
-
-def try_get_non_sym_val(node):
-    """
-    node: ssa node name
-    Return None if `node` doesn't have fully materialized value, else the
-    value.
-    """
-    if "symbolic_value" in node.attr and \
-            node.attr["symbolic_value"] is not None and \
-            node.attr["symbolic_value"].val is not None and \
-            not is_symbolic_or_unknown(node.attr["symbolic_value"].val):
-        return node.attr["symbolic_value"].val
-    return None
-
-
-class TypeInferenceVisitor(object):
-    def __init__(self, graph, whole_ssa, pedantic=False):
-        """
-        Args:
-            graph (dict): A mapping of node names to TFNodes representing the function
-                to type check.
-            whole_ssa (NetworkEnsemble): The program being compiled.
-            pedantic (bool): If true, require consistency in tensor primitive types. When
-                possible, this should be enabled. This is a more stringent standard than
-                can currently be met by some our frontends. Therefore, we leave it off by
-                default.
-        """
-        # the whole ssa is needed to propagation function calls
-        self.op_rules = {}
-        self.gdict = graph
-        self.whole_ssa = whole_ssa
-        self.visited = {}
-        self.pedantic = pedantic
-
-    def visit(self, node):
-        # make sure node is a ParsedNode
-        from ...nnssa import ParsedNode
-        if not isinstance(node, ParsedNode):
-            node = self.gdict[node]
-
-        # do we already know the answer?
-        if node.datatype is not None and not node.op.startswith('TensorArray'):
-            # if it is a fully specified type, we just return it
-            # if we seen it this round, we return it
-            # otherwise we recurse
-            if not builtins.is_tensor(node.datatype) or \
-                   builtins.tensor_has_complete_shape(node.datatype) or \
-                   node.name in self.visited:
-                return node.datatype
-        # look for the op's visit method
-        method = 'visit_' + node.op
-        visitor = getattr(self, method, None)
-        if visitor is None:
-            logging.warning('WARNING [TypeInference]: Op {} not implemented. Inferring shape from node attribute!'.format(node.op))
-            visitor = self._get_type_from_attr
-
-        # find the type of the node
-        ret = None
-        try:
-            ret = visitor(node)
-        except Exception as e:  # pylint: disable=broad-except
-            logging.exception("[TypeInference] Failed to infer type of %s:%s", node.name, node.op)
-            raise
-
-        if ret is not None:
-            self.visited[node.name] = 1
-            node.datatype = ret
-        else:
-            logging.error("[TypeInference] Unable to infer type of node %s (%s)", node.name, node.op)
-        return ret
-
-    def visit_all(self):
-        for i in self.gdict:
-            self.visit(self.gdict[i])
-
-    def _get_type_from_attr(self, node):
-        if node.datatype is not None:
-            return node.datatype
-
-        node.parse_from_attr()
-        if builtins.is_tensor(node.datatype):
-            s = list(node.datatype.get_shape())
-            for i in range(len(s)):
-                if s[i] == -1:
-                    s[i] = make_symbol(node.name + '_' + str(i))
-            node.datatype = builtins.tensor(node.datatype.get_primitive(), tuple(s))
-        return node.datatype
-
-    def match_shape(self, shapea, shapeb):
-        if len(shapea) != len(shapeb):
-            return False
-        for idx in range(len(shapea)):
-            if shapea[idx] != shapeb[idx] and shapea[idx] > 0 and shapeb[idx] > 0:
-                return False
-        return True
-
-    def strict_shape(self, typea, typeb):
-        shape = list(typea.get_shape())
-        shapeb = typeb.get_shape()
-        for idx in range(len(shape)):
-            if is_symbolic_or_unknown(shape[idx]):
-                shape[idx] = shapeb[idx]
-        return builtins.tensor(typea.T[0], tuple(shape))
-
-    def _shape_as_ints(self, shape):
-        """Convert a list of dimensions to ints and symbols"""
-        return [s if is_symbolic(s) else int(s) for s in shape]
-
-    def all_inputs_have_values(self, node):
-        return all(self.gdict[i].attr['symbolic_value'] is not None for i in node.inputs)
-
-    def any_inputs_have_values(self, node):
-        return any(self.gdict[i].attr['symbolic_value'] is not None for i in node.inputs)
-
-    def all_inputs_have_non_sym_values(self, node):
-        return all(
-            self.gdict[i].attr['symbolic_value'] is not None
-            and not any_symbolic_or_unknown(self.gdict[i].attr['symbolic_value'].val)
-            for i in node.inputs)
-
-    def get_all_input_values(self, node):
-        ret = []
-        for i in node.inputs:
-            if self.gdict[i].attr['symbolic_value'] is not None:
-                ret.append(self.gdict[i].attr['symbolic_value'].val)
-            else:
-                ret.append(None)
-        return ret
-
-    def resolve_to_non_sym_val_or_die(self, node_name):
-        """
-        Requires node_name to resolve to non-symbolic value.
-        """
-        self.visit(node_name)
-        val = try_get_non_sym_val(self.gdict[node_name])
-        assert val is not None, "%s has to have non-symbolic value" % node_name
-        return val
-
-    def _promoted_primitive_type(self, type1, type2):
-        """
-        Given a pair of tensor or primitive types, find the smallest type that can store an instance
-        of their primitive type.
-        """
-        ptype1 = type1.get_primitive() if builtins.is_tensor(type1) else type1
-        ptype2 = type2.get_primitive() if builtins.is_tensor(type2) else type2
-        return promote_types(ptype1, ptype2)
-
-    def _get_node(self, node):
-        if isinstance(node, six.string_types):
-            node = self.gdict.get(node, None)
-        return node
-
-    #
-    # Symbolic evaluation
-    #
-
-    def _get_symbolic_value(self, node):
-        return self._get_node(node).attr.get('symbolic_value', None)
-
-    def _set_symbolic_value(self, node, datatype, value):
-        v = datatype()
-        v.val = value
-        self._get_node(node).attr['symbolic_value'] = v
-
-    def _eval_symbolic_value_map(self, node, value_type):
-        """
-        Set a node's symbolic value by applying a projection.
-
-        See _SYMBOL_MAP_OPS for the defintion of op name to function.
-
-        Args:
-            node (ParsedNode): The node whose symbolic value to set.
-            value_type (nitro_builtin): The symbol's type.
-        """
-        input0 = self._get_symbolic_value(node.inputs[0])
-        input1 = self._get_symbolic_value(node.inputs[1])
-        if input0 is None or input1 is None:
-            return
-
-        binary_op = _SYMBOL_MAP_OPS.get(node.op, None)
-        if binary_op is None:
-            logging.warning('Symbolic evaluation of operator %s not implemented', node.op)
-            return
-
-        input0 = input0.val
-        input1 = input1.val
-        value = binary_op(input0, input1)
-        self._set_symbolic_value(node, value_type, value)
-
-    def _eval_symbolic_value_reduce(self, node, value_type):
-        """
-        Set a node's symbolic value by applying a reduction.
-
-        See _SYMBOL_REDUCE_OPS for the defintion of op name to function.
-
-        Args:
-            node (ParsedNode): The node whose symbolic value to set.
-            value_type (nitro_builtin): The symbol's type.
-        """
-        input0 = self._get_symbolic_value(node.inputs[0])
-        input1 = self._get_symbolic_value(node.inputs[1])
-        if input0 is None or input1 is None:
-            return
-
-        reduce_op = _SYMBOL_REDUCE_OPS.get(node.op, None)
-        if reduce_op is None:
-            logging.warning('Symbolic evaluation of operator %s not implemented', node.op)
-            return
-
-        values = input0.val
-        axis = input1.val
-        if not np.isscalar(axis) and len(axis) == 1:
-            axis = axis[0]
-
-        val = reduce_op(values, axis=axis)
-        if builtins.is_tensor(value_type) and np.isscalar(val):
-            val = np.array([val])
-
-        self._set_symbolic_value(node, value_type, val)
-
-    #
-    # Common patterns
-    #
-
-    def _visit_unary(self, node):
-        if len(node.inputs) != 1:
-            raise ValueError('Expected 1 input to {} node {}'.format(node.op, node.name))
-        return self.visit(node.inputs[0])
-
-    def _visit_reduce(self, node):
-        if len(node.inputs) != 2:
-            raise ValueError('Expected 2 inputs to {} node {}'.format(node.op, node.name))
-        typea = self.visit(node.inputs[0])
-        typeb = self.visit(node.inputs[1])
-        reduction_indices = self.gdict[node.inputs[1]].attr['symbolic_value']
-        if typea is None:
-            return None
-        if reduction_indices is None:
-            raise TypeError(
-                "Cannot infer shape of {} because we cannot infer the value of reduction_indices".
-                format(node.op))
-        reduction_indices = reduction_indices.val
-        # the reduction_idx node can be a scalar
-        if not builtins.is_tensor(typeb):
-            reduction_indices = [reduction_indices]
-        keepdims = node.attr.get('keep_dims', False)
-        reduced_shape = list(typea.get_shape())
-        if len(reduction_indices) == 0:
-            reduction_indices = list(range(len(reduced_shape)))
-        if keepdims:
-            for i in reduction_indices:
-                reduced_shape[i] = 1
-        else:
-            # sort reverse so we can delete shape elements it back to front
-            reduction_indices = sorted(reduction_indices)[::-1]
-            for i in reduction_indices:
-                reduced_shape.pop(i)
-        if len(reduced_shape) == 0:
-            rettype = typea.get_primitive()
-        else:
-            rettype = builtins.tensor(typea.get_primitive(), reduced_shape)
-        node.attr['reduction_indices'] = reduction_indices
-        node.attr['keep_dims'] = keepdims
-
-        self._eval_symbolic_value_reduce(node, rettype)
-
-        return rettype
-
-    def _broadcast_shape(self, node, shapea, shapeb):
-        """
-        Determine the shape of a broadcast of two shapes.
-
-        Args:
-            node (ParsedNode): The node bring processed (used for exception messages).
-            shapea (Iterable[int]): A shape
-            shapeb (Iterable[int]): Another shape
-        """
-        shapea = list(shapea)
-        shapeb = list(shapeb)
-        if len(shapea) < len(shapeb):
-            shapea = ([1] * (len(shapeb) - len(shapea))) + shapea
-        if len(shapeb) < len(shapea):
-            shapeb = ([1] * (len(shapea) - len(shapeb))) + shapeb
-        # get loosest shape
-        retshape = []
-        for i in range(len(shapea)):
-            a_unknown = is_symbolic_or_unknown(shapea[i])
-            b_unknown = is_symbolic_or_unknown(shapeb[i])
-            if shapea[i] == 1:
-                retshape.append(shapeb[i])
-            elif shapeb[i] == 1:
-                retshape.append(shapea[i])
-            elif not b_unknown and shapeb[i] > 1:
-                if not a_unknown and shapea[i] != shapeb[i]:
-                    raise ValueError(
-                        'Incompatible dimension {} in {} operation {}'.format(
-                            i, node.op, node.name))
-                retshape.append(shapeb[i])
-            elif not a_unknown and shapea[i] > 1:
-                if not b_unknown and shapea[i] != shapeb[i]:
-                    raise ValueError(
-                        'Incompatible dimension {} in {} operation {}'.format(
-                            i, node.op, node.name))
-                retshape.append(shapea[i])
-            elif a_unknown or b_unknown:
-                retshape.append(sm.functions.Max(shapea[i], shapeb[i]))
-            else:
-                assert (shapea[i] == shapeb[i])
-                retshape.append(shapea[i])
-        return retshape
-
-    def _visit_broadcast(self, node, is_predicate=False):
-        # this is broadcast mul
-        assert (len(node.inputs) == 2)
-        typea = self.visit(node.inputs[0])
-        typeb = self.visit(node.inputs[1])
-        if typea is not None and typeb is not None:
-            primitive_type = builtins.bool if is_predicate else self._promoted_primitive_type(
-                typea, typeb)
-            if primitive_type is None:
-                raise ValueError('Incompatible primitive types in broadcast operation')
-            if builtins.is_tensor(typea):
-                if builtins.is_tensor(typeb):
-                    retshape = self._broadcast_shape(node, typea.get_shape(), typeb.get_shape())
-                    retval = builtins.tensor(primitive_type, retshape)
-                else:
-                    # a is tensor, b is not
-                    retval = builtins.tensor(primitive_type, typea.get_shape())
-            elif builtins.is_tensor(typeb):
-                # b is tensor, a is not
-                retval = builtins.tensor(primitive_type, typeb.get_shape())
-            else:
-                # both typea and typeb are not tensors
-                retval = primitive_type
-            self._eval_symbolic_value_map(node, retval)
-            return retval
-        else:
-            # we have no idea what a and b are. Maybe Tensorflow does.
-            return self._get_type_from_attr(node)
-
-    # The main visitors
-
-    def visit_get_tuple(self, node):  # DO NOT PROPAGATE TYPE INFERENCE ACROSS FUNCTIONS
-        assert (len(node.inputs) == 1)
-        parent_type = self.visit(node.inputs[0])
-        self.propagate_tensor_array(node)
-        # parent_type should be an instance of tuple
-        if parent_type is None:
-            return None
-        assert (builtins.is_tuple(parent_type))
-        parent_val = self.gdict[node.inputs[0]].attr['symbolic_value']
-        rettype = parent_type.T[node.attr["index"]]
-        if parent_val is not None:
-            node.attr['symbolic_value'] = rettype()
-            node.attr['symbolic_value'].val = parent_val[node.attr['index']].val
-
-        return rettype
-
-    def visit_Identity(self, node):
-        ret = self._visit_unary(node)
-        node.attr['symbolic_value'] = self.gdict[node.inputs[0]].attr['symbolic_value']
-        if 'tensorarray_source' in self.gdict[node.inputs[0]].attr:
-            node.attr['tensorarray_source'] = self.gdict[node.inputs[0]].attr['tensorarray_source']
-        return ret
-
-    def visit_ZerosLike(self, node):
-        return self._visit_unary(node)
-
-    def visit_Print(self, node):
-        # this is just identity
-        node.op = 'Identity'
-        return self.visit(node.inputs[0])
-
-    def visit_Log(self, node):
-        ret = self._visit_unary(node)
-        return ret
-
-    def visit_Add(self, node):
-        return self._visit_broadcast(node)
-
-    def visit_AddN(self, node):
-        return self._get_type_from_attr(node)
-
-    def visit_AddV2(self, node):
-        return self._visit_broadcast(node)
-
-    def visit_Maximum(self, node):
-        return self._visit_broadcast(node)
-
-    def visit_Minimum(self, node):
-        return self._visit_broadcast(node)
-
-    def visit_LogicalOr(self, node):
-        return self._visit_broadcast(node)
-
-    def visit_LogicalAnd(self, node):
-        return self._visit_broadcast(node)
-
-    def visit_LogicalNot(self, node):
-        return self.visit(node.inputs[0])
-
-    def visit_All(self, node):
-        return self._visit_reduce(node)
-
-    def visit_Any(self, node):
-        return self._visit_reduce(node)
-
-    def visit_ArgMax(self, node):
-        return self._visit_reduce(node)
-
-    def visit_ArgMin(self, node):
-        return self._visit_reduce(node)
-
-    def visit_Prod(self, node):
-        return self._visit_reduce(node)
-
-    def visit_Assign(self, node):
-        assert (len(node.inputs) == 2)
-        return self.visit(node.inputs[1])
-
-    def visit_Assert(self, node):
-        pass
-
-    def visit_BiasAdd(self, node):
-        return self._visit_broadcast(node)
-
-    def visit_Cast(self, node):
-        assert (len(node.inputs) == 1)
-        input_type = self.visit(node.inputs[0])
-        dst_type = node.attr.get('DstT', None)
-        if not builtins.is_primitive(dst_type):
-            raise ValueError('Invalid destination type for Cast operation')
-        if builtins.is_tensor(input_type):
-            rettype = builtins.tensor(dst_type, input_type.get_shape())
-        else:
-            rettype = dst_type
-
-        value = self._get_symbolic_value(node.inputs[0])
-        if value is not None and not any_symbolic_or_unknown(value.val):
-            self._set_symbolic_value(node,rettype,value.val.astype(builtins.utils.nptype_from_builtin(dst_type)))
-        return rettype
-
-    def visit_Concat(self, node):
-        return self.visit_ConcatV2(node, is_v2=False)
-
-    def visit_ConcatV2(self, node, is_v2=True):
-        # Concat takes two tensors and a "axis to be concated"
-        # get most specific type of all the concated variables
-        def axis_dim_len(input_types, concat_axis):
-            """Compute the length of the axis dimension"""
-            new_axis_shape = 0
-            for t in input_types:
-                if builtins.is_tensor(t):
-                    if len(t.get_shape()) > concat_axis:
-                        taxis = t.get_shape()[concat_axis]
-                        if taxis == -1:
-                            new_axis_shape = make_symbol(node.name + '_new_axis')
-                            break
-                        else:
-                            new_axis_shape += taxis
-                    else:
-                        new_axis_shape = make_symbol(node.name + '_new_axis')
-                        break
-                else:
-                    new_axis_shape = make_symbol(node.name + '_new_axis')
-                    break
-            return new_axis_shape
-
-        if len(node.inputs) < 2:
-            raise ValueError('Expected at least 2 inputs to {} node {}'.format(node.op, node.name))
-
-        # Axis arg must be a scalar
-        if is_v2:
-            axis_node = node.inputs[-1]
-        else:
-            axis_node = node.inputs[0]
-
-        axis_type = self.visit(axis_node)
-
-        if not builtins.is_primitive(axis_type):
-            raise ValueError(
-                'Unexpected non-primitive axis argument to {} op {}'.format(node.op, node.name))
-
-        # Non-axis args must be tensors
-        input_names = [inp for inp in node.inputs if inp != axis_node]
-        input_types = [self.visit(inp) for inp in input_names]
-
-        # If not able to infer type for any one of input, return None
-        if None in input_types:
-            return None
-
-        if not all([builtins.is_tensor(it) for it in input_types]):
-            raise ValueError(
-                'Unexpected non-tensor argument to {} op {}'.format(node.op, node.name))
-        rank = len(input_types[0].get_shape())
-
-        # Axis must be computable at compile time
-        concat_axis = self._get_symbolic_value(axis_node)
-        if concat_axis is None:
-            return None
-        concat_axis = int(concat_axis.val)
-
-        if concat_axis < 0:
-            concat_axis += rank
-
-        if concat_axis >= rank:
-            raise ValueError('Axis out of bounds in {} op {}'.format(node.op, node.name))
-
-        # Output shape has same rank as inputs and same size of
-        # all non-axis dimensions
-        if any([len(it.get_shape()) != rank for it in input_types[1:]]):
-            raise ValueError('Inputs to {} op {} are not of same rank'.format(node.op, node.name))
-
-        # Validate primitive types match and non-axis dimensions match
-        retshape = list(input_types[0].get_shape())
-        retprim = input_types[0].get_primitive()
-        for it in input_types[1:]:
-            if self.pedantic and it.get_primitive() != retprim:
-                raise ValueError('Primitive type mismatch in {} op {}'.format(node.op, node.name))
-            it_shape = it.get_shape()
-            for i in range(rank):
-                if i != concat_axis and retshape[i] != it_shape[i]:
-                    if is_symbolic_or_unknown(retshape[i]) or is_symbolic_or_unknown(it_shape[i]):
-                        continue
-                    raise ValueError('Dimension mismatch in {} op {}'.format(node.op, node.name))
-
-        retshape[concat_axis] = axis_dim_len(input_types, concat_axis)
-        rettype = builtins.tensor(retprim, retshape)
-
-        # Construct symbolic_value only if the inputs without
-        # symbolic_value has 1 entry.
-        create_symbolic = True
-        for t, n in zip(input_types, input_names):
-            if self._get_symbolic_value(n) is None and \
-                builtins.is_tensor(t) and t.get_shape() != (1,):
-                create_symbolic = False
-                break
-        if create_symbolic:
-            inputs = self.get_all_input_values(node)
-            inputs = inputs[:-1] if is_v2 else inputs[1:]
-            for i in range(len(inputs)):
-                if inputs[i] is None:
-                    if builtins.is_tensor(input_types[i]):
-                        # input_types[i] must of shape [1,]
-                        inputs[i] = np.array([make_symbol(node.name + '%d' % i)])
-                    else:
-                        inputs[i] = make_symbol(node.name + '_%d' % i)
-                if isscalar(inputs[i]):
-                    inputs[i] = np.array(inputs[i])
-            val = np.concatenate(inputs, axis=concat_axis)
-            self._set_symbolic_value(node, rettype, val)
-        return rettype
-
-    def visit_Const(self, node):
-        assert (len(node.inputs) == 0)
-        node.attr['symbolic_value'] = node.value
-        if node.datatype is not None:
-            return node.datatype
-        return self._get_type_from_attr(node)
-
-    def _conv2d_strides_or_dilations(self, name, value, data_format, default_value):
-        if value is None:
-            value = default_value
-        if not isinstance(value, (int, list)):
-            raise ValueError('{} must be an int or list'.format(name))
-
-        if isinstance(value, int):
-            return [value] * 2
-
-        if len(value) == 1:
-            return value * 2
-        if len(value) == 2:
-            return value
-        if len(value) != 4:
-            raise ValueError('{} must have length 1, 2, or 4'.format(name))
-
-        if data_format == "NHWC":
-            # Only support stride/dilation along N, C == 1
-            if not (value[0] == value[3] == 1):
-                raise ValueError('{} along N and C other than 1 not implemented'.format(name))
-            return value[1:3]
-
-        # "NCHW"
-        if not (value[0] == value[1] == 1):
-            raise ValueError('{} along N and C other than 1 not implemented'.format(name))
-        return value[2:]
-
-    def _conv2d_pad(self, algorithm, custom_pad, filter_hw):
-        # pad = [t+b, l+r]
-        if algorithm == 'VALID':
-            return [0] * 2
-        if algorithm == 'SAME':
-            return [(f // 2) * 2 for f in filter_hw]
-        if algorithm == 'CUSTOM':
-            if not isinstance(custom_pad, list) or len(custom_pad) != 4:
-                raise ValueError('Invalid custom padding; expected (t, b, l, r)')
-            return [custom_pad[0] + custom_pad[1], custom_pad[2] + custom_pad[3]]
-        raise ValueError('Invalid padding algorithm "{}"'.format(algorithm))
-
-    def visit_Conv2D(self, node):
-        """
-        Inputs:
-            0 (str): The name of a 4D tensor in the format indicated by 'data_format' attribute
-            1 (str): The name of a 4D filter of shape [height, width, in_channels, out_channels]
-        Attributes:
-            data_format (str): 'NWHC' or 'NCHW'
-            strides (list): Sliding window stride: len(strides) is in (1, 2, 4)
-            padding (str): 'SAME', 'VALID', or 'CUSTOM'. If 'CUSTOM', attribute 'pad' must also
-                be specified.
-            pad (list): Non-negative ints in order (t, b, l, r) indicating padding along H and W.
-            dilations (list): OPTIONAL list of ints of length 1, 2, or 4 indicating dilation
-                factor for each dimension.
-        """
-        if len(node.inputs) != 2:
-            raise ValueError('Expected 2 inputs to {} node {}'.format(node.op, node.name))
-
-        input_type = self.visit(node.inputs[0])
-        filter_type = self.visit(node.inputs[1])
-
-        for idx, input in enumerate(node.inputs):
-            node_type = self._get_node(input).datatype
-            if not builtins.is_tensor(node_type) or len(node_type.get_shape()) != 4:
-                raise ValueError(
-                    'Input {} to {} node {} is not a 4D tensor'.format(idx, node.op, node.name))
-            for s in node_type.get_shape():
-                if not isinstance(s, (six.integer_types, np.generic, sm.Basic)):
-                    raise ValueError(
-                        'Input and filter shapes must be int or symbolic in {} node {}'.format(
-                            node.op, node.name))
-
-        in_format = node.attr.get('data_format')
-        if in_format not in ["NHWC", "NCHW"]:
-            raise ValueError(
-                'Invalid data_format "{}" in {} node {}'.format(in_format, node.op, node.name))
-
-        padding = node.attr.get('padding')
-        if padding not in ['VALID', 'SAME', 'CUSTOM']:
-            raise ValueError(
-                'Invalid padding algorithm "{}" in {} node {}'.format(padding, node.op, node.name))
-
-        inshape = input_type.get_shape()
-        filtshape = filter_type.get_shape()
-
-        HW_strides = self._conv2d_strides_or_dilations(
-            'strides', node.attr.get('strides'), in_format, 1)
-        HW_dilations = self._conv2d_strides_or_dilations(
-            'dilations', node.attr.get('dilations'), in_format, 1)
-        pad = self._conv2d_pad(
-            node.attr.get('padding'), node.attr.get('pad'),
-            filtshape[:2])  # filtshape[:2] is kH, kW
-
-        # TODO(daviddai): Dilation can be handled analogously as strides, but
-        # SSA shouldn't overfit to TF's terrible strides / dilations specs (we
-        # should disallow strides/dilation along N, C dimension). We should
-        # also allow spatial dimensions beyond 2D
-        #if not all(d == 1 for d in HW_dilations):
-        #    raise NotImplementedError('Dilations other than 1 not implemented')
-
-        N = inshape[0]
-        C = inshape[3] if in_format == "NHWC" else inshape[1]  # "NCHW"
-        HW_in = inshape[1:3] if in_format == "NHWC" else inshape[2:]  # "NCHW"
-        filtershape = [HW_dilations[r] * (filtshape[r] - 1) + 1 for r in range(2)]
-        HW_out_shape = [
-            (HW_in[r] + pad[r] - filtershape[r]) // HW_strides[r] + 1
-            for r in range(2)
-        ]
-        HW_out_shape = self._shape_as_ints(HW_out_shape)
-
-        if node.op == 'DepthwiseConv2dNative':
-            out_channels = filtshape[2] * filtshape[3]
-        else:
-            out_channels = filtshape[3]
-
-        if in_format.startswith('NH'):
-            retshape = [N] + HW_out_shape + [out_channels]
-        else:
-            retshape = [N, out_channels] + HW_out_shape
-        return builtins.tensor(input_type.get_primitive(), tuple(retshape))
-
-
-    def visit_DepthwiseConv2dNative(self, node):
-        return self.visit_Conv2D(node)
-
-    def visit_Conv2DBackpropInput(self, node):
-        attr_output_type = self._get_type_from_attr(node)
-
-        if attr_output_type is not None:
-            return attr_output_type
-        else:
-            raise ValueError("[Type Inference] Conv2DBackpropInput type "
-                             "inference case not handled")
-
-    def visit_ResizeBilinear(self, node):
-        return self._get_type_from_attr(node)
-
-    def visit_ResizeNearestNeighbor(self, node):
-        return self._get_type_from_attr(node)
-
-    def _get_window_shape(self, ksize, height_idx):
-        if not isinstance(ksize, collections.Sized):
-            ksize = [ksize]
-        if len(ksize) == 1:
-            return (ksize[0], ksize[0])
-        elif len(ksize) == 2:
-            return tuple(ksize)
-        elif len(ksize) == 4:
-            return list(ksize[height_idx:height_idx + 2])
-        raise ValueError("Expected ksize to be scalar or length 1, 2, or 4")
-
-    # The documentation for tf.nn.convolution has a good description
-    # of the proper output size.
-    # https://www.tensorflow.org/api_docs/python/tf/nn/convolution
-    def _get_window_reduced_size(self, algorithm, input_size, window_size, stride):
-        if algorithm == 'VALID':
-            return sm.ceiling((input_size - (window_size - 1)) / stride)
-        if algorithm == 'SAME':
-            return sm.ceiling((input_size / stride))
-        raise ValueError(
-            'Invalid padding algorithm "{}"; expected "SAME" or "VALID"'.format(algorithm))
-
-    def _visit_pooling(self, node):
-        if len(node.inputs) != 1:
-            raise ValueError('Expected 1 inputs to {} node {}'.format(node.op, node.name))
-        input_type = self.visit(node.inputs[0])
-        if input_type is None:
-            return self._get_type_from_attr(node)
-
-        data_format = node.attr.get('data_format')
-        if data_format not in ["NHWC", "NCHW"]:
-            raise ValueError(
-                'Invalid data_format "{}" in {} node {}'.format(data_format, node.op, node.name))
-
-        padding = node.attr.get('padding')
-        if padding not in ['VALID', 'SAME']:
-            raise ValueError(
-                'Invalid padding algorithm "{}" in {} node {}'.format(padding, node.op, node.name))
-
-        strides = self._conv2d_strides_or_dilations(
-            'strides', node.attr.get('strides'), data_format, 1)
-
-        height_idx = 1 if data_format.startswith('NH') else 2  # NHWC, NCHW, or NCHW_VECT_C
-        ksize = node.attr.get('ksize', [1])
-        (window_height, window_width) = self._get_window_shape(ksize, height_idx)
-
-        inshape = input_type.get_shape()
-        filtshape = list(inshape)
-        filtshape[height_idx] = self._get_window_reduced_size(
-            padding, inshape[height_idx], window_height, strides[0])
-        filtshape[height_idx + 1] = self._get_window_reduced_size(
-            padding, inshape[height_idx + 1], window_width, strides[1])
-        return builtins.tensor(input_type.get_primitive(), tuple(filtshape))
-
-    def visit_MaxPool(self, node):
-        return self._visit_pooling(node)
-
-    def visit_AvgPool(self, node):
-        return self._visit_pooling(node)
-
-    def visit_Equal(self, node):
-        return self._visit_broadcast(node, is_predicate=True)
-
-    def visit_NotEqual(self, node):
-        return self._visit_broadcast(node, is_predicate=True)
-
-    def visit_ExpandDims(self, node):
-        """
-        Inputs:
-            0 (str): The name of a tensor or scalar.
-            1 (str): The name of an int indicating the dimension index to expand. Must be in
-                     range [-rank(input) - 1, rank(input)] and able to be determined at compile
-                     time.
-        """
-        if len(node.inputs) != 2:
-            raise ValueError('Expected 2 inputs to {} node {}'.format(node.op, node.name))
-
-        typea = self.visit(node.inputs[0])
-        if not builtins.is_tensor(typea):
-            typea = builtins.tensor(typea, (1, ))
-            shape = []
-        else:
-            shape = list(typea.get_shape())  # input[0] should be a tensor.
-
-        axis_type = self.visit(node.inputs[1])
-        axis_value = None
-        if builtins.is_tensor(axis_type):
-            axis_shape = axis_type.get_shape()
-            size = 1
-            for s in axis_shape:
-                size *= s
-            if size != 1:
-                raise ValueError(
-                    'Unexpected value for axis specified for {} node {}'.format(node.op, node.name))
-            axis_value = self._get_symbolic_value(node.inputs[1]).val[0]
-        elif builtins.is_int(axis_type):
-            axis_value = self._get_symbolic_value(node.inputs[1]).val
-        else:
-            raise ValueError(
-                'Unexpected non-int axis specified for {} node {}'.format(node.op, node.name))
-
-        if axis_value < -len(typea.get_shape()) - 1 or axis_value > len(typea.get_shape()):
-            raise IndexError(
-                'Axis value {} is out of bounds for {} node {}'.format(
-                    axis_value, node.op, node.name))
-
-        if axis_value < 0:
-            cut = len(shape) + axis_value + 1
-        else:
-            cut = axis_value
-        shape = shape[0:cut] + [1] + shape[cut:]
-
-        rettype = builtins.tensor(typea.get_primitive(), tuple(shape))
-        input_val = self._get_symbolic_value(node.inputs[0])
-        if input_val is not None:
-            input_val = np.array(input_val.val).reshape(shape)
-            self._set_symbolic_value(node, rettype, input_val)
-        return rettype
-
-    def visit_Fill(self, node):
-        """
-        Inputs:
-            0 (str): The name of a tensor describing the size of tensor to create.
-            1 (str): The name of a scalar value to fill the new tensor with.
-        """
-        if len(node.inputs) != 2:
-            raise ValueError('Expected 2 inputs to {} node {}'.format(node.op, node.name))
-
-        typea = self.visit(node.inputs[0])
-        typeb = self.visit(node.inputs[1])
-
-        shape_value = self._get_symbolic_value(node.inputs[0])
-        if shape_value is not None:
-            shape_value = shape_value.val.flatten()
-            shape = tuple([int(s) if not is_symbolic(s) else s for s in shape_value])
-            rettype = builtins.tensor(typeb, shape)
-
-            fill_value = self._get_symbolic_value(node.inputs[1])
-            if fill_value is not None and not any_symbolic_or_unknown(shape):
-                value = np.ones(shape, dtype=builtins.utils.nptype_from_builtin(typeb)) * fill_value.val
-                self._set_symbolic_value(node, rettype, value)
-        else:
-            # shape unknown.
-            # we should be able to derive a rank
-            shape = tuple(make_symbol(node.name + str(i)) for i in range(typea.get_shape()[0]))
-            rettype = builtins.tensor(typeb, shape)
-        return rettype
-
-    def visit_RandomUniform(self, node):
-        """
-        Input:
-            0 (str): The name of a 1-D tensor indicating output shape
-        Attributes:
-            dtype (builtin): The scalar type to generate
-        """
-        if len(node.inputs) != 1:
-            raise ValueError('Expected 1 input to {} node {}'.format(node.op, node.name))
-
-        # input[0] is the shape
-        # the value that would be in the tensor(shape=input[0])
-        shape_type = self.visit(node.inputs[0])
-        if not builtins.is_tensor(shape_type) or len(shape_type.get_shape()) != 1:
-            raise ValueError('Input must be a 1-D tensor to {} node {}'.format(node.op, node.name))
-
-        dtype = node.attr.get('dtype')
-        if dtype is None:
-            raise ValueError(
-                'dtype is a required attribute in {} node {}'.format(node.op, node.name))
-        if not builtins.is_scalar(dtype):
-            raise ValueError('dtype must be a scalar type in {} node {}'.format(node.op, node.name))
-
-        shape_value = self._get_symbolic_value(node.inputs[0])
-        if shape_value is not None:
-            shape = tuple(shape_value.val.flatten())
-            rettype = builtins.tensor(dtype, shape)
-            return rettype
-
-        # shape unknown.
-        # I should be able to derive a rank
-        shape = tuple(make_symbol(node.name + str(i)) for i in range(len(shape_type.get_shape())))
-        rettype = builtins.tensor(dtype, shape)
-        return rettype
-
-    def visit_FloorMod(self, node):
-        return self._visit_broadcast(node)
-
-    def visit_Pow(self, node):
-        return self._visit_broadcast(node)
-
-    def visit_function(self, node):
-        pass
-
-    def visit_function_entry(self, node):
-        pass
-
-    def visit_Gather(self, node):
-        params_type = self.visit(node.inputs[0])
-        indices_type = self.visit(node.inputs[1])
-        axis_value = 0
-        if len(node.inputs) == 3:
-            axis = self.visit(node.inputs[2])
-            axis_value = self.gdict[node.inputs[2]].attr['symbolic_value'].val
-        node.attr['axis'] = axis_value
-        if params_type is None or indices_type is None:
-            return None
-        if not builtins.is_tensor(indices_type):
-            indices_shape = []
-        else:
-            indices_shape = list(indices_type.get_shape())
-        params_shape = list(params_type.get_shape())
-        retshape = params_shape[:axis_value] + indices_shape + params_shape[axis_value + 1:]
-        if len(indices_shape) == 0 and len(params_shape) == 1:
-            # For scalar indices, rank(output) == rank(params) - 1 is the only
-            # possibility for gather to return non-tensor.
-            rettype = params_type.get_primitive()
-        else:
-            rettype = builtins.tensor(params_type.get_primitive(), retshape)
-
-        if self.gdict[node.inputs[0]].attr['symbolic_value'] is not None and \
-                self.gdict[node.inputs[1]].attr['symbolic_value'] is not None and \
-                axis_value is not None:
-            params_val = self.gdict[node.inputs[0]].attr['symbolic_value'].val
-            indices_val = self.gdict[node.inputs[1]].attr['symbolic_value'].val
-            retval = np.take(params_val, indices_val, axis=axis_value)
-            retval = try_to_np_type(retval)
-            node.attr['symbolic_value'] = rettype()
-            node.attr['symbolic_value'].val = retval
-        return rettype
-
-    def visit_GatherV2(self, node):
-        node.op = 'Gather'
-        return self.visit_Gather(node)
-
-    def visit_GatherNd(self, node):
-        params_type = self.visit(node.inputs[0])
-        indices_type = self.visit(node.inputs[1])
-        if params_type is None or indices_type is None:
-            return None
-
-        indices_shape = []
-        if not builtins.is_tensor(indices_type):
-            indices_shape = []
-        else:
-            indices_shape = list(indices_type.get_shape())
-        params_shape = list(params_type.get_shape())
-        retshape = indices_shape[:-1] + params_shape[indices_shape[-1]:]
-        rettype = builtins.tensor(params_type.get_primitive(), retshape)
-
-        return rettype
-
-    def visit_ScatterNd(self, node):
-        indices_type = self.visit(node.inputs[0])
-        updates_type = self.visit(node.inputs[1])
-        shapes_type = self.visit(node.inputs[2])
-        if updates_type is None or shapes_type is None:
-            return None
-
-        retshape = []
-        if 'symbolic_value' in self.gdict[node.inputs[2]].attr:
-            size = list(self.gdict[node.inputs[2]].attr['symbolic_value'].val)
-            for i in range(len(size)):
-                if is_symbolic_or_unknown(size[i]):
-                    retshape.append(make_symbol(node.name + '_' + str(i)))
-                else:
-                    retshape.append(size[i])
-            if len(retshape) == 0:
-                rettype = updates_type.get_primitive()
-            else:
-                rettype = builtins.tensor(updates_type.get_primitive(), retshape)
-
-        rettype = builtins.tensor(updates_type.get_primitive(), retshape)
-
-        return rettype
-
-    def visit_GatherTree(self, node):
-        # TODO: To implement?
-        return self._get_type_from_attr(node)
-
-    def visit_GreaterEqual(self, node):
-        return self._visit_broadcast(node, is_predicate=True)
-
-    def visit_Greater(self, node):
-        return self._visit_broadcast(node, is_predicate=True)
-
-    def visit_Less(self, node):
-        return self._visit_broadcast(node, is_predicate=True)
-
-    def visit_LessEqual(self, node):
-        return self._visit_broadcast(node, is_predicate=True)
-
-    def visit_make_tuple(self, node):
-        types = [self.visit(i) for i in node.inputs]
-        self.propagate_tensor_array(node)
-
-        if any([t is None for t in types]):
-            logging.warning("make_tuple at %s has an unknown type %s", node.name, str(types))
-        types = [t if t is not None else builtins.unknown for t in types]
-        return builtins.tuple(types)
-
-    def visit_BatchMatMul(self, node):
-        # node.op = "MatMul"
-        # Batch matmul was deprecated, we implement this in MatMul
-        return self.visit_MatMul(node)
-
-    def visit_BatchMatMulV2(self, node):
-        node.op = 'BatchMatMul'
-        return self.visit_BatchMatMul(node)
-
-    def _shape_transposed(self, shape):
-        shape = list(shape)
-        shape[-1], shape[-2] = shape[-2], shape[-1]
-        return tuple(shape)
-
-    def visit_Einsum(self, node):
-
-        if len(node.inputs) > 2:
-            raise ValueError('No support for more than 2 inputs to {} node {} now.'.format(node.op, node.name))
-        output_shape = node.attr.get("_output_shapes")
-        if not len(output_shape) == 1:
-            raise ValueError('Expect only one output for Einsum.')
-        equation = node.attr.get('equation')
-        if not '->' in equation:
-            raise ValueError('current einsum does not support matrix diagonal operations.')
-
-        input_tensor_types = [self.visit(input) for input in node.inputs]
-        input_shapes = [input.get_shape() for input in input_tensor_types]
-        input_types = [input.get_primitive() for input in input_tensor_types]
-
-        # Parse equation
-        prefix = equation.split('->')[0]
-        suffix = equation.split('->')[1]
-
-        # Pattern matching
-        inference_shape = []
-        if not ',' in prefix:
-            assert(len(input_shapes) == 1)
-            input_shape = input_shapes[0]
-            map = dict(zip(prefix, input_shape))
-            inference_shape = [map[axis] for axis in suffix]
-        else:
-            a_shape = input_shapes[0]
-            b_shape = input_shapes[1]
-            map = {}
-            a, b = prefix.split(',')
-            for i, axis in enumerate(a):
-                map[axis] = a_shape[i]
-            for i, axis in enumerate(b):
-                map[axis] = b_shape[i]
-            inference_shape = [map[axis] for axis in suffix]
-
-        # Make inference type symbolic if it contains symbol
-        inference_shape = [axis if axis != -1 else make_symbol(node.name + "_" + str(i))
-                           for i, axis in enumerate(inference_shape)]
-        inference_type = input_types[0] if len(input_types) == 1 else promote_types(*input_types)
-
-        return builtins.tensor(inference_type, inference_shape)
-
-    def visit_MatMul(self, node):
-        """
-        Inputs:
-            0 (str): Name of a tensor with rank >= 2 after any transpositions
-            1 (str): Name of a tensor with rank >= 2 after any transpositions
-        Attributes:
-            transpose_a (bool): If True, transpose the first input before multiplying
-            transpose_b (bool): If True, transpose the second input before multiplying
-            adj_x (bool): If True, adjoint the first input before multiplying
-            adj_y (bool): If True, adjoint the second input before multiplying
-        """
-        #
-        # Validate inputs
-        #
-        if len(node.inputs) != 2:
-            raise ValueError('Expected 2 inputs to {} node {}'.format(node.op, node.name))
-
-        typea = self.visit(node.inputs[0])
-        typeb = self.visit(node.inputs[1])
-        if typea is None or typeb is None:
-            return self._get_type_from_attr(node)
-
-        if not builtins.is_tensor(typea) or not builtins.is_tensor(typeb):
-            raise ValueError('Inputs must be tensors in {} node {}'.format(node.op, node.name))
-
-        mata_shape = typea.get_shape()
-        matb_shape = typeb.get_shape()
-        if len(mata_shape) < 2 or len(matb_shape) < 2:
-            raise ValueError(
-                'Inputs must have rank 2 or greater in {} node {}'.format(node.op, node.name))
-
-        #
-        # Validate attributes
-        # this handles the parameters from both MatMul and BatchMatMul
-        #
-        transpose_a = node.attr.get('transpose_a', False)
-        transpose_b = node.attr.get('transpose_b', False)
-        adj_x = node.attr.get('adj_x', False)
-        adj_y = node.attr.get('adj_y', False)
-        if (transpose_a and adj_x) or (transpose_b and adj_y):
-            raise ValueError('transpose and adjoint are mutually exclusive a given input')
-        transpose_a = adj_x or transpose_a
-        transpose_b = adj_y or transpose_b
-
-        # Apply transpositions
-        if transpose_a:
-            mata_shape = self._shape_transposed(mata_shape)
-        if transpose_b:
-            matb_shape = self._shape_transposed(matb_shape)
-
-        # Check shape compatibility
-        if not all(is_symbolic_or_unknown(s) for s in [mata_shape[-1], matb_shape[-2]]):
-            if mata_shape[-1] != matb_shape[-2]:
-                raise ValueError('Incompatible dimensions in {} op {}'.format(node.op, node.name))
-
-        # Figure out the resulting shape. Outer dimensions are broadcastable.
-        outera = mata_shape[0:-2]
-        outerb = matb_shape[0:-2]
-        outer_shape = self._broadcast_shape(node, outera, outerb)
-        shape = outer_shape + [mata_shape[-2], matb_shape[-1]]
-
-        if len(shape) > 2:
-            node.op = 'BatchMatMul'
-
-        primitive = self._promoted_primitive_type(typea, typeb)
-        return builtins.tensor(primitive, tuple(shape))
-
-    def visit_LSTMBlock(self, node):
-        intype = self.visit(node.inputs[0])
-        W_type = self.visit(node.inputs[1])
-
-        mode = node.attr["mode"]
-        shape = list(intype.get_shape())
-        W_shape = list(W_type.get_shape())
-        hidden_shape = W_shape[-1] / 4
-        if node.attr.get("bidirectional", False):
-            hidden_shape /= 2
-        input_shape = W_shape[0] - hidden_shape
-        assert shape[-1] == input_shape, "Input size doesn't match"
-        shape[-1] = hidden_shape
-        if mode == "cell":
-            # returns output/cell state/hidden state
-            types = [builtins.tensor(intype.get_primitive(), tuple(shape)) for _ in range(3)]
-        elif mode == "encoder":
-            hidden_shape = shape[:]
-            output_shape = shape[:]
-            if not node.attr["output_all_states"]:
-                if node.attr["time_major"]:
-                    output_shape[0] = 1
-                else:
-                    output_shape[1] = 1
-
-            if node.attr.get("bidirectional", False):
-                output_shape[-1] *= 2
-                output_type = builtins.tensor(intype.get_primitive(), tuple(output_shape))
-                hidden_type = builtins.tensor(intype.get_primitive(), tuple(hidden_shape))
-                types = [output_type] + [hidden_type] * 4
-            else:
-                output_type = builtins.tensor(intype.get_primitive(), tuple(output_shape))
-                hidden_type = builtins.tensor(intype.get_primitive(), tuple(hidden_shape))
-                types = [output_type] + [hidden_type] * 2
-        else:
-            raise ValueError("Unknown mode type for LSTMBlock")
-
-        return builtins.tuple(types)
-
-    def visit_Mul(self, node):
-        return self._visit_broadcast(node)
-
-    def visit_Neg(self, node):
-        return self._visit_unary(node)
-
-    def visit_NoOp(self, node):
-        return builtins.void
-
-    def visit_Pack(self, node):
-        input_values = []
-        intype = None
-        rettype = None
-        for i in node.inputs:
-            intype = self.visit(i)
-            input_values.append(self.gdict[i].attr['symbolic_value'])
-        if all(i is not None for i in input_values):
-            # we can force the value!
-            for i in range(len(input_values)):
-                input_values[i] = input_values[i].val
-                input_values[i] = np.array(input_values[i])
-            val = np.stack(arrays=input_values, axis=node.attr['axis'])
-            primitive = intype
-            if builtins.is_tensor(intype):
-                primitive = intype.get_primitive()
-            rettype = builtins.tensor(primitive, tuple(val.shape))
-            node.attr['symbolic_value'] = rettype()
-            node.attr['symbolic_value'].val = val
-        if rettype is not None:
-            return rettype
-        else:
-            output_shapes = node.attr['_output_shapes']
-            if len(output_shapes[0]) > 0:
-                inference_shape = []
-                if builtins.is_tensor(intype):
-                    inference_shape = list(intype.get_shape())
-                else:
-                    inference_type = []
-                axis = node.attr['axis'] if node.attr['axis'] != -1 else len(inference_shape)
-                inference_shape.insert(axis, len(node.inputs))
-                return builtins.tensor(node.attr['T'], tuple(inference_shape))
-            elif 'N' in node.attr:
-                return builtins.tensor(node.attr['T'], (node.attr['N'], ))
-        return None
-
-    def visit_Pad(self, node):
-        lefttype = self.visit(node.inputs[0])
-        self.visit(node.inputs[1])
-        s = self.gdict[node.inputs[1]].attr['symbolic_value']
-        if not s:
-            attr_type = self._get_type_from_attr(node)
-            if not attr_type and self.gdict[node.inputs[1]].datatype and not any_symbolic_or_unknown(
-                self.gdict[node.inputs[1]].datatype.T[1]):
-                # at least we can get a rank
-                rank = self.gdict[node.inputs[1]].datatype.T[1][0]
-                ret_shape = [make_symbol(node.name + "_" + str(i)) for i in range(rank)]
-                return builtins.tensor(lefttype.get_primitive(), ret_shape)
-            else:
-                return attr_type
-        s = s.val
-        assert len(s.shape) == 2, "padding specs must be of shape [r, 2]" \
-            + "where r is rank of input tensor"
-        if not builtins.is_tensor(lefttype):
-            raise RuntimeError("Pad only operates on tensor type, but got " + str(lefttype))
-        retshape = list(lefttype.get_shape())
-        for i in range(len(retshape)):
-            retshape[i] = retshape[i] + s[i][0] + s[i][1]
-        rettype = builtins.tensor(lefttype.get_primitive(), retshape)
-        left_sym_val = self.gdict[node.inputs[0]].attr["symbolic_value"]
-        if left_sym_val:
-            node.attr["symbolic_value"] = rettype()
-            node.attr["symbolic_value"].val = np.pad(
-                left_sym_val.val, s, "constant", constant_values=node.attr['constant_values'])
-        return rettype
-
-    def visit_PadV2(self, node):
-        return self.visit_Pad(node)
-
-    def visit_MirrorPad(self, node):
-        return self.visit_Pad(node)
-
-    def visit_Placeholder(self, node):
-        return self._get_type_from_attr(node)
-
-    def visit_PlaceholderWithDefault(self, node):
-        return self._get_type_from_attr(node)
-
-    def visit_Range(self, node):
-        """
-        Inputs:
-            All must be int32, int64, float32, float64 or a single-value tensor thereof.
-
-            If len(inputs) == 2:
-                0: limit
-                1: delta
-            elif len(inputs) == 3:
-                0: start
-                1: limit
-                2: delta
-        """
-        if len(node.inputs) not in (2, 3):
-            raise ValueError('Expected 2 or 3 inputs to {} node {}'.format(node.op, node.name))
-
-        # Ensure all inputs have valid types
-        input_types = [self.visit(input) for input in node.inputs]
-        if any([input_type is None for input_type in input_types]):
-            # Non-const propagation.
-            return None
-
-        # Figure out the primitive return type
-        # We use the highest-ranked type among our inputs
-        dtypes = [builtins.int32, builtins.int64, builtins.fp32, builtins.fp64]
-        dtype_ranks = dict(zip(dtypes, range(0, len(dtypes))))
-
-        datatype = dtypes[0]
-        for dt in input_types:
-            if builtins.is_tensor(dt):
-                dt_shape = dt.get_shape()
-                if dt_shape and (len(dt_shape) != 1 or dt_shape[0] != 1):
-                    raise ValueError(
-                        'Invalid input tensor input with more than value in {} node {}'.format(
-                            node.op, node.name))
-                dt = dt.get_primitive()
-            dt_rank = dtype_ranks.get(dt)
-            if dt_rank is None:
-                raise ValueError('Invalid input datatype to {} node {}'.format(node.op, node.name))
-            if dt_rank > dtype_ranks[datatype]:
-                datatype = dt
-
-        # Ensure all inputs have symbolic values
-        input_values = [self._get_symbolic_value(input) for input in node.inputs]
-        if any([input_value is None for input_value in input_values]):
-            # Non-fixed value propagation (e.g. TensorArray)
-            return builtins.tensor(datatype, [make_symbol(node.name + '_range')])
-
-        # Extract parameters from symbolic values
-        input_values = [
-            iv.val[0] if isinstance(iv.val, np.ndarray) else iv.val for iv in input_values
-        ]
-
-        # Interpret positional arguments
-        if len(node.inputs) == 2:
-            limit_type, delta_type = input_types
-            start = 0
-            limit, delta = input_values
-        elif len(node.inputs) == 3:
-            start_type, limit_type, delta_type = input_types
-            start, limit, delta = input_values
-        else:
-            assert False, "logic error"
-
-        # Figure out the node type
-        shape = (limit - start) / delta
-        shape = shape if is_symbolic(shape) else int(math.ceil(shape))
-        rettype = builtins.tensor(datatype, [shape])
-
-        # Evaluate the symbolic value
-        if not any_symbolic_or_unknown([start, limit, delta]):
-            nptype = builtins.utils.nptype_from_builtin(datatype)
-            self._set_symbolic_value(
-                node, rettype, np.arange(start=start, stop=limit, step=delta, dtype=nptype))
-        elif delta == 1:
-            self._set_symbolic_value(node, rettype, sm.Interval(start, limit))
-        return rettype
-
-    def visit_Rank(self, node):
-        # This is also interesting. Tensorflow will return a 0-D tensor,
-        # while we transformed 0-D tensor to scalar in parsing.
-        input_type = self.visit(node.inputs[0])
-        input_shape = input_type.get_shape()
-        node.attr['symbolic_value'] = builtins.int32()
-        node.attr['symbolic_value'].val = len(input_shape)
-        return builtins.int32
-
-    def visit_Relu(self, node):
-        return self._visit_unary(node)
-
-    def visit_PRelu(self, node):
-        ret = self._visit_unary(node)
-
-        # If alpha is specified as a node, see if we can get its value now.
-        alpha_node = node.attr.get('alpha', None)
-        if isinstance(alpha_node, six.string_types):
-            alpha_type = self.visit(alpha_node)
-            alpha = self._get_symbolic_value(alpha_node)
-            if alpha is None:
-                raise ValueError('PRelu alpha node could not be evaluated')
-            alpha = alpha.val
-            if isinstance(alpha, np.ndarray):
-                if alpha.size != 1:
-                    raise ValueError('PRelu alpha must be a single value')
-                alpha = np.asscalar(alpha)
-            node.attr['alpha'] = alpha
-        return ret
-
-    def visit_Relu6(self, node):
-        return self._visit_unary(node)
-
-    def visit_LeakyRelu(self, node):
-        return self._visit_unary(node)
-
-    def visit_Selu(self, node):
-        return self._visit_unary(node)
-
-    def visit_Reshape(self, node):
-        def check_volumetric_constraint(left_volume, inshape):
-            right_volume = 1
-            left_symbols = set()
-            right_symbols = set()
-            try:
-                left_symbols = left_volume.free_symbols
-            except:
-                pass
-            try:
-                right_symbols = right_volume.free_symbols
-            except:
-                pass
-            # Generally, we want to solve for right in terms of left. But this
-            # is kinda annoying actually.
-            shape = list(inshape)
-            for i in shape:
-                right_volume = right_volume * i
-            if is_symbolic(right_volume):
-                constraints = [left_volume - right_volume]
-                solve_for = [s for s in shape if is_symbolic(s)]
-
-                for rightsym in solve_for:
-                    sol = sm.solve(constraints, [rightsym], dict=True)
-                    if not isinstance(sol, list):
-                        sol = [sol]
-                    # look for an acceptable solution
-                    for s in sol:
-                        if 0 in s.values():
-                            continue
-                        for i in range(len(shape)):
-                            if shape[i] in s:
-                                v = s[shape[i]]
-                                if len(v.free_symbols - left_symbols) > 0:
-                                    continue
-                                try:
-                                    shape[i] = int(v)
-                                except:
-                                    shape[i] = v
-            return shape
-
-        assert (len(node.inputs) == 2)
-        lefttype = self.visit(node.inputs[0])
-        if builtins.is_tensor(lefttype):
-            left_primitive = lefttype.get_primitive()
-            left_shape = lefttype.get_shape()
-            left_volume = 1
-            for i in left_shape:
-                left_volume = left_volume * i
-        else:
-            left_primitive = lefttype
-            left_volume = 1
-        if lefttype is None:
-            return None
-        self.visit(node.inputs[1])
-        if self.gdict[node.inputs[1]].attr['symbolic_value'] is not None:
-            shape = list(self.gdict[node.inputs[1]].attr['symbolic_value'].val)
-            replace_neg_1_with_symbolic(shape, node.name + '_shape')
-            shape = check_volumetric_constraint(left_volume, shape)
-            r = builtins.tensor(left_primitive, shape)
-            if self.gdict[node.inputs[0]].attr['symbolic_value'] is not None \
-                and all(isscalar(a) for a in shape):
-                node.attr['symbolic_value'] = r()
-                node.attr['symbolic_value'].val = reshape_with_symbol(
-                    self.gdict[node.inputs[0]].attr['symbolic_value'].val, shape)
-            return r
-
-        # check if we have answer from attributes.
-        # Otherwise the final fall back is just [-1] * rank
-        try:
-            attr_type = self._get_type_from_attr(node)
-        except:
-            attr_type = None
-        if attr_type is not None:
-            shape = check_volumetric_constraint(left_volume, attr_type.get_shape())
-            return builtins.tensor(attr_type.get_primitive(), shape)
-        elif self.gdict[node.inputs[1]].datatype is not None and not any_symbolic_or_unknown(
-                self.gdict[node.inputs[1]].datatype.T[1]):
-            # at least we can get a rank
-            rank = self.gdict[node.inputs[1]].datatype.T[1][0]
-            ret_shape = [make_symbol(node.name + "_" + str(i)) for i in range(rank)]
-            return builtins.tensor(left_primitive, ret_shape)
-
-    def visit_return(self, node):
-        return self._visit_unary(node)
-
-    def visit_ReverseSequence(self, node):
-        assert (len(node.inputs) == 2)
-        return self.visit(node.inputs[0])
-
-    def visit_ReverseV2(self, node):
-        assert (len(node.inputs) == 2)
-        return self.visit(node.inputs[0])
-
-    def visit_Sin(self, node):
-        rettype = self._visit_unary(node)
-        input = self.gdict[node.inputs[0]]
-        if input.attr['symbolic_value'] is not None:
-            node.attr['symbolic_value'] = rettype()
-            node.attr['symbolic_value'].val = np.sin(input.attr['symbolic_value'].val)
-        return rettype
-
-    def visit_Cos(self, node):
-        rettype = self._visit_unary(node)
-        input = self.gdict[node.inputs[0]]
-        if input.attr['symbolic_value'] is not None:
-            node.attr['symbolic_value'] = rettype()
-            node.attr['symbolic_value'].val = np.cos(input.attr['symbolic_value'].val)
-        return rettype
-
-    def visit_Tan(self, node):
-        rettype = self._visit_unary(node)
-        input = self.gdict[node.inputs[0]]
-        if input.attr['symbolic_value'] is not None:
-            node.attr['symbolic_value'] = rettype()
-            node.attr['symbolic_value'].val = np.tan(input.attr['symbolic_value'].val)
-        return rettype
-
-    def visit_Tanh(self, node):
-        rettype = self._visit_unary(node)
-        input = self.gdict[node.inputs[0]]
-        if input.attr['symbolic_value'] is not None:
-            node.attr['symbolic_value'] = rettype()
-            node.attr['symbolic_value'].val = np.tanh(input.attr['symbolic_value'].val)
-        return rettype
-
-    def visit_Sqrt(self, node):
-        rettype = self._visit_unary(node)
-        input = self.gdict[node.inputs[0]]
-        if input.attr['symbolic_value'] is not None:
-            node.attr['symbolic_value'] = rettype()
-            node.attr['symbolic_value'].val = input.attr['symbolic_value'].val ** 0.5
-        return rettype
-
-    def visit_Rsqrt(self, node):
-        rettype = self._visit_unary(node)
-        input = self.gdict[node.inputs[0]]
-        if input.attr['symbolic_value'] is not None:
-            node.attr['symbolic_value'] = rettype()
-            node.attr['symbolic_value'].val = input.attr['symbolic_value'].val ** -0.5
-        return rettype
-
-    def visit_Square(self, node):
-        rettype = self._visit_unary(node)
-        input = self.gdict[node.inputs[0]]
-        if input.attr['symbolic_value'] is not None:
-            node.attr['symbolic_value'] = rettype()
-            node.attr['symbolic_value'].val = input.attr['symbolic_value'].val ** 2
-        return rettype
-
-    def visit_Exp(self, node):
-        return self._visit_unary(node)
-
-    def visit_Shape(self, node):
-        # need to parse node itself.
-        parent_type = self.visit(node.inputs[0])
-        shape = []
-        if parent_type is None or not builtins.is_tensor(parent_type):
-            return builtins.tensor(builtins.int32, [make_symbol(node.name + '_shape')])
-        if parent_type is not None:
-            shape = parent_type.get_shape()
-            rettype = builtins.tensor(builtins.int32, [len(shape)])
-        else:
-            rettype = builtins.tensor(builtins.int32, [make_symbol(node.name + '_shape')])
-        if len(shape) > 0:
-            # we have the true value
-            node.attr['symbolic_value'] = rettype()
-            node.attr['symbolic_value'].val = np.array(shape)
-        return rettype
-
-    def visit_Select(self, node):
-        assert len(node.inputs) == 3
-        typecond = self.visit(node.inputs[0])
-
-        if builtins.is_tensor(typecond):
-            # this is a masking op.
-            # change the name
-            node.op = 'SelectMask'
-
-        typea = self.visit(node.inputs[1])
-        typeb = self.visit(node.inputs[2])
-
-        if all([builtins.is_tensor(atype) for atype in [typecond, typea, typeb]]):
-            rankcond = len(typecond.get_shape())
-            ranka = len(typea.get_shape())
-            rankb = len(typeb.get_shape())
-
-            assert (ranka == rankb)
-            if rankcond == 1 and ranka > 1:
-                node.attr['expand_dims'] = [-i - 1 for i in range(ranka - rankcond)]
-
-        if typea is not None and typeb is not None:
-            compatible, restype = builtins.is_tensor_and_is_compatible_general_shape(typea, typeb)
-            if compatible:
-                return restype
-            elif typea == typeb:
-                return typea
-            else:
-                logging.error(
-                    "%s != %s", builtins.get_type_info(typea), builtins.get_type_info(typeb))
-
-        if typea is not None:
-            return typea
-        else:
-            return typeb
-
-    def visit_SelectMask(self, node):
-        return self.visit_Select(node)
-
-    def visit_SelectV2(self, node):
-        return self.visit_Select(node)
-
-    def visit_iff(self, node):
-        # an op we inserted. equivalent to the functional IF
-        # IF cond: true: false
-        assert (len(node.inputs) == 3)
-        typecond = self.visit(node.inputs[0])
-        # assert (builtins.is_tensor(typecond) == False)
-
-        typea = self.visit(node.inputs[1])
-        typeb = self.visit(node.inputs[2])
-        if typea is not None and typeb is not None:
-
-            compatible, restype = builtins.is_tensor_and_is_compatible_general_shape(typea, typeb)
-            if compatible:
-                return restype
-            elif typea == typeb:
-                return typea
-            else:
-                logging.warning(
-                    "In an IFF node %s != %s", builtins.get_type_info(typea),
-                    builtins.get_type_info(typeb))
-                return typea
-
-        if typea is not None:
-            return typea
-        else:
-            return typeb
-
-    def visit_Where(self, node):
-        input_type = self.visit(node.inputs[0])
-        if len(node.inputs) == 3 and builtins.is_tensor(input_type):
-            return self.visit_Select(node)
-        elif len(node.inputs) == 3:
-            return self.visit_iff(node)
-        else:
-            assert (len(node.inputs) == 1)
-            rank = len(self.gdict[node.inputs[0]].datatype.get_shape())
-            ret_shape = [make_symbol(node.name + "_" + str(0)), rank]
-            return builtins.tensor(builtins.int32, ret_shape)
-
-    def visit_Sigmoid(self, node):
-        return self._visit_unary(node)
-
-    def visit_Elu(self, node):
-        return self._visit_unary(node)
-
-    def visit_Slice(self, node):
-        for i in node.inputs:
-            self.visit(i)
-        input_type = self.visit(node.inputs[0])
-        input_shape = input_type.get_shape()
-        input_value = self.gdict[node.inputs[0]].attr['symbolic_value']
-        try:
-            begin = list(self.gdict[node.inputs[1]].attr['symbolic_value'].val)
-            size = list(self.gdict[node.inputs[2]].attr['symbolic_value'].val)
-            end = [
-                int(begin[i] + size[i]) if size[i] != -1 else 2147483647 for i in range(len(begin))
-            ]
-            assert builtins.is_tensor(input_type)
-            input_shape = input_type.get_shape()
-            end = [min(i, j) for i, j in zip(end, input_shape)]
-            size = [min(s, e - b) for s, b, e in zip(size, begin, end)]
-            slices = [[int(begin[i]), int(end[i]), 1] for i in range(len(begin))]
-            node.attr['slice'] = slices
-            node.attr['begin_masks'] = [idx for idx, value in enumerate(begin) if value == 0]
-            node.attr['end_masks'] = [idx for idx, value in enumerate(end) if value == 2147483647]
-            node.attr['squeeze'] = []
-            output_value = None
-            if input_value is not None:
-                slices = [slice(*i) for i in slices]
-                slices = tuple(slices)
-                res = input_value.val[slices]
-
-                if isscalar(res):
-                    rettype = input_type.get_primitive()
-                    output_value = rettype
-                    output_value.val = res
-                elif not isscalar(res):
-                    rettype = builtins.tensor(input_type.get_primitive(), res.shape)
-                    output_value = rettype()
-                    output_value.val = res
-            else:
-                retshape = []
-                for i in range(len(begin)):
-                    if is_symbolic_or_unknown(size[i]):
-                        if is_symbolic_or_known(input_shape[i]) and is_symbolic_or_known(begin[i]):
-                            retshape.append(input_shape[i] - begin[i])
-                        else:
-                            retshape.append(make_symbol(node.name + '_' + str(i)))
-                    else:
-                        retshape.append(size[i])
-                if len(retshape) == 0:
-                    rettype = input_type.get_primitive()
-                else:
-                    rettype = builtins.tensor(input_type.get_primitive(), retshape)
-            node.attr['symbolic_value'] = output_value
-        except:
-            # unable to infer shape
-            if 'slice' in node.attr:
-                del node.attr['slice']
-            node.attr['squeeze'] = []
-            try:
-                size = list(self.gdict[node.inputs[2]].attr['symbolic_value'].val)
-                try:
-                    begin = list(self.gdict[node.inputs[1]].attr['symbolic_value'].val)
-                    begin = to_int(begin)
-                    size = [
-                        input_shape[i] - begin[i] if s in (-1, 2147483647) else s
-                        for i, s in enumerate(size)
-                    ]
-                except:
-                    # Adjust size if begin is available, otherwise trust the
-                    # materialized size assuming it's reasonable.
-                    if max(size) == 2147483647:
-                        raise RuntimeError()
-                size = to_int(size)
-
-                if len(size) == 1 and size[0] == 1:
-                    rettype = input_type.get_primitive()
-                else:
-                    rettype = builtins.tensor(input_type.get_primitive(), size)
-                node.attr['generic_slice'] = True
-                node.attr['size'] = size
-            except:
-                retshape = []
-                for i in range(len(input_shape)):
-                    retshape.append(make_symbol(node.name + '_' + str(i)))
-                if len(retshape) == 0:
-                    rettype = input_type.get_primitive()
-                else:
-                    rettype = builtins.tensor(input_type.get_primitive(), retshape)
-        return rettype
-
-    def visit_Softmax(self, node):
-        return self._visit_unary(node)
-
-    def visit_Softplus(self, node):
-        return self._visit_unary(node)
-
-    def visit_LogSoftmax(self, node):
-        return self._visit_unary(node)
-
-    def visit_Split(self, node, mode='Split'):
-        datatype = None
-        if 'T' in node.attr and node.attr['T'] is not None:
-            datatype = node.attr['T']
-        elif 'dtype' in node.attr and node.attr['dtype'] is not None:
-            datatype = node.attr['dtype']
-        # try to fill unknown output shapes from the input
-        shapes = None
-        num_split = None
-        if 'num_split' in node.attr:
-            num_split = node.attr['num_split']
-        if '_output_shapes' in node.attr:
-            shapes = node.attr['_output_shapes']
-        split_dim_idx = 2 if mode == 'SplitV' else 0
-        value_idx = 0 if mode == 'SplitV' else 1
-        self.visit(node.inputs[split_dim_idx])
-        if mode == 'SplitV':
-            self.visit(node.inputs[1])
-            if self.gdict[node.inputs[1]].attr['symbolic_value'] is not None:
-                split_size_type = self.gdict[node.inputs[1]].datatype
-                split_size = self.gdict[node.inputs[1]].attr['symbolic_value'].val
-                if not builtins.is_tensor(split_size_type):
-                    mode = 'Split'
-                else:
-                    num_split = split_size.shape[0]
-                    node.attr['num_split'] = num_split
-        try_materialize = False
-        # this *must!* be constant
-        if self.gdict[node.inputs[split_dim_idx]].attr['symbolic_value'] is not None:
-            split_dim = self.gdict[node.inputs[split_dim_idx]].attr['symbolic_value'].val
-            input_type = self.visit(node.inputs[value_idx])
-            if datatype is None:
-                datatype = input_type.get_primitive()
-            node.attr['split_dim'] = int(split_dim)
-            if input_type is not None:
-                input_shape = input_type.get_shape()
-                from_shapes_ok = False
-                try:
-                    if shapes is not None:
-                        # use the type infered shapes as much as possible
-                        for s in shapes:
-                            for k in range(len(input_shape)):
-                                if k != split_dim and is_symbolic_or_unknown(s[k]):
-                                    s[k] = input_shape[k]
-                                elif k == split_dim and is_symbolic_or_unknown(s[k]):
-                                    s[k] = input_shape[k] // num_split
-                        node.attr['split'] = [s[split_dim] for s in shapes]
-                        from_shapes_ok = True
-                except:
-                    pass
-                if not from_shapes_ok:
-                    output_shape = list(input_shape[:])
-                    idim = input_shape[split_dim]
-                    if mode == 'Split':
-                        assert (idim % num_split == 0)
-                        if is_symbolic_or_known(idim):
-                            node.attr['split'] = [idim // num_split] * num_split
-                            output_shape[split_dim] = idim // num_split
-                        else:
-                            node.attr['split'] = [-1] * num_split
-                            output_shape[split_dim] = -1
-                        shapes = [output_shape] * num_split
-                        try_materialize = True
-                    else:
-                        assert (np.sum(split_size) == idim or is_symbolic_or_unknown(idim))
-                        node.attr['split'] = list(split_size)
-                        shapes = [output_shape[:] for _ in range(len(split_size))]
-                        for idx, s in enumerate(split_size):
-                            shapes[idx][split_dim] = s
-
-            types = [builtins.tensor(datatype, tuple(shape)) for shape in shapes]
-        else:
-            types = [
-                builtins.tensor(datatype, tuple(shape)) for shape in node.attr['_output_shapes']
-            ]
-        rettype = builtins.tuple(types)
-        if try_materialize:
-            value = try_get_non_sym_val(self.gdict[node.inputs[value_idx]])
-            if value is not None:
-                node.attr["symbolic_value"] = rettype()
-                node.attr["symbolic_value"].val = np.split(value, num_split, axis=split_dim)
-        return rettype
-
-    def visit_SplitV(self, node):
-        # this is like split but has shapes
-        # implemented in Split
-        return self.visit_Split(node, mode='SplitV')
-
-    def visit_MatrixBandPart(self, node):
-        assert (len(node.inputs) == 3)
-        return self.visit(node.inputs[0])
-
-    def visit_Unpack(self, node):
-        input_type = self.visit(node.inputs[0])
-        input_shape = input_type.get_shape()
-        axis = node.attr['axis']
-        assert (dim > 0 for dim in input_shape[:axis])
-        length = input_shape[axis]
-        retshape = input_shape[:axis] + input_shape[axis + 1:]
-        return builtins.tuple([builtins.tensor(input_type.get_primitive(), tuple(retshape))] *
-                              length)
-
-    def visit_StopGradient(self, node):
-        # this is just identity
-        node.op = 'Identity'
-        return self._visit_unary(node)
-
-    def visit_Mean(self, node):
-        return self._visit_reduce(node)
-
-    def visit_Squeeze(self, node):
-        sourcetype = self.visit(node.inputs[0])
-        if sourcetype is not None:
-            assert builtins.is_tensor(sourcetype)  # only tensor is squeeze-able
-            squeezed_shape = list(sourcetype.T[1])
-            d = sorted(node.attr['squeeze_dims'])
-            if len(d) > 0:
-                d = d[::-1]
-                for i in d:
-                    squeezed_shape.pop(i)
-            else:
-                squeezed_shape = [s for s in squeezed_shape if s != 1]
-            rettype = builtins.tensor(sourcetype.get_primitive(), tuple(squeezed_shape))
-            if self.gdict[node.inputs[0]].attr['symbolic_value'] is not None:
-                val = self.gdict[node.inputs[0]].attr['symbolic_value'].val
-                retval = np.squeeze(val, axis=tuple(d))
-                node.attr['symbolic_value'] = rettype()
-                node.attr['symbolic_value'].val = retval
-            return rettype
-        datatype = self._get_type_from_attr(node)
-        return datatype
-
-    def _bitstring_to_reverse_indices(self, i):
-        # returns indices in reverse order
-        indices = []
-        ctr = 0
-        if isinstance(i, list):
-            return i
-        while (i > 0):
-            if i % 2 == 1:
-                indices.append(ctr)
-            i = i // 2
-            ctr += 1
-        return indices
-
-    def _isKthBitSet(self, n, k):
-        if n & (1 << (k)):
-            return True
-        else:
-            return False
-
-    def visit_StridedSlice(self, node):
-        # this is massively complicated
-        # https://www.tensorflow.org/api_docs/python/tf/strided_slice
-        for i in node.inputs:
-            self.visit(i)
-        input_type = self.visit(node.inputs[0])
-        input_shape = input_type.get_shape()
-        # unknown input shape. not common. should not happen really.
-        if len(input_shape) == 0:
-            return input_type
-
-        input_value = self.gdict[node.inputs[0]].attr['symbolic_value']
-
-        begin_value = self.gdict[node.inputs[1]].attr['symbolic_value']
-        end_value = self.gdict[node.inputs[2]].attr['symbolic_value']
-        stride_value = self.gdict[node.inputs[3]].attr['symbolic_value']
-
-        # these masks here are really really complicated
-        assert node.attr.get('new_axis_mask', 0) == 0
-
-        if all([begin_value, end_value, stride_value]):
-            input_rank = len(input_shape)
-            num_spec = len(begin_value.val)
-            assert input_rank >= num_spec
-
-            dim = 0
-            begin_mask, end_mask, shrink_axes = [], [], []
-            begin_ids, end_ids, strides = [], [], []
-            for spec_id in range(num_spec):
-                if self._isKthBitSet(node.attr.get('ellipsis_mask', 0), spec_id):
-                    num_ellipsis_dims = input_rank - num_spec + 1
-                    for _ in range(num_ellipsis_dims):
-                        begin_mask.append(dim)
-                        end_mask.append(dim)
-                        begin_ids.append(0)
-                        end_ids.append(0)
-                        strides.append(1)
-                        dim += 1
-                elif self._isKthBitSet(node.attr.get('shrink_axis_mask', 0), spec_id):
-                    shrink_axes.append(dim)
-                    begin_ids.append(begin_value.val[spec_id])
-                    end_ids.append(end_value.val[spec_id])
-                    strides.append(stride_value.val[spec_id])
-                    dim += 1
-                else:
-                    if self._isKthBitSet(node.attr.get('begin_mask', 0), spec_id):
-                        begin_mask.append(dim)
-
-                    if self._isKthBitSet(node.attr.get('end_mask', 0), spec_id):
-                        end_mask.append(dim)
-
-                    begin_ids.append(begin_value.val[spec_id])
-                    end_ids.append(end_value.val[spec_id])
-                    strides.append(stride_value.val[spec_id])
-                    dim += 1
-
-            begin_value = builtins.tensor(begin_value.get_primitive(), (input_rank,))()
-            begin_value.val = np.array(begin_ids)
-
-            end_value   = builtins.tensor(end_value.get_primitive(), (input_rank,))()
-            end_value.val = np.array(end_ids)
-
-            stride_value = builtins.tensor(stride_value.get_primitive(), (input_rank,))()
-            stride_value.val = np.array(strides)
-        else:
-            assert node.attr.get('ellipsis_mask', 0) == 0
-            shrink_axes = self._bitstring_to_reverse_indices(node.attr.get('shrink_axis_mask', 0))
-            begin_mask = self._bitstring_to_reverse_indices(node.attr.get('begin_mask', 0))
-            end_mask = self._bitstring_to_reverse_indices(node.attr.get('end_mask', 0))
-
-        # try to solve for value if possible
-        output_value = None
-        rettype = None
-        if not None in [input_value, begin_value, end_value, stride_value]:
-            begin = [int(i) for i in list(begin_value.val[:])]
-            end = [int(i) for i in list(end_value.val[:])]
-            for i in begin_mask:
-                begin[i] = 0
-            for i in end_mask:
-                end[i] = None
-            # Similar issue to https://github.com/tensorflow/tensorflow/issues/19260
-            for i in shrink_axes:
-                if begin[i] is None:
-                    end[i] = 1
-                elif begin[i] == -1:
-                    end[i] = None
-                else:
-                    end[i] = begin[i] + 1
-            slices = [slice(*i) for i in zip(begin, end, stride_value.val)]
-            # insert missing slices
-            for i in range(len(slices), len(input_shape)):
-                slices.append(slice(None, None, None))
-
-            slices = tuple(slices)
-            res = input_value.val[slices]
-
-            # remove shrink axes
-            if len(shrink_axes) > 0:
-                if len(shrink_axes) == len(res.shape):
-                    if len(res) == 0:
-                        logging.warning("%s:%s seems to be a 0 sized tensor", node.name, node.op)
-                        return builtins.tensor(input_type.get_primitive(), [])
-                    res = res.tolist()[0]
-                else:
-                    res = np.squeeze(res, axis=tuple(shrink_axes))
-            # if we have a complete value, we can force it
-
-            slicesv = [[begin[i], end[i], stride_value.val[i]] for i in range(len(begin))]
-            for idx, s in enumerate(slicesv):
-                if s[0] is None:
-                    s[0] = 0
-                    begin_mask.append(idx)
-                if s[1] is None:
-                    s[1] = 2147483647
-                    end_mask.append(idx)
-                if s[2] is None:
-                    s[2] = 1
-                s[0] = int(s[0])
-                s[1] = int(s[1])
-                s[2] = int(s[2])
-            # insert missing slices
-            for i in range(len(slicesv), len(input_shape)):
-                slicesv.append([0, 2147483647, 1])
-                if i not in begin_mask:
-                    begin_mask.append(i)
-                if i not in end_mask:
-                    end_mask.append(i)
-            node.attr['slice'] = slicesv
-            node.attr['squeeze'] = list(int(i) for i in shrink_axes)
-            node.attr['begin_masks'] = list(int(i) for i in begin_mask)
-            node.attr['end_masks'] = list(int(i) for i in end_mask)
-            if isscalar(res):
-                rettype = input_type.get_primitive()
-                output_value = rettype()
-                output_value.val = res
-            elif not isscalar(res):
-                rettype = builtins.tensor(input_type.get_primitive(), res.shape)
-                output_value = rettype()
-                output_value.val = res
-
-        # solve for type
-        if rettype is None:
-            # try to derive entirely from input_shape
-            if (None in [begin_value, end_value, stride_value]):
-                if len(input_shape) == len(shrink_axes):
-                    # we are removing all axes. i.e. we are indexing a
-                    # specific element
-                    rettype = input_type.get_primitive()
-                else:
-                    new_shape = [
-                        make_symbol(node.name + "_s_" + str(i))
-                        for i in range(len(input_shape) - len(shrink_axes))
-                    ]
-                    rettype = builtins.tensor(input_type.get_primitive(), new_shape)
-                # we have a non-constant shaped slice
-                # store the sqeeze
-                node.attr['squeeze'] = list(int(i) for i in shrink_axes)
-                node.attr['begin_masks'] = list(int(i) for i in begin_mask)
-                node.attr['end_masks'] = list(int(i) for i in end_mask)
-            else:
-                retshape = []
-                begin = begin_value.val[:].tolist()
-                end = end_value.val[:].tolist()
-                begin = self._shape_as_ints(begin)
-                end = self._shape_as_ints(end)
-                for i in begin_mask:
-                    begin[i] = None
-                for i in end_mask:
-                    end[i] = None
-                for i in shrink_axes:
-                    if begin[i] is None:
-                        end[i] = 1
-                    elif begin[i] == -1:
-                        end[i] = None
-                    else:
-                        end[i] = begin[i] + 1
-                if stride_value is not None:
-                    stride_value = list(stride_value.val[:].astype(np.int32))
-
-                for i in range(len(begin)):
-                    if i in shrink_axes:
-                        retshape.append(1)
-                    elif is_symbolic_or_unknown(input_shape[i]):
-                        if np.isscalar(begin[i]) and np.isscalar(
-                                end[i]) and np.isscalar(stride_value):
-                            retshape.append(len(list(range(begin[i], end[i], stride_value[i]))))
-                        elif (is_symbolic_or_unknown(begin[i])
-                              or is_symbolic_or_unknown(end[i])) and stride_value[i] == 1:
-                            if end[i] is None:
-                                retshape.append(input_shape[i] - begin[i])
-                            else:
-                                retshape.append(end[i] - begin[i])
-                        else:
-                            retshape.append(make_symbol(node.name + '_' + str(i)))
-                    else:
-                        if begin[i] is not None and begin[i] < 0:
-                            try:
-                                begin[i] += input_shape[i]
-                            except:
-                                pass
-                        if end[i] is None:
-                            end[i] = None # used to be input_shape[i]
-                        elif end[i] < 0:
-                            try:
-                                end[i] += input_shape[i]
-                            except:
-                                pass
-                        thisslice = slice(begin[i], end[i], stride_value[i])
-                        thisslicelen = len(list(range(input_shape[i]))[thisslice])
-                        retshape.append(thisslicelen)
-                slices = [[begin[i], end[i], stride_value[i]] for i in range(len(begin))]
-                has_symbolic_slices = False
-                for idx, s in enumerate(slices):
-                    if s[0] is None:
-                        s[0] = 0
-                        begin_mask.append(idx)
-                    if s[1] is None:
-                        s[1] = 2147483647
-                        end_mask.append(idx)
-                    if s[2] is None:
-                        s[2] = 1
-                    try:
-                        s[0] = int(s[0])
-                    except:
-                        has_symbolic_slices = True
-                        pass
-                    try:
-                        s[1] = int(s[1])
-                    except:
-                        has_symbolic_slices = True
-                        pass
-                    try:
-                        s[2] = int(s[2])
-                    except:
-                        has_symbolic_slices = True
-                        pass
-                # insert missing slices
-                for i in range(len(slices), len(input_shape)):
-                    slices.append([0, 2147483647, 1])
-                    retshape.append(input_shape[i])
-                    if i not in begin_mask:
-                        begin_mask.append(i)
-                    if i not in end_mask:
-                        end_mask.append(i)
-
-                if not has_symbolic_slices:
-                    node.attr['slice'] = slices
-                node.attr['squeeze'] = list(int(i) for i in shrink_axes)
-                node.attr['begin_masks'] = list(int(i) for i in begin_mask)
-                node.attr['end_masks'] = list(int(i) for i in end_mask)
-                # drop removed axes
-                for a in shrink_axes:
-                    assert (retshape[a] == 1 or is_symbolic_or_unknown(retshape[a]))
-                retshape = [s for i, s in enumerate(retshape) if i not in shrink_axes]
-                if len(retshape) == 0:
-                    rettype = input_type.get_primitive()
-                else:
-                    rettype = builtins.tensor(input_type.get_primitive(), retshape)
-        node.attr['symbolic_value'] = output_value
-        return rettype
-
-    def visit_Max(self, node):
-        return self._visit_reduce(node)
-
-    def visit_Min(self, node):
-        return self._visit_reduce(node)
-
-    def visit_Ceil(self, node):
-        return self._visit_unary(node)
-
-    def visit_Round(self, node):
-        return self._visit_unary(node)
-
-    def visit_Abs(self, node):
-        return self._visit_unary(node)
-
-    def visit_Floor(self, node):
-        return self._visit_unary(node)
-
-    def visit_Tile(self, node):
-        if len(node.inputs) != 2:
-            raise ValueError('Expected 2 inputs to {} node {}'.format(node.op, node.name))
-
-        input_type = self.visit(node.inputs[0])
-        tile_type = self.visit(node.inputs[1])
-
-        if not builtins.is_tensor(input_type):
-            raise ValueError('Expected tensor input to {} node {}'.format(node.op, node.name))
-
-        if not builtins.is_tensor(tile_type) or len(tile_type.get_shape()) != 1:
-            raise ValueError('Expected tensor tile input to {} node {}'.format(node.op, node.name))
-
-        if tile_type.get_shape()[0] != len(input_type.get_shape()):
-            raise ValueError(
-                'Tile specification must be length of input rank to {} node {}'.format(
-                    node.op, node.name))
-
-        input_shape = input_type.get_shape()
-        if len(input_shape) == 0:
-            return input_type
-
-        input_value = self._get_symbolic_value(node.inputs[0])
-        if input_value is not None:
-            input_value = input_value.val
-
-        tile_value = self._get_symbolic_value(node.inputs[1])
-        if tile_value is None:
-            ret_shape = [make_symbol(node.name + "_" + str(i)) for i in range(len(input_shape))]
-            return builtins.tensor(input_type.get_primitive(), ret_shape)
-        tile_value = tile_value.val
-
-        if len(tile_value) != len(input_shape):
-            raise ValueError(
-                'Tile specification value must be length of inpout rank to {} node {}'.format(
-                    node.op, node.name))
-
-        rettype = builtins.tensor(
-            input_type.get_primitive(),
-            [input_shape[i] * tile_value[i] for i in range(len(tile_value))])
-        if input_value is not None and tile_value is not None and not any_symbolic_or_unknown(tile_value):
-            node.attr['symbolic_value'] = rettype()
-            node.attr['symbolic_value'].val = np.tile(input_value, tile_value)
-        return rettype
-
-    def visit_FloorDiv(self, node):
-        return self._visit_broadcast(node)
-
-    def visit_RealDiv(self, node):
-        return self._visit_broadcast(node)
-
-    def visit_OneHot(self, node):
-        """
-        Inputs:
-            0: (str) Name of value indicating indicies to be "on".
-            1: (str) Name of value indicating depth of the one-hot dimension, i.e. the
-                number of values in the domain.
-            2: (str) Name of value indicating the "on" value.
-            3: (str) Name of value indicating the "off" value.
-        Attributes:
-            axis (Required int): axis to fill; -1 for a new inner-most axis.
-            dtype (Optional any): presence indicates T should be used.
-            T (Conditional builtin) primitive type of the output tensor.
-        """
-        if len(node.inputs) != 4:
-            raise ValueError('Expected 4 inputs to {} node {}'.format(node.op, node.name))
-
-        indices_type = self.visit(node.inputs[0])
-        depth_type = self.visit(node.inputs[1])
-        on_type = self.visit(node.inputs[2])
-        off_type = self.visit(node.inputs[3])
-
-        if not builtins.is_int(depth_type):
-            raise ValueError('depth must be integral in {} node {}'.format(node.op, node.name))
-
-        if not builtins.utils.is_primitive(on_type) or not builtins.utils.is_primitive(off_type):
-            raise ValueError(
-                'On and off types must be primitive in {} node {}'.format(node.op, node.name))
-
-        if on_type != off_type:
-            raise ValueError(
-                'On and off types must be the same in {} node {}'.format(node.op, node.name))
-
-        axis = node.attr.get('axis')
-        if not isinstance(axis, six.integer_types) or axis < -1:
-            raise ValueError('axis must be integer >= -1 in {} node {}'.format(node.op, node.name))
-
-        if builtins.is_tensor(indices_type):
-            indices_shape = list(indices_type.get_shape())
-        else:
-            indices_shape = [1]
-
-        depth_value = self._get_symbolic_value(node.inputs[1]).val
-        if depth_value is None:
-            depth_value = make_symbol(node.name + '_depth')
-        elif depth_value < 0:
-            raise ValueError('depth must be non-negative in {} node {}'.format(node.op, node.name))
-
-        if 'dtype' in node.attr:
-            ret_primitive = node.attr.get('T')
-            if ret_primitive is None or not builtins.is_primitive(ret_primitive):
-                raise ValueError(
-                    'Output tensor data type must be primitive in {} node {}'.format(
-                        node.op, node.name))
-        else:
-            ret_primitive = on_type
-
-        if len(indices_shape) == 0:
-            return builtins.tensor(ret_primitive, tuple())
-        retshape = indices_shape
-        if axis == -1:
-            retshape.append(depth_value)
-        else:
-            retshape.insert(axis, depth_value)
-        return builtins.tensor(ret_primitive, retshape)
-
-    def visit_SquaredDifference(self, node):
-        return self._visit_broadcast(node)
-
-    def visit_Sub(self, node):
-        return self._visit_broadcast(node)
-
-    def visit_Sum(self, node):
-        return self._visit_reduce(node)
-
-    def visit_Tanh(self, node):
-        return self._visit_unary(node)
-
-    def find_tensor_array_source_node(self, node):
-        if 'tensorarray_source' in node.attr:
-            loc = node.attr['tensorarray_source']
-            if loc in self.whole_ssa.global_resource:
-                return self.whole_ssa.global_resource[loc]
-        elif '_class' in node.attr:
-            loc = node.attr['_class'][0][5:]
-            if loc in self.whole_ssa.global_resource:
-                return self.whole_ssa.global_resource[loc]
-
-        return None
-
-    def propagate_tensor_array(self, node):
-        if node.op == 'make_tuple':
-            tensorarray_source = [
-                self.gdict[i].attr.get('tensorarray_source', None) for i in node.inputs
-            ]
-            node.attr['tensorarray_source'] = tensorarray_source
-        elif node.op == 'get_tuple':
-            if 'tensorarray_source' in self.gdict[node.inputs[0]].attr:
-                tensorarray_source = self.gdict[node.inputs[0]].attr['tensorarray_source'][
-                    node.attr['index']]
-                node.attr['tensorarray_source'] = tensorarray_source
-        else:
-            self.visit(node.inputs[-1])
-            if 'tensorarray_source' in self.gdict[node.inputs[-1]].attr:
-                node.attr['tensorarray_source'] = self.gdict[
-                    node.inputs[-1]].attr['tensorarray_source']
-
-    def visit_TensorArrayV3(self, node):
-        # input is size
-        assert (len(node.inputs) <= 1)
-        self.visit(node.inputs[0])
-        # the input is an int32 which is the size of the tensor
-        sizeval = self.gdict[node.inputs[0]].attr['symbolic_value']
-
-        if sizeval is not None and not node.attr['dynamic_size']:
-            assert isscalar(sizeval.val)
-            node.attr['size'] = sizeval.val
-
-        if 'infer_shape' in node.attr:
-            # We only support fix size of TensorArray.
-            assert (node.attr['infer_shape'])
-
-        if isinstance(node.attr['element_shape'], six.string_types):
-            val = self.resolve_to_non_sym_val_or_die(node.attr["element_shape"])
-            node.attr["element_shape"] = list(val)
-
-        if isinstance(node.attr.get('element_shape', []), list):
-            shape = []
-            if 'element_shape' in node.attr:
-                shape = node.attr['element_shape']
-            node.attr['element_shape'] = builtins.tensor(node.attr['dtype'], shape)
-        self.whole_ssa.global_resource[node.name] = node
-        node.attr['tensorarray_source'] = node.name
-
-        return builtins.list(node.attr['element_shape']) if node.attr['element_shape'] else None
-
-    def visit_TensorArrayGatherV3(self, node):
-        # input is resource, indices, flow
-        assert (len(node.inputs) == 2)
-        indices_type = self.visit(node.inputs[0])
-
-        self.propagate_tensor_array(node)
-        tanode = self.find_tensor_array_source_node(node)
-
-        if isinstance(node.attr['element_shape'], six.string_types):
-            val = self.resolve_to_non_sym_val_or_die(node.attr["element_shape"])
-            node.attr["element_shape"] = list(val)
-
-        if indices_type is None:
-            return builtins.tensor(tanode.attr['dtype'], [-1] + node.attr['element_shape'])
-        else:
-            indiceslen = indices_type.get_shape()[0]
-            return builtins.tensor(tanode.attr['dtype'], [indiceslen] + node.attr['element_shape'])
-
-    def visit_TensorArrayReadV3(self, node):
-        # input is resource, idx, flow
-        assert (len(node.inputs) == 2)
-
-        self.propagate_tensor_array(node)
-        tanode = self.find_tensor_array_source_node(node)
-
-        ta_type = self.visit(node.inputs[1])
-
-        if tanode is not None:
-            ta_type = tanode.datatype
-        return ta_type.T[0] if ta_type else None
-
-    def visit_TensorArrayScatterV3(self, node):
-        # input is resource, indices, values , flow
-        self.propagate_tensor_array(node)
-        tanode = self.find_tensor_array_source_node(node)
-
-        tensor_put_type = self.visit(node.inputs[1])
-        assert (builtins.is_tensor(tensor_put_type))
-        tensor_put_type = builtins.tensor(
-            tensor_put_type.get_primitive(),
-            tensor_put_type.get_shape()[1:])
-
-        # Overide the shape in the node attributes
-        if len(tensor_put_type.get_shape()) > 0 and tanode is not None:
-            el_shape = tanode.attr.get('element_shape')
-            es = None if el_shape is None else el_shape.get_shape()
-            if (es is None or len(es) == 0 \
-                or (-1 in es and -1 not in tensor_put_type.get_shape())):
-                tanode.attr['element_shape'] = tensor_put_type
-
-        # output is flow
-        assert (len(node.inputs) == 3)
-        return self.visit(node.inputs[2])
-
-    def visit_TensorArraySizeV3(self, node):
-
-        self.propagate_tensor_array(node)
-        tanode = self.find_tensor_array_source_node(node)
-        for inputnodes in node.inputs:
-            self.visit(inputnodes)
-
-        if tanode is not None and 'size' in tanode.attr and not tanode.attr.get('dynamic_size',
-                                                                                True):
-            node.attr['symbolic_value'] = builtins.int32()
-            node.attr['symbolic_value'].val = tanode.attr['size']
-
-        # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/data_flow_ops.cc
-        return builtins.int32
-
-    def visit_TensorArrayWriteV3(self, node):
-        # input is resource, index, value, flow
-        # output is flow
-        # try to infer tensor array element type if possible
-        self.propagate_tensor_array(node)
-        tanode = self.find_tensor_array_source_node(node)
-
-        tensor_put_type = self.visit(node.inputs[1])
-        # Overide the shape in the node attributes
-
-        if hasattr(tensor_put_type, 'get_shape') and \
-            len(tensor_put_type.get_shape()) > 0 and tanode is not None:
-            el_shape = tanode.attr.get('element_shape')
-            es = None if el_shape is None else el_shape.get_shape()
-            if (es is None or len(es) == 0 \
-                or (-1 in es and -1 not in tensor_put_type.get_shape())):
-                tanode.attr['element_shape'] = tensor_put_type
-
-        assert (len(node.inputs) == 3)
-        return self.visit(node.inputs[2])
-
-    def visit_TopKV2(self, node):
-        """
-        Inputs:
-            0 (str): The name of a tensor
-            1 (str): The name of an int32
-        """
-        if len(node.inputs) != 2:
-            raise ValueError('Expected 2 inputs to {} node {}'.format(node.op, node.name))
-
-        input_type = self.visit(node.inputs[0])
-        k_type = self.visit(node.inputs[1])
-        k_val = self._get_symbolic_value(node.inputs[1])
-
-        if not builtins.is_tensor(input_type):
-            raise ValueError('Input must be a tensor in {} node {}'.format(node.op, node.name))
-
-        if not builtins.is_int(k_type):
-            raise ValueError('K must be an int in {} node {}'.format(node.op, node.name))
-
-        if k_val is not None:
-            k = k_val.val
-            if not is_symbolic(k) and k > input_type.get_shape()[-1]:
-                raise ValueError(
-                    'K greater than size of last dimension in {} node {}'.format(
-                        node.op, node.name))
-        else:
-            k = make_symbol(node.name + '_k')
-
-        ret_shape = list(input_type.get_shape())
-        ret_shape[-1] = k
-        return builtins.tuple((
-            builtins.tensor(input_type.get_primitive(),
-                            ret_shape), builtins.tensor(builtins.int32, ret_shape)))
-
-    def visit_Transpose(self, node):
-        """
-        Inputs:
-            0 (str): Name of an input tensor.
-            1 (str): Name of a 1-D tensor indicating how to permute the input.
-        """
-        if len(node.inputs) != 2:
-            raise ValueError('Expected 2 inputs to {} node {}'.format(node.op, node.name))
-
-        inputtype = self.visit(node.inputs[0])
-        permtype = self.visit(node.inputs[1])
-
-        if not builtins.is_tensor(inputtype):
-            raise ValueError('Input must be a tensor in {} node {}'.format(node.op, node.name))
-
-        shape = inputtype.get_shape()
-        primitive = inputtype.get_primitive()
-
-        if not builtins.is_tensor(permtype) or len(shape) != permtype.get_shape()[0]:
-            raise ValueError(
-                'Permutation must be a 1-D tensor as long as input rank in {} node {}'.format(
-                    node.op, node.name))
-
-        transpose_axes = self._get_symbolic_value(node.inputs[1])
-
-        if transpose_axes is None or transpose_axes.val is None:
-            # We can't determine the output shape right now: figure it out at runtime
-            shape = tuple(make_symbol(node.name + str(i)) for i in range(len(shape)))
-            rettype = builtins.tensor(primitive, shape)
-            return rettype
-
-        if len(transpose_axes.val) != len(shape):
-            raise ValueError(
-                'Permutation symbolic value must be a 1-D tensor as long as input rank in {} node {}'
-                .format(node.op, node.name))
-
-        # Compute the output shape
-        new_shape = []
-        for ax in transpose_axes.val:
-            new_shape.append(shape[ax])
-        rettype = builtins.tensor(primitive, new_shape)
-
-        # Compute a symbolic value for this node if possible
-        input0 = try_get_non_sym_val(self.gdict[node.inputs[0]])
-        input1 = try_get_non_sym_val(self.gdict[node.inputs[1]])
-        if input0 is not None and input1 is not None:
-            self._set_symbolic_value(node, rettype, np.transpose(input0, axes=input1))
-
-        return rettype
-
-    def visit_VariableV2(self, node):
-        return None
-
-    def visit_while(self, node):
-        assert ("cond_function" in node.attr)
-        assert ("body_function" in node.attr)
-        assert (len(node.inputs) == 1)
-
-        mytype = self.visit(node.inputs[0])
-
-        functions_called = [node.attr[i] for i in ["cond_function", "body_function"]]
-        for f in functions_called:
-            if self.whole_ssa is not None and f in self.whole_ssa.functions:
-                # look for the function entry point
-                entrypoint = [
-                    n for n in self.whole_ssa.functions[f].graph.values()
-                    if n.op == 'function_entry'
-                ]
-                entrypoint[0].datatype = mytype
-                if 'tensorarray_source' in self.gdict[node.inputs[0]].attr:
-                    entrypoint[0].attr['tensorarray_source'] = self.gdict[
-                        node.inputs[0]].attr['tensorarray_source']
-        if 'tensorarray_source' in self.gdict[node.inputs[0]].attr:
-            node.attr['tensorarray_source'] = self.gdict[node.inputs[0]].attr['tensorarray_source']
-
-        return mytype
-
-    def visit_get_global(self, node):
-        assert ("variable" in node.attr)
-        assert (node.attr['variable'] in self.whole_ssa.variables)
-        return self.whole_ssa.variables[node.attr['variable']].__class__
-
-    def visit_set_global(self, node):
-        assert ("variable" in node.attr)
-        assert (node.attr['variable'] in self.whole_ssa.variables)
-        input_type = self.visit(node.inputs[0])
-        variable_type = self.whole_ssa.variables[node.attr['variable']].__class__
-        if input_type is not None:
-            if not (input_type is variable_type
-                    or builtins.is_tensor_and_is_compatible_general_shape(input_type,
-                                                                          variable_type)[0]):
-                logging.warning(
-                    "Possible incompatible type in set_global: %s. expected %s",
-                    builtins.get_type_info(input_type), builtins.get_type_info(variable_type))
-        return input_type
-
-    def visit_Size(self, node):
-        self.visit(node.inputs[0])
-        parenttype = self.gdict[node.inputs[0]].datatype
-        rettype = node.attr["out_type"]
-        if parenttype is not None:
-            input_shape = parenttype.get_shape()
-            node.attr['symbolic_value'] = rettype()
-            node.attr['symbolic_value'].val = np.prod(input_shape)
-        return rettype
-
-    def visit_Sign(self, node):
-        input_type = self.visit(node.inputs[0])
-        return input_type
-
-    def visit_Cumsum(self, node):
-        assert (len(node.inputs) == 2)
-        return self.visit(node.inputs[0])
-
-    def visit_ClipByValue(self, node):
-        assert len(node.inputs) == 3
-
-        type_min = self.visit(node.inputs[1])
-        type_max = self.visit(node.inputs[2])
-        if not (builtins.is_tensor(type_max) or builtins.is_tensor(type_min)):
-            node.attr["min_value"] = self.gdict[node.inputs[1]].attr['value'].val
-            node.attr["max_value"] = self.gdict[node.inputs[2]].attr['value'].val
-
-        return self.visit(node.inputs[0])
-
-    def visit_SpaceToDepth(self, node):
-        return self._get_type_from_attr(node)
-
-    def visit_DepthToSpace(self, node):
-        return self._get_type_from_attr(node)
-
-    def visit_SpaceToBatchND(self, node):
-        return self._get_type_from_attr(node)
-
-    def visit_BatchToSpaceND(self, node):
-        return self._get_type_from_attr(node)
-
-    def visit_LRN(self, node):
-        return self._visit_unary(node)
-
-    def visit_Reciprocal(self, node):
-        return self._visit_unary(node)
-
-
-def type_is_unknown(t):
-    if builtins.is_tuple(t):
-        return any(type_is_unknown(a) for a in t.T)
-    elif builtins.is_tensor(t):
-        return type_is_unknown(t.get_primitive()) or \
-               t.get_shape() is None or \
-               len(t.get_shape()) == 0 or \
-               any_symbolic_or_unknown(t.get_shape())
-    elif builtins.is_list(t):
-        return type_is_unknown(t.T[0])
-    elif t is builtins.unknown:
-        return True
-    else:
-        return t is None
-
-
-def type_inference_pass_impl(nnssa):
-    """
-    Takes an NetworkEnsemble object and performs recursive type inference
-    on all the nodes in the graph
-    """
-    function_names = list(nnssa.functions.keys())
-    function_names = sorted(function_names)
-    # stick the main functions at the start
-    if "main" in function_names:
-        function_names = ["main"] + [i for i in function_names if i != "main"]
-
-    import copy
-    # try to infer all the set_global types first
-    changed_variables = []
-    for k in function_names:
-        graph = copy.copy(nnssa.functions[k].graph)
-        for v in graph.values():
-            if v.op == 'set_global':
-                rettype = TypeInferenceVisitor(graph, nnssa).visit(v)
-                variable = v.attr['variable']
-                validate_shape =  v.attr.get('validate_shape', True)
-                if (variable in changed_variables) and validate_shape:
-                    if builtins.get_type_info(
-                            nnssa.variables[variable]) == builtins.get_type_info(rettype):
-                        continue
-                    else:
-                        raise TypeError(
-                            "Varable %s changes type several times from %s to %s" % (
-                                variable, builtins.get_type_info(
-                                    nnssa.variables[variable]), builtins.get_type_info(rettype)))
-                if rettype != type(nnssa.variables[variable]):
-                    nnssa.variables[variable] = rettype()
-                    if variable not in changed_variables:
-                        changed_variables.append(variable)
-                    logging.debug(
-                        "Changing variable %s to type %s", variable,
-                        builtins.get_type_info(rettype))
-        nnssa.functions[k].find_inputs_and_outputs()
-
-    # reinfer unknown shapes and types
-    for k in function_names:
-        graph = copy.copy(nnssa.functions[k].graph)
-        for v in graph.values():
-            if type_is_unknown(v.datatype):
-                v.datatype = None
-
-    # run it for real
-    for k in function_names:
-        TypeInferenceVisitor(nnssa.functions[k].graph, nnssa).visit_all()
-
-
-def recursive_replace_symbols_in_type_with_unknown(dtype):
-    if builtins.is_list(dtype):
-        return builtins.list(recursive_replace_symbols_in_type_with_unknown(dtype.T[0]))
-    elif builtins.is_tuple(dtype):
-        return builtins.tuple(
-            tuple(recursive_replace_symbols_in_type_with_unknown(t) for t in dtype.T))
-    elif builtins.is_tensor(dtype):
-        return builtins.tensor(
-            dtype.get_primitive(),
-            tuple(-1 if issubclass(type(t), sm.Basic) else int(t) for t in dtype.get_shape()))
-    else:
-        return dtype
-
-
-def recursive_replace_symbols_in_values(val):
-    # try some things in sympy.core.numbers
-    if issubclass(type(val), sm.Basic):
-        return int(val)
-    elif isinstance(val, list):
-        return [recursive_replace_symbols_in_values(i) for i in val]
-    elif isinstance(val, tuple):
-        return tuple([recursive_replace_symbols_in_values(i) for i in val])
-    elif isinstance(val, np.ndarray):
-        if np.issctype(val.dtype):
-            return val
-        else:
-            return np.array([recursive_replace_symbols_in_values(i)
-                             for i in val.flatten()]).reshape(val.shape)
-    else:
-        return val
-
-
-def graph_replace_symbolic_values(gdict):
-    for k in gdict:
-        v = gdict[k]
-        if v.value is None and v.attr['symbolic_value'] is not None and not any_symbolic_or_unknown(
-                v.attr['symbolic_value'].val):
-            v.value = v.attr['symbolic_value']
-            v.value.val = recursive_replace_symbols_in_values(v.value.val)
-        v.attr['symbolic_datatype'] = v.datatype
-        v.datatype = recursive_replace_symbols_in_type_with_unknown(v.datatype)
-
-
-def graph_make_symbolic_values(gdict):
-    for k in gdict:
-        gdict[k].attr['symbolic_value'] = gdict[k].value
-
-
-def type_inference_pass(nnssa):
-    # repeat for as many times as there are functions
-    # this is the maximum number of times required for convergence
-    for i in nnssa.functions:
-        graph_make_symbolic_values(nnssa.functions[i].graph)
-    for i in range(len(nnssa.functions)):
-        type_inference_pass_impl(nnssa)
-    for i in nnssa.functions:
-        graph_replace_symbolic_values(nnssa.functions[i].graph)
-    for i in nnssa.variables:
-        nnssa.variables[i] = recursive_replace_symbols_in_type_with_unknown(nnssa.variables[i])
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/__init__.py b/coremltools/converters/nnssa/frontend/tensorflow/__init__.py
deleted file mode 100644
index 768f45d6f..000000000
--- a/coremltools/converters/nnssa/frontend/tensorflow/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .load import load
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/cond_to_where.py b/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/cond_to_where.py
deleted file mode 100644
index 5a8dc8e87..000000000
--- a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/cond_to_where.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-from ..parsed_tf_node import ParsedTFNode
-from ....commons.basic_graph_ops import delete_node, disconnect_edge
-from .functionalize_loops import *
-from coremltools._deps import HAS_TF_2
-
-
-def compute_max_rank(graph):
-    #  highly inefficient way to calculate the rank of every node
-    ret = {}
-    # begin at max rank
-    for v in graph.keys():
-        if graph[v].inputs == 0:
-            ret[v] = 0
-        else:
-            ret[v] = len(graph)
-
-    changes = True
-    while changes == True:
-        changes = False
-        for v in graph.keys():
-            if len(graph[v].inputs) > 0:
-                rank = max(ret[i] for i in graph[v].inputs) + 1
-                if ret[v] != rank:
-                    changes = True
-                    ret[v] = rank
-    return ret
-
-
-class CondToWhere(object):
-    # this should run AFTER functionalize loops
-    def __init__(self):
-        self.switches = None
-        self.merge = ''
-
-    def _search(self, g, node):
-        if not isinstance(node, ParsedTFNode):
-            node = g[node]
-        self.merge = node.name
-        # we look for Merge nodes
-        if node.op == "Merge":
-            print("Fixing cond at merge location: %s" % (node.name))
-            self.switches = FindAllUpstreamTerminals(lambda x: x.op == 'Switch').visit(
-                g, node.name).get_result()
-            if len(self.switches) == 0:
-                self.switches = FindAllUpstreamTerminals(
-                    lambda x: x.op == 'Switch' or x.attr.get('was_switch') is not None).visit(
-                        g, node.name).get_result()
-
-    def _fix_found_cond(self, g):
-        if g[self.switches[0]].op == 'Switch':
-            condition_input = g[self.switches[0]].inputs[1]
-        else:
-            condition_input = g[self.switches[0]].attr['was_switch']
-
-        # convert the merge to a select
-        # Tensorflow seems to ensure the condition that the first
-        # merge input is the True branch and the second merge input
-        # is the false branch.
-        #
-
-        # we convert switches to identity, detaching to switch condition
-        for s in self.switches:
-            if g[s].op == 'Switch':
-                g[s].op = 'Identity'
-                g[s].attr['was_switch'] = g[s].inputs[1]
-                # detach input 1: the switch condition
-                if g[s].inputs[0] == g[s].inputs[1]:
-                    g[s].inputs.pop()
-                    g[g[s].inputs[0]].outputs.pop()
-                else:
-                    disconnect_edge(g, g[s].inputs[1], s)
-
-        # build the final select
-        g[self.merge].op = 'iff'
-        if not HAS_TF_2:
-            # swap true branch with false branch to get the right semantics for IFF
-            g[self.merge].inputs[0], g[self.merge].inputs[1] = g[self.merge].inputs[1], g[self.merge].inputs[0]
-
-        g[self.merge].inputs = [condition_input] + g[self.merge].inputs
-        g[condition_input].outputs.append(self.merge)
-        return True
-
-    def cond_to_where(self, graph):
-        stuff_done = False
-        g = graph
-        ranks = compute_max_rank(graph)
-        merges = [a for a in g if g[a].op == 'Merge']
-        merges = sorted(merges, key=lambda k: ranks[k])
-        if len(merges) == 0:
-            return False
-        for m in merges:
-            self._search(g, m)
-            ret = self._fix_found_cond(g)
-            if ret:
-                stuff_done = True
-        # delete the extra switches that seem to just lead to identities
-        # which then lead nowhere but into control dependencies
-        extra_switches = [a for a in g if g[a].op == 'Switch']
-        for s in extra_switches:
-            if all([g[o].op == 'Identity' and len(g[o].outputs) == 0 for o in g[s].outputs]):
-                nodes_to_delete = g[s].outputs + [s]
-                for d in nodes_to_delete:
-                    delete_node(g, d)
-                    stuff_done = True
-        return stuff_done
-
-
-def cond_to_where(ssa):
-    for k, v in ssa.functions.items():
-        while True:
-            stuff_done = CondToWhere().cond_to_where(v.graph)
-            if stuff_done == False:
-                break
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/constant_propagation.py b/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/constant_propagation.py
deleted file mode 100644
index 945dc6263..000000000
--- a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/constant_propagation.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-import tensorflow as tf
-
-from ...graph_pass.delete_constant import delete_unnecessary_constant_nodes
-from ....commons import builtins
-from ....commons.parse import numpy_val_to_builtin_val
-from ....commons.basic_graph_ops import const_determined_nodes
-
-
-def constant_propagation(nnssa):
-    # we are going to rely on the tensorflow graph to perform constant
-    # propagation. We construct a new graph comprising of only the
-    # constant nodes.
-
-    from tensorflow.core.framework import graph_pb2
-    from tensorflow.core.framework import node_def_pb2
-    new_graph = graph_pb2.GraphDef()
-    constant_nodes = set()
-    constant_node_num_outputs = {}
-    for f in nnssa.functions.values():
-        generated_nodes = [k for k, v in f.graph.items() if v.original_node is None]
-        const_nodes_in_this_graph = const_determined_nodes(f.graph, set(generated_nodes))
-        # we can only run TF on nodes with outputs since we must evaluate
-        # tensors and not ops
-        const_nodes_in_this_graph = [
-            i for i in const_nodes_in_this_graph if f.graph[i].op != "NoOp"
-        ]
-        constant_nodes = constant_nodes.union(set(const_nodes_in_this_graph))
-
-        # topological sort const nodes
-        topsort = []
-        topsort_set = set()
-        while len(const_nodes_in_this_graph) > 0:
-            for n in const_nodes_in_this_graph:
-                if len(set(f.graph[n].inputs).difference(topsort_set)) == 0:
-                    topsort.append(n)
-                    topsort_set.add(n)
-
-            const_nodes_in_this_graph = set(const_nodes_in_this_graph).difference(topsort_set)
-
-        for node in topsort:
-            new_node = node_def_pb2.NodeDef()
-            new_node.CopyFrom(f.graph[node].original_node)
-            if '_class' in new_node.attr:
-                del new_node.attr['_class']
-            del new_node.input[:]
-            new_node.input.extend(f.graph[node].inputs)
-            if '_output_shapes' in f.graph[node].attr:
-                constant_node_num_outputs[node] = len(f.graph[node].attr['_output_shapes'])
-            else:
-                constant_node_num_outputs[node] = 1
-            new_graph.node.extend([new_node])
-    constant_nodes = list(constant_nodes)
-    try:
-        if len(constant_nodes) > 0:
-            with tf.Graph().as_default() as graph:
-                tf.import_graph_def(new_graph, name="")
-                with tf.compat.v1.Session(graph=graph) as sess:
-                    query_list = list()
-                    for c in constant_nodes:
-                        for j in range(constant_node_num_outputs[c]):
-                            query_list.append(c + ':' + str(j))
-                    control_flow_ops = list()
-                    for query in list(query_list):
-                        op_name = query.lower()
-                        if 'switch' in op_name or 'cond' in op_name:
-                            control_flow_ops.append(query)
-                            query_list.remove(query)
-                    result_list = sess.run(query_list)
-                    result = {query_list[i]: result_list[i] for i in range(len(query_list))}
-                    # propagate switch one by one
-                    for op in control_flow_ops:
-                        try:
-                            res = sess.run([op])
-                            result.update({op: res[0]})
-                        except:
-                            print('[Constant Propagation] Skip "dead" tensor: {}'.format(op))
-                            result.update({op: None})
-            for f in nnssa.functions.values():
-                for k, v in f.graph.items():
-                    if k in constant_node_num_outputs:
-                        if constant_node_num_outputs[k] == 1:
-                            result_entry = k + ':0'
-                            try:
-                                v.value, v.datatype = numpy_val_to_builtin_val(result[result_entry])
-                            except:
-                                print(result_entry)
-                                print(result[result_entry])
-                        else:
-                            values = [
-                                result[k + ':' + str(i)]
-                                for i in range(constant_node_num_outputs[k])
-                            ]
-                            try:
-                                npval = [numpy_val_to_builtin_val(i) for i in values]
-                                v.value = [val[0] for val in npval]
-                                v.datatype = builtins.tuple(tuple([val[1] for val in npval]))
-                            except:
-                                print(values)
-    except Exception as e:
-        raise RuntimeError("Constant propagation failed: {}".format(e))
-
-    delete_unnecessary_constant_nodes(nnssa)
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/delete_asserts.py b/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/delete_asserts.py
deleted file mode 100644
index a16ce249c..000000000
--- a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/delete_asserts.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-from ....commons.basic_graph_ops import delete_node
-
-import sys
-
-sys.setrecursionlimit(5000)  # increase recursion limit to support convert large models
-
-
-def all_assert_leaves(gdict, nodename, memo):
-    if nodename in memo:
-        return memo[nodename]
-    memo[nodename] = None
-    if len(gdict[nodename].outputs) == 0:
-        if gdict[nodename].op in ['Assert', 'CheckNumerics']:
-            memo[nodename] = True
-        else:
-            memo[nodename] = False
-    else:
-        memo[nodename] = all(all_assert_leaves(gdict, o, memo) for o in gdict[nodename].outputs)
-
-    return memo[nodename]
-
-
-def delete_asserts(nnssa):
-    # look for nodes which only end up at asserts
-    delete_count = 0
-    for f in nnssa.functions.values():
-        memo = {}
-        for n in f.graph:
-            all_assert_leaves(f.graph, n, memo)
-        for m in memo:
-            if memo[m] is True:
-                delete_count += 1
-                delete_node(f.graph, m)
-    print(str(delete_count) + " assert nodes deleted")
-    return delete_count
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/delete_disconnected_nodes.py b/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/delete_disconnected_nodes.py
deleted file mode 100644
index afa63bdd4..000000000
--- a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/delete_disconnected_nodes.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-
-
-def delete_disconnected_nodes(gd):
-    # delete all nodes with no inputs and outputs
-    empty_nodes = []
-    for k, v in gd.items():
-        if len(gd[k].inputs) == 0 and \
-                len(gd[k].outputs) == 0 and  \
-                len(gd[k].control_inputs) == 0 and \
-                len(gd[k].control_outputs) == 0:
-            empty_nodes.append(k)
-
-    for k in empty_nodes:
-        del gd[k]
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/fusedbatchnorm_rewrite.py b/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/fusedbatchnorm_rewrite.py
deleted file mode 100644
index 4bbbe59b3..000000000
--- a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/fusedbatchnorm_rewrite.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import numpy as np
-import copy
-
-from ....builder import GraphBuilder
-from ....commons import builtins
-from ....commons.basic_graph_ops import replace_node, delete_node
-
-from ..parsed_tf_node import ParsedTFNode
-
-
-fused_batch_norm_ops = {'FusedBatchNorm', 'FusedBatchNormV3'}
-
-
-def expand_fusedbatchnorm_cell(graph, node):
-    assert (node.op in fused_batch_norm_ops)
-    assert (len(node.inputs) == 5)
-
-    x, scale, offset, estimated_mean, estimated_variance = node.inputs
-
-    epsilon = node.attr.get('epsilon', 1e-4)
-    var_node = graph[estimated_variance]
-    if var_node.value is not None:
-        var_node.value.val += epsilon
-
-    for o in node.outputs:
-        assert (graph[o].op == 'get_tuple')
-    original_node_outputs = list(node.outputs)
-
-    delete_node(graph, node.name)
-
-    builder = GraphBuilder(graph, node.name + '/', ParsedTFNode)
-    x_center = builder.add_elementwise("Sub", [x, estimated_mean])
-    scaling_factor = builder.add_elementwise(
-        "Mul", [scale, builder.add_elementwise("Rsqrt", [estimated_variance])])
-    x_scaled = builder.add_elementwise("Mul", [x_center, scaling_factor])
-    x_shifted = builder.add_elementwise("Add", [x_scaled, offset])
-
-    x_final = GraphBuilder(graph, '', ParsedTFNode).add_identity(x_shifted, node.name)
-
-    outputs = [x_final]
-
-    for o in original_node_outputs:
-        replace_node(graph, o, outputs[graph[o].attr['index']])
-        delete_node(graph, o)
-
-
-def fusedbatchnorm_rewrite(nnssa):
-    for i in list(nnssa.functions):
-        graph = nnssa.functions[i].graph
-        blockcells = [k for k, v in graph.items() if v.op in fused_batch_norm_ops]
-        for b in blockcells:
-            expand_fusedbatchnorm_cell(graph, graph[b])
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/lstmblockcell_rewrite.py b/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/lstmblockcell_rewrite.py
deleted file mode 100644
index ba1b97c55..000000000
--- a/coremltools/converters/nnssa/frontend/tensorflow/graph_pass/lstmblockcell_rewrite.py
+++ /dev/null
@@ -1,243 +0,0 @@
-from ....builder import GraphBuilder
-from ....commons import builtins
-from ....commons.basic_graph_ops import replace_node, delete_node
-
-from ..parsed_tf_node import ParsedTFNode
-
-
-def linear(builder, mul1, mul2, add, name=None):
-    mul = builder.add_matmul([mul1, mul2], name=name)
-    return builder.add_elementwise('Add', [mul, add], name=name)
-
-
-def expand_lstm_block_cell(graph, node):
-    assert ((node.op == 'LSTMBlockCell' and len(node.inputs) == 8)
-            or (node.op == 'BlockLSTM' and len(node.inputs) == 9))
-
-    peephole = node.attr['use_peephole']
-    cell_clip = node.attr['cell_clip']
-    forget_bias = node.attr['forget_bias']
-
-    if len(node.inputs) == 8:
-        x, cs_prev, h_prev, w, wci, wcf, wco, b = node.inputs
-    else:
-        _, x, cs_prev, h_prev, w, wci, wcf, wco, b = node.inputs
-
-    """
-    input_arg {
-      name: "x"
-      description: "The input to the LSTM cell, shape (batch_size, num_inputs)."
-      type_attr: "T"
-    }
-    input_arg {
-      name: "cs_prev"
-      description: "Value of the cell state at previous time step."
-      type_attr: "T"
-    }
-    input_arg {
-      name: "h_prev"
-      description: "Output of the previous cell at previous time step."
-      type_attr: "T"
-    }
-    input_arg {
-      name: "w"
-      description: "The weight matrix."
-      type_attr: "T"
-    }
-    input_arg {
-      name: "wci"
-      description: "The weight matrix for input gate peephole connection."
-      type_attr: "T"
-    }
-    input_arg {
-      name: "wcf"
-      description: "The weight matrix for forget gate peephole connection."
-      type_attr: "T"
-    }
-    input_arg {
-      name: "wco"
-      description: "The weight matrix for output gate peephole connection."
-      type_attr: "T"
-    }
-    input_arg {
-      name: "b"
-      description: "The bias vector."
-      type_attr: "T"
-    }
-
-    output_arg {
-      name: "i"
-      description: "The input gate."
-      type_attr: "T"
-    }
-    output_arg {
-      name: "cs"
-      description: "The cell state before the tanh."
-      type_attr: "T"
-    }
-    output_arg {
-      name: "f"
-      description: "The forget gate."
-      type_attr: "T"
-    }
-    output_arg {
-      name: "o"
-      description: "The output gate."
-      type_attr: "T"
-    }
-    output_arg {
-      name: "ci"
-      description: "The cell input."
-      type_attr: "T"
-    }
-    output_arg {
-      name: "co"
-      description: "The cell after the tanh."
-      type_attr: "T"
-    }
-    output_arg {
-      name: "h"
-      description: "The output h vector."
-      type_attr: "T"
-    }
-
-    xh = [x, h_prev]
-    [i, ci, f, o] = xh * w + b
-    f = f + forget_bias
-    if not use_peephole:
-        wci = wcf = wco = 0
-    i = sigmoid(cs_prev * wci + i)
-    f = sigmoid(cs_prev * wcf + f)
-    ci = tanh(ci)
-    cs = ci .* i + cs_prev .* f
-    cs = clip(cs, cell_clip)
-    o = sigmoid(cs * wco + o)
-    co = tanh(cs)
-    h = co .* o
-    """
-    builder = GraphBuilder(graph, node.name + '/', ParsedTFNode)
-    zero = builtins.int32()
-    zero.val = 0
-    one = builtins.int32()
-    one.val = 1
-    concat_axis = builder.add_const(one, name='concat_axis')
-    expand_axis = builder.add_const(zero, name='expand_axis')
-    h_prev_expand = builder.add_expanddims(h_prev, expand_axis)
-    xh = builder.add_concat([x, h_prev_expand], concat_axis)
-    icifo_presplit = linear(builder, xh, w, b)
-    icifo = builder.add_split(value=icifo_presplit, split_dim=concat_axis, num_split=4)
-    i = builder.add_get_tuple(icifo, index=0)
-    ci = builder.add_get_tuple(icifo, index=1)
-    f = builder.add_get_tuple(icifo, index=2)
-    o = builder.add_get_tuple(icifo, index=3)
-    if forget_bias is not None and forget_bias != 0.0:
-        fb = builtins.fp32()
-        fb.val = forget_bias
-        bias = builder.add_const(fb, name='forget_bias')
-        f = builder.add_elementwise("Add", [f, bias])
-    if peephole:
-        i = builder.add_activation('Sigmoid', linear(builder, cs_prev, wci, i))
-        f = builder.add_activation('Sigmoid', linear(builder, cs_prev, wcf, f))
-    else:
-        i = builder.add_activation('Sigmoid', i)
-        f = builder.add_activation('Sigmoid', f)
-    ci = builder.add_activation('Tanh', ci)
-    cs = builder.add_elementwise(
-        "Add",
-        [builder.add_elementwise("Mul", [ci, i]),
-         builder.add_elementwise("Mul", [cs_prev, f])])
-    if cell_clip is not None and cell_clip > 0.0:
-        cc = builtins.fp32()
-        cc.val = cell_clip
-        upper_clip = builder.add_const(cc, name='upper_clip')
-        neg_cc = builtins.fp32()
-        neg_cc.val = -cell_clip
-        lower_clip = builder.add_const(neg_cc, name='lower_clip')
-        cs = builder.add_elementwise('Maximum', [cs, lower_clip])
-        cs = builder.add_elementwise('Minimum', [cs, upper_clip])
-    if peephole:
-        o = builder.add_activation('Sigmoid', linear(builder, cs, wco, o))
-    else:
-        o = builder.add_activation('Sigmoid', o)
-    co = builder.add_activation('Tanh', cs)
-    h = builder.add_elementwise("Mul", [co, o])
-
-    outputs = [i, cs, f, o, ci, co, h]
-    for o in node.outputs:
-        assert (graph[o].op == 'get_tuple')
-
-    original_node_outputs = list(node.outputs)
-    for o in original_node_outputs:
-        replace_node(graph, o, outputs[graph[o].attr['index']])
-        delete_node(graph, o)
-    delete_node(graph, node.name)
-
-
-def rewrite_to_lstm_block(graph, node):
-    assert ((node.op == 'LSTMBlockCell' and len(node.inputs) == 8)
-            or (node.op == 'BlockLSTM' and len(node.inputs) == 9))
-
-    forget_bias = node.attr['forget_bias']
-    if len(node.inputs) == 8:
-        x, cs_prev, h_prev, w, _, _, _, b = node.inputs
-    else:
-        _, x, cs_prev, h_prev, w, _, _, _, b = node.inputs
-
-    builder = GraphBuilder(graph, node.name + '/', ParsedTFNode)
-
-    lstm_cell = builder.add_LSTMBlock(x, w, b,
-                                      prev_h=h_prev,
-                                      prev_cs=cs_prev,
-                                      forget_bias=forget_bias)
-
-    for o in node.outputs:
-        assert (graph[o].op == 'get_tuple')
-
-    original_node_outputs = list(node.outputs)
-    h = None
-    cs = None
-    for o in original_node_outputs:
-        if graph[o].attr['index'] == 1:
-            if cs is None:
-                cs = builder.add_get_tuple(lstm_cell, index=2)
-            replace_node(graph, o, cs)
-        elif graph[o].attr['index'] == 6:
-            if h is None:
-                h = builder.add_get_tuple(lstm_cell, index=1)
-            replace_node(graph, o, h)
-        else:
-            raise ValueError('Output option for LSTMBlockCell unsupported')
-        delete_node(graph, o)
-    delete_node(graph, node.name)
-
-
-def need_expand(graph, node):
-    # Check if the node needed to be expanded or stay as a LSTM block.
-    assert ((node.op == 'LSTMBlockCell' and len(node.inputs) == 8)
-            or (node.op == 'BlockLSTM' and len(node.inputs) == 9))
-
-    peephole = node.attr['use_peephole']
-    cell_clip = node.attr['cell_clip']
-
-    if peephole:
-        return True
-    if cell_clip is not None and cell_clip > 0.0:
-        return True
-
-    for out in node.outputs:
-        assert (graph[out].op == 'get_tuple')
-        if graph[out].attr['index'] != 1 and graph[out].attr['index'] != 6:
-            return True
-
-    return False
-
-
-def lstmblockcell_rewrite(nnssa):
-    for i in list(nnssa.functions):
-        graph = nnssa.functions[i].graph
-        block_cells = [k for k, v in graph.items() if v.op == 'LSTMBlock' or v.op == 'BlockLSTM']
-        for b in block_cells:
-            if need_expand(graph, graph[b]):
-                expand_lstm_block_cell(graph, graph[b])
-            else:
-                rewrite_to_lstm_block(graph, graph[b])
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/graphdef_to_ssa.py b/coremltools/converters/nnssa/frontend/tensorflow/graphdef_to_ssa.py
deleted file mode 100644
index c32f09b45..000000000
--- a/coremltools/converters/nnssa/frontend/tensorflow/graphdef_to_ssa.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-import tensorflow as tf
-from .parse import graphdef_to_dict
-from ...commons.basic_graph_ops import *
-from ...nnssa import *
-from .graph_pass import insert_get_tuple
-from .graph_pass import delete_disconnected_nodes
-from .graph_pass import tensor_array_resource_removal
-
-
-def load_tf_graph(graph_file):
-    """
-    Given a graphdef file on disk, loads it, returning a pair of
-    graph_def and tf.Graph
-    """
-    # We load the protobuf file from the disk and parse it to retrieve the
-    # unserialized graph_def
-    with tf.io.gfile.GFile(graph_file, "rb") as f:
-        graph_def = tf.compat.v1.GraphDef()
-        graph_def.ParseFromString(f.read())
-
-    # Then, we import the graph_def into a new Graph and returns it
-    with tf.Graph().as_default() as graph:
-        # The name var will prefix every op/nodes in your graph
-        # Since we load everything in a new graph, this is not needed
-        tf.import_graph_def(graph_def, name="")
-    return graph.as_graph_def(add_shapes=True), graph
-
-
-def graphdef_to_ssa(graphdef_or_file, main_method_name='main'):
-    """
-    Loads a graphdef file and transform into NetworkEnsemble.
-    """
-    if isinstance(graphdef_or_file, (bytes, str)):
-        gdorig, g = load_tf_graph(graphdef_or_file)
-    else:
-        gdorig = graphdef_or_file
-        with tf.Graph().as_default() as g:
-            tf.import_graph_def(gdorig, name="")
-
-    gd = graphdef_to_dict(gdorig)
-    tensor_array_resource_removal(gd)
-    gd = insert_get_tuple(gd)
-    gd = fill_outputs(gd)
-    delete_disconnected_nodes(gd)
-
-    ssa = NetworkEnsemble()
-    ssa.functions[main_method_name] = SSAFunction(gd)
-    return ssa
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/load.py b/coremltools/converters/nnssa/frontend/tensorflow/load.py
deleted file mode 100644
index 047e39e0a..000000000
--- a/coremltools/converters/nnssa/frontend/tensorflow/load.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-
-import traceback
-
-from .graphdef_to_ssa import graphdef_to_ssa
-from .graph_pass import *
-from ..common_pass import common_pass
-
-
-def load(tfgraph, resume_on_errors=False, **kwargs):
-    """
-    Loads a NetworkEnsemble from a TensorFlow frozen graph.
-    tfgraph should either be a TensorFlow Graph object, or a path to a 
-    frozen graph.
-
-    Parameters
-    ----------
-    tfgraph: tf.Graph or str
-        Either a path to a frozen graph, or a TensorFlow Graph object
-    resume_on_errors : bool, optional. Default False.
-        This flag should generally be False except for debugging purposes
-        for diagnosing 'unconvertible' graphs. Setting this flag to True
-        will cause graph pass errors to be ignored, forcefully returning
-        a NetworkEnsemble object.
-    inputs: dict or None
-        Dictionary containing {name: shape} for each input. When not provided,
-        The converter assumes all Placeholder or PlaceholderWithDefault
-        as inputs.
-    outputs: list of str
-        A list of names of output TF nodes.
-    """
-    if hasattr(tfgraph, 'as_graph_def'):
-        gd = tfgraph.as_graph_def(add_shapes=True)
-    else:
-        gd = tfgraph
-
-    ssa = graphdef_to_ssa(gd)
-
-    placeholder_shape = kwargs.get("inputs", {})
-
-    if placeholder_shape and len(placeholder_shape) > 0:
-        graph = ssa.functions['main'].graph
-        required_plhd_nodes = [node for node in graph if 
-            graph[node].op == 'Placeholder']
-        for name in required_plhd_nodes:
-            if name not in placeholder_shape:
-                raise ValueError('Shape of required input {} is not provided.'.format(name))
-            graph[name].attr['_output_shapes'] = [placeholder_shape[name]]
-
-    passes = [
-        delete_asserts,
-        functionalize_loops,
-        constant_propagation,
-        cond_to_where,
-        remove_variable_nodes,
-        fusedbatchnorm_rewrite,
-        lstmblockcell_rewrite
-    ]
-
-    if resume_on_errors is False:
-        for p in passes:
-            p(ssa)
-    else:
-        for p in passes:
-            try:
-                p(ssa)
-            except:
-                tb = traceback.format_exc()
-                print("Exception in pass " + str(p))
-                print(tb)
-                print("Ignoring and continuing to next pass")
-
-    common_pass(ssa, resume_on_errors)
-
-    for f in ssa.functions.values():
-        f.find_inputs_and_outputs()
-
-    # make sure type inference is complete
-    if resume_on_errors is False:
-        for f in ssa.functions.values():
-            for n in f.graph.values():
-                assert n.datatype is not None
-    return ssa
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/parse.py b/coremltools/converters/nnssa/frontend/tensorflow/parse.py
deleted file mode 100644
index 40928a082..000000000
--- a/coremltools/converters/nnssa/frontend/tensorflow/parse.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-
-import copy
-
-from ...commons import builtins
-from ...commons.builtins import get_type_info
-from .parsed_tf_node import ParsedTFNode
-
-# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto
-# DT_INT32 = 3;
-# DT_UINT8 = 4;
-# DT_INT16 = 5;
-# DT_INT8 = 6;
-# DT_STRING = 7;
-# DT_COMPLEX64 = 8;  // Single-precision complex
-# DT_INT64 = 9;
-# DT_BOOL = 10;
-# DT_QINT8 = 11;     // Quantized int8
-# DT_QUINT8 = 12;    // Quantized uint8
-# DT_QINT32 = 13;    // Quantized int32
-# DT_BFLOAT16 = 14;  // Float32 truncated to 16 bits.  Only for cast ops.
-# DT_QINT16 = 15;    // Quantized int16
-# DT_QUINT16 = 16;   // Quantized uint16
-# DT_UINT16 = 17;
-# DT_COMPLEX128 = 18;  // Double-precision complex
-# DT_HALF = 19;
-# DT_RESOURCE = 20;
-# DT_VARIANT = 21;  // Arbitrary C++ data types
-# DT_UINT32 = 22;
-# DT_UINT64 = 23;
-#
-
-
-def parse_type(t):
-    mapping = {
-        1: builtins.float,
-        2: builtins.double,
-        3: builtins.int32,
-        4: builtins.uint8,
-        5: builtins.int16,
-        6: builtins.int8,
-        7: builtins.str,
-        9: builtins.int64,
-        10: builtins.bool,
-        17: builtins.uint16,
-        22: builtins.uint32,
-        23: builtins.uint64
-    }
-    t = int(t)
-    if t in mapping:
-        return mapping[t]
-    else:
-        print("Type %d cannot be mapped" % t)
-        return None
-
-
-def parse_shape(t):
-    if t.unknown_rank:
-        return None
-    ret = [d.size for d in t.dim]
-    return ret
-
-
-def parse_tensor(t):
-    typ = parse_type(t.dtype)
-    shape = parse_shape(t.tensor_shape)
-
-    if not t.tensor_shape.unknown_rank and len(shape) == 0:
-        retobj = typ()
-    else:
-        rettype = builtins.tensor(typ, tuple(shape))
-        retobj = rettype()
-        retobj.shape = shape
-
-    if len(t.half_val) > 0:
-        retobj.val = t.half_val
-    elif len(t.float_val) > 0:
-        retobj.val = t.float_val
-    elif len(t.double_val) > 0:
-        retobj.val = t.double_val
-    elif len(t.int_val) > 0:
-        retobj.val = t.int_val
-    elif len(t.int64_val) > 0:
-        retobj.val = t.int64_val
-    elif len(t.bool_val) > 0:
-        retobj.val = t.bool_val
-    elif hasattr(t, 'uint32_val') and len(t.uint32_val) > 0:
-        retobj.val = t.uint32_val
-    elif hasattr(t, 'uint64_val') and len(t.uint64_val) > 0:
-        retobj.val = t.uint64_val
-    return retobj
-
-
-def parse_list(t):
-    if len(t.s) > 0:
-        return list(t.s)
-    elif len(t.i) > 0:
-        return list(t.i)
-    elif len(t.f) > 0:
-        return list(t.f)
-    elif len(t.b) > 0:
-        return list(t.b)
-    elif len(t.type) > 0:
-        return list(parse_type(z) for z in t.type)
-    elif len(t.shape) > 0:
-        return list(parse_shape(z) for z in t.shape)
-    elif len(t.tensor) > 0:
-        return list(parse_tensor(z) for z in t.tensor)
-    else:
-        return []
-
-
-def parse_attr(attr):
-    if attr.HasField('s'):
-        return attr.s if isinstance(attr.s, str) else attr.s.decode()
-    elif attr.HasField('i'):
-        return attr.i
-    elif attr.HasField('f'):
-        return attr.f
-    elif attr.HasField('b'):
-        return attr.b
-    elif attr.HasField('type'):
-        return parse_type(attr.type)
-    elif attr.HasField('shape'):
-        return parse_shape(attr.shape)
-    elif attr.HasField('tensor'):
-        return parse_tensor(attr.tensor)
-    elif attr.HasField('list'):
-        return parse_list(attr.list)
-    elif attr.HasField('func'):
-        raise NotImplementedError("func not yet implemented")
-    elif attr.HasField('placeholder'):
-        raise NotImplementedError("placeholder not yet implemented")
-
-
-def graphdef_to_dict(gd):
-    ret = {}
-    for node in gd.node:
-        ret[node.name] = ParsedTFNode(node)
-    return ret
diff --git a/coremltools/converters/nnssa/frontend/tensorflow/parsed_tf_node.py b/coremltools/converters/nnssa/frontend/tensorflow/parsed_tf_node.py
deleted file mode 100644
index 63d636a2c..000000000
--- a/coremltools/converters/nnssa/frontend/tensorflow/parsed_tf_node.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-from ...commons import builtins
-from ...nnssa import ParsedNode
-
-
-class ParsedTFNode(ParsedNode):
-    """
-    A parsed Tensorflow Node.
-
-    name: The name of the node (str)
-    op: The operation represented by the node (str)
-    datatype: The type of the node. (type)
-    value: The value of the node if available
-    inputs: The list of nodes which are inputs to this node (list[str])
-    control_inputs: The list of nodes which have to be executed before this node (list[str])
-    attr: The attributes of the node
-    outputs: The list of nodes which consume the result of this node (list[str])
-    control_outputs: The list of nodes which have to be executed after this node (list[str])
-    """
-
-    def __init__(self, tfnode=None):
-        super(ParsedTFNode, self).__init__()
-        self.original_node = tfnode
-
-        if tfnode is not None:
-            from .parse import parse_attr
-            self.name = tfnode.name
-            if tfnode.op == 'PlaceholderWithDefault':
-                self.op = 'Placeholder'
-            else:
-                self.op = tfnode.op
-            self.inputs = [x for x in tfnode.input if not x.startswith('^')]
-            self.control_inputs = [x[1:] for x in tfnode.input if x.startswith('^')]
-            self.attr = {k: parse_attr(v) for k, v in tfnode.attr.items()}
-
-    def parse_from_attr(self):
-        if 'value' in self.attr:
-            self.datatype = self.attr['value'].__class__
-        elif '_output_shapes' in self.attr:
-            output_shapes = self.attr['_output_shapes']
-            if output_shapes[0] is not None and len(output_shapes[0]) > 0:
-                if 'dtype' in self.attr:
-                    rettype = builtins.tensor(self.attr['dtype'], tuple(output_shapes[0]))
-                elif 'T' in self.attr:
-                    rettype = builtins.tensor(self.attr['T'], tuple(output_shapes[0]))
-                elif 'Tparams' in self.attr:
-                    rettype = builtins.tensor(self.attr['Tparams'], tuple(output_shapes[0]))
-                else:
-                    raise NotImplementedError(
-                        "Op-(%s) %s not implemented\nWith attribute:" + str(self.attr) %
-                        (self.op, self.name))
-                self.datatype = rettype
-            elif 'dtype' in self.attr:
-                self.datatype = self.attr['dtype']
-        elif 'shape' in self.attr:
-            shape = self.attr['shape']
-            assert ('dtype' in self.attr)
-            if len(shape) == 0:
-                self.datatype = self.attr['dtype']
-            else:
-                self.datatype = builtins.tensor(self.attr['dtype'], shape)
-        elif 'dtype' in self.attr:
-            self.datatype = self.attr['dtype']
-
-    def __copy__(self):
-        import copy
-        ret = ParsedTFNode()
-        ret.name = self.name
-        ret.op = self.op
-        ret.datatype = self.datatype
-        ret.value = copy.deepcopy(self.value)
-        ret.inputs = self.inputs[:]
-        ret.control_inputs = self.control_inputs[:]
-        ret.attr = {k: copy.deepcopy(v) for k, v in self.attr.items()}
-        ret.outputs = self.outputs[:]
-        ret.control_outputs = self.control_outputs[:]
-        return ret
-
-    def copy(self):
-        return self.__copy__()
diff --git a/coremltools/converters/tensorflow/__init__.py b/coremltools/converters/onnx/__init__.py
similarity index 53%
rename from coremltools/converters/tensorflow/__init__.py
rename to coremltools/converters/onnx/__init__.py
index 5b6f86e5b..a1ce17878 100644
--- a/coremltools/converters/tensorflow/__init__.py
+++ b/coremltools/converters/onnx/__init__.py
@@ -1,9 +1,9 @@
-# Copyright (c) 2017, Apple Inc. All rights reserved.
+# Copyright (c) 2018, Apple Inc. All rights reserved.
 #
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from ..._deps import HAS_TF as _HAS_TF
+from coremltools._deps import _HAS_ONNX
 
-if _HAS_TF:
-    from ._tf_converter import convert
+if _HAS_ONNX:
+    from ._converter import convert
diff --git a/coremltools/converters/onnx/_backend.py b/coremltools/converters/onnx/_backend.py
new file mode 100644
index 000000000..de5736468
--- /dev/null
+++ b/coremltools/converters/onnx/_backend.py
@@ -0,0 +1,185 @@
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+from __future__ import unicode_literals as _
+
+from typing import Any, Text, Dict, Tuple
+from onnx import ModelProto
+from onnx.backend.base import Backend
+from six import string_types as _string_types
+from ._backend_rep import CoreMLRep
+from ._converter import convert
+import onnx
+from ._graph import _input_from_onnx_input, EdgeInfo
+
+DEBUG = False
+
+
+def _get_onnx_outputs_info(model):  # type: (...) -> Dict[Text, EdgeInfo]
+    """
+    Takes in an onnx model and returns a dictionary
+    of onnx output names mapped to a tuple that is (output_name, type, shape)
+    """
+    if isinstance(model, _string_types):
+        onnx_model = onnx.load(model)
+    elif isinstance(model, onnx.ModelProto):
+        onnx_model = model
+
+    graph = onnx_model.graph
+    onnx_output_dict = {}
+    for o in graph.output:
+        out = _input_from_onnx_input(o)
+        onnx_output_dict[out[0]] = out
+    return onnx_output_dict
+
+
+class CoreMLBackend(Backend):
+    @classmethod
+    def prepare(
+        cls,
+        model,  # type: ModelProto
+        device="CPU",  # type: Text
+        minimum_ios_deployment_target="12",  # type: str
+        **kwargs  # type: Any
+    ):
+        # type: (...) -> CoreMLRep
+        super(CoreMLBackend, cls).prepare(model, device, **kwargs)
+        if DEBUG:
+            with open("/tmp/node_model.onnx", "wb") as f:
+                s = model.SerializeToString()
+                f.write(s)
+        coreml_model = convert(
+            model, minimum_ios_deployment_target=minimum_ios_deployment_target
+        )
+        if DEBUG:
+            coreml_model.save("/tmp/node_model.mlmodel")
+        onnx_outputs_info = _get_onnx_outputs_info(model)
+        return CoreMLRep(
+            coreml_model,
+            onnx_outputs_info,
+            device == "CPU",
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @classmethod
+    def is_compatible(
+        cls,
+        model,  # type: ModelProto
+        device="CPU",  # type: Text
+        **kwargs  # type: Any
+    ):  # type: (...) -> bool
+        # Return whether the model is compatible with CoreML.
+        """
+         This function will gradually grow to cover more cases.
+         Need to be careful of false negatives. There are some cases that seemingly
+         are not supported on CoreML, which the graph transformer optimizes and converts to
+         a graph that can be converted to CoreML.
+
+         1. Check whether the layers for which CoreML expects constant weights are in
+            the list of initializers in the onnx graph
+         2. unsupported ops like "And", "Or" etc
+
+         """
+
+        node_set = set()
+        initializer_set = set()
+        graph = model.graph
+        for t in graph.initializer:
+            initializer_set.add(t.name)
+        for node in graph.node:
+            if node.op_type in [
+                "ConvTranspose",
+                "Conv",
+                "BatchNormalization",
+                "InstanceNormalization",
+                "PRelu",
+            ]:
+                if len(node.input) > 1 and node.input[1] not in initializer_set:
+                    return False
+            node_set.add(node.op_type)
+
+        # unsupported ops remove
+        for node in graph.node:
+            if node.op_type in [
+                "Cast",
+                "And",
+                "Or",
+                "Xor",
+                "Not",
+                "Less",
+                "Greater",
+                "Equal",
+                "Ceil",
+                "Floor",
+            ]:
+                return False
+
+        return True
+
+    @classmethod
+    def supports_device(
+        cls, device,  # type: Text
+    ):
+        # type: (...) -> bool
+        return device == "CPU"
+
+
+class CoreMLBackendND(Backend):
+    @classmethod
+    def prepare(
+        cls,
+        model,  # type: ModelProto
+        device="CPU",  # type: Text
+        minimum_ios_deployment_target="13",  # type: str
+        **kwargs  # type: Any
+    ):
+        # type: (...) -> CoreMLRep
+        super(CoreMLBackendND, cls).prepare(model, device, **kwargs)
+        if DEBUG:
+            with open("/tmp/node_model.onnx", "wb") as f:
+                s = model.SerializeToString()
+                f.write(s)
+        coreml_model = convert(
+            model, minimum_ios_deployment_target=minimum_ios_deployment_target
+        )
+        if DEBUG:
+            coreml_model.save("/tmp/node_model.mlmodel")
+        onnx_outputs_info = _get_onnx_outputs_info(model)
+        return CoreMLRep(
+            coreml_model,
+            onnx_outputs_info,
+            device == "CPU",
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @classmethod
+    def is_compatible(
+        cls,
+        model,  # type: ModelProto
+        device="CPU",  # type: Text
+        **kwargs  # type: Any
+    ):  # type: (...) -> bool
+        # Return whether the model is compatible with CoreML.
+        """
+        This function will gradually grow to cover more cases.
+        Need to be careful of false negatives. There are some cases that seemingly
+        are not supported on CoreML, which the graph transformer optimizes and converts to
+        a graph that can be converted to CoreML.
+
+        2. Unsupported ops: If graph has one of unsupported op, exit
+
+        """
+        ## TODO: Add un-supported ops
+        unsupported_ops = []
+        graph = model.graph
+        for node in graph.node:
+            if node.op_type in unsupported_ops:
+                return False
+        return True
+
+    @classmethod
+    def supports_device(
+        cls, device,  # type: Text
+    ):
+        # type: (...) -> bool
+        return device == "CPU"
diff --git a/coremltools/converters/onnx/_backend_rep.py b/coremltools/converters/onnx/_backend_rep.py
new file mode 100644
index 000000000..09e7da441
--- /dev/null
+++ b/coremltools/converters/onnx/_backend_rep.py
@@ -0,0 +1,123 @@
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+
+# from __future__ import unicode_literals
+
+import numpy as np
+from typing import Any, Sequence, List
+from onnx.backend.base import BackendRep, namedtupledict
+from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE
+from coremltools.proto import FeatureTypes_pb2 as ft  # type: ignore
+from coremltools.models import MLModel  # type: ignore
+from typing import Dict, Any, Text, Tuple
+from onnx import TensorProto
+from ._graph import EdgeInfo
+from ._converter import SupportedVersion
+
+
+def _set_dtypes(
+    input_dict,  # type: Dict[Text, np._ArrayLike[Any]]
+    model,  # type: MLModel
+):
+    # type: (...) -> None
+    spec = model.get_spec()
+    for input_ in spec.description.input:
+        if input_.type.HasField("multiArrayType") and input_.name in input_dict:
+            if input_.type.multiArrayType.dataType == ft.ArrayFeatureType.INT32:
+                input_dict[input_.name] = input_dict[input_.name].astype(np.int32)
+            if input_.type.multiArrayType.dataType == ft.ArrayFeatureType.FLOAT32:
+                input_dict[input_.name] = input_dict[input_.name].astype(np.float32)
+            if input_.type.multiArrayType.dataType == ft.ArrayFeatureType.DOUBLE:
+                input_dict[input_.name] = input_dict[input_.name].astype(np.float64)
+
+
+class CoreMLRep(BackendRep):
+    def __init__(
+        self,
+        coreml_model,  # type: MLModel
+        onnx_outputs_info,  # type: Dict[Text, EdgeInfo]
+        useCPUOnly=False,  # type: bool
+        minimum_ios_deployment_target="12",  # type: str
+    ):
+        # type: (...) -> None
+        super(CoreMLRep, self).__init__()
+        self.model = coreml_model
+        self.useCPUOnly = useCPUOnly
+        self.minimum_ios_deployment_target = minimum_ios_deployment_target
+
+        spec = coreml_model.get_spec()
+        self.input_names = [str(i.name) for i in spec.description.input]
+        self.output_names = [str(o.name) for o in spec.description.output]
+        self.onnx_outputs_info = onnx_outputs_info  # type: Dict[Text, EdgeInfo]
+
+    def run(
+        self,
+        inputs,  # type: Any
+        **kwargs  # type: Any
+    ):
+        # type: (...) -> Tuple[Any, ...]
+        super(CoreMLRep, self).run(inputs, **kwargs)
+        inputs_ = inputs
+        _reshaped = False
+        if not SupportedVersion.is_nd_array_supported(
+            self.minimum_ios_deployment_target
+        ):
+            for i, input_ in enumerate(inputs_):
+                shape = input_.shape
+                if len(shape) == 4 or len(shape) == 2:
+                    inputs_[i] = input_[np.newaxis, :]
+                    _reshaped = True
+                elif len(shape) == 3:
+                    spec = self.model.get_spec()
+                    spec_shape = [
+                        int(k)
+                        for k in spec.description.input[i].type.multiArrayType.shape
+                    ]
+                    prod = spec_shape[0] * spec_shape[1] * spec_shape[2]
+                    onnx_shape = list(shape)
+                    if onnx_shape != spec_shape:
+                        if onnx_shape[2] == prod:
+                            inputs_[i] = np.reshape(
+                                inputs_[i], [onnx_shape[0], onnx_shape[1]] + spec_shape
+                            )
+                        elif onnx_shape[1] * onnx_shape[2] == prod:
+                            inputs_[i] = np.reshape(
+                                inputs_[i], [1, onnx_shape[0]] + spec_shape
+                            )
+        input_dict = dict(zip(self.input_names, map(np.array, inputs_)))
+        _set_dtypes(input_dict, self.model)  # type: ignore
+
+        prediction = self.model.predict(input_dict, self.useCPUOnly)
+        output_values = [prediction[name] for name in self.output_names]
+
+        if not SupportedVersion.is_nd_array_supported(
+            self.minimum_ios_deployment_target
+        ):
+            for i, output_ in enumerate(output_values):
+                shape = output_.shape
+                # reshape the CoreML output to match Onnx's output shape
+                try:
+                    output_values[i] = np.reshape(output_, self.onnx_outputs_info[self.output_names[i]][2])  # type: ignore
+                except RuntimeError:
+                    print(
+                        "Output '%s' shape incompatible between CoreML (%s) and onnx (%s)"
+                        % (
+                            self.output_names[i],
+                            output_.shape,
+                            self.onnx_outputs_info[self.output_names[i]],
+                        )
+                    )
+
+        ## Type Cast to ONNX expected output types
+        for i, output_ in enumerate(output_values):
+            output_type = self.onnx_outputs_info[self.output_names[i]][1]
+            if TENSOR_TYPE_TO_NP_TYPE[output_type] != output_values[i].dtype:
+                output_values[i] = output_values[i].astype(
+                    TENSOR_TYPE_TO_NP_TYPE[output_type]
+                )
+
+        result = namedtupledict("Outputs", self.output_names)(
+            *output_values
+        )  # type: Tuple[Any, ...]
+        return result
diff --git a/coremltools/converters/onnx/_converter.py b/coremltools/converters/onnx/_converter.py
new file mode 100644
index 000000000..053e7c93d
--- /dev/null
+++ b/coremltools/converters/onnx/_converter.py
@@ -0,0 +1,915 @@
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+from __future__ import unicode_literals as _
+from typing import Text, Union, Optional, Dict, Any, Iterable, Sequence, Callable, List
+
+import numpy as np
+
+from coremltools._deps import _HAS_ONNX
+
+if _HAS_ONNX:
+    import onnx
+    from onnx import shape_inference
+    from onnx import TensorProto
+
+from coremltools.models.neural_network import NeuralNetworkBuilder  # type: ignore
+from coremltools.models import datatypes, MLModel  # type: ignore
+from coremltools.proto import FeatureTypes_pb2 as ft  # type: ignore
+from coremltools import (
+    _MINIMUM_CUSTOM_LAYER_SPEC_VERSION as IOS_11_2_SPEC_VERSION,
+)  # iOS 11.2
+from coremltools import (
+    _MINIMUM_CUSTOM_MODEL_SPEC_VERSION as IOS_12_SPEC_VERSION,
+)  # iOS 12.0
+from coremltools import _MINIMUM_NDARRAY_SPEC_VERSION as IOS_13_SPEC_VERSION  # iOS 13.0
+from coremltools import __version__ as ct_version
+from coremltools.models import _METADATA_VERSION, _METADATA_SOURCE
+from typing import Tuple
+
+from ._operators import (
+    _convert_node,
+    _SEQUENCE_LAYERS_REGISTRY,
+    _ONNX_NODE_REGISTRY,
+    _add_const_inputs_if_required,
+)
+from ._operators_nd import _ONNX_NODE_REGISTRY_ND, _convert_node_nd
+
+from ._graph import Graph, EdgeInfo, Transformer
+
+from ._transformers import (
+    ConvAddFuser,
+    DropoutRemover,
+    ReshapeInitTensorFuser,
+    BNBroadcastedMulFuser,
+    BNBroadcastedAddFuser,
+    PixelShuffleFuser,
+    OutputRenamer,
+    AddModelInputsOutputs,
+    ConstantsToInitializers,
+    ImageScalerRemover,
+    ShapeOpRemover,
+    ConstantRemover,
+    ConstantFillToInitializers,
+    ReshapeTransposeReshape_pattern1,
+    CastOpRemover,
+    DeadCodeElimination,
+    PaddingOpRemover,
+)
+
+# ML model passes
+from coremltools.converters.mil.backend.nn.passes.mlmodel_passes import (
+    remove_disconnected_layers,
+    transform_conv_crop,
+)
+
+from ._error_utils import ErrorHandling
+from ._graph_viz import plot_graph  # type: ignore
+
+USE_SHAPE_MAPPING = True
+
+DEBUG = False
+
+
+class SupportedVersion:
+    # Supported iOS Version
+    # New OS Version must be added at the end to maintain backward version index
+    supported_ios_version = ["11.2", "12", "13"]
+    IOS_13_VERSION = supported_ios_version.index("13")
+    ND_ARRARY_SUPPORT = IOS_13_VERSION
+
+    @staticmethod
+    def ios_support_check(minimum_ios_deployment_target):
+        return minimum_ios_deployment_target in SupportedVersion.supported_ios_version
+
+    @staticmethod
+    def is_nd_array_supported(minimum_ios_deployment_target):
+        if not SupportedVersion.ios_support_check(minimum_ios_deployment_target):
+            raise TypeError(
+                "{} not supported. Please provide one of target iOS: {}",
+                minimum_ios_deployment_target,
+                SupportedVersion.supported_ios_version,
+            )
+
+        minimum_ios_deployment_target_index = SupportedVersion.supported_ios_version.index(
+            minimum_ios_deployment_target
+        )
+        return SupportedVersion.ND_ARRARY_SUPPORT <= minimum_ios_deployment_target_index
+
+    @staticmethod
+    def get_supported_ios():
+        return SupportedVersion.supported_ios_version
+
+    @staticmethod
+    def get_specification_version(minimum_ios_deployment_target):
+        if not SupportedVersion.ios_support_check(minimum_ios_deployment_target):
+            raise TypeError(
+                "{} not supported. Please provide one of target iOS: {}",
+                minimum_ios_deployment_target,
+                SupportedVersion.supported_ios_version,
+            )
+
+        if minimum_ios_deployment_target == "11.2":
+            return IOS_11_2_SPEC_VERSION
+        elif minimum_ios_deployment_target == "12":
+            return IOS_12_SPEC_VERSION
+        else:
+            return IOS_13_SPEC_VERSION
+
+
+"""
+inputs: list of tuples.
+      [Tuple]: [(name, type, shape)]
+"""
+
+
+def _make_coreml_input_features(
+    graph, onnx_coreml_input_shape_map, disable_coreml_rank5_mapping=False
+):  # type: (...) -> Sequence[Tuple[Text, datatypes.Array]]
+    """
+    If "disable_coreml_rank5_mapping" is False, then:
+
+    ONNX shapes to CoreML static shapes mapping
+    length==1: [C]
+    length==2: [B,C]
+    length==3: [C,H,W] or [Seq,B,C]
+    length==4: [B,C,H,W]
+
+    If "disable_coreml_rank5_mapping" is True, then
+    onnx shapes are mapped "as is" to CoreML.
+    """
+    inputs = graph.inputs
+    op_types = graph.blob_to_op_type
+    features = []
+    for input_ in inputs:
+        shape = input_[2]
+        if disable_coreml_rank5_mapping:
+            if len(shape) > 5:
+                raise ValueError(
+                    "ONNX input %s has a rank greater than 5, which is not supported in CoreML framework"
+                    % str(input_[0])
+                )
+            else:
+                features.append((str(input_[0]), datatypes.Array(*shape)))
+            continue
+
+        if USE_SHAPE_MAPPING and input_[0] in onnx_coreml_input_shape_map:
+            mapp = onnx_coreml_input_shape_map[input_[0]]
+            if len(mapp) != len(shape):
+                raise ValueError(
+                    "Incorrect value in onnx_coreml_input_shape_map argument"
+                )
+            graph.onnx_coreml_shape_mapping[input_[0]] = mapp
+            coreml_shape = [1, 1, 1]
+            for i in range(3):
+                if (i + 2) in mapp:
+                    coreml_shape[i] = shape[mapp.index(i + 2)]
+            shape = coreml_shape
+        else:
+            if len(shape) == 0:
+                shape = [1, 1, 1]
+            elif len(shape) == 1:
+                # assume [C]
+                if USE_SHAPE_MAPPING:
+                    graph.onnx_coreml_shape_mapping[input_[0]] = [2]
+            elif len(shape) == 2:
+                # assume [Batch,C]
+                shape = [shape[1]]
+                if USE_SHAPE_MAPPING:
+                    graph.onnx_coreml_shape_mapping[input_[0]] = [1, 2]
+            elif len(shape) == 3:
+                # assume [C,H,W] unless its connected an op that bestows another mapping
+                if input_[0] in op_types and len(op_types[input_[0]]) == 1:
+                    if str(op_types[input_[0]][0]) in _SEQUENCE_LAYERS_REGISTRY:
+                        # (Seq,B,C)
+                        shape = [shape[2]]
+                        if USE_SHAPE_MAPPING:
+                            graph.onnx_coreml_shape_mapping[input_[0]] = [0, 1, 2]
+                    elif str(op_types[input_[0]][0]) in [
+                        "MaxPool",
+                        "AveragePool",
+                        "BatchNormalization",
+                        "GlobalAveragePool",
+                        "GlobalLpPool",
+                        "GlobalMaxPool",
+                        "InstanceNormalization",
+                        "LRN",
+                        "LpPool",
+                        "Conv",
+                        "ConvTranspose",
+                    ]:
+                        # (B,C,W)
+                        shape = [shape[1], 1, shape[2]]
+                        if USE_SHAPE_MAPPING:
+                            graph.onnx_coreml_shape_mapping[input_[0]] = [1, 2, 4]
+                    else:
+                        if USE_SHAPE_MAPPING:
+                            graph.onnx_coreml_shape_mapping[input_[0]] = [2, 3, 4]
+                else:
+                    if USE_SHAPE_MAPPING:
+                        graph.onnx_coreml_shape_mapping[input_[0]] = [2, 3, 4]
+            elif len(shape) == 4:  # (B,C,H,W) --> (C,H,W)
+                shape = shape[1:]
+                if USE_SHAPE_MAPPING:
+                    graph.onnx_coreml_shape_mapping[input_[0]] = [1, 2, 3, 4]
+            else:
+                raise ValueError(
+                    "CoreML input cannot be more than rank 4. Input shape: %s, input: '%s' "
+                    % (str(shape), str(input_[0]))
+                )
+        features.append((str(input_[0]), datatypes.Array(*shape)))
+    return features
+
+
+"""
+outputs: list of tuples.
+      [Tuple]: [(name, type, shape)]
+"""
+
+
+def _make_coreml_output_features(
+    graph, forceShape=False, disable_coreml_rank5_mapping=False
+):  # type: (...) -> Sequence[Tuple[Text, datatypes.Array]]
+    features = []
+    outputs = graph.outputs
+    op_types = graph.blob_from_op_type
+    ops_allowing_zerod_output = {"Size"}
+
+    for output_ in outputs:
+        if op_types[output_[0]] in ops_allowing_zerod_output and len(output_[2]) == 0:
+            output_ = list(output_)
+            output_[2] = (1,)
+
+        if disable_coreml_rank5_mapping:
+            shape = output_[2]
+            if len(shape) > 5:
+                raise ValueError(
+                    "ONNX output %s has a rank greater than 5, which is not supported in CoreML framework"
+                    % str(output_[0])
+                )
+            else:
+                features.append((str(output_[0]), datatypes.Array(*shape)))
+            continue
+
+        if not forceShape:
+            features.append((str(output_[0]), None))
+        else:
+            shape = output_[2]
+            if len(shape) == 0:
+                shape = [1, 1, 1]
+            elif len(shape) == 1:
+                pass
+            elif len(shape) == 3:
+                if (
+                    output_[0] in op_types
+                    and str(op_types[output_[0]]) in _SEQUENCE_LAYERS_REGISTRY
+                ):
+                    # onnx shape: (Seq,B,C)
+                    shape = [shape[2]]
+            elif len(shape) == 4:  # (B,C,H,W) --> (C,H,W)
+                shape = shape[1:]
+            else:
+                shape = None  # output shape need not be specified for CoreML.
+            if shape is None:
+                features.append((str(output_[0]), shape))
+            else:
+                features.append((str(output_[0]), datatypes.Array(*shape)))
+    return features
+
+
+def _check_unsupported_ops(
+    nodes, disable_coreml_rank5_mapping=False
+):  # type: (...) -> None
+    unsupported_op_types = []  # type: List[Text]
+    for node in nodes:
+
+        if disable_coreml_rank5_mapping:
+            if (
+                node.op_type not in _ONNX_NODE_REGISTRY_ND
+                and node.op_type not in unsupported_op_types
+            ):
+                unsupported_op_types.append(node.op_type)
+            continue
+
+        if (
+            node.op_type not in _ONNX_NODE_REGISTRY
+            and node.op_type not in unsupported_op_types
+        ):
+            unsupported_op_types.append(node.op_type)
+
+    coreml_3_rerun_message = ""
+    if not disable_coreml_rank5_mapping:
+        coreml_3_rerun_message = (
+            "\nPlease try converting again by providing the additonal argument, "
+            "minimum_ios_deployment_target=13"
+            " and making sure you have the latest coremltools package"
+        )
+    if len(unsupported_op_types) > 0:
+        raise NotImplementedError(
+            "Unsupported ONNX ops of type: %s %s"
+            % (",".join(unsupported_op_types), coreml_3_rerun_message)
+        )
+
+
+def _update_multiarray_to_float32(
+    feature,  # type: Any
+):  # type : (...) -> None
+    if feature.type.HasField("multiArrayType"):
+        feature.type.multiArrayType.dataType = ft.ArrayFeatureType.FLOAT32
+
+
+def _update_multiarray_to_int32(
+    feature,  # type: Any
+):  # type : (...) -> None
+    if feature.type.HasField("multiArrayType"):
+        feature.type.multiArrayType.dataType = ft.ArrayFeatureType.INT32
+
+
+def _transform_coreml_dtypes(
+    builder,  # type : NeuralNetworkBuilder
+    inputs,  # type: List[EdgeInfo]
+    outputs,  # type: List[EdgeInfo]
+):
+    # type: (...) -> None
+
+    """ Make sure ONNX input/output data types are mapped to the equivalent CoreML types
+    """
+    for i, input_ in enumerate(inputs):
+        onnx_type = input_[1]
+        if onnx_type == TensorProto.FLOAT:
+            _update_multiarray_to_float32(builder.spec.description.input[i])
+        elif onnx_type == TensorProto.DOUBLE:
+            continue
+        elif onnx_type == TensorProto.INT32 or onnx_type == TensorProto.INT64:
+            _update_multiarray_to_int32(builder.spec.description.input[i])
+        elif onnx_type == TensorProto.BOOL:
+            _update_multiarray_to_float32(builder.spec.description.input[i])
+        else:
+            raise TypeError("Input must be of of type FLOAT, DOUBLE, INT32 or INT64")
+
+    for i, output_ in enumerate(outputs):
+        onnx_type = output_[1]
+        if onnx_type == TensorProto.FLOAT:
+            _update_multiarray_to_float32(builder.spec.description.output[i])
+        elif onnx_type == TensorProto.DOUBLE:
+            continue
+        elif onnx_type == TensorProto.INT32 or onnx_type == TensorProto.INT64:
+            _update_multiarray_to_int32(builder.spec.description.output[i])
+        elif onnx_type == TensorProto.BOOL:
+            _update_multiarray_to_float32(builder.spec.description.output[i])
+        else:
+            raise TypeError("Output must be of of type FLOAT, DOUBLE, INT32 or INT64")
+
+
+def _convert_multiarray_output_to_image(
+    spec,  # type: Any
+    feature_name,  # type: Text
+    is_bgr=False,  # type: bool
+):
+    # type: (...) -> None
+    for output in spec.description.output:
+        if output.name != feature_name:
+            continue
+        if output.type.WhichOneof("Type") != "multiArrayType":
+            raise ValueError("{} is not a multiarray type".format(output.name,))
+        array_shape = tuple(output.type.multiArrayType.shape)
+        if len(array_shape) == 2:
+            height, width = array_shape
+            output.type.imageType.colorSpace = ft.ImageFeatureType.ColorSpace.Value(
+                "GRAYSCALE"
+            )
+        else:
+            if len(array_shape) == 4:
+                if array_shape[0] != 1:
+                    raise ValueError(
+                        "Shape {} is not supported for image output".format(
+                            array_shape,
+                        )
+                    )
+                array_shape = array_shape[1:]
+
+            channels, height, width = array_shape
+
+            if channels == 1:
+                output.type.imageType.colorSpace = ft.ImageFeatureType.ColorSpace.Value(
+                    "GRAYSCALE"
+                )
+            elif channels == 3:
+                if is_bgr:
+                    output.type.imageType.colorSpace = ft.ImageFeatureType.ColorSpace.Value(
+                        "BGR"
+                    )
+                else:
+                    output.type.imageType.colorSpace = ft.ImageFeatureType.ColorSpace.Value(
+                        "RGB"
+                    )
+            else:
+                raise ValueError(
+                    "Channel Value {} is not supported for image output".format(
+                        channels,
+                    )
+                )
+
+        output.type.imageType.width = width
+        output.type.imageType.height = height
+
+
+def _set_deprocessing(
+    is_grayscale,  # type: bool
+    builder,  # type: NeuralNetworkBuilder
+    deprocessing_args,  # type: Dict[Text, Any]
+    input_name,  # type: Text
+    output_name,  # type: Text
+):
+    # type: (...) -> None
+    is_bgr = deprocessing_args.get("is_bgr", False)
+
+    image_scale = deprocessing_args.get("image_scale", 1.0)
+
+    if is_grayscale:
+        gray_bias = deprocessing_args.get("gray_bias", 0.0)
+        W = np.array([image_scale])
+        b = np.array([gray_bias])
+    else:
+        W = np.array([image_scale, image_scale, image_scale])
+
+        red_bias = deprocessing_args.get("red_bias", 0.0)
+        green_bias = deprocessing_args.get("green_bias", 0.0)
+        blue_bias = deprocessing_args.get("blue_bias", 0.0)
+
+        if not is_bgr:
+            b = np.array([red_bias, green_bias, blue_bias,])
+        else:
+            b = np.array([blue_bias, green_bias, red_bias,])
+    builder.add_scale(
+        name=input_name,
+        W=W,
+        b=b,
+        has_bias=True,
+        shape_scale=W.shape,
+        shape_bias=b.shape,
+        input_name=input_name,
+        output_name=output_name,
+    )
+
+
+def _prepare_onnx_graph(
+    graph, transformers, onnx_ir_version
+):  # type: (Graph, Iterable[Transformer]) -> Graph
+    graph_ = Graph.from_onnx(graph, onnx_ir_version)
+    if DEBUG:
+        plot_graph(graph_, graph_img_path="/tmp/graph_raw.pdf")
+    graph_ = graph_.transformed(transformers)
+    if DEBUG:
+        plot_graph(graph_, graph_img_path="/tmp/graph_opt.pdf")
+    return graph_
+
+
+def convert(
+    model,  # type: Union[onnx.ModelProto, Text]
+    mode=None,  # type: Optional[Text]
+    image_input_names=[],  # type: Sequence[Text]
+    preprocessing_args={},  # type: Dict[Text, Any]
+    image_output_names=[],  # type: Sequence[Text]
+    deprocessing_args={},  # type: Dict[Text, Any]
+    class_labels=None,  # type: Union[Text, Iterable[Text], None]
+    predicted_feature_name="classLabel",  # type: Text
+    add_custom_layers=False,  # type: bool
+    custom_conversion_functions={},  # type: Dict[Text, Any]
+    onnx_coreml_input_shape_map={},  # type: Dict[Text, List[int,...]]
+    minimum_ios_deployment_target="12",
+):
+    # type: (...) -> MLModel
+    """
+    Convert ONNX model to CoreML.
+    Parameters
+    ----------
+    model:
+        An ONNX model with parameters loaded in onnx package or path to file
+        with models.
+    mode: 'classifier', 'regressor' or None
+        Mode of the converted coreml model:
+        'classifier', a NeuralNetworkClassifier spec will be constructed.
+        'regressor', a NeuralNetworkRegressor spec will be constructed.
+    preprocessing_args:
+        'is_bgr', 'red_bias', 'green_bias', 'blue_bias', 'gray_bias',
+        'image_scale' keys with the same meaning as
+        https://apple.github.io/coremltools/generated/coremltools.models.neural_network.html#coremltools.models.neural_network.NeuralNetworkBuilder.set_pre_processing_parameters
+    deprocessing_args:
+        Same as 'preprocessing_args' but for deprocessing.
+    class_labels:
+        As a string it represents the name of the file which contains
+        the classification labels (one per line).
+        As a list of strings it represents a list of categories that map
+        the index of the output of a neural network to labels in a classifier.
+    predicted_feature_name:
+        Name of the output feature for the class labels exposed in the Core ML
+        model (applies to classifiers only). Defaults to 'classLabel'
+    add_custom_layers: bool
+        Flag to turn on addition of custom CoreML layers for unsupported ONNX ops or attributes within
+        a supported op.
+    custom_conversion_functions: dict()
+        A dictionary with keys corresponding to the names/types of onnx ops and values as functions taking
+        an object of class coreml-tools's 'NeuralNetworkBuilder', Graph' (see onnx-coreml/_graph.Graph),
+        'Node' (see onnx-coreml/_graph.Node), ErrorHandling (see onnx-coreml/_error_utils.ErrorHandling).
+        This custom conversion function gets full control and responsibility for converting given onnx op.
+        This function returns nothing and is responsible for adding a equivalent CoreML layer via 'NeuralNetworkBuilder'
+    onnx_coreml_input_shape_map: dict()
+        (Optional) A dictionary with keys corresponding to the model input names. Values are a list of integers that specify
+        how the shape of the input is mapped to CoreML. Convention used for CoreML shapes is
+        0: Sequence, 1: Batch, 2: channel, 3: height, 4: width.
+        For example, an input of rank 2 could be mapped as [3,4] (i.e. H,W) or [1,2] (i.e. B,C) etc.
+        This is ignored if "minimum_ios_deployment_target" is set to 13.
+    minimum_ios_deployment_target: str
+        Target Deployment iOS Version (default: '12')
+        Supported iOS version options: '11.2', '12', '13'
+        CoreML model produced by the converter will be compatible with the iOS version specified in this argument.
+        e.g. if minimum_ios_deployment_target = '12', the converter would only utilize CoreML features released till iOS12 (equivalently macOS 10.14, watchOS 5 etc).
+
+        iOS 11.2 (CoreML 0.8) does not support resize_bilinear, crop_resize layers
+         - (Supported features: https://github.com/apple/coremltools/releases/tag/v0.8)
+        iOS 12 (CoreML 2.0)
+         - (Supported features: https://github.com/apple/coremltools/releases/tag/v2.0)
+        iSO 13 (CoreML 3.0)
+         - (Supported features: https://github.com/apple/coremltools/releases/tag/3.0-beta6)
+
+    Returns
+    -------
+    model: A coreml model.
+    """
+    if isinstance(model, Text):
+        onnx_model = onnx.load(model)
+    elif isinstance(model, onnx.ModelProto):
+        onnx_model = model
+    else:
+        raise TypeError("Model must be file path to .onnx file or onnx loaded model")
+
+    if not SupportedVersion.ios_support_check(minimum_ios_deployment_target):
+        raise TypeError(
+            "{} not supported. Please provide one of target iOS: {}",
+            minimum_ios_deployment_target,
+            SupportedVersion.get_supported_ios(),
+        )
+
+    global USE_SHAPE_MAPPING
+    disable_coreml_rank5_mapping = False
+    if SupportedVersion.is_nd_array_supported(minimum_ios_deployment_target):
+        disable_coreml_rank5_mapping = True
+
+    if disable_coreml_rank5_mapping:
+        USE_SHAPE_MAPPING = False
+    else:
+        USE_SHAPE_MAPPING = True
+
+    """
+    First, apply a few optimizations to the ONNX graph,
+    in preparation for conversion to CoreML.
+    """
+
+    # Using Dummy transformation to conditionally disable certain transformation
+    class DummyTransformation(object):
+        def __call__(self, graph):
+            return graph
+
+    transformers = [
+        ConstantsToInitializers(),
+        ShapeOpRemover(),
+        ConstantRemover(),
+        CastOpRemover(),
+        PaddingOpRemover(),
+        ReshapeInitTensorFuser(),
+        DropoutRemover(),
+        DeadCodeElimination(),
+        ConvAddFuser(),
+        BNBroadcastedMulFuser(),
+        BNBroadcastedAddFuser(),
+        ReshapeTransposeReshape_pattern1(),
+        PixelShuffleFuser(),
+        AddModelInputsOutputs()
+        if not disable_coreml_rank5_mapping
+        else DummyTransformation(),
+        ConstantFillToInitializers(),
+    ]  # type: Iterable[Transformer]
+
+    onnx_model = onnx.shape_inference.infer_shapes(onnx_model)
+    graph = _prepare_onnx_graph(onnx_model.graph, transformers, onnx_model.ir_version)
+
+    """
+    Check for ImageScalar nodes in ONNX, this will indicate whether input image preprocessing needs
+    to be added to the CoreML graph or not.
+    """
+    # are there ImageScaler nodes in the Graph?
+    # If yes then add the info from it to the "preprocessing_args" dictionary, if the dictionary is not
+    # already provided by the user
+    if not bool(preprocessing_args):
+        for node in graph.nodes:
+            if node.op_type == "ImageScaler":
+                inp_name = node.inputs[0]
+                scale = node.attrs.get("scale", 1.0)
+                bias = node.attrs.get("bias", [0, 0, 0])
+                if not (len(bias) == 1 or len(bias) == 3):
+                    continue
+                if "image_scale" in preprocessing_args:
+                    preprocessing_args["image_scale"][inp_name] = scale
+                else:
+                    preprocessing_args["image_scale"] = {inp_name: scale}
+                if len(bias) == 3:
+                    for i, color in enumerate(["red", "green", "blue"]):
+                        if color + "_bias" in preprocessing_args:
+                            preprocessing_args[color + "_bias"][inp_name] = bias[i]
+                        else:
+                            preprocessing_args[color + "_bias"] = {inp_name: bias[i]}
+                else:
+                    if "gray_bias" in preprocessing_args:
+                        preprocessing_args["gray_bias"][inp_name] = bias[0]
+                    else:
+                        preprocessing_args["gray_bias"] = {inp_name: bias[0]}
+                if inp_name not in image_input_names:
+                    image_input_names.append(inp_name)  # type: ignore
+
+    # remove all ImageScaler ops
+    graph = graph.transformed([ImageScalerRemover()])
+
+    """
+    Gather information (name, shape) for model inputs and outputs
+    This information is then used to initialize the neural network builder object of coremltools.
+    The builder object is later used to add layers to the CoreML model.
+    """
+
+    # Make CoreML input and output features by gathering shape info and
+    # interpreting it for CoreML
+    input_features = _make_coreml_input_features(
+        graph, onnx_coreml_input_shape_map, disable_coreml_rank5_mapping
+    )
+    if len(image_output_names) > 0:
+        output_features = _make_coreml_output_features(
+            graph,
+            forceShape=True,
+            disable_coreml_rank5_mapping=disable_coreml_rank5_mapping,
+        )
+    else:
+        output_features = _make_coreml_output_features(
+            graph, disable_coreml_rank5_mapping=disable_coreml_rank5_mapping
+        )
+
+    builder = NeuralNetworkBuilder(
+        input_features,
+        output_features,
+        mode=mode,
+        disable_rank5_shape_mapping=disable_coreml_rank5_mapping,
+    )
+
+    # TODO: To be removed once, auto-downgrading of spec version is enabled
+    builder.spec.specificationVersion = SupportedVersion.get_specification_version(
+        minimum_ios_deployment_target
+    )
+
+    """
+    Set CoreML input,output types (float, double, int) same as onnx types, if supported
+    """
+    _transform_coreml_dtypes(builder, graph.inputs, graph.outputs)
+
+    """what follows is some book-keeping to support outputs of type image.
+    """
+
+    is_deprocess_bgr_only = (len(deprocessing_args) == 1) and (
+        "is_bgr" in deprocessing_args
+    )
+    add_deprocess = (
+        (len(image_output_names) > 0)
+        and (len(deprocessing_args) > 0)
+        and (not is_deprocess_bgr_only)
+    )
+
+    if add_deprocess:
+        mapping = {}
+        for f in output_features:
+            output_name = f[0]
+            mapping[output_name] = graph.get_unique_edge_name(output_name)
+        graph = OutputRenamer(mapping)(graph)
+
+    if len(image_input_names) > 0:
+        builder.set_pre_processing_parameters(
+            image_input_names=image_input_names,
+            is_bgr=preprocessing_args.get("is_bgr", False),
+            red_bias=preprocessing_args.get("red_bias", 0.0),
+            green_bias=preprocessing_args.get("green_bias", 0.0),
+            blue_bias=preprocessing_args.get("blue_bias", 0.0),
+            gray_bias=preprocessing_args.get("gray_bias", 0.0),
+            image_scale=preprocessing_args.get("image_scale", 1.0),
+        )
+
+    preprocessing_args.clear()
+
+    if len(image_output_names) > 0:
+        for f in output_features:
+            f_name = f[0]
+            if f_name in image_output_names:
+                is_bgr = deprocessing_args.get("is_bgr", False)
+                _convert_multiarray_output_to_image(builder.spec, f_name, is_bgr=is_bgr)
+
+    """
+    Iterate through all the ONNX ops and translate them to CoreML layers, one by one.
+    """
+
+    """
+    before proceeding to start the layer translation process,
+    check whether there is an op in the ONNX graph, whose translation function is not yet
+    implemented in the converter or which is not supported in the CoreML framework. If so,
+    raise an error before starting the process.
+    (if the user desires to add a custom layer then this check is not required)
+    """
+    if not add_custom_layers:
+        _check_unsupported_ops(graph.nodes, disable_coreml_rank5_mapping)
+
+    """
+    ErrorHandling is a generic class, useful to store a variety of parameters during the conversion process
+    """
+    err = ErrorHandling(add_custom_layers, custom_conversion_functions)
+
+    for i, node in enumerate(graph.nodes):
+        print(
+            "%d/%d: Converting Node Type %s" % (i + 1, len(graph.nodes), node.op_type)
+        )
+        if disable_coreml_rank5_mapping:
+            _convert_node_nd(builder, node, graph, err)
+        else:
+            _add_const_inputs_if_required(builder, node, graph, err)
+            _convert_node(builder, node, graph, err)
+
+    if DEBUG:
+        plot_graph(
+            graph,
+            graph_img_path="/tmp/after_conversion.pdf",
+            show_coreml_mapped_shapes=not disable_coreml_rank5_mapping,
+        )
+
+    if add_deprocess:
+        for f in output_features:
+            output_name = f[0]
+            if output_name not in image_output_names:
+                continue
+            output_shape = f[1].dimensions
+            if len(output_shape) == 2 or output_shape[0] == 1:
+                is_grayscale = True
+            elif output_shape[0] == 3:
+                is_grayscale = False
+            else:
+                raise ValueError("Output must be RGB image or Grayscale")
+            _set_deprocessing(
+                is_grayscale,
+                builder,
+                deprocessing_args,
+                mapping[output_name],
+                output_name,
+            )
+
+    if class_labels is not None:
+        if isinstance(class_labels, Text):
+            labels = [
+                l.strip() for l in open(class_labels).readlines()
+            ]  # type: Sequence[Text]
+        elif isinstance(class_labels, list):
+            labels = class_labels
+        else:
+            raise TypeError(
+                "synset variable of unknown type. Type found: {}. \
+                Expected either string or list of strings.".format(
+                    type(class_labels),
+                )
+            )
+
+        builder.set_class_labels(
+            class_labels=labels, predicted_feature_name=predicted_feature_name
+        )
+
+    def _add_informative_description(feature, raise_error=True):
+        if feature.type.WhichOneof("Type") == "multiArrayType":
+            if (
+                feature.name in graph.onnx_coreml_shape_mapping
+                and feature.name in graph.shape_dict
+            ):
+                mapp = graph.onnx_coreml_shape_mapping[feature.name]
+                onnx_shape = graph.shape_dict[feature.name]
+                if raise_error:
+                    assert len(mapp) == len(onnx_shape), "Something wrong in shape"
+                if len(mapp) == len(onnx_shape):
+                    shape = []
+                    for i in range(5):
+                        if i in mapp:
+                            shape += [int(onnx_shape[mapp.index(i)])]
+                        else:
+                            shape += [1]
+                    msg = "MultiArray of shape {}. The first and second dimensions correspond to sequence and batch size, respectively".format(
+                        str(tuple(shape))
+                    )
+                    feature.shortDescription += msg
+
+    optional_input_names = []
+    for tup in graph.optional_inputs:
+        optional_input_names.append(tup[0])
+    optional_output_names = []
+    for tup in graph.optional_outputs:
+        optional_output_names.append(tup[0])
+
+    # add description for inputs and outputs shapes
+    remove_input_id = []
+    for i, input_ in enumerate(builder.spec.description.input):
+        if input_.name not in optional_input_names:
+            if not disable_coreml_rank5_mapping:
+                _add_informative_description(input_)
+        else:
+            remove_input_id.append(i)
+    remove_output_id = []
+    for i, output_ in enumerate(builder.spec.description.output):
+        if output_.name not in optional_output_names:
+            if not disable_coreml_rank5_mapping:
+                _add_informative_description(output_, raise_error=False)
+        else:
+            remove_output_id.append(i)
+
+    for index in sorted(remove_input_id, reverse=True):
+        del builder.spec.description.input[index]
+    for index in sorted(remove_output_id, reverse=True):
+        del builder.spec.description.output[index]
+
+    if len(graph.optional_inputs) > 0 or len(graph.optional_outputs):
+        builder.add_optionals(graph.optional_inputs, graph.optional_outputs)
+
+    # Check for specification version and target ios compatibility
+    if (
+        minimum_ios_deployment_target == "11.2"
+        and builder.spec.WhichOneof("Type") == "neuralNetwork"
+    ):
+        nn_spec = builder.spec.neuralNetwork
+        for layer in nn_spec.layers:
+            if (
+                layer.WhichOneof("layer") == "resizeBilinear"
+                or layer.WhichOneof("layer") == "cropResize"
+            ):
+                raise TypeError(
+                    "{} not supported with target iOS 11.2 please provide higher target iOS".format(
+                        layer.WhichOneof("layer")
+                    )
+                )
+
+    # Optimize ML Model Spec
+    ml_model_passes = [remove_disconnected_layers, transform_conv_crop]
+    for opt in ml_model_passes:
+        opt(builder.spec)
+
+    print("Translation to CoreML spec completed. Now compiling the CoreML model.")
+    try:
+        if DEBUG:
+            import coremltools
+
+            coremltools.models.utils.save_spec(
+                builder.spec, "/tmp/node_model_raw_spec.mlmodel"
+            )
+            from coremltools.models.neural_network.printer import print_network_spec
+
+            print_network_spec(builder.spec, style="coding")
+        mlmodel = MLModel(builder.spec)
+    except RuntimeError as e:
+        raise ValueError("Compilation failed: {}".format(str(e)))
+    print("Model Compilation done.")
+
+    # print information about all ops for which custom layers have been added
+    if len(err.custom_layer_nodes) > 0:
+        print("\n")
+        print(
+            "Custom layers have been added to the CoreML model "
+            "corresponding to the following ops in the onnx model: "
+        )
+        for i, node in enumerate(err.custom_layer_nodes):
+            input_info = []
+            for input_ in node.inputs:
+                input_info.append(
+                    (
+                        str(input_),
+                        graph.shape_dict.get(input_, str("Shape not available")),
+                    )
+                )
+            output_info = []
+            for output_ in node.outputs:
+                output_info.append(
+                    (
+                        str(output_),
+                        graph.shape_dict.get(output_, str("Shape not available")),
+                    )
+                )
+            print(
+                "{}/{}: op type: {}, op input names and shapes: {}, op output names and shapes: {}".format(
+                    i + 1,
+                    len(err.custom_layer_nodes),
+                    node.op_type,
+                    str(input_info),
+                    str(output_info),
+                )
+            )
+
+    mlmodel.user_defined_metadata[_METADATA_VERSION] = ct_version
+    mlmodel.user_defined_metadata[_METADATA_SOURCE] = "onnx=={0}".format(
+        onnx.__version__
+    )
+    return mlmodel
diff --git a/coremltools/converters/onnx/_error_utils.py b/coremltools/converters/onnx/_error_utils.py
new file mode 100644
index 000000000..5305fdec5
--- /dev/null
+++ b/coremltools/converters/onnx/_error_utils.py
@@ -0,0 +1,105 @@
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+
+from typing import Dict, Text, Any, Callable
+from coremltools.models.neural_network import NeuralNetworkBuilder  # type: ignore
+from ._graph import Node, Graph
+
+
+class ErrorHandling(object):
+    """
+  To handle errors and addition of custom layers
+  """
+
+    def __init__(
+        self,
+        add_custom_layers=False,  # type: bool
+        custom_conversion_functions=dict(),  # type: Dict[Text, Any]
+        custom_layer_nodes=[],  # type : List[Node]
+    ):
+        # type: (...) -> None
+        self.add_custom_layers = add_custom_layers
+        self.custom_conversion_functions = custom_conversion_functions
+        self.custom_layer_nodes = custom_layer_nodes
+
+        self.rerun_suggestion = (
+            "\n Please try converting with higher minimum_ios_deployment_target.\n"
+            "You can also provide custom function/layer to convert the model."
+        )
+
+    def unsupported_op(
+        self, node,  # type: Node
+    ):
+        # type: (...) -> Callable[[Any, Node, Graph, ErrorHandling], None]
+        """
+      Either raise an error for an unsupported op type or return custom layer add function
+      """
+        if self.add_custom_layers:
+            from ._operators import _convert_custom
+
+            return _convert_custom
+        else:
+            raise TypeError(
+                "ONNX node of type {} is not supported. {}\n".format(
+                    node.op_type, self.rerun_suggestion
+                )
+            )
+
+    def unsupported_op_configuration(
+        self,
+        builder,  # type: NeuralNetworkBuilder
+        node,  # type: Node
+        graph,  # type: Graph
+        err_message,  # type: Text
+    ):
+        # type: (...) -> None
+        """
+      Either raise an error for an unsupported attribute or add a custom layer.
+      """
+        if self.add_custom_layers:
+            from ._operators import _convert_custom
+
+            _convert_custom(builder, node, graph, self)
+        else:
+            raise TypeError(
+                "Error while converting op of type: {}. Error message: {} {}\n".format(
+                    node.op_type, err_message, self.rerun_suggestion
+                )
+            )
+
+    def missing_initializer(
+        self,
+        node,  # type: Node
+        err_message,  # type: Text
+    ):
+        # type: (...) -> None
+        """
+      Missing initializer error
+      """
+        raise ValueError(
+            "Missing initializer error in op of type {}, with input name = {}, "
+            "output name = {}. Error message: {} {}\n".format(
+                node.op_type,
+                node.inputs[0],
+                node.outputs[0],
+                err_message,
+                self.rerun_suggestion,
+            )
+        )
+
+    def unsupported_feature_warning(
+        self,
+        node,  # type: Node
+        warn_message,  # type: Text
+    ):
+        # type: (...) -> None
+        """
+      Unsupported feature warning
+      """
+        print(
+            "Warning: Unsupported Feature in op of type {}, with input name = {}, "
+            "output name = {}. Warning message: {}\n".format(
+                node.op_type, node.inputs[0], node.outputs[0], err_message
+            )
+        )
diff --git a/coremltools/converters/onnx/_graph.py b/coremltools/converters/onnx/_graph.py
new file mode 100644
index 000000000..0ccfab5fe
--- /dev/null
+++ b/coremltools/converters/onnx/_graph.py
@@ -0,0 +1,317 @@
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+from __future__ import unicode_literals as _
+
+from onnx import (
+    numpy_helper,
+    ValueInfoProto,
+    AttributeProto,
+    GraphProto,
+    NodeProto,
+    TensorProto,
+    TensorShapeProto,
+)
+from typing import Any, Text, Iterable, List, Dict, Sequence, Optional, Tuple, Union
+from typing_extensions import Protocol
+import numpy as np
+
+
+class Transformer(Protocol):
+    def __call__(self, graph):  # type: (Graph) -> Graph
+        pass
+
+
+EdgeInfo = Tuple[Text, Any, TensorShapeProto]
+AttributeValue = Any  # TODO Union[Sequence[float], Sequence[int], Sequence[Text], Sequence[TensorProto], Sequence[GraphProto]]
+
+
+def _input_from_onnx_input(input):  # type: (ValueInfoProto) -> EdgeInfo
+    name = input.name
+    type = input.type.tensor_type.elem_type
+    shape = tuple([d.dim_value for d in input.type.tensor_type.shape.dim])
+    return (name, type, shape)
+
+
+def _convertAttributeProto(onnx_arg):  # type: (AttributeProto) -> AttributeValue
+    """
+    Convert an ONNX AttributeProto into an appropriate Python object
+    for the type.
+    NB: Tensor attribute gets returned as numpy array
+    """
+    if onnx_arg.HasField("f"):
+        return onnx_arg.f
+    elif onnx_arg.HasField("i"):
+        return onnx_arg.i
+    elif onnx_arg.HasField("s"):
+        return onnx_arg.s
+    elif onnx_arg.HasField("t"):
+        return numpy_helper.to_array(onnx_arg.t)
+    elif len(onnx_arg.floats):
+        return list(onnx_arg.floats)
+    elif len(onnx_arg.ints):
+        return list(onnx_arg.ints)
+    elif len(onnx_arg.strings):
+        return list(onnx_arg.strings)
+    else:
+        return None
+
+
+def _extract_node_names(graph):  # type : (Graph) -> List[Text]
+    node_names = []
+    for node in graph.nodes:
+        node_names.append(node.name)
+    return node_names
+
+
+def _apply_graph_transformations(
+    graph, transformers
+):  # (Graph, Iterable[Transformer]) -> Graph
+    old_node_names = _extract_node_names(graph)  # type: ignore
+    while True:
+        for transformer in transformers:
+            graph = transformer(graph)
+        new_node_names = _extract_node_names(graph)  # type: ignore
+        if new_node_names == old_node_names:
+            break
+        old_node_names = new_node_names
+    return graph
+
+
+class Attributes(Dict[Text, Any]):
+    @staticmethod
+    def from_onnx(args):  # type: (Iterable[AttributeProto]) -> Attributes
+        d = Attributes()
+        for arg in args:
+            val = _convertAttributeProto(arg)
+            if val is not None:
+                d[arg.name] = val
+        return d
+
+
+class Node(object):
+    def __init__(
+        self,
+        name,  # type: Optional[Text]
+        op_type,  # type: Text
+        attrs,  # type: Dict[Text, AttributeValue]
+        inputs,  # type: List[Text]
+        outputs,  # type: List[Text]
+    ):
+        # type: (...) -> None
+        self.name = name
+        self.op_type = op_type
+        self.attrs = attrs
+        self.inputs = inputs
+        self.outputs = outputs
+        self.input_tensors = {}  # type: Dict[Text, np._ArrayLike[Any]]
+        self.parents = []  # type: List[Node]
+        self.children = []  # type: List[Node]
+        self.metadata = {}  # type: Dict[Any, Any]
+
+    def add_parent(self, parent_node):  # type: (Node) -> None
+        assert parent_node not in self.parents
+        self.parents.append(parent_node)
+        if self not in parent_node.children:
+            parent_node.children.append(self)
+
+    def add_child(self, child_node):  # type: (Node) -> None
+        assert child_node not in self.children
+        self.children.append(child_node)
+        if self not in child_node.parents:
+            child_node.parents.append(self)
+
+    def get_only_parent(self):  # type: () -> Node
+        if len(self.parents) != 1:
+            raise ValueError(
+                "Node ({}) expected to have 1 parent. Found {}.".format(
+                    self, len(self.parents)
+                )
+            )
+        return self.parents[0]
+
+    @staticmethod
+    def from_onnx(node):  # type: (NodeProto) -> Node
+        attrs = Attributes.from_onnx(node.attribute)
+        name = Text(node.name)
+        if len(name) == 0:
+            name = "_".join(node.output)
+        return Node(name, node.op_type, attrs, list(node.input), list(node.output))
+
+
+class Graph(object):
+    def __init__(
+        self,
+        nodes,  # type: List[Node]
+        inputs,  # type: List[EdgeInfo]
+        outputs,  # type: List[EdgeInfo]
+        shape_dict,  # type: Dict[Text,Tuple[int,...]]
+        onnx_ir_version,  # type: int
+    ):
+        # type: (...) -> None
+        self.nodes = nodes
+        self.inputs = inputs
+        self.outputs = outputs
+        self.shape_dict = shape_dict  # data blob name to its shape
+        self.constants_loaded = set()  # set of constants present in graph as node
+        self.onnx_ir_version = onnx_ir_version  # ONNX IR Version for current graph
+
+        self.optional_inputs = (
+            []
+        )  # list of tuple(str, tuple(int)), use with recurrent layers
+        self.optional_outputs = (
+            []
+        )  # list of tuple(str,tuple(int)), use with recurrent layers
+
+        """
+        All axes in CoreML Tensor shapes are annotated. That is,
+        0: Sequence
+        1: Batch
+        2: Channel
+        3: Height
+        4: Width
+        This dictionary "onnx_coreml_shape_mapping" records onnx shape to coreml shape mapping for
+        every tensor (including intermediate tensors) in the onnx graph.
+        The requirement is to only know the "rank" (i.e. number of dimensions) of the onnx tensor, not its actual shape, during conversion time.
+
+        The Dict is "str" -> List of ints
+
+        e.g. "x" -> [1,3] carries the following information:
+        - "x" is rank 2
+        - "x" in Coreml will have the shape [Seq=1, B=x.shape[0], C=1, H=x.shape[1], W=1]
+
+        e.g. "x" -> [1,3,2] carries the following information:
+        - "x" is rank 3
+        - "x" in Coreml will have the shape [Seq=1, B=x.shape[0], C=x.shape[2], H=x.shape[1], W=1]
+
+        The dictionary "onnx_coreml_shape_mapping" is progressively built as the onnx graph is converted to CoreML graph.
+        The op to layer conversion functions use the information in this dict to correctly set the parameters of the CoreML layer
+        to be added and at the end they update the dict with that layer's output(s).
+        """
+        self.onnx_coreml_shape_mapping = {}  # type: Dict[Text, List[int,...]]
+
+        # data blob name to the list of op types it feeds into
+        self.blob_to_op_type = {}  # type: Dict[Text, List[Text]]
+        # data blob name to the op_type that generates it
+        self.blob_from_op_type = {}  # type: Dict[Text, Text]
+
+        self.constant_layers_added = {}  # type: Dict[Text, bool]
+
+        for node_ in nodes:
+            for input_ in node_.inputs:
+                if input_ in self.blob_to_op_type:
+                    self.blob_to_op_type[input_].append(node_.op_type)
+                else:
+                    self.blob_to_op_type[input_] = [node_.op_type]
+            for output_ in node_.outputs:
+                if output_ in self.blob_from_op_type:
+                    raise ValueError(
+                        "Data blob: %s, is generated by more than 1 op" % (output_)
+                    )
+                self.blob_from_op_type[output_] = node_.op_type
+
+    def create_graph(
+        self,
+        nodes=None,
+        inputs=None,
+        outputs=None,
+        shape_dict=None,
+        onnx_ir_version=None,
+    ):
+        node = self.nodes if nodes is None else nodes
+        inputs = self.inputs if inputs is None else inputs
+        outputs = self.outputs if outputs is None else outputs
+        shape_dict = self.shape_dict if shape_dict is None else shape_dict
+        onnx_ir_version = (
+            self.onnx_ir_version if onnx_ir_version is None else onnx_ir_version
+        )
+        return Graph(nodes, inputs, outputs, shape_dict, onnx_ir_version)
+
+    def transformed(self, transformers):  # type: (Iterable[Transformer]) -> Graph
+        graph = self
+        return _apply_graph_transformations(graph, transformers)  # type: ignore
+
+    def has_edge_name(self, name):  # type: (Text) -> bool
+        """
+        Check if name is already used for graph inputs/outputs or for nodes
+        inputs/outputs
+        """
+        names = set()
+        for input in self.inputs:
+            names.add(input[0])
+        for output in self.outputs:
+            names.add(output[0])
+        for node in self.nodes:
+            names.update(node.inputs)
+            names.update(node.outputs)
+        return name in names
+
+    def get_unique_edge_name(self, name):  # type: (Text) -> Text
+        n_ = name
+        i = 0
+        while self.has_edge_name(n_):
+            n_ = "{}_{}".format(name, i)
+            i += 1
+        return n_
+
+    @staticmethod
+    def from_onnx(graph, onnx_ir_version):  # type: (GraphProto) -> Graph
+        input_tensors = {t.name: numpy_helper.to_array(t) for t in graph.initializer}
+        nodes_ = []
+        nodes_by_input = {}  # type: Dict[Text, List[Node]]
+        nodes_by_output = {}
+        for node in graph.node:
+            node_ = Node.from_onnx(node)
+            for input_ in node_.inputs:
+                if input_ in input_tensors:
+                    node_.input_tensors[input_] = input_tensors[input_]
+                else:
+                    if input_ in nodes_by_input:
+                        input_nodes = nodes_by_input[input_]
+                    else:
+                        input_nodes = []
+                        nodes_by_input[input_] = input_nodes
+                    input_nodes.append(node_)
+            for output_ in node_.outputs:
+                nodes_by_output[output_] = node_
+            nodes_.append(node_)
+
+        inputs = []
+        for i in graph.input:
+            if i.name not in input_tensors:
+                inputs.append(_input_from_onnx_input(i))
+
+        outputs = []
+        for o in graph.output:
+            outputs.append(_input_from_onnx_input(o))
+
+        for node_ in nodes_:
+            for input_ in node_.inputs:
+                if input_ in nodes_by_output:
+                    node_.parents.append(nodes_by_output[input_])
+            for output_ in node_.outputs:
+                if output_ in nodes_by_input:
+                    node_.children.extend(nodes_by_input[output_])
+
+        # Dictionary to hold the "value_info" field from ONNX graph
+        shape_dict = {}  # type: Dict[Text,Tuple[int,...]]
+
+        def extract_value_info(
+            shape_dict,  # type: Dict[Text,Tuple[int,...]]
+            value_info,  # type: ValueInfoProto[...]
+        ):
+            # type: (...) -> None
+            t = tuple(
+                [int(dim.dim_value) for dim in value_info.type.tensor_type.shape.dim]
+            )
+            if t:
+                shape_dict[value_info.name] = t
+
+        for value_info in graph.value_info:
+            extract_value_info(shape_dict, value_info)
+        for value_info in graph.input:
+            extract_value_info(shape_dict, value_info)
+        for value_info in graph.output:
+            extract_value_info(shape_dict, value_info)
+
+        return Graph(nodes_, inputs, outputs, shape_dict, onnx_ir_version)
diff --git a/coremltools/converters/onnx/_graph_viz.py b/coremltools/converters/onnx/_graph_viz.py
new file mode 100644
index 000000000..e6c896471
--- /dev/null
+++ b/coremltools/converters/onnx/_graph_viz.py
@@ -0,0 +1,126 @@
+import os
+
+
+def _shape_notation(int_shape):
+    X = ["S", "B", "C", "H", "W"]
+    return [X[i] for i in int_shape]
+
+
+def plot_graph(graph, graph_img_path="graph.png", show_coreml_mapped_shapes=False):
+    """
+    Plot graph using pydot
+
+    It works in two steps:
+    1. Add nodes to pydot
+    2. connect nodes added in pydot
+
+    :param graph
+    :return: writes down a png/pdf file using dot
+    """
+
+    try:
+        # pydot-ng is a fork of pydot that is better maintained.
+        import pydot_ng as pydot  # type: ignore
+    except:
+        # pydotplus is an improved version of pydot
+        try:
+            import pydotplus as pydot  # type: ignore
+        except:
+            # Fall back on pydot if necessary.
+            try:
+                import pydot  # type: ignore
+            except:
+                return None
+
+    dot = pydot.Dot()
+    dot.set("rankdir", "TB")
+    dot.set("concentrate", True)
+    dot.set_node_defaults(shape="record")
+
+    # Add nodes corresponding to graph inputs
+    graph_inputs = []
+    for input_ in graph.inputs:
+        if show_coreml_mapped_shapes:
+            if input_[0] in graph.onnx_coreml_shape_mapping:
+                shape = tuple(
+                    _shape_notation(graph.onnx_coreml_shape_mapping[input_[0]])
+                )
+            else:
+                shape = "NA, "
+        else:
+            shape = tuple(input_[2])
+        label = "%s\n|{|%s}|{{%s}|{%s}}" % ("Input", input_[0], "", str(shape))
+        pydot_node = pydot.Node(input_[0], label=label)
+        dot.add_node(pydot_node)
+        graph_inputs.append(input_[0])
+
+    # Traverse graph and add nodes to pydot
+    for node in graph.nodes:
+        inputlabels = ""
+        for input_ in node.inputs:
+            if show_coreml_mapped_shapes:
+                if input_ in graph.onnx_coreml_shape_mapping:
+                    inputlabels += (
+                        str(
+                            tuple(
+                                _shape_notation(graph.onnx_coreml_shape_mapping[input_])
+                            )
+                        )
+                        + ", "
+                    )
+                else:
+                    inputlabels += "NA, "
+            else:
+                if input_ in graph.shape_dict:
+                    inputlabels += str(tuple(graph.shape_dict[input_])) + ", "
+                else:
+                    inputlabels += "NA, "
+        outputlabels = ""
+        for output_ in node.outputs:
+            if show_coreml_mapped_shapes:
+                if output_ in graph.onnx_coreml_shape_mapping:
+                    outputlabels += (
+                        str(
+                            tuple(
+                                _shape_notation(
+                                    graph.onnx_coreml_shape_mapping[output_]
+                                )
+                            )
+                        )
+                        + ", "
+                    )
+                else:
+                    outputlabels += "NA, "
+            else:
+                if output_ in graph.shape_dict:
+                    outputlabels += str(tuple(graph.shape_dict[output_])) + ", "
+                else:
+                    outputlabels += "NA, "
+        output_names = ", ".join([output_ for output_ in node.outputs])
+        input_names = ", ".join([input_ for input_ in node.inputs])
+        label = "%s\n|{{%s}|{%s}}|{{%s}|{%s}}" % (
+            node.op_type,
+            input_names,
+            output_names,
+            inputlabels,
+            outputlabels,
+        )
+        pydot_node = pydot.Node(node.name, label=label)
+        dot.add_node(pydot_node)
+
+    # add edges
+    for node in graph.nodes:
+        for child in node.children:
+            # add edge in pydot
+            dot.add_edge(pydot.Edge(node.name, child.name))
+        for input_ in node.inputs:
+            if input_ in graph_inputs:
+                dot.add_edge(pydot.Edge(input_, node.name))
+
+    # write out the image file
+    _, extension = os.path.splitext(graph_img_path)
+    if not extension:
+        extension = "pdf"
+    else:
+        extension = extension[1:]
+    dot.write(graph_img_path, format=extension)
diff --git a/coremltools/converters/onnx/_operators.py b/coremltools/converters/onnx/_operators.py
new file mode 100644
index 000000000..5b5377f6e
--- /dev/null
+++ b/coremltools/converters/onnx/_operators.py
@@ -0,0 +1,2668 @@
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+from __future__ import unicode_literals as _
+
+import numpy as np
+import copy
+
+from typing import Sequence, Callable, List, Tuple, Optional, Text, Any
+from coremltools.models.neural_network import NeuralNetworkBuilder  # type: ignore
+from ._graph import Node, Graph
+from coremltools.proto import NeuralNetwork_pb2  # type: ignore
+from ._error_utils import ErrorHandling
+
+INT_MAX = 2 ** 30
+
+"""
+General common functions
+"""
+
+
+def _compare(a, b, encoding="utf8"):  # type: (Text, Text, Text) -> bool
+    if isinstance(a, bytes):
+        a = a.decode(encoding)
+    if isinstance(b, bytes):
+        b = b.decode(encoding)
+    return a == b
+
+
+def _is_input_shape_mapping_defined(node, graph):  # type: (Node, Graph) -> Bool
+    if node.inputs[0] in graph.onnx_coreml_shape_mapping:
+        return True
+    else:
+        return False
+
+
+def _update_shape_mapping_unchanged(
+    node, graph, err
+):  # type: (Node, Graph, ErrorHandling) -> None
+    if _is_input_shape_mapping_defined(node, graph):
+        graph.onnx_coreml_shape_mapping[
+            node.outputs[0]
+        ] = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+
+
+def _convert_broadcast_op(
+    builder, node, graph, err, mode
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling, Text) -> None
+    if node.op_type == "Max" or node.op_type == "Min" or node.op_type == "Mean":
+        if len(node.inputs) == 1:
+            inputs = [node.inputs[0], node.inputs[0]]
+        else:
+            inputs = node.inputs
+    else:
+        inputs = node.inputs
+
+    if node.op_type == "Sub":
+        builder.add_elementwise(
+            name=node.name + "_neg",
+            input_names=[inputs[1]],
+            output_name=inputs[1] + "_neg",
+            mode="MULTIPLY",
+            alpha=-1.0,
+        )
+        builder.add_elementwise(
+            name=node.name,
+            input_names=[inputs[0], inputs[1] + "_neg"],
+            output_name=node.outputs[0],
+            mode=mode,
+        )
+    else:
+        builder.add_elementwise(
+            name=node.name, input_names=inputs, output_name=node.outputs[0], mode=mode
+        )
+
+    if _is_input_shape_mapping_defined(node, graph):
+        ranks = [len(graph.onnx_coreml_shape_mapping[input_]) for input_ in node.inputs]
+        max_id = np.argmax(np.array(ranks))
+        graph.onnx_coreml_shape_mapping[
+            node.outputs[0]
+        ] = graph.onnx_coreml_shape_mapping[node.inputs[max_id]]
+
+
+def _get_coreml_target_shape(target_shape, builder, node, graph, err):
+    # type: (Tuple[int, ...], NeuralNetworkBuilder, node, Graph, ErrorHandling) -> Optional[Tuple[int, ...]]
+
+    if len(target_shape) == 1:  # (D,)
+        coreml_shape = (1, target_shape[0], 1, 1)  # type: Optional[Tuple[int, ...]]
+        if _is_input_shape_mapping_defined(node, graph):
+            graph.onnx_coreml_shape_mapping[node.outputs[0]] = [2]
+    elif len(target_shape) == 2:  # (S,D)
+        coreml_shape = target_shape + (1, 1)
+        if _is_input_shape_mapping_defined(node, graph):
+            graph.onnx_coreml_shape_mapping[node.outputs[0]] = [0, 2]
+    elif len(target_shape) == 3:  # (C,H,W)
+        coreml_shape = (1, target_shape[0], target_shape[1], target_shape[2])
+        if _is_input_shape_mapping_defined(node, graph):
+            graph.onnx_coreml_shape_mapping[node.outputs[0]] = [2, 3, 4]
+    elif len(target_shape) == 4:
+        coreml_shape = target_shape
+        if _is_input_shape_mapping_defined(node, graph):
+            mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+            if mapp[0] == 1 and coreml_shape[0] == 1:
+                graph.onnx_coreml_shape_mapping[node.outputs[0]] = [1, 2, 3, 4]
+            else:
+                graph.onnx_coreml_shape_mapping[node.outputs[0]] = [0, 2, 3, 4]
+    elif len(target_shape) > 4:
+        # return err.unsupported_op_configuration(builder, node, graph, "Supports tensors not more than 4d")  # type: ignore
+        diff = len(target_shape) - 4
+        if all([d == 1 for d in target_shape[:diff]]):
+            coreml_shape = target_shape[diff:]
+        else:
+            err.unsupported_op_configuration(builder, node, graph, "Tensors more than rank 4 are not supported")  # type: ignore
+        if _is_input_shape_mapping_defined(node, graph):
+            if target_shape[0] == 1 and len(target_shape) == 5:
+                graph.onnx_coreml_shape_mapping[node.outputs[0]] = [1, 0, 2, 3, 4]
+        else:
+            return err.unsupported_op_configuration(builder, node, graph, "Supports tensors not more than 4d")  # type: ignore
+    else:
+        coreml_shape = None
+    return coreml_shape
+
+
+def _get_coreml_axis(
+    axes, builder, node, graph, err
+):  # type: (List[int], NeuralNetworkBuilder, node, Graph, ErrorHandling) -> Text
+    coreml_axis = ""
+    if node.inputs[0] not in graph.shape_dict:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "Failed to translate axis"
+        )
+    input_shape = graph.shape_dict[node.inputs[0]]
+    if len(input_shape) == 1:
+        coreml_axis = "C"
+    elif len(input_shape) == 2:
+        if len(axes) == 1 and axes[0] == 1:
+            coreml_axis = "C"
+    elif len(input_shape) == 3:
+        for ind in [["C", "H", "W"][i] for i in axes]:
+            coreml_axis += ind
+    elif len(input_shape) == 4:
+        for ind in [["B", "C", "H", "W"][i] for i in axes]:
+            coreml_axis += ind
+    return coreml_axis
+
+
+def _add_transpose_before_after(
+    layer_func,  # function for layer conversion
+    input_names,  # List[str]
+    output_names,  # List[str]
+    transpose_dims,  # List[int]
+    **kwargs
+):  # type: ignore
+
+    for i, input_ in enumerate(input_names):
+        kwargs["builder"].add_permute(
+            name=kwargs["node"].name + "_input_transpose" + str(i),
+            dim=transpose_dims,
+            input_name=input_,
+            output_name=kwargs["node"].name + "_" + input_ + "_transpose",
+        )
+
+    new_input_names = [
+        kwargs["node"].name + "_" + input_ + "_transpose" for input_ in input_names
+    ]
+    new_output_names = [output_ + "_transpose" for output_ in output_names]
+    layer_func(new_input_names, new_output_names, **kwargs)
+
+    for i, output_ in enumerate(output_names):
+        kwargs["builder"].add_permute(
+            name=kwargs["node"].name + "_output_transpose" + str(i),
+            dim=transpose_dims,
+            input_name=output_ + "_transpose",
+            output_name=output_,
+        )
+
+
+def _add_inner_product(input_names, output_names, **kwargs):
+    node = kwargs["node"]
+    builder = kwargs["builder"]
+    builder.add_inner_product(
+        name=node.name,
+        W=kwargs["W"],
+        b=kwargs["b"],
+        input_channels=kwargs["W"].shape[1],
+        output_channels=kwargs["W"].shape[0],
+        has_bias=kwargs["b"] is not None,
+        input_name=input_names[0],
+        output_name=output_names[0],
+    )
+
+
+def _add_conv_like_op(
+    add_func, get_params_func, params_dict, builder, node, graph, err
+):
+    if node.inputs[0] in graph.onnx_coreml_shape_mapping:
+        mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+
+        r = len(mapp)
+        if not (r == 3 or r == 4):
+            return err.unsupported_op_configuration(
+                builder, node, graph, "more than 4 axes not supported"
+            )
+        if r == 4:
+            if not (mapp == [1, 2, 3, 4] or mapp == [0, 2, 3, 4]):
+                return err.unsupported_op_configuration(
+                    builder,
+                    node,
+                    graph,
+                    "error in axes alignment between onnx and coreml",
+                )
+            get_params_func(builder, node, graph, err, params_dict)
+            add_func(
+                node.inputs,
+                node.outputs,
+                params_dict=params_dict,
+                node=node,
+                builder=builder,
+                graph=graph,
+                err=err,
+            )
+        if r == 3:
+            if mapp == [1, 2, 3]:  # [B,C,H]
+                # spatial dimension: height
+                get_params_func(builder, node, graph, err, params_dict, axis="height")
+                add_func(
+                    node.inputs,
+                    node.outputs,
+                    params_dict=params_dict,
+                    node=node,
+                    builder=builder,
+                    graph=graph,
+                    err=err,
+                )
+            elif mapp == [1, 2, 4]:  # [B,C,W]
+                # spatial dimension: width
+                get_params_func(builder, node, graph, err, params_dict, axis="width")
+                add_func(
+                    node.inputs,
+                    node.outputs,
+                    params_dict=params_dict,
+                    node=node,
+                    builder=builder,
+                    graph=graph,
+                    err=err,
+                )
+            elif mapp == [
+                2,
+                3,
+                4,
+            ]:  # [C,H,W] in CoreML, but it represents [B,C,D] in ONNX.
+                # spatial dimension: sequence
+                get_params_func(builder, node, graph, err, params_dict, axis="width")
+                node.inputs = [node.inputs[0]]
+                _add_transpose_before_after(
+                    add_func,
+                    node.inputs,
+                    node.outputs,
+                    [0, 2, 1, 3],  # swap C & H
+                    builder=builder,
+                    node=node,
+                    params_dict=params_dict,
+                    graph=graph,
+                    err=err,
+                )
+
+            elif mapp == [1, 2, 0]:  # [B,C,S]
+                # spatial dimension: sequence
+                get_params_func(builder, node, graph, err, params_dict, axis="width")
+                node.inputs = [node.inputs[0]]
+                _add_transpose_before_after(
+                    add_func,
+                    node.inputs,
+                    node.outputs,
+                    [3, 1, 2, 0],
+                    builder=builder,
+                    node=node,
+                    params_dict=params_dict,
+                    graph=graph,
+                    err=err,
+                )
+            else:
+                return err.unsupported_op_configuration(
+                    builder,
+                    node,
+                    graph,
+                    "error in axes alignment between onnx and coreml",
+                )
+
+    else:
+        get_params_func(builder, node, graph, err, params_dict)
+        add_func(
+            node.inputs,
+            node.outputs,
+            params_dict=params_dict,
+            builder=builder,
+            node=node,
+            graph=graph,
+            err=err,
+        )
+
+
+def _is_no_op(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> Bool
+
+    if node.inputs[0] in graph.shape_dict and node.outputs[0] in graph.shape_dict:
+        if graph.shape_dict[node.inputs[0]] == graph.shape_dict[node.outputs[0]]:
+            builder.add_activation(
+                name=node.name,
+                non_linearity="LINEAR",
+                input_name=node.inputs[0],
+                output_name=node.outputs[0],
+                params=[1.0, 0.0],
+            )
+            _update_shape_mapping_unchanged(node, graph, err)
+            return True
+
+    return False
+
+
+"""
+Layer conversion functions
+"""
+
+
+def _convert_abs(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    builder.add_unary(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        mode="abs",
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_add(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+
+    # check if its equivalent to a bias layer
+    if len(node.inputs) > 1:
+        if node.inputs[1] in node.input_tensors:
+            second_input = np.squeeze(node.input_tensors[node.inputs[1]])
+            if len(second_input.shape) == 1:
+                builder.add_bias(
+                    name=node.name,
+                    b=second_input,
+                    input_name=node.inputs[0],
+                    output_name=node.outputs[0],
+                    shape_bias=[second_input.shape[0]],
+                )
+                return
+    """
+    Supported shapes by CoreML 2.0 for broadcasting (-1 means it can be 1 or greater than 1):
+    (i.e. all of the outputs must have one of these shapes for broadcasting support)
+    - (S=-1,B=-1,1,1,1)
+    - (S=-1,B=-1,C,1,1)
+    - (S=-1,B=-1,1,H,W)
+    - (S=-1,B=-1,C,H,W)
+    Unsupported:
+    - (S=-1,B=-1,1,1,W)
+    - (S=-1,B=-1,1,H,1)
+    - (S=-1,B=-1,C,1,W)
+    - (S=-1,B=-1,C,H,1)
+    """
+    _convert_broadcast_op(builder, node, graph, err, "ADD")
+
+
+def _convert_sub(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    _convert_broadcast_op(builder, node, graph, err, "ADD")
+
+
+def _get_conv_params(builder, node, graph, err, params_dict, axis=None):
+    if "dilations" not in node.attrs:
+        params_dict["dilations"] = [1, 1]
+    else:
+        if axis == "height":
+            params_dict["dilations"] = node.attrs["dilations"]
+            params_dict["dilations"].append(1)
+        elif axis == "width":
+            params_dict["dilations"] = node.attrs["dilations"]
+            params_dict["dilations"].insert(0, 1)
+        else:
+            params_dict["dilations"] = node.attrs["dilations"]
+
+    if "pads" not in node.attrs:
+        params_dict["pads"] = [0, 0, 0, 0]
+    else:
+        pads = node.attrs["pads"]
+        if axis == "height":
+            pads = [pads[0], 0, pads[1], 0]
+        elif axis == "width":
+            pads = [0, pads[0], 0, pads[1]]
+        params_dict["pads"] = pads
+
+    if "kernel_shape" in node.attrs:
+        params_dict["kernel_shape"] = node.attrs["kernel_shape"]
+    else:
+        # w_shape is ONNX format shape
+        w_shape = params_dict["w_shape"]
+        if len(w_shape) == 4:
+            params_dict["kernel_shape"] = [w_shape[-2], w_shape[-1]]
+        else:
+            params_dict["kernel_shape"] = [w_shape[-1]]
+    params_dict["strides"] = node.attrs.get("strides", [1, 1] if axis is None else [1])
+
+    if axis == "height":
+        if params_dict["W"] is not None:
+            params_dict["W"] = np.expand_dims(params_dict["W"], axis=-1)
+        params_dict["kernel_shape"].append(1)
+        params_dict["strides"].append(1)
+    elif axis == "width":
+        if params_dict["W"] is not None:
+            params_dict["W"] = np.expand_dims(params_dict["W"], axis=-2)
+        params_dict["strides"].insert(0, 1)
+        params_dict["kernel_shape"].insert(0, 1)
+
+    params_dict["out_shape"] = None
+    params_dict["padding_type"] = "valid"
+    params_dict["same_padding_asymmetry_mode"] = "BOTTOM_RIGHT_HEAVY"
+
+    if params_dict["W"] is not None:
+        if not params_dict["is_deconv"]:
+            params_dict["W"] = params_dict["W"].transpose((2, 3, 1, 0))  # type: ignore
+        else:
+            params_dict["W"] = params_dict["W"].transpose((2, 3, 0, 1))  # type: ignore
+
+    if "auto_pad" in node.attrs and not _compare(node.attrs["auto_pad"], "VALID"):
+        params_dict["padding_type"] = "same"
+        if _compare(node.attrs["auto_pad"], "SAME_LOWER"):
+            params_dict["same_padding_asymmetry_mode"] = "TOP_LEFT_HEAVY"
+
+    if params_dict["is_deconv"]:
+        if "output_shape" in node.attrs:
+            if axis == "height":
+                params_dict["out_shape"] = (
+                    node.attrs["output_shape"][-1],
+                    1,
+                )  # (Hout, wout)
+            elif axis == "width":
+                params_dict["out_shape"] = (
+                    1,
+                    node.attrs["output_shape"][-1],
+                )  # (Hout, wout)
+            else:
+                params_dict["out_shape"] = (
+                    node.attrs["output_shape"][-2],
+                    node.attrs["output_shape"][-1],
+                )  # (Hout, wout)
+        elif "output_padding" in node.attrs:
+            params_dict["crops"] = copy.copy(params_dict["pads"])
+            params_dict["pads"] = [0, 0, 0, 0]
+            post_pads = node.attrs["output_padding"]
+            if sum(post_pads) != 0:
+                t = l = b = r = 0
+                if len(post_pads) == 1:
+                    if axis == "height":
+                        b = post_pads[0]
+                    elif axis == "width":
+                        r = post_pads[0]
+                    else:
+                        err.unsupported_op_configuration(
+                            builder,
+                            node,
+                            graph,
+                            "length 1 output padding attribute only supported for 1D conv",
+                        )
+                elif len(post_pads) == 2:
+                    if axis == "height":
+                        b, r = post_pads
+                    elif axis == "width":
+                        r, b = post_pads
+                    else:
+                        b, r = post_pads
+                elif len(post_pads) == 4:
+                    b, r, t, l = post_pads
+                else:
+                    return err.unsupported_op_configuration(
+                        builder,
+                        node,
+                        graph,
+                        "Supports only length 1 or 2 or 4 output padding attribute",
+                    )
+
+                def _update_crop_pad(idx, v):
+                    if params_dict["crops"][idx] >= v:
+                        params_dict["crops"][idx] -= v
+                    else:
+                        params_dict["pads"][idx] = v - params_dict["crops"][idx]
+
+                _update_crop_pad(0, t)
+                _update_crop_pad(1, l)
+                _update_crop_pad(2, b)
+                _update_crop_pad(3, r)
+                params_dict["is_post_crop"] = (
+                    True if sum(params_dict["crops"]) > 0 else False
+                )
+                params_dict["is_pre_pad"] = (
+                    True if sum(params_dict["pads"]) > 0 else False
+                )
+
+
+def _add_conv(input_names, output_names, **kwargs):
+    params_dict = kwargs["params_dict"]
+    node = kwargs["node"]
+    builder = kwargs["builder"]
+    graph = kwargs["graph"]
+    err = kwargs["err"]
+
+    W_shape = params_dict["w_shape"]
+
+    output_name = output_names[0]
+    pre_padding_input_name = input_names[0]
+
+    if params_dict.get("is_post_crop", False):
+        output_name += "_conv_tranpose_post_crop"
+    if params_dict.get("is_pre_pad", False):
+        input_names[0] += "_conv_tranpose_pre_pad"
+
+    if params_dict["W"] is None and len(node.inputs) == 1:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "Kernel weight missing"
+        )
+
+    if params_dict["is_deconv"]:
+        oc = W_shape[1] * params_dict["groups"]
+        kc = W_shape[0]
+    else:
+        oc = W_shape[0]
+        kc = W_shape[1]
+
+    if params_dict.get("is_pre_pad", False):
+        builder.add_padding(
+            name=node.name + "_pre_pad",  # type: ignore
+            left=params_dict["pads"][1],
+            right=params_dict["pads"][3],
+            top=params_dict["pads"][0],
+            bottom=params_dict["pads"][2],
+            input_name=pre_padding_input_name,
+            output_name=input_names[0],
+            value=0,
+        )
+    builder.add_convolution(
+        name=node.name,
+        kernel_channels=kc,
+        output_channels=oc,
+        height=params_dict["kernel_shape"][0],
+        width=params_dict["kernel_shape"][1],
+        stride_height=params_dict["strides"][0],
+        stride_width=params_dict["strides"][1],
+        border_mode=params_dict["padding_type"],
+        same_padding_asymmetry_mode=params_dict["same_padding_asymmetry_mode"],
+        groups=params_dict["groups"],
+        W=params_dict["W"],
+        b=params_dict["bias"],
+        has_bias=params_dict["bias"] is not None,
+        is_deconv=params_dict["is_deconv"],
+        output_shape=params_dict["out_shape"],
+        input_name=input_names[0]
+        if params_dict["W"] is not None
+        else [input_names[0], input_names[1]],
+        output_name=output_name,
+        dilation_factors=params_dict["dilations"],
+        padding_top=params_dict["pads"][0],
+        padding_bottom=params_dict["pads"][2],
+        padding_left=params_dict["pads"][1],
+        padding_right=params_dict["pads"][3],
+    )
+    if params_dict.get("is_post_crop", False):
+        builder.add_crop(
+            name=node.name + "_post_crop",  # type: ignore
+            left=params_dict["crops"][1],
+            right=params_dict["crops"][3],
+            top=params_dict["crops"][0],
+            bottom=params_dict["crops"][2],
+            input_names=[output_name],
+            output_name=output_names[0],
+            offset=[0, 0],
+        )
+
+
+def _convert_conv(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    params_dict = dict()
+    # get weights for convolution
+    weight_name = node.inputs[1]
+    W = None
+    if weight_name in node.input_tensors:
+        W = node.input_tensors[weight_name]
+        params_dict["w_shape"] = W.shape
+    else:
+        err.missing_initializer(
+            node,
+            "Weight tensor: {} not found in the graph initializer".format(weight_name,),
+        )
+    params_dict["W"] = W
+
+    params_dict["is_deconv"] = False
+    if node.op_type.endswith("Transpose"):
+        params_dict["is_deconv"] = True
+    bias = None
+    if len(node.inputs) > 2:
+        bias = node.input_tensors[node.inputs[2]]
+    params_dict["bias"] = bias
+    params_dict["groups"] = node.attrs.get("group", 1)
+
+    _add_conv_like_op(
+        _add_conv, _get_conv_params, params_dict, builder, node, graph, err
+    )
+
+    # update map
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_relu(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    builder.add_activation(
+        name=node.name,
+        non_linearity="RELU",
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_thresholdedrelu(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    alpha = node.attrs.get("alpha", 1.0)
+    builder.add_activation(
+        name=node.name,
+        non_linearity="THRESHOLDEDRELU",
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        params=alpha,
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_reshape(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+
+    shape = tuple(node.attrs.get("shape", ()))  # type: (Tuple[int, ...])
+    if len(shape) == 0:
+        shape_name = node.inputs[1]
+        if shape_name in node.input_tensors:
+            shape = tuple(node.input_tensors[shape_name].astype(int))  # type: ignore
+        else:
+            err.missing_initializer(
+                node,
+                "CoreML only supports Reshape layer when the target shape is static and known apriori",
+            )
+
+    # check if all entries in shape are 1/-1
+    is_flatten = True
+    for s in shape:
+        if abs(s) != 1:
+            is_flatten = False
+            break
+    if is_flatten:
+        builder.add_flatten(
+            name=node.name,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            mode=0,
+        )
+        if _is_input_shape_mapping_defined(node, graph):
+            mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+            if len(shape) == 4:
+                mapp_out = [mapp[0], 2, 3, 4]
+            elif len(shape) == 3:
+                mapp_out = [2, 3, 4]
+            elif len(shape) == 2:
+                mapp_out = [mapp[0], 2]
+            elif len(shape) == 1:
+                mapp_out = [2]
+            else:
+                return err.unsupported_op_configuration(
+                    builder, node, graph, "Supports only less than equal to 4d tensors"
+                )
+            graph.onnx_coreml_shape_mapping[node.outputs[0]] = mapp_out
+        return
+
+    new_shape = _get_coreml_target_shape(shape, builder, node, graph, err)
+
+    if new_shape is None:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "Unsupported shape for reshape"
+        )
+
+    builder.add_reshape(
+        name=node.name,
+        target_shape=new_shape,
+        mode=0,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+    )
+
+
+def _convert_transpose(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+
+    if _is_input_shape_mapping_defined(node, graph):
+        mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        r = len(mapp)
+        default_perm = list(range(r))
+        default_perm.reverse()
+        perm = node.attrs.get("perm", default_perm)
+        coreml_perm = []
+        for p in perm:
+            coreml_perm.append(mapp[p])
+        if 1 in mapp:
+            batch_index = mapp.index(1)
+            batch_index_new = coreml_perm.index(1)
+            if batch_index != batch_index_new:
+                return err.unsupported_op_configuration(
+                    builder, node, graph, "cannot transpose batch dimension"
+                )
+        perm_translated = []
+        for c in coreml_perm:
+            if c == 0:
+                perm_translated.append(c)
+            elif c == 1:
+                continue
+            else:
+                perm_translated.append(c - 1)
+        perm_final = [
+            -1,
+            -1,
+            -1,
+            -1,
+        ]  # has to be of length 4 corresponding to [S,C,H,W]
+        for i in range(4):
+            if i not in perm_translated:
+                perm_final[i] = i
+        if perm_final.count(-1) != len(perm_translated):
+            return err.unsupported_op_configuration(
+                builder, node, graph, "unable to translate transpose op to CoreML"
+            )
+        ctr = 0
+        for i, v in enumerate(perm_final):
+            if v == -1:
+                perm_final[i] = perm_translated[ctr]
+                ctr += 1
+        perm = tuple(perm_final)
+    else:
+        perm = node.attrs.get("perm", [0, 3, 2, 1])
+        if len(perm) > 4:
+            diff = len(perm) - 4
+            if all([perm[i] == i for i in range(diff)]):
+                perm = [p - diff for p in perm[diff:]]
+            else:
+                return err.unsupported_op_configuration(
+                    builder, node, graph, "Supports only 4d tensors"
+                )
+        elif len(perm) < 4:
+            diff = 4 - len(perm)
+            perm = [d for d in range(diff)] + [d + diff for d in perm]
+        perm = tuple(perm)
+
+    builder.add_permute(
+        name=node.name, dim=perm, input_name=node.inputs[0], output_name=node.outputs[0]
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _get_pool_params(builder, node, graph, err, params_dict, axis=None):
+    (
+        params_dict["pad_b"],
+        params_dict["pad_l"],
+        params_dict["pad_r"],
+        params_dict["pad_t"],
+    ) = (0, 0, 0, 0)
+    params_dict["stride_height"], params_dict["stride_width"] = 1, 1
+    params_dict["padding_type"] = "VALID"
+    params_dict["same_padding_asymmetry_mode"] = "BOTTOM_RIGHT_HEAVY"
+
+    if params_dict["is_global"]:
+        params_dict["height"], params_dict["width"] = 0, 0
+        params_dict["stride_height"], params_dict["stride_width"] = 1, 1
+    else:
+        kernel_shape = node.attrs["kernel_shape"]
+        if axis == "height":
+            params_dict["height"] = kernel_shape[0]
+        elif axis == "width":
+            params_dict["width"] = kernel_shape[0]
+        else:
+            params_dict["height"] = kernel_shape[0]
+            params_dict["width"] = kernel_shape[1]
+
+        pads = node.attrs.get("pads", None)
+        if pads:
+            if axis == "height":
+                params_dict["pad_t"] = pads[0]
+                params_dict["pad_b"] = pads[1]
+            elif axis == "width":
+                params_dict["pad_l"] = pads[0]
+                params_dict["pad_r"] = pads[1]
+            else:
+                params_dict["pad_t"] = pads[0]
+                params_dict["pad_l"] = pads[1]
+                params_dict["pad_b"] = pads[2]
+                params_dict["pad_r"] = pads[3]
+
+        strides = node.attrs.get("strides", [1, 1])
+        if axis == "height":
+            params_dict["stride_height"] = strides[0]
+        elif axis == "width":
+            params_dict["stride_width"] = strides[0]
+        else:
+            params_dict["stride_height"] = strides[0]
+            params_dict["stride_width"] = strides[1]
+
+        if "auto_pad" in node.attrs and not _compare(node.attrs["auto_pad"], "VALID"):
+            params_dict["padding_type"] = "SAME"
+            if _compare(node.attrs["auto_pad"], "SAME_LOWER"):
+                params_dict["same_padding_asymmetry_mode"] = "TOP_LEFT_HEAVY"
+
+    params_dict["exclude_pad_area"] = node.attrs.get("count_include_pad", 0) == 0
+
+
+def _add_pool(input_names, output_names, **kwargs):
+    params_dict = kwargs["params_dict"]
+    node = kwargs["node"]
+    kwargs["builder"].add_pooling(
+        name=node.name,
+        height=params_dict.get("height", 1),
+        width=params_dict.get("width", 1),
+        stride_height=params_dict.get("stride_height", 1),
+        stride_width=params_dict.get("stride_width", 1),
+        layer_type=params_dict["layer_type"],
+        padding_type=params_dict["padding_type"],
+        exclude_pad_area=params_dict["exclude_pad_area"],
+        is_global=params_dict["is_global"],
+        input_name=input_names[0],
+        output_name=output_names[0],
+        padding_top=params_dict.get("pad_t", 0),
+        padding_bottom=params_dict.get("pad_b", 0),
+        padding_left=params_dict.get("pad_l", 0),
+        padding_right=params_dict.get("pad_r", 0),
+        same_padding_asymmetry_mode=params_dict["same_padding_asymmetry_mode"],
+    )
+
+
+def _convert_pool(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    input_name = node.inputs[0]
+    output_name = node.outputs[0]
+    params_dict = dict()
+    params_dict["is_global"] = False
+    if node.op_type.startswith("Global"):
+        params_dict["is_global"] = True
+    if node.op_type.endswith("MaxPool"):
+        params_dict["layer_type"] = "MAX"
+    elif node.op_type.endswith("AveragePool"):
+        params_dict["layer_type"] = "AVERAGE"
+    else:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "Unsupported pool type"
+        )
+
+    if len(node.outputs) == 2:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "argmax with pool unsupported"
+        )
+
+    if "ceil_mode" in node.attrs and node.attrs["ceil_mode"] == 1:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "ceil_mod=1 not supported"
+        )
+
+    if "dilations" in node.attrs:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "dilations not supported"
+        )
+
+    _add_conv_like_op(
+        _add_pool, _get_pool_params, params_dict, builder, node, graph, err
+    )
+
+    # update map
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_bn(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    def add_bn(input_names, output_names, **kwargs):
+        kwargs["builder"].add_batchnorm(
+            name=node.name,
+            input_name=input_names[0],
+            output_name=output_names[0],
+            channels=kwargs["channels"][0],
+            gamma=kwargs["scale"],
+            beta=kwargs["bias"],
+            mean=kwargs["mean"],
+            variance=kwargs["var"],
+            epsilon=kwargs["epsilon"],
+        )
+
+    if len(node.outputs) > 1:
+        return err.unsupported_op_configuration(
+            builder,
+            node,
+            graph,
+            "This converter only supports BatchNormalization with one output",
+        )
+
+    epsilon = node.attrs.get("epsilon", 1e-5)
+    channels = set()
+    for v in node.input_tensors.values():
+        channels.add(v.shape)
+    assert len(channels) == 1
+    channels = channels.pop()
+    scale = (
+        node.input_tensors[node.inputs[1]]
+        if node.inputs[1] in node.input_tensors
+        else np.ones(shape=channels, dtype=np.float32)
+    )
+    bias = (
+        node.input_tensors[node.inputs[2]]
+        if node.inputs[2] in node.input_tensors
+        else np.zeros(shape=channels, dtype=np.float32)
+    )
+    mean = (
+        node.input_tensors[node.inputs[3]]
+        if node.inputs[3] in node.input_tensors
+        else np.zeros(shape=channels, dtype=np.float32)
+    )
+    var = (
+        node.input_tensors[node.inputs[4]]
+        if node.inputs[4] in node.input_tensors
+        else np.ones(shape=channels, dtype=np.float32)
+    )
+
+    mapp = graph.onnx_coreml_shape_mapping.get(node.inputs[0], None)
+    if mapp == [2, 3, 4]:
+        _add_transpose_before_after(
+            add_bn,
+            [node.inputs[0]],
+            node.outputs,
+            [0, 2, 1, 3],
+            builder=builder,
+            node=node,
+            scale=scale,
+            bias=bias,
+            mean=mean,
+            var=var,
+            epsilon=epsilon,
+            channels=channels,
+        )
+    else:
+        builder.add_batchnorm(
+            name=node.name,
+            channels=channels[0],
+            gamma=scale,
+            beta=bias,
+            mean=mean,
+            variance=var,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            epsilon=epsilon,
+        )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_instancenorm(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    epsilon = node.attrs.get("epsilon", 1e-5)
+    scale = node.input_tensors[node.inputs[1]]
+    bias = node.input_tensors[node.inputs[2]]
+
+    builder.add_batchnorm(
+        name=node.name,
+        channels=scale.shape[0],
+        gamma=scale,
+        beta=bias,
+        compute_mean_var=True,
+        instance_normalization=True,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        epsilon=epsilon,
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_mul(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    _convert_broadcast_op(builder, node, graph, err, "MULTIPLY")
+
+
+def _convert_mean(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    _convert_broadcast_op(builder, node, graph, err, "AVE")
+
+
+def _convert_div(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    builder.add_unary(
+        name=node.name + "_inverse",  # type: ignore
+        input_name=node.inputs[1],
+        output_name=node.inputs[1] + "_inverse",
+        mode="inverse",
+    )
+    builder.add_elementwise(
+        name=node.name,
+        input_names=[node.inputs[0], node.inputs[1] + "_inverse"],
+        output_name=node.outputs[0],
+        mode="MULTIPLY",
+    )
+    if _is_input_shape_mapping_defined(node, graph):
+        ranks = [len(graph.onnx_coreml_shape_mapping[input_]) for input_ in node.inputs]
+        max_id = np.argmax(np.array(ranks))
+        graph.onnx_coreml_shape_mapping[
+            node.outputs[0]
+        ] = graph.onnx_coreml_shape_mapping[node.inputs[max_id]]
+
+
+def _convert_leaky_relu(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    alpha = node.attrs.get("alpha", 0.01)
+    builder.add_activation(
+        name=node.name,
+        non_linearity="LEAKYRELU",
+        params=[alpha],
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_concat(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    def _add_concat(input_names, output_names, **kwargs):
+        kwargs["builder"].add_elementwise(
+            name=kwargs["node"].name,
+            input_names=input_names,
+            output_name=output_names[0],
+            mode=kwargs["mode"],
+        )
+
+    axis = node.attrs.get("axis", 1)
+    parent_op_type = graph.blob_from_op_type.get(node.inputs[0], None)
+
+    if _is_input_shape_mapping_defined(node, graph):
+        mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        caxis = mapp[axis]
+        if caxis == 0:
+            _add_concat(
+                node.inputs,
+                node.outputs,
+                node=node,
+                builder=builder,
+                mode="SEQUENCE_CONCAT",
+            )
+        elif caxis == 2:
+            _add_concat(
+                node.inputs, node.outputs, node=node, builder=builder, mode="CONCAT"
+            )
+        elif caxis == 3:
+            _add_transpose_before_after(
+                _add_concat,
+                node.inputs,
+                node.outputs,
+                [0, 2, 1, 3],
+                mode="CONCAT",
+                node=node,
+                builder=builder,
+            )
+        elif caxis == 4:
+            _add_transpose_before_after(
+                _add_concat,
+                node.inputs,
+                node.outputs,
+                [0, 3, 2, 1],
+                mode="CONCAT",
+                node=node,
+                builder=builder,
+            )
+        else:
+            return err.unsupported_op_configuration(
+                builder, node, graph, "Concat not supported along batch axis"
+            )
+    else:
+        mode = None
+        first_input_shape = None
+        if node.inputs[0] in graph.shape_dict:
+            first_input_shape = graph.shape_dict[node.inputs[0]]
+            if (
+                parent_op_type in _SEQUENCE_LAYERS_REGISTRY
+                and len(first_input_shape) == 3
+            ):
+                if axis == 0:
+                    mode = "SEQUENCE_CONCAT"
+                if axis == 2:
+                    mode = "CONCAT"
+            elif (
+                (len(first_input_shape) == 1 and axis == 0)
+                or (len(first_input_shape) == 3 and axis == 0)
+                or (len(first_input_shape) == 4 and axis == 1)
+                or (len(first_input_shape) == 2 and axis == 1)
+            ):
+                mode = "CONCAT"
+        else:  # shape info is not available. Fall back to guessing (ideally this should not happen)
+            if axis == 0:
+                mode = "SEQUENCE_CONCAT"
+            elif axis == 1:
+                mode = "CONCAT"
+        if mode is None:
+            return err.unsupported_op_configuration(
+                builder,
+                node,
+                graph,
+                "Unsupported axis {} in input of shape".format(
+                    axis, str(first_input_shape)
+                ),
+            )
+        _add_concat(node.inputs, node.outputs, node=node, builder=builder, mode=mode)
+
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_split(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    def _add_split(input_names, output_names, **kwargs):
+        kwargs["builder"].add_split(
+            name=kwargs["node"].name,
+            input_name=input_names[0],
+            output_names=output_names,
+        )
+
+    axis = node.attrs.get("axis", 0)
+    splits = node.attrs.get("split", None)
+    # check that splits are equal
+    if splits:
+        if splits.count(splits[0]) != len(splits):
+            return err.unsupported_op_configuration(
+                builder, node, graph, "Only Equal splits are supported"
+            )
+
+    if _is_input_shape_mapping_defined(node, graph):
+        mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        if mapp[axis] == 2:
+            _add_split(node.inputs, node.outputs, node=node, builder=builder)
+        elif mapp[axis] == 0:
+            _add_transpose_before_after(
+                _add_split,
+                node.inputs,
+                node.outputs,
+                [1, 0, 2, 3],
+                builder=builder,
+                node=node,
+            )
+        elif mapp[axis] == 3:
+            _add_transpose_before_after(
+                _add_split,
+                node.inputs,
+                node.outputs,
+                [0, 2, 1, 3],
+                builder=builder,
+                node=node,
+            )
+        elif mapp[axis] == 4:
+            _add_transpose_before_after(
+                _add_split,
+                node.inputs,
+                node.outputs,
+                [0, 3, 2, 1],
+                builder=builder,
+                node=node,
+            )
+        else:
+            err.unsupported_op_configuration(
+                builder, node, graph, "Split along Batch axis not supported"
+            )
+    else:
+        if not (axis == 0 or axis == 1):
+            return err.unsupported_op_configuration(
+                builder, node, graph, "Unsupported axis {}".format(axis,)
+            )
+        _add_split(node.inputs, node.outputs, node=node, builder=builder)
+
+    if _is_input_shape_mapping_defined(node, graph):
+        for out_ in node.outputs:
+            graph.onnx_coreml_shape_mapping[out_] = graph.onnx_coreml_shape_mapping[
+                node.inputs[0]
+            ]
+
+
+def _convert_argmax(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    def _add_argmax_or_argmin(input_names, output_names, **kwargs):
+        input_name = input_names[0]
+        output_name = output_names[0]
+        if kwargs["node"].op_type == "ArgMin":
+            kwargs["builder"].add_elementwise(
+                name=kwargs["node"].name + "_multiply_minus_1",  # type: ignore
+                input_names=[input_name],
+                output_name=input_name + "_multiply_minus_1",
+                mode="MULTIPLY",
+                alpha=-1,
+            )
+            input_name += "_multiply_minus_1"
+        kwargs["builder"].add_reduce(
+            name=kwargs["node"].name,
+            input_name=input_name,
+            output_name=output_name,
+            axis=kwargs["coreml_axis"],
+            mode="argmax",
+        )
+
+    """
+    Conversion
+    """
+    axis = node.attrs.get("axis", 0)
+    keepdims = node.attrs.get("keepdims", 1)
+
+    input_name = node.inputs[0]
+    output_name = node.outputs[0]
+
+    if _is_input_shape_mapping_defined(node, graph):
+        mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        coreml_axis = mapp[axis]
+        coreml_axis_string = "C"
+        if coreml_axis == 1:  # coreml_axis corresponds to the batch dimension
+            return err.unsupported_op_configuration(
+                builder, node, graph, "Cannot apply operation along Batch axis"
+            )
+        if coreml_axis != 0:
+            coreml_axis_string = ["C", "H", "W"][coreml_axis - 2]
+            _add_argmax_or_argmin(
+                [input_name],
+                [output_name],
+                builder=builder,
+                node=node,
+                coreml_axis=coreml_axis_string,
+            )
+        else:  # coreml_axis corresponds to the sequence dimension
+            _add_transpose_before_after(
+                _add_argmax_or_argmin,
+                [input_name],
+                [output_name],
+                [1, 0, 2, 3],
+                builder=builder,
+                node=node,
+                coreml_axis=coreml_axis_string,
+            )
+
+    else:
+        coreml_axis_string = _get_coreml_axis([axis], builder, node, graph, err)
+        if coreml_axis_string not in ["C", "H", "W", "HW", "CHW"]:
+            return err.unsupported_op_configuration(
+                builder,
+                node,
+                graph,
+                "Unable to translate axes attribute to CoreML axis parameter for %s"
+                % axis,
+            )
+        _add_argmax_or_argmin(
+            [input_name],
+            [output_name],
+            builder=builder,
+            node=node,
+            coreml_axis=coreml_axis_string,
+        )
+
+    """
+    update output shape map
+    """
+    if _is_input_shape_mapping_defined(node, graph):
+        mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        if keepdims == 1:
+            graph.onnx_coreml_shape_mapping[node.outputs[0]] = mapp
+        else:
+            graph.onnx_coreml_shape_mapping[node.outputs[0]] = (
+                mapp[:axis] + mapp[axis + 1 :]
+            )
+
+
+def _convert_reduce(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+
+    # CoreML reduction supported along: C, H, W, CHW, HW
+
+    def _add_reduce(input_names, output_names, **kwargs):
+        input_name = input_names[0]
+        output_name = output_names[0]
+
+        if "add_log" in kwargs and kwargs["add_log"]:
+            if kwargs["node"].op_type == "ReduceLogSum":
+                output_name = output_names[0] + "_before_log"
+
+        kwargs["builder"].add_reduce(
+            name=kwargs["node"].name + "_" + output_name,
+            input_name=input_name,
+            output_name=output_name,
+            axis=kwargs["coreml_axis"],
+            mode=kwargs["mode"],
+        )
+
+        if "add_log" in kwargs and kwargs["add_log"]:
+            if node.op_type == "ReduceLogSum":
+                kwargs["builder"].add_unary(
+                    name=kwargs["node"].name + "_log",
+                    input_name=output_name,
+                    output_name=output_names[0],
+                    mode="log",
+                )
+
+    """
+    Conversion
+    """
+    input_name = node.inputs[0]
+    output_name = node.outputs[0]
+
+    axes = node.attrs.get("axes", None)
+    keepdims = node.attrs.get("keepdims", 1)
+
+    if axes is None:
+        if node.inputs[0] in graph.onnx_coreml_shape_mapping:
+            axes = range(0, len(graph.onnx_coreml_shape_mapping[node.inputs[0]]))
+        elif node.inputs[0] in graph.shape_dict:
+            axes = range(0, len(graph.shape_dict[node.inputs[0]]))
+        else:
+            return err.unsupported_op_configuration(
+                builder, node, graph, "Shape inference failed for reduce op"
+            )
+
+    if node.op_type == "ReduceMean":
+        mode = "avg"
+    elif node.op_type == "ReduceL1":
+        mode = "L1"
+    elif node.op_type == "ReduceL2":
+        mode = "L2"
+    elif node.op_type == "ReduceLogSum":
+        mode = "sum"
+    elif node.op_type == "ReduceMax":
+        mode = "max"
+    elif node.op_type == "ReduceMin":
+        mode = "min"
+    elif node.op_type == "ReduceProd":
+        mode = "prod"
+    elif node.op_type == "ReduceSum":
+        mode = "sum"
+    elif node.op_type == "ReduceSumSquare":
+        mode = "sumsquare"
+    else:
+        return err.unsupported_op_configuration(builder, node, graph, "Unsupported op")
+
+    if _is_input_shape_mapping_defined(node, graph):
+        mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        coreml_axis = ""
+        for ind in [["S", "B", "C", "H", "W"][mapp[i]] for i in axes]:
+            coreml_axis += ind
+        coreml_axis = "".join(sorted(coreml_axis))
+    else:
+        coreml_axis = _get_coreml_axis(axes, builder, node, graph, err)
+
+    if coreml_axis in ["C", "H", "W", "HW", "CHW"]:
+        _add_reduce(
+            [input_name],
+            [output_name],
+            builder=builder,
+            node=node,
+            coreml_axis=coreml_axis,
+            mode=mode,
+            add_log=True,
+        )
+    else:
+        if node.op_type in ["ReduceMean"]:
+            return err.unsupported_op_configuration(
+                builder,
+                node,
+                graph,
+                "Unable to translate axes attribute to CoreML axis parameter for %s"
+                % axes,
+            )
+        n = len(coreml_axis)
+        for i, ax in enumerate(coreml_axis):
+            if ax not in ["C", "H", "W"]:
+                return err.unsupported_op_configuration(
+                    builder,
+                    node,
+                    graph,
+                    "Unable to translate axes attribute to CoreML axis parameter for %s"
+                    % axes,
+                )
+            else:
+                if i == 0:
+                    iname = input_name
+                else:
+                    iname = input_name + str(i)
+                if i == n - 1:
+                    oname = output_name
+                else:
+                    oname = input_name + str(i + 1)
+                if i < n - 1:
+                    _add_reduce(
+                        [iname],
+                        [oname],
+                        builder=builder,
+                        node=node,
+                        coreml_axis=ax,
+                        mode=mode,
+                        add_log=False,
+                    )
+                else:
+                    _add_reduce(
+                        [iname],
+                        [oname],
+                        builder=builder,
+                        node=node,
+                        coreml_axis=ax,
+                        mode=mode,
+                        add_log=True,
+                    )
+
+    """
+    update output shape map
+    """
+    if _is_input_shape_mapping_defined(node, graph):
+        mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        if keepdims == 1:
+            graph.onnx_coreml_shape_mapping[node.outputs[0]] = mapp
+        else:
+            out_mapp = []
+            for i, m in enumerate(mapp):
+                if i not in axes:
+                    out_mapp.append(m)
+            if len(out_mapp) == 0:
+                out_mapp = [2]
+            graph.onnx_coreml_shape_mapping[node.outputs[0]] = out_mapp
+
+
+def _convert_softmax(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    def _add_softmax(input_names, output_names, **kwargs):
+        node = kwargs["node"]
+        builder = kwargs["builder"]
+
+        if node.op_type == "LogSoftmax":
+            builder.add_softmax(
+                name=node.name + "_softmax",  # type: ignore
+                input_name=node.inputs[0],
+                output_name=node.outputs[0] + "_softmax",
+            )
+            builder.add_unary(
+                name=node.name,
+                input_name=node.outputs[0] + "_softmax",
+                output_name=node.outputs[0],
+                mode="log",
+            )
+        else:
+            builder.add_softmax(
+                name=node.name, input_name=input_names[0], output_name=output_names[0]
+            )
+
+    axis = node.attrs.get("axis", 1)
+    if axis != 1:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "Unsupported axis {} for softmax".format(axis,)
+        )
+
+    _add_softmax(node.inputs, node.outputs, node=node, builder=builder)
+
+    if node.inputs[0] in graph.onnx_coreml_shape_mapping:
+        mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        graph.onnx_coreml_shape_mapping[node.outputs[0]] = mapp
+
+
+def _convert_gemm(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+
+    """
+    operation:  alpha * (A * B) + beta * C
+    so far the case only handled is :
+    - B is a constant matrix
+    - C is a constant vector
+    - alpha == beta == 1.0
+    - transA is off
+    """
+
+    if node.attrs.get("transA", 0) != 0:
+        return err.unsupported_op_configuration(
+            builder,
+            node,
+            graph,
+            "This Gemm layer cannot be converted to CoreML inner_product layer",
+        )
+
+    if (
+        abs(node.attrs.get("alpha", 1.0) - 1.0) > 1e-3
+        or abs(node.attrs.get("beta", 1.0) - 1.0) > 1e-3
+    ):
+        return err.unsupported_op_configuration(
+            builder,
+            node,
+            graph,
+            "This Gemm layer cannot be converted to CoreML inner_product layer",
+        )
+
+    weight_name = node.inputs[1]
+    if weight_name in node.input_tensors:
+        W = node.input_tensors[weight_name]
+        if not node.attrs.get("transB", 0):
+            W = np.transpose(W)
+    else:
+        err.missing_initializer(node, "Second input to Gemm layer must be a constant")
+
+    b = None
+    if len(node.inputs) > 2:
+        b = (node.input_tensors[node.inputs[2]]).flatten()
+    if len(W.shape) != 2 or (b is not None and len(b.shape) != 1):
+        return err.unsupported_op_configuration(
+            builder,
+            node,
+            graph,
+            "This Gemm layer cannot be converted to CoreML inner_product layer",
+        )
+
+    if b is not None:
+        if W.shape[0] != b.shape[0]:
+            return err.unsupported_op_configuration(
+                builder,
+                node,
+                graph,
+                "This Gemm layer cannot be converted to CoreML inner_product layer",
+            )
+
+    if node.inputs[0] in graph.onnx_coreml_shape_mapping:
+        mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        if mapp == [1, 2] or mapp == [0, 2]:  # [B,C] or [S,C]
+            _add_inner_product(
+                [node.inputs[0]], node.outputs, W=W, b=b, node=node, builder=builder
+            )
+        elif mapp == [3, 4]:  # [H,W]
+            _add_transpose_before_after(
+                _add_inner_product,
+                [node.inputs[0]],
+                node.outputs,
+                [2, 3, 0, 1],
+                W=W,
+                b=b,
+                node=node,
+                builder=builder,
+            )
+        elif mapp == [2, 3]:  # (C,H)
+            _add_transpose_before_after(
+                _add_inner_product,
+                [node.inputs[0]],
+                node.outputs,
+                [1, 2, 0, 3],
+                W=W,
+                b=b,
+                node=node,
+                builder=builder,
+            )
+        elif mapp == [2, 4]:  # (C,W)
+            _add_transpose_before_after(
+                _add_inner_product,
+                [node.inputs[0]],
+                node.outputs,
+                [1, 3, 2, 0],
+                W=W,
+                b=b,
+                node=node,
+                builder=builder,
+            )
+        else:
+            return err.unsupported_op_configuration(
+                builder, node, graph, "CoreML incompatible axis placement"
+            )
+    else:
+        _add_inner_product(
+            [node.inputs[0]], node.outputs, W=W, b=b, node=node, builder=builder
+        )
+
+    if node.inputs[0] in graph.onnx_coreml_shape_mapping:
+        graph.onnx_coreml_shape_mapping[
+            node.outputs[0]
+        ] = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+
+
+def _convert_matmul(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+
+    weight_name = node.inputs[1]
+    if weight_name in node.input_tensors:
+        W = node.input_tensors[weight_name]
+    else:
+        err.missing_initializer(node, "Second input to Matmul layer must be a constant")
+
+    if len(W.shape) != 2:
+        return err.unsupported_op_configuration(
+            builder,
+            node,
+            graph,
+            "This Matmul layer cannot be converted to CoreML inner_product layer",
+        )
+
+    W = np.transpose(W)
+
+    if node.inputs[0] in graph.onnx_coreml_shape_mapping:
+        mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        if mapp == [1, 2] or mapp == [0, 2]:  # [B,C] or [S,C]
+            _add_inner_product(
+                [node.inputs[0]], node.outputs, W=W, b=None, node=node, builder=builder
+            )
+        elif mapp == [3, 4]:  # [H,W]
+            _add_transpose_before_after(
+                _add_inner_product,
+                [node.inputs[0]],
+                node.outputs,
+                [2, 3, 0, 1],
+                W=W,
+                b=None,
+                node=node,
+                builder=builder,
+            )
+        elif mapp == [2, 3]:  # (C,H)
+            _add_transpose_before_after(
+                _add_inner_product,
+                [node.inputs[0]],
+                node.outputs,
+                [1, 2, 0, 3],
+                W=W,
+                b=None,
+                node=node,
+                builder=builder,
+            )
+        elif mapp == [2, 4]:  # (C,W)
+            _add_transpose_before_after(
+                _add_inner_product,
+                [node.inputs[0]],
+                node.outputs,
+                [1, 3, 2, 0],
+                W=W,
+                b=None,
+                node=node,
+                builder=builder,
+            )
+        else:
+            return err.unsupported_op_configuration(
+                builder, node, graph, "CoreML incompatible axis placement"
+            )
+    else:
+        _add_inner_product(
+            [node.inputs[0]], node.outputs, W=W, b=None, node=node, builder=builder
+        )
+
+    if node.inputs[0] in graph.onnx_coreml_shape_mapping:
+        graph.onnx_coreml_shape_mapping[
+            node.outputs[0]
+        ] = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+
+
+def _convert_lrn(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    alpha = node.attrs.get("alpha", 1.0e-4)
+    beta = node.attrs.get("beta", 0.75)
+    bias = node.attrs.get("bias", 1.0)
+    size = node.attrs["size"]
+    builder.add_lrn(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        alpha=alpha,
+        beta=beta,
+        k=bias,
+        local_size=size,
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_sigmoid(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    builder.add_activation(
+        name=node.name,
+        non_linearity="SIGMOID",
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_sign(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    builder.add_activation(
+        name=node.name,
+        non_linearity="SIGMOID_HARD",
+        input_name=node.inputs[0],
+        output_name=node.outputs[0] + "_step",
+        params=[10000, 0],
+    )
+    builder.add_elementwise(
+        name=node.name + "_subtract_half",
+        input_names=node.outputs[0] + "_step",
+        output_name=node.outputs[0] + "_step_half",
+        mode="ADD",
+        alpha=-0.5,
+    )
+    builder.add_elementwise(
+        name=node.name + "_multiply_2",
+        input_names=node.outputs[0] + "_step_half",
+        output_name=node.outputs[0],
+        mode="MULTIPLY",
+        alpha=2,
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_elu(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    alpha = node.attrs.get("alpha", 1.0)
+    builder.add_activation(
+        name=node.name,
+        non_linearity="ELU",
+        params=alpha,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_selu(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    alpha = node.attrs.get("alpha", 1.6732)
+    gamma = node.attrs.get("gamma", 1.0507)
+    builder.add_activation(
+        name=node.name + "_elu",  # type: ignore
+        non_linearity="ELU",
+        params=alpha,
+        input_name=node.inputs[0],
+        output_name=node.inputs[0] + "_elu",
+    )
+    builder.add_elementwise(
+        name=node.name,
+        input_names=node.inputs[0] + "_elu",
+        output_name=node.outputs[0],
+        mode="MULTIPLY",
+        alpha=gamma,
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_prelu(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    if node.inputs[1] not in node.input_tensors:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "Slope must be known!"
+        )
+
+    slope = node.input_tensors[node.inputs[1]]
+    builder.add_activation(
+        name=node.name,
+        non_linearity="PRELU",
+        params=slope,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_tanh(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    builder.add_activation(
+        name=node.name,
+        non_linearity="TANH",
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_pad(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    def _get_pad_params(builder, node, graph, err, params_dict, axis=None):
+
+        pads = node.attrs["pads"]
+        if not (len(pads) % 2 == 0 and len(pads) >= 2):
+            return err.unsupported_op_configuration(
+                builder,
+                node,
+                graph,
+                "pads attribute: {}."
+                "Length of pads must be a multiple of 2".format(str(pads)),
+            )
+        if len(pads) == 8:
+            az = pads[:2] + pads[4:6]
+            if az.count(0) != len(az):
+                return err.unsupported_op_configuration(
+                    builder,
+                    node,
+                    graph,
+                    "Paddings value {} not supported".format(pads,),
+                )
+            pads = pads[2:4] + pads[6:8]
+
+        if len(pads) == 6:
+            az = pads[:2] + pads[3:5]
+            if az.count(0) != len(az):
+                return err.unsupported_op_configuration(
+                    builder,
+                    node,
+                    graph,
+                    "Paddings value {} not supported".format(pads,),
+                )
+            pads = [pads[2], pads[5]]
+
+        pad_t, pad_b, pad_l, pad_r = 0, 0, 0, 0
+        if axis == "height":
+            pad_t, pad_b = pads
+        elif axis == "width":
+            pad_l, pad_r = pads
+        else:
+            pad_t, pad_l, pad_b, pad_r = pads
+        params_dict["pad_t"] = pad_t
+        params_dict["pad_b"] = pad_b
+        params_dict["pad_l"] = pad_l
+        params_dict["pad_r"] = pad_r
+
+    def _add_pad(input_names, output_names, **kwargs):
+        params_dict = kwargs["params_dict"]
+        node = kwargs["node"]
+        builder = kwargs["builder"]
+        builder.add_padding(
+            name=node.name,
+            left=params_dict["pad_l"],
+            right=params_dict["pad_r"],
+            top=params_dict["pad_t"],
+            bottom=params_dict["pad_b"],
+            value=params_dict["value"],
+            input_name=input_names[0],
+            output_name=output_names[0],
+            padding_type=params_dict["mode"],
+        )
+
+    params_dict = dict()
+    mode = node.attrs["mode"]
+    if mode == "reflect" or mode == b"reflect":
+        mode = "reflection"
+    elif mode == "edge" or mode == b"edge":
+        mode = "replication"
+    else:
+        mode = "constant"
+    params_dict["mode"] = mode
+    params_dict["value"] = node.attrs.get("value", 0.0)
+
+    _add_conv_like_op(_add_pad, _get_pad_params, params_dict, builder, node, graph, err)
+
+    # update map
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_slice(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+
+    if _is_no_op(builder, node, graph, err):
+        return
+
+    def _add_slice(input_names, output_names, **kwargs):
+        node = kwargs["node"]
+        builder = kwargs["builder"]
+        params_dict = kwargs["params_dict"]
+        builder.add_slice(
+            name=node.name + "_" + output_names[0],
+            input_name=input_names[0],
+            output_name=output_names[0],
+            axis=params_dict["axis"],
+            start_index=params_dict["start_index"],
+            end_index=params_dict["end_index"],
+            stride=1,
+        )
+
+    params_dict = dict()
+    starts = node.attrs["starts"]
+    ends = node.attrs["ends"]
+    axes = node.attrs.get("axes", range(len(starts)))
+
+    if node.inputs[0] in graph.shape_dict:
+        for ii, _ in enumerate(axes):
+            if ends[ii] > INT_MAX:
+                ends[ii] = graph.shape_dict[node.inputs[0]][ii]
+
+    if _is_input_shape_mapping_defined(node, graph):
+        mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        r = len(starts)
+        for i, ax in enumerate(axes):
+            params_dict["start_index"] = starts[i]
+            params_dict["end_index"] = ends[i]
+            if i == 0:
+                iname = node.inputs[0]
+            else:
+                iname = node.inputs[0] + str(i)
+            oname = node.inputs[0] + str(i + 1)
+            if i == r - 1:
+                oname = node.outputs[0]
+
+            if mapp[ax] == 2:
+                params_dict["axis"] = "channel"
+                _add_slice(
+                    [iname],
+                    [oname],
+                    node=node,
+                    builder=builder,
+                    params_dict=params_dict,
+                )
+            elif mapp[ax] == 3:
+                params_dict["axis"] = "height"
+                _add_slice(
+                    [iname],
+                    [oname],
+                    node=node,
+                    builder=builder,
+                    params_dict=params_dict,
+                )
+            elif mapp[ax] == 4:
+                params_dict["axis"] = "width"
+                _add_slice(
+                    [iname],
+                    [oname],
+                    node=node,
+                    builder=builder,
+                    params_dict=params_dict,
+                )
+            elif mapp[ax] == 0:
+                params_dict["axis"] = "channel"
+                _add_transpose_before_after(
+                    _add_slice,
+                    [iname],
+                    [oname],
+                    [1, 0, 2, 3],
+                    node=node,
+                    builder=builder,
+                    params_dict=params_dict,
+                )
+            else:
+                err.unsupported_op_configuration(
+                    builder, node, graph, "cannot slice along batch axis"
+                )
+    else:
+        params_dict["start_index"] = starts[0]
+        params_dict["end_index"] = ends[0]
+        input_shape = graph.shape_dict.get(node.inputs[0], None)
+        if len(axes) != 1:
+            return err.unsupported_op_configuration(
+                builder, node, graph, "Only single axis Slice is supported now"
+            )
+        if input_shape and len(input_shape) == 4 and len(axes) == 1:
+            axis = ["B", "channel", "height", "width"][axes[0]]
+        elif len(axes) == 1:
+            if axes[0] == 0:
+                axis = "channel"
+            elif axes[0] == 1:
+                axis = "height"
+            elif axes[0] == 2:
+                axis = "width"
+            else:
+                return err.unsupported_op_configuration(
+                    builder,
+                    node,
+                    graph,
+                    "Slice is supported only along H, W or C dimensions",
+                )
+        else:
+            return err.unsupported_op_configuration(
+                builder,
+                node,
+                graph,
+                "Slice is supported only along one axis for 3D or 4D Tensors",
+            )
+        params_dict["axis"] = axis
+        _add_slice(
+            node.inputs,
+            node.outputs,
+            builder=builder,
+            node=node,
+            params_dict=params_dict,
+        )
+
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_exp(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    builder.add_unary(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        mode="exp",
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_pow(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+
+    input2 = node.inputs[1]
+    is_supported = False
+    if input2 in node.input_tensors:
+        alpha = node.input_tensors[input2]
+        if len(alpha.shape) == 0:
+            is_supported = True
+
+    if not is_supported:
+        err.missing_initializer(
+            node, "Only mode supported is when the second input is a scalar constant"
+        )
+
+    builder.add_unary(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        mode="power",
+        alpha=float(alpha),
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_flatten(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    def _add_flatten(input_names, output_names, **kwargs):
+        kwargs["builder"].add_flatten(
+            name=kwargs["node"].name,
+            input_name=input_names[0],
+            output_name=output_names[0],
+            mode=0,
+        )
+
+    axis = node.attrs.get("axis", 1)
+    if node.inputs[0] in graph.onnx_coreml_shape_mapping:
+        mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        if (mapp[0] == 0 or mapp[0] == 1) and (axis == 0 or axis == 1):
+            _add_flatten(node.inputs, node.outputs, builder=builder, node=node)
+        elif mapp[0:2] == [0, 1] and axis == 2:
+            _add_flatten(node.inputs, node.outputs, builder=builder, node=node)
+        elif len(mapp) == 1 and axis == 1 and mapp[0] < 4:
+            _add_flatten(node.inputs, node.outputs, builder=builder, node=node)
+        else:
+            return err.unsupported_op_configuration(
+                builder, node, graph, "Flatten axis mode not supported"
+            )
+    else:
+        _add_flatten(node.inputs, node.outputs, builder=builder, node=node)
+
+    if node.inputs[0] in graph.onnx_coreml_shape_mapping:
+        mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        if len(mapp) == 1 and axis == 1 and mapp[0] < 4:
+            graph.onnx_coreml_shape_mapping[node.outputs[0]] = [mapp[0], mapp[0] + 1]
+        else:
+            graph.onnx_coreml_shape_mapping[node.outputs[0]] = [mapp[0], 2]
+
+
+def _convert_max(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    _convert_broadcast_op(builder, node, graph, err, "MAX")
+
+
+def _convert_min(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    _convert_broadcast_op(builder, node, graph, err, "MIN")
+
+
+def _convert_softsign(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    builder.add_activation(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        non_linearity="SOFTSIGN",
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_softplus(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    builder.add_activation(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        non_linearity="SOFTPLUS",
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_hardsigmoid(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    alpha = node.attrs.get("alpha", 0.2)
+    beta = node.attrs.get("beta", 0.5)
+    builder.add_activation(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        non_linearity="SIGMOID_HARD",
+        params=[alpha, beta],
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_neg(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    builder.add_elementwise(
+        name=node.name,
+        input_names=node.inputs,
+        output_name=node.outputs[0],
+        mode="MULTIPLY",
+        alpha=-1.0,
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_log(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    builder.add_unary(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        mode="log",
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_sqrt(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    builder.add_unary(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        mode="sqrt",
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_reciprocal(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    builder.add_unary(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        mode="inverse",
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_reorganize_data(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    mode = "SPACE_TO_DEPTH"
+    if node.op_type == "DepthToSpace":
+        mode = "DEPTH_TO_SPACE"
+    block_size = node.attrs.get("blocksize", 2)
+    builder.add_reorganize_data(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        mode=mode,
+        block_size=block_size,
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_upsample(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    if "scales" in node.attrs:
+        scales = node.attrs["scales"]
+        if len(scales) != 4 or scales[0] != 1.0 or scales[1] != 1.0:
+            err.unsupported_op_configuration(
+                builder,
+                node,
+                graph,
+                "Unsupported scales {} for upsample".format(scales),
+            )
+        height_scale = int(scales[2])
+        width_scale = int(scales[3])
+    elif len(node.input_tensors):
+        key = next(iter(node.input_tensors.keys()))
+        scales = node.input_tensors[key]
+        height_scale = int(scales[2])
+        width_scale = int(scales[3])
+    else:
+        if len(node.inputs) > 1:
+            return err.unsupported_op_configuration(
+                builder,
+                node,
+                graph,
+                "This ONNX upsample layer has 'scales' provided as an input. CoreML upsample requires 'scales' as an attribute of the layer.",
+            )
+        height_scale = int(node.attrs.get("height_scale", 1))
+        width_scale = int(node.attrs.get("width_scale", 1))
+    mode_convert = {
+        "nearest": "NN",
+        "linear": "BILINEAR",
+    }
+    mode = mode_convert[node.attrs["mode"].decode("UTF-8")]
+    builder.add_upsample(
+        name=node.name,
+        scaling_factor_h=height_scale,
+        scaling_factor_w=width_scale,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        mode=mode,
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_clip(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    # clip(x, a, b) = max(min(x, a), b) = -min(-min(x, a), -b)
+
+    if node.attrs.get("max") is None:
+        min_limit = node.attrs.get("min", float(-(2 ** 16) - 1))
+        builder.add_unary(
+            name=node.name,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            mode="threshold",
+            alpha=min_limit,
+            shift=0,
+            scale=1.0,
+        )
+    elif node.attrs.get("min") is None:
+        max_limit = node.attrs.get("max", float(2 ** 16 - 1))
+        builder.add_unary(
+            name=node.name + "_min_minus_x_minus_b",
+            input_name=node.inputs[0],
+            output_name=node.inputs[0] + "_min_minus_x_minus_b",
+            mode="threshold",
+            alpha=-max_limit,
+            shift=0,
+            scale=-1.0,
+        )
+
+        builder.add_activation(
+            name=node.name,
+            non_linearity="LINEAR",
+            input_name=node.inputs[0] + "_min_minus_x_minus_b",
+            output_name=node.outputs[0],
+            params=[-1.0, 0],
+        )
+
+    else:
+        min_limit = node.attrs.get("min")
+        max_limit = node.attrs.get("max")
+        builder.add_unary(
+            name=node.name + "_min_x_a",
+            input_name=node.inputs[0],
+            output_name=node.inputs[0] + "_min_x_a",
+            mode="threshold",
+            alpha=min_limit,
+            shift=0,
+            scale=1.0,
+        )
+
+        builder.add_unary(
+            name=node.name + "_min_minus_x_minus_b",
+            input_name=node.inputs[0] + "_min_x_a",
+            output_name=node.inputs[0] + "_min_minus_x_minus_b",
+            mode="threshold",
+            alpha=-max_limit,
+            shift=0,
+            scale=-1.0,
+        )
+
+        builder.add_activation(
+            name=node.name,
+            non_linearity="LINEAR",
+            input_name=node.inputs[0] + "_min_minus_x_minus_b",
+            output_name=node.outputs[0],
+            params=[-1.0, 0],
+        )
+
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_mvn(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    builder.add_mvn(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        across_channels=node.attrs.get("across_channels", 0),
+        normalize_variance=node.attrs.get("normalize_variance", 1),
+        epsilon=1e-5,
+    )
+    _update_shape_mapping_unchanged(node, graph, err)
+
+
+def _convert_lstm(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    W_name = node.inputs[1]
+    R_name = node.inputs[2]
+    B = None
+    if len(node.inputs) > 3:
+        B_name = node.inputs[3]
+        B = node.input_tensors.get(B_name, None)
+    W = node.input_tensors.get(W_name, None)
+    R = node.input_tensors.get(R_name, None)
+    if W is None:
+        err.missing_initializer(
+            node, "Weight tensor: {} not found in the graph initializer".format(W_name,)
+        )
+    if R is None:
+        err.missing_initializer(
+            node, "Weight tensor: {} not found in the graph initializer".format(R_name,)
+        )
+
+    h = node.attrs["hidden_size"]
+    W_i, W_o, W_f, W_c = np.split(np.squeeze(W), 4)  # type: ignore
+    R_i, R_o, R_f, R_c = np.split(np.squeeze(R), 4)  # type: ignore
+    x = W_i.shape[1]
+    h = W_i.shape[0]
+    W_x = [W_i, W_f, W_o, W_c]
+    W_h = [R_i, R_f, R_o, R_c]
+    b = None
+    if B is not None:
+        b_Wi, b_Wo, b_Wf, b_Wc, b_Ri, b_Ro, b_Rf, b_Rc = np.split(np.squeeze(B), 8)  # type: ignore
+        b = [b_Wi + b_Ri, b_Wf + b_Rf, b_Wo + b_Ro, b_Wc + b_Rc]
+
+    input_h = node.inputs[5] if len(node.inputs) > 5 else node.inputs[0] + "_h_input"
+    input_c = node.inputs[6] if len(node.inputs) > 6 else node.inputs[0] + "_c_input"
+    output_h = (
+        node.outputs[1] if len(node.outputs) > 1 else node.outputs[0] + "_h_output"
+    )
+    output_c = (
+        node.outputs[2] if len(node.outputs) > 2 else node.outputs[0] + "_c_output"
+    )
+
+    graph.optional_inputs.append((input_h, (h)))
+    graph.optional_inputs.append((input_c, (h)))
+    graph.optional_outputs.append((output_h, (h)))
+    graph.optional_outputs.append((output_c, (h)))
+
+    builder.add_unilstm(
+        name=node.name,
+        W_h=W_h,
+        W_x=W_x,
+        b=b,
+        hidden_size=h,
+        input_size=x,
+        input_names=[node.inputs[0], input_h, input_c],
+        output_names=[node.outputs[0], output_h, output_c],
+        inner_activation="SIGMOID",
+        cell_state_update_activation="TANH",
+        output_activation="TANH",
+        peep=None,
+        output_all=True,
+        forget_bias=False,
+        coupled_input_forget_gate=False,
+        cell_clip_threshold=50000.0,
+        reverse_input=False,
+    )
+
+    if _is_input_shape_mapping_defined(node, graph):
+        graph.onnx_coreml_shape_mapping[
+            node.outputs[0]
+        ] = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        graph.onnx_coreml_shape_mapping[
+            node.outputs[1]
+        ] = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        graph.onnx_coreml_shape_mapping[
+            node.outputs[2]
+        ] = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+
+
+def _convert_custom(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    params = NeuralNetwork_pb2.CustomLayerParams()
+    params.className = node.op_type
+    params.description = "Custom layer that corresponds to the ONNX op {}".format(
+        node.op_type,
+    )
+
+    inputs_ = []
+    # skip the inputs that are initializers
+    for inp in node.inputs:
+        if inp not in node.input_tensors:
+            inputs_.append(inp)
+
+    builder.add_custom(
+        name=node.name,
+        input_names=inputs_,
+        output_names=node.outputs,
+        custom_proto_spec=params,
+    )
+    err.custom_layer_nodes.append(node)
+
+
+def _convert_identity(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    builder.add_activation(
+        name=node.name,
+        non_linearity="LINEAR",
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        params=[1.0, 0.0],
+    )
+    if _is_input_shape_mapping_defined(node, graph):
+        mapp = graph.onnx_coreml_shape_mapping[node.inputs[0]]
+        mapp_out = []
+        if node.op_type == "Squeeze":
+            axes = node.attrs.get("axes", None)
+            if axes is None:
+                if node.inputs[0] not in graph.shape_dict:
+                    return err.unsupported_op_configuration(
+                        builder, node, graph, "shape not known"
+                    )
+                else:
+                    ishape = graph.shape_dict[node.inputs[0]]
+                    if ishape.count(1) == len(ishape):
+                        mapp_out = [2]
+                    else:
+                        for i, d in enumerate(ishape):
+                            if d != 1:
+                                mapp_out.append(mapp[i])
+            else:
+                for i, a in enumerate(mapp):
+                    if i in axes:
+                        continue
+                    else:
+                        mapp_out.append(a)
+                if len(mapp_out) == 0:
+                    mapp_out = [2]
+        elif node.op_type == "Unsqueeze":
+            axes = node.attrs["axes"]
+            available_set = [0, 1, 2, 3, 4]
+            for d in mapp:
+                if d in available_set:
+                    available_set.remove(d)
+            if len(axes) > len(available_set):
+                return err.unsupported_op_configuration(
+                    builder,
+                    node,
+                    graph,
+                    "cannot unsqueeze to a dimension greater than 5",
+                )
+            mapp_out = [1] * (len(axes) + len(mapp))
+            mapp_ptr = 0
+            available_set_ptr = 0
+            for i in range(len(mapp_out)):
+                if i in axes:
+                    mapp_out[i] = available_set[available_set_ptr]
+                    available_set_ptr += 1
+                else:
+                    mapp_out[i] = mapp[mapp_ptr]
+                    mapp_ptr += 1
+        else:
+            raise ValueError("convert_identity incorrectly called")
+        graph.onnx_coreml_shape_mapping[node.outputs[0]] = mapp_out
+
+
+def _convert_const(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+
+    mapp = None
+    for input_ in node.inputs:
+        if input_ in graph.onnx_coreml_shape_mapping:
+            mapp = graph.onnx_coreml_shape_mapping[input_]
+
+    for name, value in node.input_tensors.items():
+        output_name = name
+        if name not in graph.constant_layers_added:
+            add_transpose_later = False
+            shape = value.shape
+            coreml_shape = [1, 1, 1]
+            if len(shape) == 0:
+                graph.onnx_coreml_shape_mapping[name] = [2]  # [C]
+            elif len(shape) == 3:
+                coreml_shape = list(shape)
+                graph.onnx_coreml_shape_mapping[name] = [2, 3, 4]  # [C,H,W]
+            elif len(shape) == 1:
+                coreml_shape = [shape[0], 1, 1]
+                graph.onnx_coreml_shape_mapping[name] = [2]  # [C]
+            elif len(shape) == 2:
+                coreml_shape = [1, shape[0], shape[1]]
+                if mapp is not None and (mapp == [1, 2] or mapp == [0, 2]):
+                    add_transpose_later = True
+                    transpose_dims = [2, 3, 0, 1]
+                    graph.onnx_coreml_shape_mapping[name] = [0, 2]  # [S,C]
+                else:
+                    graph.onnx_coreml_shape_mapping[name] = [3, 4]  # [H,W]
+            else:
+                return err.unsupported_op_configuration(
+                    builder,
+                    node,
+                    graph,
+                    "unable to translate constant array shape to CoreML shape",
+                )
+
+            if add_transpose_later:
+                output_name += "_pre_transpose"
+            builder.add_load_constant(
+                name=output_name,
+                output_name=output_name,
+                constant_value=value.flatten(),
+                shape=coreml_shape,
+            )
+            if add_transpose_later:
+                builder.add_permute(
+                    name=name,
+                    dim=transpose_dims,
+                    input_name=output_name,
+                    output_name=name,
+                )
+
+            graph.constant_layers_added[output_name] = True
+
+
+_ONNX_NODE_REGISTRY = {
+    "Abs": _convert_abs,
+    "Add": _convert_add,
+    "ArgMax": _convert_argmax,
+    "ArgMin": _convert_argmax,
+    "AveragePool": _convert_pool,
+    "BatchNormalization": _convert_bn,
+    "Clip": _convert_clip,
+    "Concat": _convert_concat,
+    "Conv": _convert_conv,
+    "ConvTranspose": _convert_conv,
+    "DepthToSpace": _convert_reorganize_data,
+    "Div": _convert_div,
+    "Elu": _convert_elu,
+    "Exp": _convert_exp,
+    "Flatten": _convert_flatten,  # Todo: handle more cases
+    "Gemm": _convert_gemm,
+    "GlobalAveragePool": _convert_pool,
+    "GlobalMaxPool": _convert_pool,
+    "HardSigmoid": _convert_hardsigmoid,
+    "InstanceNormalization": _convert_instancenorm,
+    "LeakyRelu": _convert_leaky_relu,
+    "Log": _convert_log,
+    "LogSoftmax": _convert_softmax,
+    "LRN": _convert_lrn,
+    "LSTM": _convert_lstm,
+    "MatMul": _convert_matmul,
+    "Max": _convert_max,
+    "MaxPool": _convert_pool,
+    "Mean": _convert_mean,
+    "MeanVarianceNormalization": _convert_mvn,
+    "Min": _convert_min,
+    "Mul": _convert_mul,
+    "Neg": _convert_neg,
+    "Pad": _convert_pad,
+    "Pow": _convert_pow,
+    "PRelu": _convert_prelu,
+    "Reciprocal": _convert_reciprocal,
+    "ReduceL1": _convert_reduce,
+    "ReduceL2": _convert_reduce,
+    "ReduceLogSum": _convert_reduce,
+    "ReduceMax": _convert_reduce,
+    "ReduceMean": _convert_reduce,
+    "ReduceMin": _convert_reduce,
+    "ReduceProd": _convert_reduce,
+    "ReduceSum": _convert_reduce,
+    "ReduceSumSquare": _convert_reduce,
+    "Relu": _convert_relu,
+    "Reshape": _convert_reshape,
+    "Selu": _convert_selu,
+    "Sigmoid": _convert_sigmoid,
+    "Sign": _convert_sign,
+    "Slice": _convert_slice,
+    "Softmax": _convert_softmax,  # Todo: handle more cases
+    "Softplus": _convert_softplus,
+    "Softsign": _convert_softsign,
+    "SpaceToDepth": _convert_reorganize_data,
+    "SpatialBN": _convert_bn,
+    "Split": _convert_split,
+    "Sqrt": _convert_sqrt,
+    "Squeeze": _convert_identity,
+    "Sub": _convert_sub,
+    "Sum": _convert_add,
+    "Tanh": _convert_tanh,
+    "ThresholdedRelu": _convert_thresholdedrelu,
+    "Transpose": _convert_transpose,
+    "Unsqueeze": _convert_identity,
+    "Upsample": _convert_upsample,
+}
+
+_SEQUENCE_LAYERS_REGISTRY = set(["LSTM"])
+
+_CONST_INPUT_ALLOWED_LAYERS = set(
+    ["Add", "Sub", "Sum", "Mul", "Concat", "Max", "Min", "Div", "Reciprocal"]
+)
+
+
+def _get_node_converter_fn(
+    builder, node, err
+):  # type: (NeuralNetworkBuilder, Node, ErrorHandling) -> Callable[[NeuralNetworkBuilder, Node, Graph, ErrorHandling], None]
+    """
+    Get the right converter function for ONNX node op_type
+    """
+    op_type = node.op_type
+    # Return custom conversion function if provided
+    # If both node type and node name custom function
+    # is provided, then use node name specific custom function, as
+    # type specific custom function is more generic than name specific
+    if node.name in err.custom_conversion_functions:
+        return err.custom_conversion_functions[node.name]
+    elif op_type in err.custom_conversion_functions:
+        return err.custom_conversion_functions[op_type]
+    elif op_type in _ONNX_NODE_REGISTRY:
+        return _ONNX_NODE_REGISTRY[op_type]
+    else:
+        return err.unsupported_op(node)
+
+
+def _add_const_inputs_if_required(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    if node.op_type in _CONST_INPUT_ALLOWED_LAYERS:
+        if len(node.input_tensors) > 0:
+            _convert_const(builder, node, graph, err)
+
+
+def _convert_node(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    converter_fn = _get_node_converter_fn(builder, node, err)
+    return converter_fn(builder, node, graph, err)
diff --git a/coremltools/converters/onnx/_operators_nd.py b/coremltools/converters/onnx/_operators_nd.py
new file mode 100644
index 000000000..f85a81b46
--- /dev/null
+++ b/coremltools/converters/onnx/_operators_nd.py
@@ -0,0 +1,2764 @@
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+from __future__ import unicode_literals as _
+
+import numpy as np
+import copy
+
+from typing import Sequence, Callable, List, Tuple, Optional, Text, Any
+from coremltools.models.neural_network import NeuralNetworkBuilder  # type: ignore
+from onnx import TensorProto
+from ._graph import Node, Graph
+from coremltools.proto import NeuralNetwork_pb2  # type: ignore
+from ._error_utils import ErrorHandling
+
+from ._operators import (
+    _convert_abs,
+    _convert_relu,
+    _convert_sqrt,
+    _convert_exp,
+    _convert_elu,
+    _convert_selu,
+    _convert_sigmoid,
+    _convert_sign,
+    _convert_prelu,
+    _convert_upsample,
+    _convert_softsign,
+    _convert_softplus,
+    _convert_log,
+    _convert_neg,
+    _convert_reciprocal,
+    _convert_hardsigmoid,
+    _convert_reorganize_data,
+    _add_pool,
+    _get_pool_params,
+    _add_conv,
+    _get_conv_params,
+    _convert_thresholdedrelu,
+    _convert_leaky_relu,
+    _convert_lrn,
+)
+
+from ._operators import _convert_pad as _convert_pad_5d
+
+INT_MAX = 2 ** 63 - 1
+
+
+## Helper functions
+def load_input_constants(builder, node, graph, err):
+    for i in range(len(node.inputs)):
+        if (
+            node.inputs[i] in node.input_tensors
+            and node.inputs[i] not in graph.constants_loaded
+        ):
+            value = node.input_tensors[node.inputs[i]]
+            builder.add_load_constant_nd(
+                name=node.name + "_load_constant_" + str(i),
+                output_name=node.inputs[i],
+                constant_value=value,
+                shape=[1] if value.shape == () else value.shape,
+            )
+            graph.constants_loaded.add(node.inputs[i])
+
+
+def _add_conv_like_op(
+    add_func, get_params_func, params_dict, builder, node, graph, err
+):
+    rank = builder._get_rank(node.inputs[0])
+    if rank == 4:
+        get_params_func(builder, node, graph, err, params_dict)
+        add_func(
+            node.inputs,
+            node.outputs,
+            params_dict=params_dict,
+            builder=builder,
+            node=node,
+            graph=graph,
+            err=err,
+        )
+    elif rank == 3:
+        axes = [0, 3]
+        # Make 5d tensor
+        expanded_node_output = node.name + "_" + node.inputs[0] + "_expanded"
+        builder.add_expand_dims(
+            name=node.name + "_ip_expand",
+            input_name=node.inputs[0],
+            output_name=expanded_node_output,
+            axes=axes,
+        )
+        node.inputs[0] = expanded_node_output
+        output_name = node.outputs[0]
+        node.outputs[0] = node.name + "_" + output_name + "_expanded"
+        # Add conversion op
+        get_params_func(builder, node, graph, err, params_dict, axis="width")
+        add_func(
+            node.inputs,
+            node.outputs,
+            params_dict=params_dict,
+            builder=builder,
+            node=node,
+            graph=graph,
+            err=err,
+        )
+        # Make 3d tensor back
+        builder.add_squeeze(
+            name=node.name + "_ip_squeeze_out",
+            input_name=node.outputs[0],
+            output_name=output_name,
+            axes=axes,
+        )
+    else:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "provided number axes {} not supported".format(rank)
+        )
+
+
+def add_broadcastable_op_chain(builder, node, err, add_op_function):
+    """
+    Splits list of input into chain of operator with two inputs
+    where output of first node is fed into next one until the final input
+    is processed
+    Pass node:            Node to be converted
+         add_op_function: Conversion function to be used
+    """
+    total_nodes = len(node.inputs)
+
+    if total_nodes < 2:
+        # TODO: Skip or CopyProp + DeadCode elimination
+        builder.add_activation(
+            name=node.name,
+            non_linearity="LINEAR",
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            params=[1.0, 0.0],
+        )
+    elif total_nodes == 2:
+        add_op_function(
+            name=node.name, input_names=node.inputs, output_name=node.outputs[0]
+        )
+    else:
+        decorator = 0
+        out_name = node.outputs[0]
+        # Add broadcastable layer for first two inputs
+        add_op_function(
+            name=node.name,
+            input_names=[node.inputs[0], node.inputs[1]],
+            output_name=out_name + "_" + str(decorator),
+        )
+        # Continue chain of broadcastable layers
+        for i in range(2, total_nodes - 1):
+            add_op_function(
+                name=node.name,
+                input_names=[out_name + "_" + str(decorator), node.inputs[i]],
+                output_name=out_name + "_" + str(decorator + 1),
+            )
+            decorator += 1
+        # End chain of broadcastable layers with final output
+        add_op_function(
+            name=node.name + "_" + str(decorator),
+            input_names=[out_name + "_" + str(decorator), node.inputs[total_nodes - 1]],
+            output_name=out_name,
+        )
+
+
+def add_bn_with_expansion(
+    builder,
+    node,
+    err,
+    node_name,
+    input_name,
+    output_name,
+    channels,
+    scale,
+    bias,
+    mean=None,
+    var=None,
+    epsilon=None,
+    compute_mean_var=False,
+    instance_normalization=False,
+    axes_for_expansion=[],
+):
+    real_input_name = input_name
+    real_output_name = output_name
+
+    # Expand input if needed
+    if len(axes_for_expansion) != 0:
+        input_name = node_name + "_" + input_name + "_expanded"
+        output_name = output_name + "_expanded"
+        builder.add_expand_dims(
+            name=node_name + "_expand",
+            input_name=real_input_name,
+            output_name=input_name,
+            axes=axes_for_expansion,
+        )
+
+    builder.add_batchnorm(
+        name=node.name,
+        channels=channels,
+        gamma=scale,
+        beta=bias,
+        mean=mean,
+        variance=var,
+        input_name=input_name,
+        output_name=output_name,
+        compute_mean_var=compute_mean_var,
+        instance_normalization=instance_normalization,
+        epsilon=epsilon,
+    )
+
+    # Squeeze output if needed
+    if len(axes_for_expansion) != 0:
+        builder.add_squeeze(
+            name=node_name + "_squeeze",
+            input_name=output_name,
+            output_name=real_output_name,
+            axes=axes_for_expansion,
+        )
+
+
+# Helper function to convert RandomNormal, RandomUniform and it's variants
+def add_random(builder, node, graph, err, add_op_function):
+    # Ignoring attribute `dtype` as CoreML internally represents tensors into 'Float'
+    mean = node.attrs.get("mean", 0.0)
+    scale = node.attrs.get("scale", 1.0)
+    seed = node.attrs.get("seed", -1)
+    shape = node.attrs.get("shape", None)
+    if shape is None:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "Shape not provided"
+        )
+    add_op_function(
+        name=node.name,
+        output_name=node.outputs[0],
+        output_shape=shape,
+        mean=mean,
+        stddev=scale,
+        seed=seed,
+    )
+
+
+## Converter functions
+
+
+def _convert_acos(builder, node, graph, err):
+    """
+    convert to CoreML Acos Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3793
+    """
+    load_input_constants(builder, node, graph, err)
+    builder.add_acos(
+        name=node.name, input_name=node.inputs[0], output_name=node.outputs[0]
+    )
+
+
+def _convert_acosh(builder, node, graph, err):
+    """
+    convert to CoreML Acosh Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3925
+    """
+    load_input_constants(builder, node, graph, err)
+    builder.add_acosh(
+        name=node.name, input_name=node.inputs[0], output_name=node.outputs[0]
+    )
+
+
+def _convert_add(builder, node, graph, err):
+    """
+    convert to CoreML Add Broadcastable Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4117
+    """
+    load_input_constants(builder, node, graph, err)
+    add_broadcastable_op_chain(builder, node, err, builder.add_add_broadcastable)
+
+
+def _convert_argmax(builder, node, graph, err):
+    """
+    convert to CoreML ArgMax Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4961
+    """
+    axis = node.attrs.get("axis", 0)
+    keepdims = node.attrs.get("keepdims", True)
+    builder.add_argmax(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        axis=axis,
+        keepdims=keepdims,
+    )
+
+
+def _convert_argmin(builder, node, graph, err):
+    """
+    convert to CoreML ArgMin Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4988
+    """
+    axis = node.attrs.get("axis", 0)
+    keepdims = node.attrs.get("keepdims", True)
+    builder.add_argmin(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        axis=axis,
+        keepdims=keepdims,
+    )
+
+
+def _convert_asin(builder, node, graph, err):
+    """
+    convert to CoreML Asin Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3771
+    """
+    load_input_constants(builder, node, graph, err)
+    builder.add_asin(
+        name=node.name, input_name=node.inputs[0], output_name=node.outputs[0]
+    )
+
+
+def _convert_asinh(builder, node, graph, err):
+    """
+    convert to CoreML Asinh Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3903
+    """
+    load_input_constants(builder, node, graph, err)
+    builder.add_asinh(
+        name=node.name, input_name=node.inputs[0], output_name=node.outputs[0]
+    )
+
+
+def _convert_atan(builder, node, graph, err):
+    """
+    convert to CoreML Atan Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3815
+    """
+    load_input_constants(builder, node, graph, err)
+    builder.add_atan(
+        name=node.name, input_name=node.inputs[0], output_name=node.outputs[0]
+    )
+
+
+def _convert_atanh(builder, node, graph, err):
+    """
+    convert to CoreML Atanh Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3947
+    """
+    load_input_constants(builder, node, graph, err)
+    builder.add_atanh(
+        name=node.name, input_name=node.inputs[0], output_name=node.outputs[0]
+    )
+
+
+def _convert_bn(builder, node, graph, err):
+    """
+    convert to CoreML BatchNorm Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L1633
+    """
+    if len(node.outputs) > 1:
+        return err.unsupported_op_configuration(
+            builder,
+            node,
+            graph,
+            "This converter only supports BatchNormalization with one output",
+        )
+
+    epsilon = node.attrs.get("epsilon", 1e-5)
+    scale_name = node.inputs[1]
+
+    if scale_name in node.input_tensors:
+        channels = node.input_tensors[scale_name].shape
+    elif scale_name in graph.shape_dict:
+        channels = graph.shape_dict[scale_name]
+    else:
+        err.unsupported_op_configuration(
+            builder, node, graph, "Input shape not available"
+        )
+
+    # TODO: Move error check under VERBOSE / DEBUG Mode
+    for i in range(2, len(node.inputs)):
+        ip_name = node.inputs[i]
+        if ip_name in node.input_tensors:
+            tensor_shape = node.input_tensors[ip_name].shape
+        else:
+            if ip_name not in graph.shape_dict:
+                return err.unsupported_op_configuration(
+                    builder, node, graph, "Input shape not available"
+                )
+            tensor_shape = graph.shape_dict[ip_name]
+        if tensor_shape != channels:
+            err.unsupported_op_configuration(
+                builder,
+                node,
+                graph,
+                "Shape mismatch between Scale, Bias, Mean and Variance",
+            )
+
+    scale = (
+        node.input_tensors[node.inputs[1]]
+        if node.inputs[1] in node.input_tensors
+        else np.ones(shape=channels, dtype=np.float32)
+    )
+    bias = (
+        node.input_tensors[node.inputs[2]]
+        if node.inputs[2] in node.input_tensors
+        else np.zeros(shape=channels, dtype=np.float32)
+    )
+    mean = (
+        node.input_tensors[node.inputs[3]]
+        if node.inputs[3] in node.input_tensors
+        else np.zeros(shape=channels, dtype=np.float32)
+    )
+    var = (
+        node.input_tensors[node.inputs[4]]
+        if node.inputs[4] in node.input_tensors
+        else np.ones(shape=channels, dtype=np.float32)
+    )
+
+    rank = builder._get_rank(node.inputs[0])
+    # ONNX converts B x C tensor into B x C x 1 hence
+    # Rank 2 BN is mapped to Rank 3 BN
+    if rank == 3:
+        # 1D Batch Norm
+        add_bn_with_expansion(
+            builder,
+            node,
+            err,
+            node.name,
+            node.inputs[0],
+            node.outputs[0],
+            channels[0],
+            scale,
+            bias,
+            mean,
+            var,
+            epsilon,
+            axes_for_expansion=[0, 3],
+        )
+    elif rank == 4:
+        # 2D Batch Norm
+        add_bn_with_expansion(
+            builder,
+            node,
+            err,
+            node.name,
+            node.inputs[0],
+            node.outputs[0],
+            channels[0],
+            scale,
+            bias,
+            mean,
+            var,
+            epsilon,
+            axes_for_expansion=[],
+        )
+    else:
+        # Unsupported 1D, 3D and above
+        err.unsupported_op_configuration(
+            builder, node, graph, "provided number axes {} not supported".format(rank)
+        )
+
+
+def _convert_cast(builder, node, graph, err):
+    """
+    Perform cast operation in CoreML
+        e.g. Casting from Float (assumed) to Int maps to Floor Layer
+             For Others, add copy layer
+    """
+    convert_to = node.attrs.get("to")
+    convert_to_int = set(
+        {
+            TensorProto.UINT8,
+            TensorProto.INT8,
+            TensorProto.UINT16,
+            TensorProto.INT32,
+            TensorProto.INT64,
+            TensorProto.UINT32,
+            TensorProto.UINT64,
+        }
+    )
+
+    ## TODO: Add support for conversion from STRING TO FLOAT
+    ## Currently, such input will error out in parsing
+    if convert_to in convert_to_int:
+        builder.add_floor(
+            name=node.name, input_name=node.inputs[0], output_name=node.outputs[0]
+        )
+    else:
+        load_input_constants(builder, node, graph, err)
+        builder.add_activation(
+            name=node.name,
+            non_linearity="LINEAR",
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            params=[1.0, 0.0],
+        )
+
+
+def _convert_ceil(builder, node, graph, err):
+    """
+    convert to CoreML Ceil Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L5018
+    """
+    builder.add_ceil(
+        name=node.name, input_name=node.inputs[0], output_name=node.outputs[0],
+    )
+
+
+def _convert_clip(builder, node, graph, err):
+    """
+    convert to CoreML Clip Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L5066
+    """
+    max_value = node.attrs.get("max", 3.4028234663852886e38)
+    min_value = node.attrs.get("min", -3.4028234663852886e38)
+    builder.add_clip(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        min_value=min_value,
+        max_value=max_value,
+    )
+
+
+def _convert_concat(builder, node, graph, err):
+    """
+    convert to CoreML ConcatND Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3521
+    """
+    axis = node.attrs.get("axis")
+    load_input_constants(builder, node, graph, err)
+
+    # TODO: Adding Linear layer will change to
+    #       either: Skip the op right away
+    #       or:     Insert Linear and perform copy-propogation followed by dead code elimination
+    if len(node.inputs) == 1:
+        builder.add_activation(
+            name=node.name,
+            non_linearity="LINEAR",
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            params=[1.0, 0.0],
+        )
+    else:
+        builder.add_concat_nd(
+            name=node.name,
+            input_names=node.inputs,
+            output_name=node.outputs[0],
+            axis=axis,
+        )
+
+
+def _convert_constant(builder, node, graph, err):
+    """
+    convert to CoreML Load Constant ND Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3596
+    """
+    value = node.attrs["value"]
+    # HACK: If Value is 0-Rank then make it 1-Rank
+    builder.add_load_constant_nd(
+        name=node.name,
+        output_name=node.outputs[0],
+        constant_value=value,
+        shape=[1] if value.shape == () else value.shape,
+    )
+    graph.constants_loaded(node.outputs[0])
+
+
+def _convert_constant_of_shape(builder, node, graph, err):
+    """
+    convert to CoreML Fill Static Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3641
+    """
+    value = node.attrs.get("value", [0.0])
+    # if shape is known, create tensor of given shape
+    # otherwise create tensor at runtime
+    if node.inputs[0] in node.input_tensors:
+        output_shape = node.input_tensors[node.inputs[0]]
+        # add_fill_static requires shape to be more than rank-1
+        if len(output_shape.shape) == 1:
+            output_shape = output_shape.reshape(output_shape.shape[0], 1)
+        builder.add_fill_static(
+            name=node.name,
+            output_name=node.outputs[0],
+            output_shape=output_shape,
+            value=value[0],
+        )
+    else:
+        builder.add_fill_dynamic(
+            name=node.name,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            value=value[0],
+        )
+
+
+def _convert_conv(builder, node, graph, err):
+    """
+    convert to CoreML Convolution Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L1418
+    """
+    params_dict = dict()
+    params_dict["is_deconv"] = False
+    if node.op_type.endswith("Transpose"):
+        params_dict["is_deconv"] = True
+    # get weights for convolution
+    weight_name = node.inputs[1]
+    W = None
+    if weight_name in node.input_tensors:
+        W = node.input_tensors[weight_name]
+        params_dict["w_shape"] = W.shape
+    else:
+        # W is provided as a input
+        # Make W compatible for CoreML Conv Layer
+        # W ONNX format: OC x KC x H x W
+        # Expected CoreML Format: H x W x KC x OC
+        W_name = node.inputs[1]
+        W_shape = graph.shape_dict[W_name]
+        W_rank = len(W_shape)
+
+        params_dict["w_shape"] = W_shape
+        if W_rank == 3:
+            expanded_node_name = node.name + "_" + W_name + "_expanded"
+            builder.add_expand_dims(
+                name=node.name + "_w_expand",
+                input_name=W_name,
+                output_name=expanded_node_name,
+                axes=[-2],
+            )
+            W_name = expanded_node_name
+
+        # Now Permute the W tensor
+        W_transpose_axes = [2, 3, 1, 0]
+        # If ConvTranpose then, Kernel and Output channels are shuffled
+        if params_dict["is_deconv"]:
+            W_transpose_axes = [2, 3, 0, 1]
+
+        builder.add_transpose(
+            name=node.name + "_w_transpose",
+            axes=W_transpose_axes,
+            input_name=W_name,
+            output_name=W_name + "_transposed",
+        )
+        W_name = W_name + "_transposed"
+        node.inputs[1] = W_name
+
+    params_dict["W"] = W
+    bias = None
+    if len(node.inputs) > 2:
+        bias = node.input_tensors[node.inputs[2]]
+    params_dict["bias"] = bias
+    params_dict["groups"] = node.attrs.get("group", 1)
+
+    _add_conv_like_op(
+        _add_conv, _get_conv_params, params_dict, builder, node, graph, err
+    )
+
+
+def _convert_cos(builder, node, graph, err):
+    """
+    convert to CoreML Cos Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3727
+    """
+    load_input_constants(builder, node, graph, err)
+    builder.add_cos(
+        name=node.name, input_name=node.inputs[0], output_name=node.outputs[0]
+    )
+
+
+def _convert_cosh(builder, node, graph, err):
+    """
+    convert to CoreML Cosh Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3859
+    """
+    load_input_constants(builder, node, graph, err)
+    builder.add_cosh(
+        name=node.name, input_name=node.inputs[0], output_name=node.outputs[0]
+    )
+
+
+def _convert_div(builder, node, graph, err):
+    """
+    convert to CoreML Divide Broadcastable Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4180
+    """
+    load_input_constants(builder, node, graph, err)
+    add_broadcastable_op_chain(builder, node, err, builder.add_divide_broadcastable)
+
+
+def _convert_equal(builder, node, graph, err):
+    """
+    convert to CoreML Equal Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L961
+    """
+    load_input_constants(builder, node, graph, err)
+    builder.add_equal(
+        name=node.name, input_names=node.inputs, output_name=node.outputs[0]
+    )
+
+
+def _convert_erf(builder, node, graph, err):
+    """
+    convert to CoreML Erf Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L5140
+    """
+    load_input_constants(builder, node, graph, err)
+    builder.add_erf(
+        name=node.name, input_name=node.inputs[0], output_name=node.outputs[0]
+    )
+
+
+def _convert_expand(builder, node, graph, err):
+    """
+    convert to CoreML Broadcast To Static/Dynamic Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4086
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4108
+    """
+    load_input_constants(builder, node, graph, err)
+    if node.inputs[1] in node.input_tensors:
+        output_shape = node.input_tensors[node.inputs[1]].astype(np.int64)
+        builder.add_broadcast_to_static(
+            name=node.name,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            output_shape=output_shape,
+        )
+    else:
+        builder.add_broadcast_to_dynamic(
+            name=node.name, input_names=node.inputs, output_name=node.outputs[0],
+        )
+
+
+def _convert_flatten(builder, node, graph, err):
+    """
+    convert to CoreML Flatten Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4826
+    """
+    axis = node.attrs.get("axis", 1)
+    builder.add_flatten_to_2d(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        axis=axis,
+    )
+
+
+def _convert_floor(builder, node, graph, err):
+    """
+    convert to CoreML Floor Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L5040
+    """
+    builder.add_floor(
+        name=node.name, input_name=node.inputs[0], output_name=node.outputs[0]
+    )
+
+
+def _convert_gather(builder, node, graph, err):
+    """
+    convert to CoreML Gather Along Axis Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4296
+    """
+    axis = node.attrs.get("axis", 0)
+
+    if len(node.inputs) != 2:
+        err.unsupported_op_configuration(
+            builder, node, graph, "Error in ONNX model: Gather expects two inputs"
+        )
+
+    if (
+        node.inputs[0] in node.input_tensors
+        and node.inputs[0] not in graph.constants_loaded
+    ):
+        value = node.input_tensors[node.inputs[0]]
+        builder.add_load_constant_nd(
+            name=node.name + "_load_data",
+            output_name=node.inputs[0],
+            constant_value=value,
+            shape=[1] if value.shape == () else value.shape,
+        )
+        graph.constants_loaded.add(node.inputs[0])
+
+    if (
+        node.inputs[1] in node.input_tensors
+        and node.inputs[1] not in graph.constants_loaded
+    ):
+        value = node.input_tensors[node.inputs[1]]
+        builder.add_load_constant_nd(
+            name=node.name + "_load_indices",
+            output_name=node.inputs[1],
+            constant_value=value,
+            shape=[1] if value.shape == () else value.shape,
+        )
+        graph.constants_loaded.add(node.inputs[1])
+
+    builder.add_gather(
+        name=node.name,
+        input_names=[node.inputs[0], node.inputs[1]],
+        output_name=node.outputs[0],
+        axis=axis,
+    )
+
+
+def _convert_gemm(builder, node, graph, err):
+    """
+    convert to CoreML Tranpose (Optional) and Inner Product Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4180
+    """
+    # Read attributes
+    alpha = node.attrs.get("alpha", 1.0)
+    beta = node.attrs.get("beta", 1.0)
+    transA = node.attrs.get("transA", False)
+    transB = node.attrs.get("transB", False)
+
+    A = node.inputs[0]
+    if A in node.input_tensors:
+        A_tensor = node.input_tensors[A]
+        builder.add_load_constant_nd(
+            name=node.name + A + "_const",
+            output_name="const_" + A,
+            constant_value=A_tensor,
+            shape=A_tensor.shape,
+        )
+        A = "const_" + A
+
+    if alpha != 1.0:
+        builder.add_load_constant_nd(
+            name=node.name + "_load_alpha",
+            output_name="alpha_for_" + A,
+            constant_value=np.array([alpha]),
+            shape=[1],
+        )
+        builder.add_multiply_broadcastable(
+            name=node.name + "_alphaA",
+            input_names=[A, "alpha_for_" + A],
+            output_name=A + "_alphaA",
+        )
+        A = A + "_alphaA"
+
+    B = node.inputs[1]
+    C = node.inputs[2]
+    if B in node.input_tensors and C in node.input_tensors:
+        B = node.input_tensors[B]
+        C = node.input_tensors[C]
+
+        if transB:
+            B = B.transpose()
+
+        C = C.flatten()
+        builder.add_batched_mat_mul(
+            name=node.name,
+            input_names=[A],
+            output_name=node.outputs[0],
+            transpose_a=transA,
+            weight_matrix_rows=B.shape[0],
+            weight_matrix_columns=B.shape[1],
+            W=B,
+            bias=C,
+        )
+    else:
+        ## TODO: Test coverage when B and C are non-constant
+        ## Should C be of Rank-1? or it's okay to keep it that way?
+        if beta != 1.0:
+            builder.add_load_constant_nd(
+                name=node.name + "_load_beta",
+                output_name="beta_for_" + B,
+                constant_value=np.array([beta]),
+                shape=[1],
+            )
+            builder.add_multiply_broadcastable(
+                name=node.name + "_betaC",
+                input_names=[C, "beta_for_" + B],
+                output_name=C + "_betaC",
+            )
+            C = C + "_betaC"
+
+        builder.add_batched_mat_mul(
+            name=node.name,
+            input_names=[A, B],
+            output_name=node.outputs[0] + "_b_mat_mul",
+            transpose_a=transA,
+            transpose_b=transB,
+        )
+
+        builder.add_add_broadcastable(
+            name=node.name + "_add_bias",
+            input_names=[node.outputs[0] + "_b_mat_mul", C],
+            output_name=node.outputs[0],
+        )
+
+
+def _convert_greater(builder, node, graph, err):
+    """
+    convert to CoreML Greater than Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L853
+    """
+    load_input_constants(builder, node, graph, err)
+    builder.add_greater_than(
+        name=node.name, input_names=node.inputs, output_name=node.outputs[0],
+    )
+
+
+def _convert_gru(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    """
+    convert to CoreML GRU Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3104
+    """
+
+    def get_weights(W, W_name, R, R_name, B):
+        """
+        Helper routine to return weights in CoreML LSTM required format
+        """
+        W = np.expand_dims(np.expand_dims(W, 3), 3)
+        R = np.expand_dims(np.expand_dims(R, 3), 3)
+
+        if W is None:
+            err.missing_initializer(
+                node,
+                "Weight tensor: {} not found in the graph initializer".format(W_name),
+            )
+        if R is None:
+            err.missing_initializer(
+                node,
+                "Weight tensor: {} not found in the graph initializer".format(R_name),
+            )
+
+        W_z, W_r, W_h = np.split(np.squeeze(W), 3)  # type: ignore
+        R_z, R_r, R_h = np.split(np.squeeze(R), 3)  # type: ignore
+
+        W_x = [W_z, W_r, W_h]
+        W_h = [R_z, R_r, R_h]
+        b = None
+        if B is not None:
+            b_Wz, b_Wr, b_Wh, b_Rz, b_Rr, b_Rh = np.split(np.squeeze(B), 6)  # type: ignore
+            b = [b_Wz + b_Rz, b_Wr + b_Rr, b_Wh + b_Rh]
+
+        return W_x, W_h, b
+
+    def expand_dim(node_name, input_name, output_name, axes):
+        builder.add_expand_dims(
+            name=node_name, input_name=input_name, output_name=output_name, axes=axes
+        )
+
+    # Read attributes
+    # activation alpha and beta
+    if "activation_alpha" in node.attrs or "activation_beta" in node.attrs:
+        err.unsupported_feature_warning(
+            node, "Activation parameter alpha and beta are currently not used"
+        )
+
+    inner_activation = "SIGMOID"
+    output_activation = "TANH"
+
+    if "activations" in node.attrs:
+        activations_list = node.attrs["activations"]
+
+        if len(activations_list) < 2:
+            err.unsupported_op_configuration(
+                builder,
+                node,
+                graph,
+                "Error in ONNX model: Less number of activations provided",
+            )
+
+        inner_activation = activations_list[0].upper()
+        output_activation = activations_list[1].upper()
+
+    # Extract direction from ONNX attribute
+    direction = node.attrs.get("direction", "forward")
+    if direction == "bidirectional":
+        return err.unsupported_op_configuration(
+            builder,
+            node,
+            graph,
+            "Bidirectional GRU not supported!! Please consider adding custom conversion function/layer",
+        )
+
+    hidden_size = node.attrs.get("hidden_size")
+
+    # Read inputs
+    W_name = node.inputs[1]
+    R_name = node.inputs[2]
+    B = None
+    if len(node.inputs) > 3:
+        B_name = node.inputs[3]
+        B = node.input_tensors.get(B_name, None)
+
+    if W_name not in node.input_tensors or R_name not in node.input_tensors:
+        return err.unsupported_op_configuration(
+            builder,
+            node,
+            graph,
+            "Input and Recursion weights must be known!! Please consider adding custom conversion function/layer",
+        )
+
+    W = node.input_tensors.get(W_name, None)
+    R = node.input_tensors.get(R_name, None)
+
+    # Get weights for forward direction
+    W_x, W_h, b = get_weights(W, W_name, R, R_name, B)
+
+    # shape of input
+    input_size = W_x[0].shape[1]
+
+    # Get input and output for hidden and cell
+    input_h = node.inputs[5] if len(node.inputs) > 5 else node.inputs[0] + "_h_input"
+    output_h = (
+        node.outputs[1] if len(node.outputs) > 1 else node.outputs[0] + "_h_output"
+    )
+    output_h_5d = output_h + "_5d"
+
+    if len(node.inputs) < 6:
+        # if input is not present in the network, load they as constant
+        if node.inputs[0] not in graph.shape_dict:
+            err.unsupported_op_configuration(
+                builder, node, graph, "Input shape not represented within Graph"
+            )
+
+        # Input is represented as [Seq Len, Batch Size, Input Size]
+        batch_size = graph.shape_dict[node.inputs[0]][1]
+        builder.add_load_constant_nd(
+            name=node.name + "_load_initial_h",
+            output_name=input_h,
+            constant_value=0.0,
+            shape=[1, batch_size, hidden_size],
+        )
+
+    # CoreML GRU expects 5-d tensor
+    # Expand dimensions of input to 5-d for compatibility
+    input_rank = builder._get_rank(node.inputs[0])
+    if input_rank == -1:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "Rank unknown for input"
+        )
+
+    if input_rank < 5:
+        add_nodes = 5 - input_rank
+
+        # TODO: Add one expand instead of adding one after another for input, h
+        expand_dim(
+            node.name + "_expand_in_0",
+            node.inputs[0],
+            node.inputs[0] + "_expand_out_0",
+            [input_rank],
+        )
+        expand_dim(
+            node.name + "_expand_in_h_0",
+            input_h,
+            input_h + "_expand_out_h_0",
+            [input_rank],
+        )
+
+        for i in range(1, add_nodes):
+            i_str = str(i)
+            i_p_str = str(i - 1)
+            expand_dim(
+                node.name + "_expand_in_" + i_str,
+                node.inputs[0] + "_expand_out_" + i_p_str,
+                node.inputs[0] + "_expand_out_" + i_str,
+                [input_rank + i],
+            )
+            expand_dim(
+                node.name + "_expand_in_h_" + i_str,
+                input_h + "_expand_out_h_" + i_p_str,
+                input_h + "_expand_out_h_" + i_str,
+                [input_rank + i],
+            )
+
+    builder.add_gru(
+        name=node.name,
+        W_h=W_h,
+        W_x=W_x,
+        b=b,
+        hidden_size=hidden_size,
+        input_size=input_size,
+        input_names=[
+            node.inputs[0] + "_expand_out_" + str(add_nodes - 1),
+            input_h + "_expand_out_h_" + str(add_nodes - 1),
+        ],
+        output_names=[node.outputs[0] + "_5d_out", output_h_5d],
+        inner_activation=inner_activation,
+        activation=output_activation,
+        output_all=True,
+        reverse_input=(direction == "reverse"),
+    )
+
+    # CoreML output is [Seq Len, Batch Size, Num Dir * Hidden Size, 1, 1]
+    # Return output as [Seq Len, Num Dir, Batch Size, Hidden Size]
+    # Following steps:
+    #       a. Reshape and split hidden size for direction [Seq Len, Batch Size, Num Dir, Hidden Size, 1]
+    #       b. Squeeze last dimension [Seq Len, Batch Size, Num Dir, Hidden Size]
+    #       c. Permute to fix the order [Seq Len, Num Dir, Batch Size, Hidden Size, 1]
+    builder.add_rank_preserving_reshape(
+        name=node.name + "_reshape_",
+        input_name=node.outputs[0] + "_5d_out",
+        output_name=node.outputs[0] + "_5d_reshaped",
+        output_shape=[0, 0, 1, -1, 0],
+    )
+
+    builder.add_squeeze(
+        name=node.name + "_squeeze_out",
+        input_name=node.outputs[0] + "_5d_reshaped",
+        output_name=node.outputs[0] + "_4d",
+        axes=[-1],
+    )
+
+    builder.add_transpose(
+        name=node.name + "_transpose",
+        axes=[0, 2, 1, 3],
+        input_name=node.outputs[0] + "_4d",
+        output_name=node.outputs[0],
+    )
+
+    # Squeeze dimensions of output_h
+    builder.add_squeeze(
+        name=node.name + "_squeeze_out_h",
+        input_name=output_h_5d,
+        output_name=output_h,
+        axes=[-1, -2],
+    )
+
+
+def _convert_identity(builder, node, graph, err):
+    """
+    convert to CoreML Linear Activation Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L417
+    """
+    # TODO: Skip or CopyProp + DeadCode elimination
+    builder.add_activation(
+        name=node.name,
+        non_linearity="LINEAR",
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        params=[1.0, 0.0],
+    )
+
+
+def _convert_instancenorm(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    """
+    convert to CoreML BatchNorm Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L1633
+    """
+    epsilon = node.attrs.get("epsilon", 1e-5)
+    if (
+        node.inputs[1] not in node.input_tensors
+        or node.inputs[2] not in node.input_tensors
+    ):
+        return err.unsupported_op_configuration(
+            builder,
+            node,
+            graph,
+            "CoreML InstanceNorm requires Scale and Bias to be known",
+        )
+
+    scale = node.input_tensors[node.inputs[1]]
+    bias = node.input_tensors[node.inputs[2]]
+
+    rank = builder._get_rank(node.inputs[0])
+    # ONNX converts B x C tensor into B x C x 1 hence
+    # Rank 2 BN is mapped to Rank 3 BN
+    if rank == 3:
+        # 1D Batch Norm
+        add_bn_with_expansion(
+            builder,
+            node,
+            err,
+            node.name,
+            node.inputs[0],
+            node.outputs[0],
+            scale.shape[0],
+            scale,
+            bias,
+            epsilon=epsilon,
+            compute_mean_var=True,
+            instance_normalization=True,
+            axes_for_expansion=[0, 3],
+        )
+    elif rank == 4:
+        # 2D Batch Norm
+        add_bn_with_expansion(
+            builder,
+            node,
+            err,
+            node.name,
+            node.inputs[0],
+            node.outputs[0],
+            scale.shape[0],
+            scale,
+            bias,
+            epsilon=epsilon,
+            compute_mean_var=True,
+            instance_normalization=True,
+            axes_for_expansion=[],
+        )
+    else:
+        # Unsupported 1D, 3D and above
+        err.unsupported_op_configuration(
+            builder, node, graph, "provided number axes {} not supported".format(rank)
+        )
+
+
+def _convert_less(builder, node, graph, err):
+    """
+    convert to CoreML Less Than Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L907
+    """
+    load_input_constants(builder, node, graph, err)
+    builder.add_less_than(
+        name=node.name, input_names=node.inputs, output_name=node.outputs[0],
+    )
+
+
+def _convert_lstm(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    """
+    convert to CoreML Uni/Bi-Directional LSTM Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3282
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3348
+    """
+
+    def get_weights(W, W_name, R, R_name, B):
+        """
+        Helper routine to return weights in CoreML LSTM required format
+        """
+        W = np.expand_dims(np.expand_dims(W, 3), 3)
+        R = np.expand_dims(np.expand_dims(R, 3), 3)
+
+        if W is None:
+            err.missing_initializer(
+                node,
+                "Weight tensor: {} not found in the graph initializer".format(W_name),
+            )
+        if R is None:
+            err.missing_initializer(
+                node,
+                "Weight tensor: {} not found in the graph initializer".format(R_name),
+            )
+
+        W_i, W_o, W_f, W_c = np.split(np.squeeze(W), 4)  # type: ignore
+        R_i, R_o, R_f, R_c = np.split(np.squeeze(R), 4)  # type: ignore
+
+        W_x = [W_i, W_f, W_o, W_c]
+        W_h = [R_i, R_f, R_o, R_c]
+        b = None
+        if B is not None:
+            b_Wi, b_Wo, b_Wf, b_Wc, b_Ri, b_Ro, b_Rf, b_Rc = np.split(np.squeeze(B), 8)  # type: ignore
+            b = [b_Wi + b_Ri, b_Wf + b_Rf, b_Wo + b_Ro, b_Wc + b_Rc]
+
+        return W_x, W_h, b
+
+    def expand_dim(node_name, input_name, output_name, axes):
+        builder.add_expand_dims(
+            name=node_name, input_name=input_name, output_name=output_name, axes=axes
+        )
+
+    # Read attributes
+    # activation alpha and beta
+    if "activation_alpha" in node.attrs or "activation_beta" in node.attrs:
+        err.unsupported_feature_warning(
+            node, "Activation parameter alpha and beta are currently not used"
+        )
+
+    inner_activation = "SIGMOID"
+    cell_state_update_activation = "TANH"
+    output_activation = "TANH"
+
+    if "activations" in node.attrs:
+        activations_list = node.attrs["activations"]
+
+        if len(activations_list) < 3:
+            err.unsupported_op_configuration(
+                builder,
+                node,
+                graph,
+                "Error in ONNX model: Less number of activations provided",
+            )
+
+        if len(activations_list) == 6:
+            err.unsupported_feature_warning(
+                node, "Forward and backward pass will use same activations."
+            )
+
+        inner_activation = activations_list[0].upper()
+        cell_state_update_activation = activations_list[1].upper()
+        output_activation = activations_list[2].upper()
+
+    # Provide max Clip Value if not provided
+    clip_threshold = node.attrs.get("clip", 500000.0)
+
+    # Extract direction from ONNX attribute
+    direction = 1
+    if (
+        "direction" in node.attrs
+        and node.attrs["direction"].decode("utf-8") == "bidirectional"
+    ):
+        direction = 2
+
+    hidden_size = node.attrs.get("hidden_size")
+
+    input_forget = node.attrs.get("input_forget", 0) == 1
+
+    # Read inputs
+    W_name = node.inputs[1]
+    R_name = node.inputs[2]
+    B = None
+    if len(node.inputs) > 3:
+        B_name = node.inputs[3]
+        B = node.input_tensors.get(B_name, None)
+
+    W = node.input_tensors.get(W_name, None)
+    R = node.input_tensors.get(R_name, None)
+
+    W = np.split(W, direction)
+    R = np.split(R, direction)
+    if B is not None:
+        B = np.split(B, direction)
+    else:
+        B = [None, None]
+
+    # Get weights for forward direction
+    W_x, W_h, b = get_weights(W[0], W_name, R[0], R_name, B[0])
+
+    # shape of input
+    input_size = W_x[0].shape[1]
+
+    # Get input and output for hidden and cell
+    input_h = node.inputs[5] if len(node.inputs) > 5 else node.inputs[0] + "_h_input"
+    input_c = node.inputs[6] if len(node.inputs) > 6 else node.inputs[0] + "_c_input"
+    output_h = (
+        node.outputs[1] if len(node.outputs) > 1 else node.outputs[0] + "_h_output"
+    )
+    output_c = (
+        node.outputs[2] if len(node.outputs) > 2 else node.outputs[0] + "_c_output"
+    )
+    output_h_5d = output_h + "_5d"
+    output_c_5d = output_c + "_5d"
+
+    # if input is not present in the network, load they as constant
+    load_input_constants(builder, node, graph, err)
+
+    # Input is represented as [Seq Len, Batch Size, Input Size]
+    if len(node.inputs) < 6:
+        batch_size = graph.shape_dict[node.inputs[0]][1]
+        builder.add_load_constant_nd(
+            name=node.name + "_load_initial_h_and_c",
+            output_name=input_h,
+            constant_value=0.0,
+            shape=[direction, batch_size, hidden_size],
+        )
+        # OPTIMIZATION: let's reuse the intial weights
+        input_c = input_h
+
+    # Get tensors for peepholes
+    peepholes = node.inputs[7] if len(node.inputs) > 7 else None
+
+    # CoreML LSTM expects 5-d tensor
+    # Expand dimensions of input to 5-d for compatibility
+    rank = builder._get_rank(node.inputs[0])
+    if rank == -1:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "Rank unknown for input"
+        )
+    if rank < 5:
+        add_nodes = 5 - rank
+        # TODO: Add one expand instead of adding one after another for input, h and c
+        expand_dim(
+            node.name + "_expand_in_0",
+            node.inputs[0],
+            node.inputs[0] + "_expand_out_0",
+            [rank],
+        )
+        expand_dim(
+            node.name + "_expand_in_h_0", input_h, input_h + "_expand_out_h_0", [rank]
+        )
+        expand_dim(
+            node.name + "_expand_in_c_0", input_c, input_c + "_expand_out_c_0", [rank]
+        )
+
+        for i in range(1, add_nodes):
+            i_str = str(i)
+            i_p_str = str(i - 1)
+            expand_dim(
+                node.name + "_expand_in_" + i_str,
+                node.inputs[0] + "_expand_out_" + i_p_str,
+                node.inputs[0] + "_expand_out_" + i_str,
+                [rank + i],
+            )
+            expand_dim(
+                node.name + "_expand_in_h_" + i_str,
+                input_h + "_expand_out_h_" + i_p_str,
+                input_h + "_expand_out_h_" + i_str,
+                [rank + i],
+            )
+            expand_dim(
+                node.name + "_expand_in_c_" + i_str,
+                input_c + "_expand_out_c_" + i_p_str,
+                input_c + "_expand_out_c_" + i_str,
+                [rank + i],
+            )
+
+    if direction == 1:
+        # Peephole from ONNX are of shape [Num Dir, 3 * hidden_size]
+        # Reshape into CoreML format of [input hs, forget hs, cell hs]
+        if peepholes is not None:
+            builder.add_reshape_static(
+                name=node.name + "_peephole_reshape",
+                input_name=peepholes,
+                output_name=peepholes + "_reshaped",
+                output_shape=[hidden_size, hidden_size, hidden_size],
+            )
+            peepholes = peepholes + "_reshaped"
+
+        builder.add_unilstm(
+            name=node.name,
+            W_h=W_h,
+            W_x=W_x,
+            b=b,
+            hidden_size=hidden_size,
+            input_size=input_size,
+            input_names=[
+                node.inputs[0] + "_expand_out_" + str(add_nodes - 1),
+                input_h + "_expand_out_h_" + str(add_nodes - 1),
+                input_c + "_expand_out_c_" + str(add_nodes - 1),
+            ],
+            output_names=[node.outputs[0] + "_5d_out", output_h_5d, output_c_5d],
+            inner_activation=inner_activation,
+            cell_state_update_activation=cell_state_update_activation,
+            output_activation=output_activation,
+            peep=peepholes,
+            output_all=True,
+            forget_bias=True,
+            coupled_input_forget_gate=input_forget,
+            cell_clip_threshold=clip_threshold,
+            reverse_input=False,
+        )
+    elif direction == 2:
+        if len(W) != 2 and len(R) != 2 and len(B) != 2:
+            err.unsupported_op_configuration(
+                builder,
+                node,
+                graph,
+                "Bi-Directional LSTM does not have weights for both the directions",
+            )
+
+        W_x_back, W_h_back, b_back = get_weights(W[1], W_name, R[1], R_name, B[1])
+
+        peephole_f = None
+        peephole_b = None
+        if peepholes is not None:
+            builder.add_reshape_static(
+                name=node.name + "_peephole_reshape",
+                input_name=peepholes,
+                output_name=peepholes + "_reshaped",
+                output_shape=[direction, hidden_size, hidden_size, hidden_size],
+            )
+
+            peepholes_f = peepholes + "_f"
+            peepholes_b = peepholes + "_b"
+
+            builder.add_split_nd(
+                name=node.name + "_peephole_split",
+                input_name=peepholes + "_reshaped",
+                output_names=[peepholes_f, peepholes_b],
+                axis=0,
+            )
+
+        # split input_h and input_c into two parts
+        builder.add_split_nd(
+            name=node.name + "_split_h",
+            input_name=input_h + "_expand_out_h_" + str(add_nodes - 1),
+            output_names=[input_h + "_f", input_h + "_b"],
+            axis=0,
+        )
+
+        # OPTIMIZATION: If input_h and input_c are same
+        # Avoid creating new split and instead reuse
+        if input_h != input_c:
+            builder.add_split_nd(
+                name=node.name + "_split_c",
+                input_name=input_c + "_expand_out_c_" + str(add_nodes - 1),
+                output_names=[input_c + "_f", input_c + "_b"],
+                axis=0,
+            )
+
+        builder.add_bidirlstm(
+            name=node.name,
+            W_h=W_h,
+            W_x=W_x,
+            b=b,
+            W_h_back=W_h_back,
+            W_x_back=W_x_back,
+            b_back=b_back,
+            hidden_size=hidden_size,
+            input_size=input_size,
+            input_names=[
+                node.inputs[0] + "_expand_out_" + str(add_nodes - 1),
+                input_h + "_f",
+                input_c + "_f",
+                input_h + "_b",
+                input_c + "_b",
+            ],
+            output_names=[
+                node.outputs[0] + "_5d_out",
+                output_h + "_f",
+                output_c + "_f",
+                output_h + "_b",
+                output_c + "_b",
+            ],
+            inner_activation=inner_activation,
+            cell_state_update_activation=cell_state_update_activation,
+            output_activation=output_activation,
+            output_all=True,
+            peep=peephole_f,
+            peep_back=peephole_b,
+            forget_bias=True,
+            coupled_input_forget_gate=input_forget,
+            cell_clip_threshold=clip_threshold,
+        )
+
+        # Combine output_h and output_c
+        builder.add_concat_nd(
+            name=node.name + "concat_output_h",
+            input_names=[output_h + "_f", output_h + "_b"],
+            output_name=output_h_5d,
+            axis=0,
+        )
+
+        builder.add_concat_nd(
+            name=node.name + "concat_output_c",
+            input_names=[output_c + "_f", output_c + "_b"],
+            output_name=output_c_5d,
+            axis=0,
+        )
+    else:
+        err.unsupported_op_configuration(
+            builder, node, graph, "Unsupported direction {} for LSTM".format(direction)
+        )
+
+    # CoreML output is [Seq Len, Batch Size, Num Dir * Hidden Size, 1, 1]
+    # Return output as [Seq Len, Num Dir, Batch Size, Hidden Size]
+    # Following steps:
+    #       a. Reshape and split hidden size for direction [Seq Len, Batch Size, Num Dir, Hidden Size, 1]
+    #       b. Squeeze last dimension [Seq Len, Batch Size, Num Dir, Hidden Size]
+    #       c. Permute to fix the order [Seq Len, Num Dir, Batch Size, Hidden Size, 1]
+    builder.add_rank_preserving_reshape(
+        name=node.name + "_reshape_",
+        input_name=node.outputs[0] + "_5d_out",
+        output_name=node.outputs[0] + "_5d_reshaped",
+        output_shape=[0, 0, direction, -1, 0],
+    )
+
+    builder.add_squeeze(
+        name=node.name + "_squeeze_out",
+        input_name=node.outputs[0] + "_5d_reshaped",
+        output_name=node.outputs[0] + "_4d",
+        axes=[-1],
+    )
+
+    builder.add_transpose(
+        name=node.name + "_transpose",
+        axes=[0, 2, 1, 3],
+        input_name=node.outputs[0] + "_4d",
+        output_name=node.outputs[0],
+    )
+
+    # Squeeze dimensions of output_h and output_c
+    builder.add_squeeze(
+        name=node.name + "_squeeze_out_h",
+        input_name=output_h_5d,
+        output_name=output_h,
+        axes=[-1, -2],
+    )
+    builder.add_squeeze(
+        name=node.name + "_squeeze_out_c",
+        input_name=output_c_5d,
+        output_name=output_c,
+        axes=[-1, -2],
+    )
+
+
+def _convert_logical(builder, node, graph, err):
+    """
+    convert to CoreML Logical And/Or/Xor/Not Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L1013
+    """
+    mode = node.op_type.upper()
+    builder.add_logical(
+        name=node.name, input_names=node.inputs, output_name=node.outputs[0], mode=mode
+    )
+
+
+def _convert_pad(builder, node, graph, err):
+    """
+    convert to CoreML Padding / ConstantPadding Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4397
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L1822
+    """
+    mode = node.attrs.get("mode", "constant")
+
+    try:
+        mode = mode.decode()
+    except (UnicodeDecodeError, AttributeError):
+        pass
+
+    if mode == "constant":
+        pads = node.attrs.get("pads", [])
+        value = node.attrs.get("value", 0.0)
+
+        builder.add_constant_pad(
+            name=node.name,
+            input_names=node.inputs,
+            output_name=node.outputs[0],
+            value=value,
+            pad_to_given_output_size_mode=False,
+            pad_amounts=pads,
+        )
+    else:
+        _convert_pad_5d(builder, node, graph, err)
+
+
+def _convert_matmul(builder, node, graph, err):
+    """
+    convert to CoreML BatchedMatMul Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3473
+    """
+    weight_name = node.inputs[1]
+    W = None
+    weight_as_layer_parameter = False
+    if weight_name in node.input_tensors:
+        W = node.input_tensors[weight_name]
+
+    if W is not None:
+        if len(W.shape) != 2:
+            # since weight as parameter in batchedMatMul layer must be rank 2
+            builder.add_load_constant_nd(
+                node.name + "_const_weight_input",
+                weight_name,
+                constant_value=W,
+                shape=W.shape,
+            )
+        else:
+            weight_as_layer_parameter = True
+
+    if weight_as_layer_parameter:
+        builder.add_batched_mat_mul(
+            name=node.name,
+            input_names=[node.inputs[0]],
+            output_name=node.outputs[0],
+            weight_matrix_rows=W.shape[0],
+            weight_matrix_columns=W.shape[1],
+            W=W,
+        )
+    else:
+        builder.add_batched_mat_mul(
+            name=node.name,
+            input_names=[node.inputs[0], weight_name],
+            output_name=node.outputs[0],
+        )
+
+
+def _convert_max(builder, node, graph, err):
+    """
+    convert to CoreML Max Broadcastable Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4126
+    """
+    load_input_constants(builder, node, graph, err)
+    add_broadcastable_op_chain(builder, node, err, builder.add_max_broadcastable)
+
+
+def _convert_mean(builder, node, graph, err):
+    """
+    convert to CoreML Add Broadcastable Layer and Divide BroadCastable Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4117
+    """
+    number_of_inputs = len(node.inputs)
+    output_name = node.outputs[0]
+    node.outputs[0] = node.outputs[0] + "_sum"
+
+    builder.add_load_constant_nd(
+        name=node.name + "_divider",
+        output_name=output_name + "_divider",
+        constant_value=np.array(number_of_inputs),
+        shape=[1],
+    )
+    add_broadcastable_op_chain(builder, node, err, builder.add_add_broadcastable)
+    builder.add_divide_broadcastable(
+        name=node.name + "_mean",
+        input_names=[node.outputs[0], output_name + "_divider"],
+        output_name=output_name,
+    )
+
+
+def _convert_pow(builder, node, graph, err):
+    """
+    convert to CoreML Pow Broadcastable Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3969
+    """
+    load_input_constants(builder, node, graph, err)
+    add_broadcastable_op_chain(builder, node, err, builder.add_pow_broadcastable)
+
+
+def _convert_randomnormal(builder, node, graph, err):
+    """
+    convert to CoreML Random Normal Static Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4457
+    """
+    add_random(builder, node, graph, err, builder.add_random_normal_static)
+
+
+def _convert_randomnormallike(builder, node, graph, err):
+    """
+    convert to CoreML Random Normal Like Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4434
+    """
+    # Ignoring attribute `dtype` as CoreML internally represents tensors into 'Float'
+    mean = node.attributes.get("mean", 0.0)
+    scale = node.attributes.get("scale", 1.0)
+    seed = node.attributes.get("seed", -1)
+
+    builder.add_random_normal_like(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        mean=mean,
+        stddev=scale,
+        seed=seed,
+    )
+
+
+def _convert_randomuniform(builder, node, graph, err):
+    """
+    convert to CoreML Random Uniform Static Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4526
+    """
+    add_random(builder, node, graph, err, builder.random_uniform_static)
+
+
+def _convert_randomuniformlike(builder, node, graph, err):
+    """
+    convert to CoreML Random Normal Like Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4503
+    """
+    # Ignoring attribute `dtype` as CoreML internally represents tensors into 'Float'
+    mean = node.attributes.get("mean", 0.0)
+    scale = node.attributes.get("scale", 1.0)
+    seed = node.attributes.get("seed", -1)
+
+    builder.add_random_uniform_like(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        mean=mean,
+        stddev=scale,
+        seed=seed,
+    )
+
+
+def _convert_min(builder, node, graph, err):
+    """
+    convert to CoreML Min Broadcastable Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4135
+    """
+    load_input_constants(builder, node, graph, err)
+    add_broadcastable_op_chain(builder, node, err, builder.add_min_broadcastable)
+
+
+def _convert_mod(builder, node, graph, err):
+    """
+    convert to CoreML Mod Broadcastable Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4144
+    """
+    load_input_constants(builder, node, graph, err)
+    add_broadcastable_op_chain(builder, node, err, builder.add_mod_broadcastable)
+
+
+def _convert_mul(builder, node, graph, err):
+    """
+    convert to CoreML Multiply Broadcastable Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4171
+    """
+    load_input_constants(builder, node, graph, err)
+    add_broadcastable_op_chain(builder, node, err, builder.add_multiply_broadcastable)
+
+
+def _convert_nonzero(builder, node, graph, err):
+    """
+    convert to CoreML Where Non Zero Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4002
+    """
+    load_input_constants(builder, node, graph, err)
+    builder.add_where_nonzero(
+        name=node.name, input_name=node.inputs[0], output_name=node.outputs[0]
+    )
+
+
+def _convert_pool(builder, node, graph, err):
+    """
+    convert to CoreML Pooling Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L477
+    """
+    params_dict = dict()
+    params_dict["is_global"] = False
+    if node.op_type.startswith("Global"):
+        params_dict["is_global"] = True
+    if node.op_type.endswith("MaxPool"):
+        params_dict["layer_type"] = "MAX"
+    elif node.op_type.endswith("AveragePool"):
+        params_dict["layer_type"] = "AVERAGE"
+    else:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "Unsupported pool type"
+        )
+
+    if len(node.outputs) == 2:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "argmax with pool unsupported"
+        )
+
+    if "ceil_mode" in node.attrs and node.attrs["ceil_mode"] == 1:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "ceil_mode=1 not supported"
+        )
+
+    if "dilations" in node.attrs:
+        return err.unsupported_op_configuration(
+            builder, node, graph, "dilations not supported"
+        )
+
+    _add_conv_like_op(
+        _add_pool, _get_pool_params, params_dict, builder, node, graph, err
+    )
+
+
+def _convert_reduce(builder, node, graph, err):
+    """
+    convert to CoreML ReduceSum Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4707
+    """
+    load_input_constants(builder, node, graph, err)
+
+    # read attributes
+    axes = node.attrs.get("axes", None)
+    reduce_all = False
+    if axes is None:
+        reduce_all = True
+    keepdims = node.attrs.get("keepdims", True)
+
+    # add respective operator
+    op_type = node.op_type
+    if op_type == "ReduceSum":
+        builder.add_reduce_sum(
+            name=node.name,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            axes=axes,
+            keepdims=keepdims,
+            reduce_all=reduce_all,
+        )
+    elif op_type == "ReduceProd":
+        builder.add_reduce_prod(
+            name=node.name,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            axes=axes,
+            keepdims=keepdims,
+            reduce_all=reduce_all,
+        )
+    elif op_type == "ReduceMean":
+        builder.add_reduce_mean(
+            name=node.name,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            axes=axes,
+            keepdims=keepdims,
+            reduce_all=reduce_all,
+        )
+    elif op_type == "ReduceMax":
+        builder.add_reduce_max(
+            name=node.name,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            axes=axes,
+            keepdims=keepdims,
+            reduce_all=reduce_all,
+        )
+    elif op_type == "ReduceMin":
+        builder.add_reduce_min(
+            name=node.name,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            axes=axes,
+            keepdims=keepdims,
+            reduce_all=reduce_all,
+        )
+    elif op_type == "ReduceL2":
+        builder.add_reduce_l2(
+            name=node.name,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            axes=axes,
+            keepdims=keepdims,
+            reduce_all=reduce_all,
+        )
+    elif op_type == "ReduceL1":
+        builder.add_reduce_l1(
+            name=node.name,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            axes=axes,
+            keepdims=keepdims,
+            reduce_all=reduce_all,
+        )
+    elif op_type == "ReduceSumSquare":
+        builder.add_reduce_sumsquare(
+            name=node.name,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            axes=axes,
+            keepdims=keepdims,
+            reduce_all=reduce_all,
+        )
+    elif op_type == "ReduceLogSum":
+        builder.add_reduce_logsum(
+            name=node.name,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            axes=axes,
+            keepdims=keepdims,
+            reduce_all=reduce_all,
+        )
+    elif op_type == "ReduceLogSumExp":
+        builder.add_reduce_logsumexp(
+            name=node.name,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            axes=axes,
+            keepdims=keepdims,
+            reduce_all=reduce_all,
+        )
+    else:
+        err.unsupported_op_configuration(
+            builder, node, graph, "Unsupported reduce operation: {}".format(op_type)
+        )
+
+
+def _convert_reshape(builder, node, graph, err):
+    """
+    convert to CoreML Reshape Static Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4844
+    """
+    shape_node = node.inputs[1]
+    if shape_node in node.input_tensors:
+        output_shape = node.input_tensors[shape_node].astype(np.int64)
+
+        # if rank is same, then call rank preserving reshape
+        if node.inputs[0] not in graph.shape_dict:
+            # If Input shape is not present and output shape is known
+            # add reshape static as
+            # TODO: ONNX should be able to infer the shape
+            builder.add_reshape_static(
+                name=node.name,
+                input_name=node.inputs[0],
+                output_name=node.outputs[0],
+                output_shape=output_shape,
+            )
+            return
+
+        len_of_input_shape = builder._get_rank(node.inputs[0])
+        if len(output_shape) == len_of_input_shape:
+            builder.add_rank_preserving_reshape(
+                name=node.name,
+                input_name=node.inputs[0],
+                output_name=node.outputs[0],
+                output_shape=output_shape,
+            )
+        else:
+            add_static_reshape = True
+            if len_of_input_shape > len(output_shape):
+                # Output rank is less than input rank
+                # Case when most of the dims size is unchanged
+                num_zeros = 0
+                num_neg_ones = 0
+                for i in output_shape:
+                    if i == 0:
+                        num_zeros += 1
+                    elif i == -1:
+                        num_neg_ones += 1
+
+                if num_neg_ones > 1:
+                    err.unsupported_op_configuration(
+                        builder,
+                        node,
+                        graph,
+                        "Error in ONNX model: At most one dimension of new shape can be -1, found {}".format(
+                            num_neg_ones
+                        ),
+                    )
+
+                if num_neg_ones + num_zeros == len(output_shape):
+                    # Rank of output is less than input
+                    # Make Rank equivalent for reshape and then squeeze
+                    add_static_reshape = False
+                    new_shape = []
+                    i = 0
+                    for i in range(len(output_shape)):
+                        new_shape.append(output_shape[i])
+                        if output_shape[i] == -1:
+                            break
+                    while i < len_of_input_shape - 1:
+                        new_shape.append(1)
+                        i += 1
+
+                    builder.add_rank_preserving_reshape(
+                        name=node.name + "_reshape_preserving",
+                        input_name=node.inputs[0],
+                        output_name=node.outputs[0] + "_reshape_dim_preserved",
+                        output_shape=new_shape,
+                    )
+
+                    squeeze_axes = list(
+                        range(len(output_shape) - len_of_input_shape, 0)
+                    )
+                    squeeze_axes.reverse()
+
+                    builder.add_squeeze(
+                        name=node.name,
+                        input_name=node.outputs[0] + "_reshape_dim_preserved",
+                        output_name=node.outputs[0],
+                        axes=squeeze_axes,
+                    )
+
+            if add_static_reshape:
+                builder.add_reshape_static(
+                    name=node.name,
+                    input_name=node.inputs[0],
+                    output_name=node.outputs[0],
+                    output_shape=output_shape,
+                )
+    else:
+        builder.add_reshape_dynamic(
+            name=node.name, input_names=node.inputs, output_name=node.outputs[0],
+        )
+
+
+def _convert_resize(builder, node, graph, err):
+    """
+    convert to CoreML Upsample or Resize Bilinear Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L2139
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L2178
+    """
+    mode = node.attrs.get("mode", "nearest")
+    if node.inputs[1] not in node.input_tensors:
+        return err.unsupported_op_configuration(
+            builder,
+            node,
+            graph,
+            "Scaling factor unknown!! CoreML does not support dynamic scaling for Resize",
+        )
+
+    mode = "NN" if mode == "nearest" else "BILINEAR"
+    scale = node.input_tensors[node.inputs[1]]
+
+    builder.add_upsample(
+        name=node.name,
+        scaling_factor_h=scale[-2],
+        scaling_factor_w=scale[-1],
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        mode=mode,
+    )
+
+
+def _convert_reverse_sequence(builder, node, graph, err):
+    """
+    convert to CoreML Reverse Sequence Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3577
+    """
+    batch_axis = node.attrs.get("batch_axis", 1)
+    time_axis = node.attrs.get("time_axis", 0)
+
+    output_name = node.outputs[0]
+    add_transpose = False
+    if batch_axis > time_axis:
+        output_name += "_before_reverse"
+        batch_axis, time_axis = time_axis, batch_axis
+        add_transpose = True
+
+    builder.add_reverse_sequence(
+        name=node.name,
+        input_names=node.inputs,
+        output_name=output_name,
+        batch_axis=batch_axis,
+        seq_axis=time_axis,
+    )
+
+    if add_transpose:
+        output_name_post = "_before_reverse"
+        rank = builder._get_rank(node.inputs[0])
+        if rank == -1:
+            return err.unsupported_op_configuration(
+                builder, node, graph, "Rank unknown for input"
+            )
+        axes = list(range(rank))
+        axes[batch_axis], axes[time_axis] = axes[time_axis], axes[batch_axis]
+        builder.add_transpose(
+            name=node.name + "_transpose",
+            axes=axes,
+            input_name=output_name,
+            output_name=node.outputs[0],
+        )
+
+
+def _convert_roialign(builder, node, graph, err):
+    """
+    convert to CoreML CropResize and Pooling Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L2239
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L1702
+    """
+
+    target_height = node.attrs.get("output_height", 1)
+    target_width = node.attrs.get("output_width", 1)
+    mode = node.attrs.get("mode", "AVERAGE").upper()
+    sampling_ratio = node.attrs.get("sampling_ratio", 0)
+    spatial_scale = node.attrs.get("sampling_scale", 1.0)
+
+    if node.inputs[2] in graph.inputs:
+        graph.inputs.remove(node.inputs[2])
+
+    builder.add_expand_dims(
+        name=node.name + "_expand_0",
+        input_name=node.inputs[0],
+        output_name=node.inputs[0] + "_expanded",
+        axes=[0],
+    )
+    node.inputs[0] += "_expanded"
+
+    builder.add_expand_dims(
+        name=node.name + "_expand_2",
+        input_name=node.inputs[2],
+        output_name=node.inputs[2] + "_expanded",
+        axes=[1],
+    )
+    node.inputs[2] += "_expanded"
+
+    builder.add_concat_nd(
+        name=node.name + "_concat_indices",
+        input_names=[node.inputs[2], node.inputs[1]],
+        output_name=node.inputs[1] + "_rois",
+        axis=1,
+    )
+    node.inputs[1] += "_rois"
+
+    builder.add_expand_dims(
+        name=node.name + "_expand_1",
+        input_name=node.inputs[1],
+        output_name=node.inputs[1] + "_expanded",
+        axes=[1, 3, 4],
+    )
+    node.inputs[1] += "_expanded"
+
+    builder.add_crop_resize(
+        name=node.name + "_crop_resize",
+        input_names=[node.inputs[0], node.inputs[1]],
+        output_name=node.outputs[0] + "_crop_resized",
+        target_height=target_height * sampling_ratio,
+        target_width=target_width * sampling_ratio,
+        mode="ROI_ALIGN_MODE",
+        box_indices_mode="CORNERS_WIDTH_FIRST",
+        spatial_scale=spatial_scale,
+    )
+
+    builder.add_squeeze(
+        name=node.name + "_squeeze",
+        input_name=node.outputs[0] + "_crop_resized",
+        output_name=node.outputs[0] + "_crop_resized_squeezed",
+        axes=[1],
+    )
+
+    builder.add_pooling(
+        name=node.name + "_pool",
+        height=sampling_ratio,
+        width=sampling_ratio,
+        layer_type=mode,
+        input_name=node.outputs[0] + "_crop_resized_squeezed",
+        output_name=node.outputs[0],
+        stride_height=sampling_ratio,
+        stride_width=sampling_ratio,
+        padding_type="VALID",
+    )
+
+
+def _convert_round(builder, node, graph, err):
+    """
+    convert to CoreML Round Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L5029
+    """
+    builder.add_round(
+        name=node.name, input_name=node.inputs[0], output_name=node.outputs[0]
+    )
+
+
+def _convert_scatter(builder, node, graph, err):
+    """
+    convert to CoreML Scatter Along Axis Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4308
+    """
+    axis = node.attrs.get("axis", 0)
+    builder.add_scatter_along_axis(
+        name=node.name, input_names=node.inputs, output_name=node.outputs[0], axis=axis
+    )
+
+
+def _convert_size(builder, node, graph, err):
+    """
+    convert to CoreML GetShape and ReduceProd Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L5131
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4722
+    """
+    builder.add_get_shape(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.inputs[0] + "_getshape",
+    )
+    builder.add_reduce_prod(
+        name=node.name + "_reduce_prod",
+        input_name=node.inputs[0] + "_getshape",
+        output_name=node.outputs[0],
+    )
+
+
+def _convert_slice_ir4v9(builder, node, graph, err):
+    """
+    convert to CoreML Slice Static Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L5082
+    """
+    if node.inputs[0] in graph.shape_dict:
+        data_shape = graph.shape_dict[node.inputs[0]]
+    else:
+        rank = builder._get_rank(node.inputs[0])
+        if rank == -1:
+            return err.unsupported_op_configuration(
+                builder, node, graph, "Input shape not available"
+            )
+        data_shape = [INT_MAX] * rank
+
+    len_of_data = len(data_shape)
+    begin_masks = [True] * len_of_data
+    end_masks = [True] * len_of_data
+
+    default_axes = list(range(len_of_data))
+    default_steps = [1] * len_of_data
+
+    ip_starts = node.attrs.get("starts")
+    ip_ends = node.attrs.get("ends")
+    axes = node.attrs.get("axes", default_axes)
+    steps = node.attrs.get("steps", default_steps)
+
+    starts = [0] * len_of_data
+    ends = [0] * len_of_data
+
+    for i in range(len(axes)):
+        current_axes = axes[i]
+        starts[current_axes] = ip_starts[i]
+        ends[current_axes] = ip_ends[i]
+        # n <= end <= INT_MAX implies end is -1, hence end_mask should be True
+        # otherwise end_mask should be False
+        if ends[current_axes] < data_shape[current_axes]:
+            # this means end is not -1
+            end_masks[current_axes] = False
+
+        if starts[current_axes] != 0:
+            begin_masks[current_axes] = False
+
+    builder.add_slice_static(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        begin_ids=starts,
+        end_ids=ends,
+        strides=steps,
+        begin_masks=begin_masks,
+        end_masks=end_masks,
+    )
+
+
+def _convert_slice(builder, node, graph, err):
+    """
+    convert to CoreML Slice Static Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L5082
+    """
+    if len(node.inputs) == 1:
+        return _convert_slice_ir4v9(builder, node, graph, err)
+
+    if node.inputs[0] not in graph.shape_dict:
+        err.unsupported_op_configuration(
+            builder, node, graph, "Input shape not available"
+        )
+
+    data_shape = graph.shape_dict[node.inputs[0]]
+    len_of_data = len(data_shape)
+    begin_masks = [True] * len_of_data
+    end_masks = [True] * len_of_data
+
+    default_axes = list(range(len_of_data))
+
+    add_static_slice_layer = False
+    if node.inputs[1] in node.input_tensors and node.inputs[2] in node.input_tensors:
+        if len(node.inputs) > 3:
+            if node.inputs[3] in node.input_tensors:
+                if len(node.inputs) > 4:
+                    if node.inputs[4] in node.input_tensors:
+                        add_static_slice_layer = True
+                else:
+                    add_static_slice_layer = True
+        else:
+            add_static_slice_layer = True
+
+    if add_static_slice_layer:
+        ip_starts = node.input_tensors[node.inputs[1]]
+        ip_ends = node.input_tensors[node.inputs[2]]
+        axes = (
+            node.input_tensors[node.inputs[3]] if len(node.inputs) > 3 else default_axes
+        )
+        ip_steps = node.input_tensors[node.inputs[4]] if len(node.inputs) > 4 else None
+
+        starts = [0] * len_of_data
+        ends = [0] * len_of_data
+        steps = [1] * len_of_data
+
+        for i in range(len(axes)):
+            current_axes = axes[i]
+            starts[current_axes] = ip_starts[i]
+            ends[current_axes] = ip_ends[i]
+            # n <= end <= INT_MAX implies end is -1, hence end_mask should be True
+            # otherwise end_mask should be False
+            if ends[current_axes] < data_shape[current_axes]:
+                # this means end is not -1
+                end_masks[current_axes] = False
+
+            if starts[current_axes] != 0:
+                begin_masks[current_axes] = False
+
+            if isinstance(ip_steps, list):
+                steps[current_axes] = ip_steps[i]
+
+        builder.add_slice_static(
+            name=node.name,
+            input_name=node.inputs[0],
+            output_name=node.outputs[0],
+            begin_ids=starts,
+            end_ids=ends,
+            strides=steps,
+            begin_masks=begin_masks,
+            end_masks=end_masks,
+        )
+    else:
+        err.unsupported_op_configuration(
+            builder,
+            node,
+            graph,
+            "CoreML does not support Dynamic Slice with unknown axes. Please provide Custom Function/Layer",
+        )
+
+
+def _convert_softmax_nd(builder, node, graph, err):
+    """
+    convert to CoreML SoftMax ND Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#3547
+    """
+    axis = node.attrs.get("axis", 1)
+    builder.add_softmax_nd(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0]
+        + ("_softmax" if node.op_type == "LogSoftmax" else ""),
+        axis=axis,
+    )
+    if node.op_type == "LogSoftmax":
+        builder.add_unary(
+            name=node.name + "_log",
+            input_name=node.outputs[0] + "_softmax",
+            output_name=node.outputs[0],
+            mode="log",
+        )
+
+
+def _convert_softmax(builder, node, graph, err):
+    """
+    convert to CoreML SoftMax ND Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#3547
+    """
+
+    def add_softmax(output_name, rank=-1, axis=-3):
+        softmax_axis = 3
+        axes = list(range(5 - rank))
+        if axis < 0:
+            axis = rank + axis
+        axis += len(axes)
+        softmax_output_name = output_name + "_expanded"
+
+        expanded_node = node.name + "_" + node.inputs[0] + "_expanded"
+        builder.add_expand_dims(
+            name=node.name + "_expand_dims",
+            input_name=node.inputs[0],
+            output_name=expanded_node,
+            axes=axes,
+        )
+        input_name = expanded_node
+        rank = 5
+
+        if axis != -3 and axis != rank - softmax_axis:
+            transpose_axes = list(range(rank))
+            transpose_axes[-3], transpose_axes[axis] = (
+                transpose_axes[axis],
+                transpose_axes[-3],
+            )
+
+            builder.add_transpose(
+                name=node.name + "_transpose",
+                axes=transpose_axes,
+                input_name=input_name,
+                output_name=input_name + "_transposed",
+            )
+            input_name += "_transposed"
+            softmax_output_name += "_transposed"
+
+        builder.add_softmax(
+            name=node.name, input_name=input_name, output_name=softmax_output_name
+        )
+
+        if axis != -3 and axis != rank - softmax_axis:
+            transpose_axes = list(range(rank))
+            transpose_axes[-3], transpose_axes[axis] = (
+                transpose_axes[axis],
+                transpose_axes[-3],
+            )
+
+            builder.add_transpose(
+                name=node.name + "_transpose_back",
+                axes=transpose_axes,
+                input_name=softmax_output_name,
+                output_name=softmax_output_name + "_transposed_back",
+            )
+            softmax_output_name += "_transposed_back"
+
+        builder.add_squeeze(
+            name=node.name + "_squeeze_dims",
+            input_name=softmax_output_name,
+            output_name=output_name,
+            axes=axes,
+        )
+
+    axis = node.attrs.get("axis", 1)
+    rank = builder._get_rank(node.inputs[0])
+    if rank == -1:
+        return _convert_softmax_nd(builder, node, graph, err)
+
+    if node.op_type == "LogSoftmax":
+        add_softmax(node.outputs[0] + "_softmax", rank=rank, axis=axis)
+        builder.add_unary(
+            name=node.name + "_log",
+            input_name=node.outputs[0] + "_softmax",
+            output_name=node.outputs[0],
+            mode="log",
+        )
+    else:
+        add_softmax(node.outputs[0], rank=rank, axis=axis)
+
+
+def _convert_split(builder, node, graph, err):
+    """
+    convert to CoreML Squeeze Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#5003
+    """
+    axis = node.attrs.get("axis", 0)
+    split = node.attrs.get("split", None)
+    num_splits = len(node.outputs) if split is None else 2
+
+    builder.add_split_nd(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_names=node.outputs,
+        axis=axis,
+        num_splits=num_splits,
+        split_sizes=split,
+    )
+
+
+def _convert_shape(builder, node, graph, err):
+    """
+    convert to CoreML GetShape Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L5131
+    """
+    builder.add_get_shape(
+        name=node.name, input_name=node.inputs[0], output_name=node.outputs[0]
+    )
+
+
+def _convert_squeeze(builder, node, graph, err):
+    """
+    convert to CoreML Squeeze Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4903
+    """
+    axes = node.attrs.get("axes", None)
+    builder.add_squeeze(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        axes=axes,
+    )
+
+
+def _convert_sub(builder, node, graph, err):
+    """
+    convert to CoreML Subtract Broadcastable Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4117
+    """
+    load_input_constants(builder, node, graph, err)
+    add_broadcastable_op_chain(builder, node, err, builder.add_subtract_broadcastable)
+
+
+def _convert_tanh(builder, node, graph, err):
+    """
+    convert to CoreML Tanh Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3881
+    """
+    load_input_constants(builder, node, graph, err)
+    builder.add_tanh(
+        name=node.name, input_name=node.inputs[0], output_name=node.outputs[0]
+    )
+
+
+def _convert_tile(builder, node, graph, err):
+    """
+    convert to CoreML Tile Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L5117
+    """
+    load_input_constants(builder, node, graph, err)
+    if node.inputs[1] not in node.input_tensors:
+        err.unsupported_op_configuration(
+            builder,
+            node,
+            graph,
+            "CoreML Tile layer does not support dynamic 'reps'. 'reps' should be known statically",
+        )
+    builder.add_tile(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        reps=node.input_tensors[node.inputs[1]].astype(np.int32).tolist(),
+    )
+
+
+def _convert_topk(builder, node, graph, err):
+    """
+    convert to CoreML TopK Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L5190
+    """
+    load_input_constants(builder, node, graph, err)
+    axis = node.attrs.get("axis", -1)
+    bottom_k = node.attrs.get("largest", True) == False
+    # NOTE: Sorted order attribute is currently ignored in CoreML
+    sorted_order = node.attrs.get("sorted", True)
+    if "sorted" in node.attrs:
+        err.unsupported_feature_warning(
+            node, "Sorted Order attribute('sorted') is currently ignored in CoreML 3.0"
+        )
+
+    builder.add_topk(
+        name=node.name,
+        input_names=node.inputs,
+        output_names=node.outputs,
+        axis=axis,
+        use_bottom_k=bottom_k,
+    )
+
+
+def _convert_transpose(builder, node, graph, err):
+    """
+    convert to CoreML Transpose Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3426
+    """
+
+    axes = node.attrs.get("perm", [])
+    # If 'perm' not provided, the reverse the dimensions
+    if axes == []:
+        rank = builder._get_rank(node.inputs[0])
+        if rank == -1:
+            return err.unsupported_op_configuration(
+                builder, node, graph, "Rank unknown for input"
+            )
+        axes = list(range(-1, -(rank + 1), -1))
+
+    builder.add_transpose(
+        name=node.name,
+        axes=axes,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+    )
+
+
+def _convert_unsqueeze(builder, node, graph, err):
+    """
+    convert to CoreML ExpandDim Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L4810
+    """
+    axes = node.attrs.get("axes")
+    builder.add_expand_dims(
+        name=node.name,
+        input_name=node.inputs[0],
+        output_name=node.outputs[0],
+        axes=axes,
+    )
+
+
+def _convert_where(builder, node, graph, err):
+    """
+    convert to CoreML WhereBroadcastable Layer:
+    https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3742
+    """
+    load_input_constants(builder, node, graph, err)
+    builder.add_where_broadcastable(
+        name=node.name, input_names=node.inputs, output_name=node.outputs[0],
+    )
+
+
+_ONNX_NODE_REGISTRY_ND = {
+    "Abs": _convert_abs,
+    "Acos": _convert_acos,
+    "Acosh": _convert_acosh,
+    "Add": _convert_add,
+    "And": _convert_logical,
+    "ArgMax": _convert_argmax,
+    "ArgMin": _convert_argmin,
+    "Asin": _convert_asin,
+    "Asinh": _convert_asinh,
+    "Atan": _convert_atan,
+    "Atanh": _convert_atanh,
+    "AveragePool": _convert_pool,
+    "BatchNormalization": _convert_bn,
+    "Cast": _convert_cast,
+    "Ceil": _convert_ceil,
+    "Clip": _convert_clip,
+    "Concat": _convert_concat,
+    "Constant": _convert_constant,
+    "ConstantOfShape": _convert_constant_of_shape,
+    "Conv": _convert_conv,
+    "ConvTranspose": _convert_conv,
+    "Cos": _convert_cos,
+    "Cosh": _convert_cosh,
+    "DepthToSpace": _convert_reorganize_data,
+    "Div": _convert_div,
+    "Elu": _convert_elu,
+    "Equal": _convert_equal,
+    "Erf": _convert_erf,
+    "Exp": _convert_exp,
+    "Expand": _convert_expand,
+    "Flatten": _convert_flatten,
+    "Floor": _convert_floor,
+    "Gather": _convert_gather,
+    "Gemm": _convert_gemm,
+    "Greater": _convert_greater,
+    "GRU": _convert_gru,
+    "GlobalAveragePool": _convert_pool,
+    "GlobalMaxPool": _convert_pool,
+    "HardSigmoid": _convert_hardsigmoid,
+    "Identity": _convert_identity,
+    "InstanceNormalization": _convert_instancenorm,
+    "LeakyRelu": _convert_leaky_relu,
+    "Log": _convert_log,
+    "LogSoftmax": _convert_softmax,
+    "LRN": _convert_lrn,
+    "Less": _convert_less,
+    "LSTM": _convert_lstm,
+    "MatMul": _convert_matmul,
+    "Max": _convert_max,
+    "MaxPool": _convert_pool,
+    "Mean": _convert_mean,
+    "Min": _convert_min,
+    "Mod": _convert_mod,
+    "Mul": _convert_mul,
+    "Neg": _convert_neg,
+    "NonZero": _convert_nonzero,
+    "Not": _convert_logical,
+    "Or": _convert_logical,
+    "Pad": _convert_pad,
+    "Pow": _convert_pow,
+    "PRelu": _convert_prelu,
+    "RandomNormal": _convert_randomnormal,
+    "RandomNormalLike": _convert_randomnormallike,
+    "RandomUniform": _convert_randomuniform,
+    "RandomUniformLike": _convert_randomuniformlike,
+    "Reciprocal": _convert_reciprocal,
+    "ReduceL1": _convert_reduce,
+    "ReduceL2": _convert_reduce,
+    "ReduceLogSum": _convert_reduce,
+    "ReduceLogSumExp": _convert_reduce,
+    "ReduceMax": _convert_reduce,
+    "ReduceMean": _convert_reduce,
+    "ReduceMin": _convert_reduce,
+    "ReduceProd": _convert_reduce,
+    "ReduceSum": _convert_reduce,
+    "ReduceSumSquare": _convert_reduce,
+    "Relu": _convert_relu,
+    "Reshape": _convert_reshape,
+    "Resize": _convert_resize,
+    "ReverseSequence": _convert_reverse_sequence,
+    "RoiAlign": _convert_roialign,
+    "Round": _convert_round,
+    "Scatter": _convert_scatter,
+    "Selu": _convert_selu,
+    "Sigmoid": _convert_sigmoid,
+    "Sign": _convert_sign,
+    "Size": _convert_size,
+    "Slice": _convert_slice,
+    "Softmax": _convert_softmax,
+    "Softplus": _convert_softplus,
+    "Softsign": _convert_softsign,
+    "SpaceToDepth": _convert_reorganize_data,
+    "Split": _convert_split,
+    "Shape": _convert_shape,
+    "Sqrt": _convert_sqrt,
+    "Squeeze": _convert_squeeze,
+    "Sub": _convert_sub,
+    "Sum": _convert_add,
+    "Tanh": _convert_tanh,
+    "ThresholdedRelu": _convert_thresholdedrelu,
+    "Tile": _convert_tile,
+    "TopK": _convert_topk,
+    "Transpose": _convert_transpose,
+    "Unsqueeze": _convert_unsqueeze,
+    "Upsample": _convert_upsample,
+    "Xor": _convert_logical,
+    "Where": _convert_where,
+}
+
+
+def _get_node_converter_fn(
+    builder, node, err
+):  # type: (NeuralNetworkBuilder, Node, ErrorHandling) -> Callable[[NeuralNetworkBuilder, Node, Graph, ErrorHandling], None]
+    """
+    Get the right converter function for ONNX node op_type
+    """
+    op_type = node.op_type
+    # Return custom conversion function if provided
+    # If both node type and node name custom function
+    # is provided, then use node name specific custom function, as
+    # type specific custom function is more generic than name specific
+    if node.name in err.custom_conversion_functions:
+        return err.custom_conversion_functions[node.name]
+    elif op_type in err.custom_conversion_functions:
+        return err.custom_conversion_functions[op_type]
+    elif op_type in _ONNX_NODE_REGISTRY_ND:
+        return _ONNX_NODE_REGISTRY_ND[op_type]
+    else:
+        return err.unsupported_op(node)
+
+
+def _convert_node_nd(
+    builder, node, graph, err
+):  # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
+    converter_fn = _get_node_converter_fn(builder, node, err)
+    return converter_fn(builder, node, graph, err)
diff --git a/coremltools/converters/nnssa/coreml/graph_pass/__init__.py b/coremltools/converters/onnx/_tests/__init__.py
similarity index 53%
rename from coremltools/converters/nnssa/coreml/graph_pass/__init__.py
rename to coremltools/converters/onnx/_tests/__init__.py
index 132958219..596f50304 100644
--- a/coremltools/converters/nnssa/coreml/graph_pass/__init__.py
+++ b/coremltools/converters/onnx/_tests/__init__.py
@@ -1,8 +1,4 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
 from __future__ import absolute_import as _
-
-from .op_removals import *
-from .op_fusions import *
-from .mlmodel_passes import *
+from __future__ import division as _
+from __future__ import print_function as _
+from __future__ import unicode_literals as _
diff --git a/coremltools/converters/onnx/_tests/_test_utils.py b/coremltools/converters/onnx/_tests/_test_utils.py
new file mode 100644
index 000000000..71448b7a2
--- /dev/null
+++ b/coremltools/converters/onnx/_tests/_test_utils.py
@@ -0,0 +1,267 @@
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+from __future__ import unicode_literals as _
+
+import numpy as np
+import numpy.testing as npt  # type: ignore
+import numpy.random as npr
+from onnx import helper, ModelProto, ValueInfoProto, TensorProto, NodeProto
+from typing import Any, Sequence, Text, Tuple, Optional, Dict, List, TypeVar
+from coremltools.converters.onnx import convert
+from coremltools.converters.onnx._converter import SupportedVersion
+from coremltools._deps import _IS_MACOS
+import sys
+
+"""
+   dynamically generate random inputs,
+   use caffe2 backend for onnx and
+"""
+
+
+def _forward_onnx_model(
+    model,  # type: ModelProto
+    input_dict,  # type: Dict[Text, np._ArrayLike[Any]]
+    test_name="",  # type: Text
+):
+    # type: (...) -> np.ndarray[Any]
+
+    import caffe2.python.onnx.backend  # type: ignore
+
+    prepared_backend = caffe2.python.onnx.backend.prepare(model)
+    out = prepared_backend.run(input_dict)
+    out_dict = {}
+    out_names = [v.name for v in model.graph.output]
+    for out_name in out_names:
+        out_dict[out_name] = out[out_name]
+
+    result = [out[v.name] for v in model.graph.output]
+    output_shapes = [_shape_from_onnx_value_info(o) for o in model.graph.output]
+    for i, output in enumerate(result):
+        result[i] = output.reshape(output_shapes[i])
+    return np.array(result)
+
+
+def _onnx_create_model(
+    nodes,  # type: Sequence[NodeProto]
+    inputs,  # type: Sequence[Tuple[Text,Tuple[int, ...]]]
+    outputs,  # type: Sequence[Tuple[Text,Tuple[int, ...], int]]
+    initializer=[],  # type: Sequence[TensorProto]
+):
+    # type: (...) -> ModelProto
+    initializer_inputs = [
+        helper.make_tensor_value_info(t.name, TensorProto.FLOAT, t.dims)
+        for t in initializer
+    ]
+
+    graph = helper.make_graph(
+        nodes=nodes,
+        name="test",
+        inputs=initializer_inputs
+        + [
+            helper.make_tensor_value_info(input_[0], TensorProto.FLOAT, input_[1])
+            for input_ in inputs
+        ],
+        outputs=[
+            helper.make_tensor_value_info(output_[0], output_[2], output_[1])
+            for output_ in outputs
+        ],
+        initializer=initializer,
+    )
+    onnx_model = helper.make_model(graph)
+    return onnx_model
+
+
+def _onnx_create_single_node_model(
+    op_type,  # type: Text
+    input_shapes,  # type: Sequence[Tuple[int, ...]]
+    output_shapes,  # type: Sequence[Tuple[int, ...]]
+    initializer=[],  # type: Sequence[TensorProto]
+    **kwargs  # type: Any
+):
+    # type: (...) -> ModelProto
+    inputs = [("input{}".format(i,), input_shapes[i]) for i in range(len(input_shapes))]
+    outputs = [
+        ("output{}".format(i,), output_shapes[i], TensorProto.FLOAT)
+        for i in range(len(output_shapes))
+    ]
+
+    node = helper.make_node(
+        op_type,
+        inputs=[i[0] for i in inputs] + [t.name for t in initializer],
+        outputs=[o[0] for o in outputs],
+        **kwargs
+    )
+    return _onnx_create_model([node], inputs, outputs, initializer)
+
+
+def _shape_from_onnx_value_info(
+    v,
+):  # type: (ValueInfoProto) -> Sequence[Tuple[int, ...]]
+    return tuple([d.dim_value for d in v.type.tensor_type.shape.dim])
+
+
+def _coreml_forward_model(
+    model,  # type: ModelProto
+    input_dict,  # type: Dict[Text, np._ArrayLike[Any]]
+    output_names,  # type: Sequence[Text]
+    minimum_ios_deployment_target="12",
+):
+    # type: (...) -> np.ndarray[Any]
+    if not SupportedVersion.is_nd_array_supported(minimum_ios_deployment_target):
+        for k, arr in input_dict.items():
+            if len(arr.shape) == 4:
+                input_dict[k] = arr[0]
+        for k, v in input_dict.items():
+            if len(v.shape) == 2 and v.shape[0] == 1:
+                input_dict[k] = v.flatten()
+    coreml_out = model.predict(input_dict, useCPUOnly=True)
+    return np.array([coreml_out[name] for name in output_names])
+
+
+def _coreml_forward_onnx_model(
+    model,  # type: ModelProto
+    input_dict,  # type: Dict[Text, np._ArrayLike[Any]]
+    onnx_coreml_input_shape_map={},  # type: Dict[Text, List[int,...]]
+    minimum_ios_deployment_target="12",
+):
+    # type: (...) -> np.ndarray[Any]
+    coreml_model = convert(
+        model,
+        onnx_coreml_input_shape_map=onnx_coreml_input_shape_map,
+        minimum_ios_deployment_target=minimum_ios_deployment_target,
+    )
+    output_names = [o.name for o in model.graph.output]
+    return _coreml_forward_model(
+        coreml_model,
+        input_dict,
+        output_names,
+        minimum_ios_deployment_target=minimum_ios_deployment_target,
+    )
+
+
+def _random_array(
+    shape, random_seed=10
+):  # type: (Tuple[int, ...], Any) -> np._ArrayLike[float]
+    if random_seed:
+        npr.seed(random_seed)  # type: ignore
+    return npr.ranf(shape).astype("float32")
+
+
+def _conv_pool_output_size(
+    input_shape,  # type: Sequence[int]
+    dilations,  # type: Sequence[int]
+    kernel_shape,  # type: Tuple[int, int]
+    pads,  # type: Sequence[int]
+    strides,  # type: Tuple[int, int]
+):
+    # type: (...) -> Tuple[int, int]
+    output_height = (
+        input_shape[2] + pads[0] + pads[2] - (dilations[0] * (kernel_shape[0] - 1) + 1)
+    ) / strides[0] + 1
+    output_width = (
+        input_shape[3] + pads[1] + pads[3] - (dilations[1] * (kernel_shape[1] - 1) + 1)
+    ) / strides[1] + 1
+
+    return (int(output_height), int(output_width))
+
+
+_T = TypeVar("_T")
+
+
+def _assert_outputs(
+    output1,  # type: np.ndarray[_T]
+    output2,  # type: np.ndarray[_T]
+    decimal=7,  # type: int
+):
+    # type: (...) -> None
+    npt.assert_equal(len(output1), len(output2))
+    for o1, o2 in zip(output1, output2):
+        npt.assert_almost_equal(o2.flatten(), o1.flatten(), decimal=decimal)
+
+
+def _prepare_inputs_for_onnx(
+    model,  # type: ModelProto
+    test_name="",  # type: Text
+    values=None,  # type: Optional[List[np._ArrayLike[Any]]]
+):
+    # type: (...) -> Dict[Text, np._ArrayLike[Any]]
+    graph = model.graph
+    initializer_names = {t.name for t in graph.initializer}
+    input_names = [i.name for i in graph.input if i.name not in initializer_names]
+    input_shapes = [
+        tuple([d.dim_value for d in i.type.tensor_type.shape.dim])
+        for i in graph.input
+        if i.name not in initializer_names
+    ]
+
+    if values is None:
+        inputs = [_random_array(shape) for shape in input_shapes]
+    else:
+        inputs = values
+    input_dict = dict(zip(input_names, inputs))
+    return input_dict
+
+
+def _test_onnx_model(
+    model,  # type: ModelProto
+    test_name="",  # type: Text
+    decimal=5,  # type: int
+    onnx_coreml_input_shape_map={},  # type: Dict[Text, List[int,...]]
+    coreml_input_shape={},  # type: Dict[Text, List[int,...]]
+    minimum_ios_deployment_target="12",
+):
+    # type: (...) -> None
+    if not test_name:
+        test_name = sys._getframe(1).f_code.co_name
+    W = _prepare_inputs_for_onnx(model, test_name=test_name)
+    c2_outputs = _forward_onnx_model(model, W, test_name=test_name)
+    coreml_input_dict = dict()
+    # Supported iOS Version
+    # New OS Version must be added at the end to maintain backward version index
+    supported_ios_version = ["11.2", "12", "13"]
+    IOS_13_VERSION = supported_ios_version.index("13")
+    for key, value in W.items():
+        if (
+            supported_ios_version.index(minimum_ios_deployment_target) < IOS_13_VERSION
+            and key in coreml_input_shape
+        ):
+            coreml_input_dict[key] = np.reshape(value, coreml_input_shape[key])
+        else:
+            coreml_input_dict[key] = value
+    if _IS_MACOS:
+        coreml_outputs = _coreml_forward_onnx_model(
+            model,
+            coreml_input_dict,
+            onnx_coreml_input_shape_map=onnx_coreml_input_shape_map,
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+        _assert_outputs(c2_outputs, coreml_outputs, decimal=decimal)
+
+
+def _test_single_node(
+    op_type,  # type: Text
+    input_shapes,  # type: Sequence[Tuple[int, ...]]
+    output_shapes,  # type: Sequence[Tuple[int, ...]]
+    initializer=[],  # type: Sequence[TensorProto]
+    decimal=5,  # type: int
+    test_name="",  # type: Text
+    onnx_coreml_input_shape_map={},  # type: Dict[Text, List[int,...]]
+    coreml_input_shape={},  # type: Dict[Text, List[int,...]]
+    minimum_ios_deployment_target="12",
+    **kwargs  # type: Any
+):
+    # type: (...) -> None
+    model = _onnx_create_single_node_model(
+        op_type, input_shapes, output_shapes, initializer, **kwargs
+    )
+    if not test_name:
+        test_name = sys._getframe(1).f_code.co_name
+    _test_onnx_model(
+        model,
+        test_name=test_name,
+        decimal=decimal,
+        onnx_coreml_input_shape_map=onnx_coreml_input_shape_map,
+        coreml_input_shape=coreml_input_shape,
+        minimum_ios_deployment_target=minimum_ios_deployment_target,
+    )
diff --git a/coremltools/converters/onnx/_tests/test_convert.py b/coremltools/converters/onnx/_tests/test_convert.py
new file mode 100644
index 000000000..5517262d3
--- /dev/null
+++ b/coremltools/converters/onnx/_tests/test_convert.py
@@ -0,0 +1,82 @@
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+from __future__ import unicode_literals as _
+
+from coremltools._deps import _HAS_ONNX, MSG_ONNX_NOT_FOUND, _IS_MACOS
+import unittest
+import numpy as np
+import numpy.testing as npt  # type: ignore
+import numpy.random as npr
+
+from PIL import Image  # type: ignore
+
+if _HAS_ONNX:
+    import onnx
+    from coremltools.converters.onnx import convert
+    from ._test_utils import _onnx_create_single_node_model
+
+
+@unittest.skipUnless(_HAS_ONNX, MSG_ONNX_NOT_FOUND)
+class ConvertTest(unittest.TestCase):
+    def setUp(self):  # type: () -> None
+        self.img_arr = np.uint8(npr.rand(224, 224, 3) * 255)  # type: ignore
+        self.img = Image.fromarray(np.uint8(self.img_arr))  # type: ignore
+        self.img_arr = np.float32(self.img_arr)  # type: ignore
+        self.onnx_model = _onnx_create_single_node_model(
+            "Relu", [(3, 224, 224)], [(3, 224, 224)]
+        )
+        self.input_names = [i.name for i in self.onnx_model.graph.input]
+        self.output_names = [o.name for o in self.onnx_model.graph.output]
+
+    def test_convert_image_input(self):  # type: () -> None
+        coreml_model = convert(self.onnx_model, image_input_names=self.input_names)
+        spec = coreml_model.get_spec()
+        for input_ in spec.description.input:
+            self.assertEqual(input_.type.WhichOneof("Type"), "imageType")
+
+    def test_convert_image_output(self):  # type: () -> None
+        coreml_model = convert(self.onnx_model, image_output_names=self.output_names)
+        spec = coreml_model.get_spec()
+        for output in spec.description.output:
+            self.assertEqual(output.type.WhichOneof("Type"), "imageType")
+
+    def test_convert_image_input_preprocess(self):  # type: () -> None
+        bias = np.array([100, 90, 80])
+        coreml_model = convert(
+            self.onnx_model,
+            image_input_names=self.input_names,
+            preprocessing_args={
+                "is_bgr": True,
+                "blue_bias": bias[0],
+                "green_bias": bias[1],
+                "red_bias": bias[2],
+            },
+        )
+
+        if _IS_MACOS:
+            output = coreml_model.predict({self.input_names[0]: self.img})[
+                self.output_names[0]
+            ]
+
+            expected_output = self.img_arr[:, :, ::-1].transpose((2, 0, 1))
+            expected_output[0] = expected_output[0] + bias[0]
+            expected_output[1] = expected_output[1] + bias[1]
+            expected_output[2] = expected_output[2] + bias[2]
+            npt.assert_equal(output.flatten(), expected_output.flatten())
+
+    def test_convert_image_output_bgr(self):  # type: () -> None
+        coreml_model = convert(
+            self.onnx_model,
+            image_input_names=self.input_names,
+            image_output_names=self.output_names,
+            deprocessing_args={"is_bgr": True},
+        )
+
+        if _IS_MACOS:
+            output = coreml_model.predict({self.input_names[0]: self.img})[
+                self.output_names[0]
+            ]
+            output = np.array(output)[:, :, :3].transpose((2, 0, 1))
+            expected_output = self.img_arr[:, :, ::-1].transpose((2, 0, 1))
+            npt.assert_equal(output, expected_output)
diff --git a/coremltools/converters/onnx/_tests/test_custom_layers.py b/coremltools/converters/onnx/_tests/test_custom_layers.py
new file mode 100644
index 000000000..f3a5e9bb3
--- /dev/null
+++ b/coremltools/converters/onnx/_tests/test_custom_layers.py
@@ -0,0 +1,224 @@
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+
+import unittest
+
+from coremltools._deps import _HAS_ONNX, MSG_ONNX_NOT_FOUND
+
+if _HAS_ONNX:
+    import onnx
+    from ._test_utils import _onnx_create_model
+    from onnx import helper, ModelProto, TensorProto
+    from coremltools.converters.onnx import convert
+from coremltools.proto import NeuralNetwork_pb2  # type: ignore
+
+
+def _make_model_acos_exp_topk():  # type: (...) -> ModelProto
+    """
+  make a very simple model for testing: input->clip->exp->topk->2 outputs
+  """
+    inputs = [("input0", (10,), TensorProto.FLOAT), ("K", (1,), TensorProto.INT64)]
+    outputs = [
+        ("output_values", (3,), TensorProto.FLOAT),
+        ("output_indices", (3,), TensorProto.INT64),
+    ]
+    acos = helper.make_node("Acos", inputs=[inputs[0][0]], outputs=["acos_out"])
+    exp = helper.make_node("Exp", inputs=[acos.output[0]], outputs=["exp_out"])
+    topk = helper.make_node(
+        "TopK",
+        inputs=[exp.output[0], inputs[1][0]],
+        outputs=[outputs[0][0], outputs[1][0]],
+        axis=0,
+    )
+    return _onnx_create_model([acos, exp, topk], inputs, outputs)
+
+
+def _make_model_flatten_axis3():  # type: (...) -> ModelProto
+    """
+  make a simple model: 4-D input -> flatten (axis=3)-> output
+  """
+    inputs = [("input", (1, 3, 10, 20), TensorProto.FLOAT)]
+    outputs = [("output", (30, 20), TensorProto.FLOAT)]
+    flatten = helper.make_node(
+        "Flatten", inputs=[inputs[0][0]], outputs=[outputs[0][0]], axis=3
+    )
+    return _onnx_create_model([flatten], inputs, outputs)
+
+
+@unittest.skipUnless(_HAS_ONNX, MSG_ONNX_NOT_FOUND)
+class CustomLayerTest(unittest.TestCase):
+    def test_unsupported_ops(self):  # type: () -> None
+
+        onnx_model = _make_model_acos_exp_topk()
+        coreml_model = convert(onnx_model, add_custom_layers=True)
+
+        spec = coreml_model.get_spec()
+        layers = spec.neuralNetwork.layers
+        self.assertIsNotNone(layers[0].custom)
+        self.assertIsNotNone(layers[2].custom)
+        self.assertEqual("Acos", layers[0].custom.className)
+        self.assertEqual("TopK", layers[2].custom.className)
+
+    def test_unsupported_ops_provide_functions(self):  # type: () -> None
+        def convert_acos(builder, node, graph, err):
+            params = NeuralNetwork_pb2.CustomLayerParams()
+            params.className = node.op_type
+            params.description = "Custom layer that corresponds to the ONNX op {}".format(
+                node.op_type,
+            )
+
+            builder.add_custom(
+                name=node.name,
+                input_names=node.inputs,
+                output_names=node.outputs,
+                custom_proto_spec=params,
+            )
+
+        def convert_topk(builder, node, graph, err):
+            params = NeuralNetwork_pb2.CustomLayerParams()
+            params.className = node.op_type
+            params.description = "Custom layer that corresponds to the ONNX op {}".format(
+                node.op_type,
+            )
+            params.parameters["axis"].intValue = node.attrs.get("axis", -1)
+
+            builder.add_custom(
+                name=node.name,
+                input_names=node.inputs,
+                output_names=node.outputs,
+                custom_proto_spec=params,
+            )
+
+        onnx_model = _make_model_acos_exp_topk()
+        coreml_model = convert(
+            model=onnx_model,
+            add_custom_layers=True,
+            custom_conversion_functions={"Acos": convert_acos, "TopK": convert_topk},
+        )
+
+        spec = coreml_model.get_spec()
+        layers = spec.neuralNetwork.layers
+        self.assertIsNotNone(layers[0].custom)
+        self.assertIsNotNone(layers[2].custom)
+        self.assertEqual("Acos", layers[0].custom.className)
+        self.assertEqual("TopK", layers[2].custom.className)
+        self.assertEqual(0, layers[2].custom.parameters["axis"].intValue)
+
+    def test_node_name_type_custom_functions(self):  # type: () -> None
+        def convert_acos(builder, node, graph, err):
+            params = NeuralNetwork_pb2.CustomLayerParams()
+            params.className = node.op_type
+            params.description = "Custom layer that corresponds to the ONNX op {}".format(
+                node.op_type,
+            )
+
+            builder.add_custom(
+                name=node.name,
+                input_names=node.inputs,
+                output_names=node.outputs,
+                custom_proto_spec=params,
+            )
+
+        def convert_topk_generic(builder, node, graph, err):
+            params = NeuralNetwork_pb2.CustomLayerParams()
+            params.className = node.op_type
+            params.description = "Custom layer that corresponds to the ONNX op {}".format(
+                node.op_type,
+            )
+            params.parameters["axis"].intValue = node.attrs.get("axis", -1)
+            params.parameters["k"].intValue = node.attrs["k"]
+
+            builder.add_custom(
+                name=node.name,
+                input_names=node.inputs,
+                output_names=node.outputs,
+                custom_proto_spec=params,
+            )
+
+        def convert_topk_node_specific(builder, node, graph, err):
+            params = NeuralNetwork_pb2.CustomLayerParams()
+            params.className = node.op_type
+            params.description = "Custom layer that corresponds to the ONNX op {}".format(
+                node.op_type,
+            )
+            params.parameters["axis"].intValue = node.attrs.get("axis", -1)
+
+            builder.add_custom(
+                name=node.name,
+                input_names=node.inputs,
+                output_names=node.outputs,
+                custom_proto_spec=params,
+            )
+
+        onnx_model = _make_model_acos_exp_topk()
+        coreml_model = convert(
+            model=onnx_model,
+            add_custom_layers=True,
+            custom_conversion_functions={
+                "Acos": convert_acos,
+                "TopK": convert_topk_generic,
+                "output_values_output_indices": convert_topk_node_specific,
+            },
+        )
+
+        spec = coreml_model.get_spec()
+        layers = spec.neuralNetwork.layers
+        self.assertIsNotNone(layers[0].custom)
+        self.assertIsNotNone(layers[2].custom)
+        self.assertEqual("Acos", layers[0].custom.className)
+        self.assertEqual("TopK", layers[2].custom.className)
+        self.assertEqual(0, layers[2].custom.parameters["axis"].intValue)
+
+    def test_unsupported_op_attribute(self):  # type: () -> None
+        onnx_model = _make_model_flatten_axis3()
+        coreml_model = convert(onnx_model, add_custom_layers=True)
+
+        spec = coreml_model.get_spec()
+        layers = spec.neuralNetwork.layers
+        self.assertIsNotNone(layers[0].custom)
+        self.assertEqual("Flatten", layers[0].custom.className)
+
+    def test_unsupported_op_attribute_provide_functions(self):  # type: () -> None
+        def convert_flatten(builder, node, graph, err):
+            params = NeuralNetwork_pb2.CustomLayerParams()
+            params.className = node.op_type
+            params.description = "Custom layer that corresponds to the ONNX op {}".format(
+                node.op_type,
+            )
+            params.parameters["axis"].intValue = node.attrs["axis"]
+
+            builder.add_custom(
+                name=node.name,
+                input_names=node.inputs,
+                output_names=node.outputs,
+                custom_proto_spec=params,
+            )
+
+        def test_conversion(onnx_model, add_custom_layers=False):
+            coreml_model = convert(
+                onnx_model,
+                add_custom_layers=add_custom_layers,
+                custom_conversion_functions={"Flatten": convert_flatten},
+            )
+
+            spec = coreml_model.get_spec()
+            layers = spec.neuralNetwork.layers
+            self.assertIsNotNone(layers[0].custom)
+            self.assertEqual("Flatten", layers[0].custom.className)
+            self.assertEqual(3, layers[0].custom.parameters["axis"].intValue)
+
+        onnx_model = _make_model_flatten_axis3()
+        # Test with add_custom_layers True
+        convert(
+            onnx_model,
+            add_custom_layers=True,
+            custom_conversion_functions={"Flatten": convert_flatten},
+        )
+
+        # Test with add_custom_layers False
+        convert(
+            onnx_model,
+            add_custom_layers=False,
+            custom_conversion_functions={"Flatten": convert_flatten},
+        )
diff --git a/coremltools/converters/onnx/_tests/test_graph.py b/coremltools/converters/onnx/_tests/test_graph.py
new file mode 100644
index 000000000..457ea09c2
--- /dev/null
+++ b/coremltools/converters/onnx/_tests/test_graph.py
@@ -0,0 +1,81 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import unittest
+from coremltools._deps import _HAS_ONNX, MSG_ONNX_NOT_FOUND
+
+if _HAS_ONNX:
+    import onnx
+    from onnx import helper, numpy_helper, TensorProto
+    from coremltools.converters.onnx._graph import Node, Graph
+    from ._test_utils import (
+        _onnx_create_single_node_model,
+        _onnx_create_model,
+        _conv_pool_output_size,
+        _random_array,
+    )
+
+
+@unittest.skipUnless(_HAS_ONNX, MSG_ONNX_NOT_FOUND)
+class NodeTest(unittest.TestCase):
+    def test_create_node(self):  # type: () -> None
+        model = _onnx_create_single_node_model(
+            "Elu", [(1, 3, 224, 224)], [(1, 3, 224, 224)], alpha=0.5
+        )
+        graph = model.graph
+        node = graph.node[0]
+        node_ = Node.from_onnx(node)
+        self.assertTrue(len(node_.inputs) == 1)
+        self.assertTrue(len(node_.outputs) == 1)
+        self.assertTrue(len(node_.attrs) == 1)
+        self.assertTrue(node_.attrs["alpha"] == 0.5)
+
+
+@unittest.skipUnless(_HAS_ONNX, MSG_ONNX_NOT_FOUND)
+class GraphTest(unittest.TestCase):
+    def test_create_graph(self):  # type: () -> None
+        kernel_shape = (3, 2)
+        strides = (2, 3)
+        pads = (4, 2, 4, 2)
+        dilations = (1, 2)
+        group = 1
+        weight = numpy_helper.from_array(_random_array((16, 3, 3, 2)), name="weight")
+
+        input_shape = (1, 3, 224, 224)
+        output_size = _conv_pool_output_size(
+            input_shape, dilations, kernel_shape, pads, strides
+        )
+
+        output_shape = (1, int(weight.dims[0]), output_size[0], output_size[1])
+
+        inputs = [("input0", input_shape)]
+        outputs = [("output0", output_shape, TensorProto.FLOAT)]
+
+        conv = helper.make_node(
+            "Conv",
+            inputs=[inputs[0][0], "weight"],
+            outputs=["conv_output"],
+            dilations=dilations,
+            group=group,
+            kernel_shape=kernel_shape,
+            pads=pads,
+            strides=strides,
+        )
+
+        relu = helper.make_node(
+            "Relu", inputs=[conv.output[0]], outputs=[outputs[0][0]]
+        )
+
+        model = _onnx_create_model([conv, relu], inputs, outputs, [weight])
+        graph_ = Graph.from_onnx(model.graph, onnx_ir_version=5)
+        self.assertTrue(len(graph_.inputs) == 1)
+        self.assertEqual(graph_.inputs[0][2], input_shape)
+        self.assertTrue(len(graph_.outputs) == 1)
+        self.assertEqual(graph_.outputs[0][2], output_shape)
+        self.assertTrue(len(graph_.nodes) == 2)
+        self.assertEqual(len(graph_.nodes[0].parents), 0)
+        self.assertEqual(len(graph_.nodes[1].parents), 1)
+        self.assertEqual(len(graph_.nodes[0].children), 1)
+        self.assertEqual(len(graph_.nodes[1].children), 0)
diff --git a/coremltools/converters/onnx/_tests/test_mlmodel_passes.py b/coremltools/converters/onnx/_tests/test_mlmodel_passes.py
new file mode 100644
index 000000000..9252061bc
--- /dev/null
+++ b/coremltools/converters/onnx/_tests/test_mlmodel_passes.py
@@ -0,0 +1,31 @@
+import numpy as np
+import unittest
+import coremltools.models.datatypes as datatypes
+from coremltools.models import neural_network as neural_network
+from coremltools.converters.mil.backend.nn.passes.mlmodel_passes import (
+    remove_disconnected_layers,
+)
+
+
+class MLModelPassesTest(unittest.TestCase):
+    def test_load_constant_remove(self):
+        input_features = [("data", datatypes.Array(*(3, 4)))]
+        output_features = [("out", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_activation("relu1", "RELU", "data", "relu1")
+        builder.add_load_constant_nd(
+            "const1", "c1", constant_value=np.ones((5,)), shape=(5,)
+        )
+        builder.add_activation("relu2", "RELU", "relu1", "out")
+        builder.add_load_constant_nd(
+            "const2", "c2", constant_value=np.ones((5,)), shape=(5,)
+        )
+        builder.add_load_constant_nd(
+            "const3", "c3", constant_value=np.ones((5,)), shape=(5,)
+        )
+        spec = builder.spec
+        np.testing.assert_equal(5, len(spec.neuralNetwork.layers))
+        remove_disconnected_layers(spec)
+        np.testing.assert_equal(2, len(spec.neuralNetwork.layers))
diff --git a/coremltools/converters/onnx/_tests/test_operators.py b/coremltools/converters/onnx/_tests/test_operators.py
new file mode 100644
index 000000000..1d5f2d1c5
--- /dev/null
+++ b/coremltools/converters/onnx/_tests/test_operators.py
@@ -0,0 +1,469 @@
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+from __future__ import unicode_literals as _
+
+import unittest
+import numpy as np
+from coremltools._deps import _HAS_ONNX, MSG_ONNX_NOT_FOUND
+
+if _HAS_ONNX:
+    import onnx
+    from onnx.numpy_helper import from_array
+    from coremltools.converters.onnx import convert
+    from ._test_utils import (
+        _onnx_create_single_node_model,
+        _test_single_node,
+        _random_array,
+        _conv_pool_output_size,
+        _assert_outputs,
+    )
+
+from typing import Text
+
+from coremltools.models.utils import _macos_version
+
+MIN_MACOS_VERSION_10_15 = (10, 15)
+
+ONNX_SHAPE_INFERENCE_FAILS = True
+
+
+@unittest.skipUnless(_HAS_ONNX, MSG_ONNX_NOT_FOUND)
+class SingleOperatorTest(unittest.TestCase):
+    def test_conv(self):  # type: () -> None
+        kernel_shape = (3, 2)
+        strides = (2, 3)
+        pads = (4, 2, 4, 2)
+        dilations = (1, 2)
+        group = 1
+        weight = from_array(_random_array((16, 3, 3, 2)), name="weight")
+
+        input_shape = (1, 3, 224, 224)
+        output_size = _conv_pool_output_size(
+            input_shape, dilations, kernel_shape, pads, strides
+        )
+
+        output_shape = (1, int(weight.dims[0]), output_size[0], output_size[1])
+
+        _test_single_node(
+            "Conv",
+            [input_shape],
+            [output_shape],
+            initializer=[weight],
+            dilations=dilations,
+            group=group,
+            kernel_shape=kernel_shape,
+            pads=pads,
+            strides=strides,
+        )
+
+    def test_conv_transpose(self):  # type: () -> None
+        kernel_shape = (3, 3)
+        pads = (0, 0, 0, 0)
+        C_in = 3
+        C_out = 12
+        H_in, W_in = 30, 30
+        strides = (2, 2)
+
+        input_shape = (1, C_in, H_in, W_in)
+        weight = from_array(
+            _random_array((C_in, C_out, kernel_shape[0], kernel_shape[1])),
+            name="weight",
+        )
+
+        H_out = (H_in - 1) * strides[0] + kernel_shape[0] - pads[0] - pads[2]
+        W_out = (W_in - 1) * strides[1] + kernel_shape[1] - pads[1] - pads[3]
+        output_shape = (1, C_out, H_out, W_out)
+
+        _test_single_node(
+            "ConvTranspose",
+            [input_shape],
+            [output_shape],
+            initializer=[weight],
+            # Default values for other attributes: dilations=[1, 1], group=1
+            strides=strides,
+            kernel_shape=kernel_shape,
+            pads=pads,
+            output_padding=(0, 0),
+        )
+
+    def test_conv_without_pads(self):  # type: () -> None
+        kernel_shape = (3, 2)
+        strides = (2, 3)
+        dilations = (1, 2)
+        group = 1
+        weight = from_array(_random_array((16, 3, 3, 2)), name="weight")
+
+        input_shape = (1, 3, 224, 224)
+        output_size = _conv_pool_output_size(
+            input_shape, dilations, kernel_shape, [0, 0, 0, 0], strides
+        )
+
+        output_shape = (1, int(weight.dims[0]), output_size[0], output_size[1])
+        _test_single_node(
+            "Conv",
+            [input_shape],
+            [output_shape],
+            initializer=[weight],
+            dilations=dilations,
+            group=group,
+            kernel_shape=kernel_shape,
+            strides=strides,
+        )
+
+    def test_max_pool(self):  # type: () -> None
+        kernel_shape = (5, 3)
+        pads = (2, 1, 2, 1)
+        strides = (1, 2)
+
+        input_shape = (1, 3, 224, 224)
+
+        output_size = _conv_pool_output_size(
+            input_shape, [1, 1], kernel_shape, pads, strides
+        )
+
+        output_shape = (1, 3, output_size[0], output_size[1])
+
+        _test_single_node(
+            "MaxPool",
+            [input_shape],
+            [output_shape],
+            test_name="test_max_pool_1",
+            kernel_shape=kernel_shape,
+            pads=pads,
+            strides=strides,
+        )
+
+        output_size = _conv_pool_output_size(
+            input_shape, [1, 1], kernel_shape, [0, 0, 0, 0], strides
+        )
+        output_shape = (1, 3, output_size[0], output_size[1])
+        _test_single_node(
+            "MaxPool",
+            [input_shape],
+            [output_shape],
+            test_name="test_max_pool_2",
+            kernel_shape=kernel_shape,
+            strides=strides,
+        )
+
+    @unittest.skip("Skip due to internal CoreML CPU backend issue")
+    def test_avg_pool(self):  # type: () -> None
+        kernel_shape = (5, 3)
+        pads = (2, 1, 2, 1)
+        strides = (1, 2)
+
+        input_shape = (1, 3, 224, 224)
+        output_size = _conv_pool_output_size(
+            input_shape, (1, 1), kernel_shape, pads, strides
+        )
+        output_shape = (1, 3, output_size[0], output_size[1])
+        _test_single_node(
+            "AveragePool",
+            [input_shape],
+            [output_shape],
+            test_name="test_avg_pool_1",
+            kernel_shape=kernel_shape,
+            pads=pads,
+            strides=strides,
+        )
+
+        output_size = _conv_pool_output_size(
+            input_shape, (1, 1), kernel_shape, [0, 0, 0, 0], strides
+        )
+        output_shape = (1, 3, output_size[0], output_size[1])
+        _test_single_node(
+            "AveragePool",
+            [input_shape],
+            [output_shape],
+            test_name="test_avg_pool_2",
+            kernel_shape=kernel_shape,
+            strides=strides,
+        )
+
+    def test_bn(self):  # type: () -> None
+        scale = from_array(_random_array((3,)), name="scale")
+        bias = from_array(_random_array((3,)), name="bias")
+        mean = from_array(_random_array((3,)), name="mean")
+        var = from_array(_random_array((3,)), name="var")
+
+        epsilon = 1e-5
+        momentum = 0.001
+
+        op_types = ["BatchNormalization", "SpatialBN"]
+        for op_type in op_types:
+            _test_single_node(
+                "BatchNormalization",
+                [(1, 3, 224, 224)],
+                [(1, 3, 224, 224)],
+                initializer=[scale, bias, mean, var],
+                epsilon=epsilon,
+                momentum=momentum,
+            )
+
+            # epsilon by default
+            _test_single_node(
+                "BatchNormalization",
+                [(1, 3, 224, 224)],
+                [(1, 3, 224, 224)],
+                initializer=[scale, bias, mean, var],
+                # epsilon=epsilon,
+                momentum=momentum,
+            )
+
+    def test_gemm(self, minimum_ios_deployment_target="12"):  # type: () -> None
+        input_shape = (1, 2048)
+        output_shape = (1, 5)
+        W = from_array(_random_array((output_shape[1], input_shape[1])), name="weight")
+        b = from_array(_random_array((output_shape[1],)), name="bias")
+        _test_single_node(
+            "Gemm",
+            [input_shape],
+            [output_shape],
+            initializer=[W, b],
+            decimal=3,
+            transB=1,
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_gemm_ios13(self):
+        self.test_gemm(minimum_ios_deployment_target="13")
+
+    def test_gemm_transB_off(
+        self, minimum_ios_deployment_target="12"
+    ):  # type: () -> None
+        input_shape = (1, 2048)
+        output_shape = (1, 5)
+        W = from_array(_random_array((input_shape[1], output_shape[1])), name="weight")
+        b = from_array(_random_array((output_shape[1],)), name="bias")
+        _test_single_node(
+            "Gemm",
+            [input_shape],
+            [output_shape],
+            initializer=[W, b],
+            decimal=3,
+            transB=0,
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_gemm_transB_off_ios13(self):
+        self.test_gemm_transB_off(minimum_ios_deployment_target="13")
+
+    def test_lrn(self):  # type: () -> None
+        _test_single_node(
+            "LRN",
+            [(1, 3, 224, 224)],
+            [(1, 3, 224, 224)],
+            alpha=9.99e-5,
+            beta=0.75,
+            bias=1.0,
+            size=5,
+        )
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    @unittest.skip("Failing: wrong input type")
+    def test_split_axis_0_rank_3(
+        self, minimum_ios_deployment_target="12"
+    ):  # type: () -> None
+        _test_single_node(
+            "Split",
+            [(2, 1, 200)],
+            [(1, 1, 200), (1, 1, 200)],
+            axes=0,
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_concat(self, minimum_ios_deployment_target="13"):  # type: () -> None
+        _test_single_node(
+            "Concat",
+            [(1, 2, 200), (1, 2, 200)],
+            [(2, 2, 200)],
+            axis=0,
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    @unittest.skip("Failing: wrong input type")
+    def test_gather(self, minimum_ios_deployment_target="13"):  # type: () -> None
+        _test_single_node(
+            "Gather",
+            [(5, 4, 3), (3,)],
+            [(3, 4, 3)],
+            axis=0,
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    @unittest.skip("Failing: wrong input type")
+    def test_reshape_same_rank(
+        self, minimum_ios_deployment_target="13"
+    ):  # type: () -> None
+        _test_single_node(
+            "Reshape",
+            [(5, 4, 3), (3,)],
+            [(4, 5, 3)],
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    @unittest.skip("Failing: wrong input type")
+    def test_reshape_same_rank_infer_shape(
+        self, minimum_ios_deployment_target="13"
+    ):  # type: () -> None
+        _test_single_node(
+            "Reshape",
+            [(5, 4, 3), (3,)],
+            [(5, 2, 6)],
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    # TODO: add test_reshape_diff_rank_infer_shape where shape is Constant and known
+    # to test rank-4 into rank-3 reshape with shape inferencing
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    @unittest.skip("Failing: wrong input type")
+    def test_reshape_dynamic(
+        self, minimum_ios_deployment_target="13"
+    ):  # type: () -> None
+        _test_single_node(
+            "Reshape",
+            [(5, 4, 3, 2), (3,)],
+            [(2, 3, 20)],
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_squeeze(self, minimum_ios_deployment_target="13"):  # type: () -> None
+        _test_single_node(
+            "Squeeze",
+            [(5, 1, 3, 1, 1)],
+            [(5, 3)],
+            axes=[1, 3, 4],
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_transpose_default(
+        self, minimum_ios_deployment_target="13"
+    ):  # type: () -> None
+        _test_single_node(
+            "Transpose",
+            [(5, 3, 4, 6, 2)],
+            [(2, 6, 4, 3, 5)],
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @unittest.skipIf(
+        ONNX_SHAPE_INFERENCE_FAILS,
+        "ONNX Shape inference fails to recongnize correct shape",
+    )
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_transpose_permute(
+        self, minimum_ios_deployment_target="13"
+    ):  # type: () -> None
+        _test_single_node(
+            "Transpose",
+            [(5, 3, 4, 6, 2)],
+            [(2, 3, 4, 6, 5)],
+            axes=[4, 1, 2, 3, 0],
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @unittest.skipIf(
+        ONNX_SHAPE_INFERENCE_FAILS,
+        "ONNX Shape inference fails to recongnize correct shape",
+    )
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_unsqueeze(self, minimum_ios_deployment_target="13"):  # type: () -> None
+        _test_single_node(
+            "Unsqueeze",
+            [(5, 3, 4)],
+            [(1, 5, 1, 3, 4)],
+            axes=[0, 1],
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @unittest.skip(
+        "Error while preparing Caffe2 backend. Maybe something is incorrect in ONNX model definition"
+    )
+    def skip_test_lstm(self):  # type: () -> None
+        x = 4
+        h = 2
+        seq_length = 3
+        W = from_array(_random_array((4 * h, x)), name="gate_weights")
+        R = from_array(_random_array((4 * h, h)), name="recursion_weights")
+        B = from_array(_random_array((8 * h,)), name="biases")
+        seq_lens_input = from_array(
+            np.array([seq_length]).astype(np.int32), name="seq_lens_input"
+        )
+        initial_h = from_array(np.zeros((1, 1, h)).astype(np.float32), name="initial_h")
+        initial_c = from_array(np.zeros((1, 1, h)).astype(np.float32), name="initial_c")
+
+        input_shape = (seq_length, 1, x)
+        output_shape_all = (seq_length, 1, h)
+        output_shape_last = (1, 1, h)
+
+        onnx_model = _onnx_create_single_node_model(
+            "LSTM",
+            [input_shape],
+            [output_shape_all, output_shape_last],
+            initializer=[W, R, B, seq_lens_input, initial_h, initial_c],
+            hidden_size=h,
+        )
+        X = np.random.rand(*input_shape).astype("float32")  # type: ignore
+        import caffe2.python.onnx.backend
+
+        prepared_backend = caffe2.python.onnx.backend.prepare(onnx_model)
+        out = prepared_backend.run({"input0": X})
+        caffe2_out_all = out["output0"]
+        caffe2_out_last = out["output1"]
+
+        coreml_model = convert(onnx_model)
+        inputdict = {}
+        inputdict["input0"] = X
+        inputdict["initial_h"] = np.zeros((h), dtype=np.float32)
+        inputdict["initial_c"] = np.zeros((h), dtype=np.float32)
+        coreml_out_dict = coreml_model.predict(inputdict, useCPUOnly=True)
+        coreml_out_all = coreml_out_dict["output0"]
+        coreml_out_last = coreml_out_dict["output1"]
+
+        _assert_outputs(caffe2_out_all.flatten(), coreml_out_all.flatten(), decimal=5)
+        _assert_outputs(caffe2_out_last.flatten(), coreml_out_last.flatten(), decimal=5)
diff --git a/coremltools/converters/onnx/_tests/test_pytorch_model.py b/coremltools/converters/onnx/_tests/test_pytorch_model.py
new file mode 100644
index 000000000..24549e246
--- /dev/null
+++ b/coremltools/converters/onnx/_tests/test_pytorch_model.py
@@ -0,0 +1,1011 @@
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+from __future__ import unicode_literals as _
+
+import unittest
+from coremltools._deps import _HAS_ONNX, MSG_ONNX_NOT_FOUND, _IS_MACOS
+
+if _HAS_ONNX:
+    import onnx
+    from coremltools.converters.onnx import convert
+    from coremltools.converters.onnx._converter import SupportedVersion
+    from ._test_utils import _assert_outputs
+
+import numpy as np
+import torch  # type: ignore
+import torch.nn as nn  # type: ignore
+import torch.nn.functional as F
+import shutil
+import tempfile
+import os
+import pytest
+
+from coremltools.models.utils import _macos_version
+
+np.random.seed(10)
+torch.manual_seed(10)
+
+MIN_MACOS_VERSION_10_15 = (10, 15)
+
+DEBUG = False
+
+
+def _test_torch_model_single_io(
+    torch_model,
+    torch_input_shape,
+    coreml_input_shape,
+    minimum_ios_deployment_target="12",
+    decimal=4,
+    opset_version=9,
+):
+    # run torch model
+    torch_input = torch.rand(*torch_input_shape)
+    torch_out_raw = torch_model(torch_input)
+    if isinstance(torch_out_raw, tuple):
+        torch_out = torch_out_raw[0].detach().numpy()
+    else:
+        torch_out = torch_out_raw.detach().numpy()
+
+    # convert to onnx model
+    model_dir = tempfile.mkdtemp()
+    if DEBUG:
+        model_dir = "/tmp"
+    onnx_file = os.path.join(model_dir, "torch_model.onnx")
+    torch.onnx.export(torch_model, torch_input, onnx_file, opset_version=opset_version)
+    onnx_model = onnx.load(onnx_file)
+
+    # convert to coreml and run
+    coreml_model = convert(
+        onnx_model, minimum_ios_deployment_target=minimum_ios_deployment_target
+    )
+
+    output_name = [o.name for o in onnx_model.graph.output][0]
+    initializer_names = {t.name for t in onnx_model.graph.initializer}
+    input_name = [
+        i.name for i in onnx_model.graph.input if i.name not in initializer_names
+    ][0]
+    input_numpy = torch_input.detach().numpy()
+    if SupportedVersion.is_nd_array_supported(minimum_ios_deployment_target):
+        input_dict = {input_name: input_numpy}  # type: ignore
+    else:
+        input_dict = {input_name: np.reshape(input_numpy, coreml_input_shape)}  # type: ignore
+    if _IS_MACOS:
+        coreml_out = coreml_model.predict(input_dict, useCPUOnly=True)[output_name]
+        if DEBUG:
+            coreml_model.save(model_dir + "/torch_model.mlmodel")
+            print("coreml_out")
+            print(np.squeeze(coreml_out))
+            print("torch_out")
+            print(np.squeeze(torch_out))
+            print("coreml out shape ", coreml_out.shape)
+            print("torch out shape: ", torch_out.shape)
+
+        # compare
+        _assert_outputs([torch_out], [coreml_out], decimal=decimal)  # type: ignore
+
+        # delete onnx model
+        if not DEBUG:
+            if os.path.exists(model_dir):
+                shutil.rmtree(model_dir)
+
+
+@unittest.skipUnless(_HAS_ONNX, MSG_ONNX_NOT_FOUND)
+class OnnxModelTest(unittest.TestCase):
+    def test_functional_average_pool(self, minimum_ios_deployment_target="12"):
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+
+            def forward(self, x):
+                y = F.avg_pool2d(x, [15, 18], [15, 18])
+                return y
+
+        torch_model = Net()
+        torch_model.train(False)
+        if minimum_ios_deployment_target == "12":
+            coreml_shape = (1, 64, 64)
+        else:
+            coreml_shape = (1, 1, 64, 64)
+        _test_torch_model_single_io(
+            torch_model,
+            (1, 1, 64, 64),
+            coreml_shape,
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_functional_average_pool_disable_rank5_mapping(self):
+        self.test_functional_average_pool(minimum_ios_deployment_target="13")
+
+    def test_linear_no_bias(
+        self, minimum_ios_deployment_target="12"
+    ):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.simple_nn = nn.Sequential(
+                    nn.Linear(256, 128, bias=False), nn.ReLU()
+                )
+
+            def forward(self, x):
+                return self.simple_nn(x)
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 256), (256), minimum_ios_deployment_target=minimum_ios_deployment_target)  # type: ignore
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_linear_no_bias_disable_rank5_mapping(self):
+        self.test_linear_no_bias(minimum_ios_deployment_target="13")
+
+    def test_linear_bias(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.simple_nn = nn.Sequential(
+                    nn.Linear(256, 128, bias=True), nn.ReLU()
+                )
+
+            def forward(self, x):
+                return self.simple_nn(x)
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 256), (256))  # type: ignore
+
+    def test_dynamic_reshape(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.conv = nn.Conv2d(
+                    in_channels=3,
+                    out_channels=32,
+                    kernel_size=(3, 3),
+                    stride=1,
+                    padding=0,
+                    bias=True,
+                )
+
+            def forward(self, x):
+                x = self.conv(x)
+                x = x.view(x.size()[0], -1)
+                return x
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 3, 100, 100), (3, 100, 100), "13")  # type: ignore
+
+    def test_const_initializer1(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.ones = torch.nn.Parameter(torch.ones(1,))
+
+            def forward(self, x):
+                y = x + self.ones
+                return y
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 3), (3,))  # type: ignore
+
+    def test_const_initializer2(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+
+            def forward(self, x):
+                y = x + torch.nn.Parameter(torch.ones(2, 3), requires_grad=False)
+                return y
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 2, 3), (1, 2, 3))  # type: ignore
+
+    def test_conv2D_transpose(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.convT = torch.nn.ConvTranspose2d(
+                    1, 1, kernel_size=3, stride=2, output_padding=0, padding=3, groups=1
+                )
+
+            def forward(self, x):
+                y = self.convT(x)
+                return y
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 1, 64, 64), (1, 64, 64))  # type: ignore
+
+    def test_conv2D_transpose_output_padding(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.convT = torch.nn.ConvTranspose2d(
+                    1, 1, kernel_size=3, stride=2, output_padding=1, padding=3, groups=1
+                )
+
+            def forward(self, x):
+                y = self.convT(x)
+                return y
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 1, 64, 64), (1, 64, 64))  # type: ignore
+
+    def test_conv2D_transpose_groups(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.convT = torch.nn.ConvTranspose2d(
+                    4, 4, kernel_size=3, stride=2, output_padding=1, padding=1, groups=2
+                )
+
+            def forward(self, x):
+                y = self.convT(x)
+                return y
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 4, 8, 8), (4, 8, 8))  # type: ignore
+
+    def test_conv2D_transpose_2(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.convT = torch.nn.ConvTranspose2d(
+                    1, 1, kernel_size=3, stride=3, output_padding=2, padding=1, groups=1
+                )
+
+            def forward(self, x):
+                y = self.convT(x)
+                return y
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 1, 3, 3), (1, 3, 3))  # type: ignore
+
+    def test_pow(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+
+            def forward(self, x):
+                y = x.pow(3)
+                return y
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (3, 2, 3), (3, 2, 3))  # type: ignore
+
+    @pytest.mark.skip(reason="rdar://64224329")
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_lstm(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.lstm = nn.LSTM(input_size=256, hidden_size=64, num_layers=1)
+
+            def forward(self, x):
+                y = self.lstm(x)
+                return y
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (3, 1, 256), (3, 1, 256), minimum_ios_deployment_target="13")  # type: ignore
+
+    @pytest.mark.skip(reason="rdar://64224329")
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_bidirlstm(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.lstm = nn.LSTM(
+                    input_size=256, hidden_size=64, num_layers=1, bidirectional=True
+                )
+
+            def forward(self, x):
+                y = self.lstm(x)
+                return y
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (3, 1, 256), (3, 1, 256), minimum_ios_deployment_target="13")  # type: ignore
+
+    @pytest.mark.skip(reason="rdar://64224329")
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_gru(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.gru = nn.GRU(input_size=256, hidden_size=64, num_layers=1)
+
+            def forward(self, x):
+                y = self.gru(x)
+                return y
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (3, 1, 256), (3, 1, 256), minimum_ios_deployment_target="13", decimal=1)  # type: ignore
+
+    def test_1d_conv(self):
+        class Net(nn.Module):
+            def __init__(
+                self,
+                in_channels,
+                out_channels,
+                kernel_size,
+                stride=1,
+                dilation=1,
+                groups=1,
+                bias=True,
+            ):
+                super(Net, self).__init__()
+
+                self.conv = torch.nn.Conv1d(
+                    in_channels,
+                    out_channels,
+                    kernel_size=kernel_size,
+                    stride=stride,
+                    padding=0,
+                    dilation=dilation,
+                    groups=groups,
+                    bias=bias,
+                )
+
+                self.__padding = (kernel_size - 1) * dilation
+
+            def forward(self, x):
+                result = self.conv(x)
+                if self.__padding != 0:
+                    return result[:, :, : -self.__padding]
+                return result
+
+        B = 1
+        Cin = 5
+        Cout = 11
+        k = 3
+        Win = 15
+        torch_model = Net(Cin, Cout, k)  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, Cin, Win), (Cin, 1, Win))  # type: ignore
+
+    def test_conv1d_after_reshape(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.conv = torch.nn.Conv1d(
+                    in_channels=300,
+                    out_channels=32,
+                    kernel_size=3,
+                    stride=1,
+                    padding=0,
+                    bias=True,
+                )
+
+            def forward(self, x):
+                x = x.view(1, 300, 100)
+                x = self.conv(x)
+                return x
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 3, 100, 100), (3, 100, 100))  # type: ignore
+
+    def test_conv2d_stride(self):
+        class TestModule(torch.nn.Module):
+            def __init__(self):
+                in_channels = 1
+                out_channels = 1
+                bsz = 1  # batch size
+                super(TestModule, self).__init__()
+                self.conv1 = torch.nn.Conv2d(
+                    in_channels, out_channels, kernel_size=(3, 4), stride=1
+                )
+                self.conv2 = torch.nn.Conv2d(
+                    in_channels,
+                    out_channels,
+                    kernel_size=(3, 5),
+                    stride=(2, 1),
+                    padding=(1, 2),
+                )
+
+            def forward(self, x):
+                return (self.conv2(x),)  # self.conv2(x)
+
+        torch_model = TestModule()  # type: ignore
+        torch_model.train(False)
+        H, W = 6, 3
+        _test_torch_model_single_io(torch_model, (1, 1, H, W), (1, H, W))  # type: ignore
+
+    def test_conv2d_dilation(self):
+        class TestModule(torch.nn.Module):
+            def __init__(self):
+                in_channels = 1
+                out_channels = 3
+                bsz = 1  # batch size
+                super(TestModule, self).__init__()
+                self.conv1 = torch.nn.Conv2d(
+                    in_channels, out_channels, kernel_size=(3, 4), stride=2, dilation=2
+                )
+
+            def forward(self, x):
+                return self.conv1(x)
+
+        torch_model = TestModule()  # type: ignore
+        torch_model.train(False)
+        H, W = 64, 64
+        _test_torch_model_single_io(torch_model, (1, 1, H, W), (1, H, W))  # type: ignore
+
+    def test_bachnorm_after_reshape(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.conv = torch.nn.Conv1d(
+                    in_channels=300,
+                    out_channels=32,
+                    kernel_size=3,
+                    stride=1,
+                    padding=0,
+                    bias=True,
+                )
+                self.bachnorm = nn.BatchNorm1d(32)
+
+            def forward(self, x):
+                x = x.view(1, 300, 100)
+                x = self.conv(x)
+                x = self.bachnorm(x)
+                return x
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 3, 100, 100), (3, 100, 100))  # type: ignore
+
+    def test_res_connect_downsampling_after_reshape(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.conv = torch.nn.Conv1d(
+                    in_channels=300,
+                    out_channels=32,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    bias=True,
+                )
+                self.downsample = torch.nn.Conv1d(
+                    in_channels=300,
+                    out_channels=32,
+                    kernel_size=1,
+                    stride=1,
+                    padding=0,
+                    bias=True,
+                )
+
+            def forward(self, x):
+                x = x.view(1, 300, 100)
+                y = self.conv(x)
+                res = self.downsample(x)
+                return y + res
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 3, 100, 100), (3, 100, 100))  # type: ignore
+
+    def test_fc_plus_convenet(self):  # type: () -> None
+        class Net(nn.Module):
+            def __init__(
+                self,
+                channel_size=1,
+                output_h=16,
+                output_w=16,
+                filter_num=32,
+                latent_size=16,
+            ):
+                super(Net, self).__init__()
+                self.channel_size = channel_size
+                self.output_h = output_h
+                self.output_w = output_w
+                self.filter_num = filter_num
+                self.latent_size = latent_size
+
+                self.fc3 = nn.Linear(latent_size, 128)
+                self.fc4 = nn.Linear(128, 256)
+
+                self.relu = nn.ReLU()
+
+                self.convt = nn.Sequential(
+                    nn.ConvTranspose2d(256, self.filter_num * 4, 4, 1),
+                    nn.BatchNorm2d(self.filter_num * 4),
+                    nn.ReLU(inplace=True),
+                    nn.ConvTranspose2d(self.filter_num * 4, self.filter_num * 2, 4, 1),
+                    nn.BatchNorm2d(self.filter_num * 2),
+                    nn.ReLU(inplace=True),
+                    nn.ConvTranspose2d(self.filter_num * 2, self.filter_num, 4, 1),
+                    nn.BatchNorm2d(self.filter_num),
+                    nn.ReLU(inplace=True),
+                    nn.ConvTranspose2d(self.filter_num, self.filter_num, 4, 1),
+                    nn.BatchNorm2d(self.filter_num),
+                    nn.ReLU(inplace=True),
+                    nn.ConvTranspose2d(self.filter_num, 1, 4, 1),
+                    nn.Sigmoid(),
+                )
+
+            def forward(self, z):
+                x = self.relu(self.fc3(z))
+                deconv_input = self.fc4(x)
+                deconv_input = deconv_input.view(-1, 256, 1, 1)
+                x = self.convt(deconv_input)
+                return x
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 16), (1, 1, 16))  # type: ignore
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_conv1d_pool1d(self, minimum_ios_deployment_target="13"):
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+                self.conv1 = nn.Conv1d(
+                    in_channels=4, out_channels=32, kernel_size=3, stride=1, padding=1
+                )
+                self.conv2 = nn.Conv1d(
+                    in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1
+                )
+
+            def forward(self, x):
+                x = x.permute(0, 2, 1)
+                x = self.conv1(x)
+                x = F.relu(x)
+                x = F.max_pool1d(x, 2)
+                x = self.conv2(x)
+                x = F.relu(x)
+                return x
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(
+            torch_model,
+            (2, 10, 4),
+            (2, 10, 4),
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_slice(self, minimum_ios_deployment_target="13"):
+        class Net(nn.Module):
+            def __init__(self):
+                super(Net, self).__init__()
+
+            def forward(self, x):
+                x = x[:, :5] + x[:, 5:]
+                return x
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+
+        # opset <= 9
+        _test_torch_model_single_io(
+            torch_model,
+            (10, 10),
+            (10, 10),
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+        # opset > 9
+        _test_torch_model_single_io(
+            torch_model,
+            (10, 10),
+            (10, 10),
+            opset_version=10,
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+
+@unittest.skipUnless(_HAS_ONNX, MSG_ONNX_NOT_FOUND)
+class ReshapeTransposeTests(unittest.TestCase):
+    """
+    tests for models that have patterns like:
+    rank(4) ---> reshape (rank 6) ----> transpose (rank 6) ----> reshape(4)
+    """
+
+    @pytest.mark.xfail
+    # is pytorch to onnx conversion correct?
+    def test_pixel_shuffle_not_working(self):
+        """
+        (1, c, h, w) --> reshape ---> (1, sh, sw, c/(sh*sw), h, w)
+        --> transpose [0,1,4,2,5,3] ---> (1, sh, h, sw, w, c/(sh*sw))
+        --> reshape ---> (1, c/(s1*s2), sh*h, sw*w)
+        """
+
+        class Net(nn.Module):
+            def __init__(self, upscale_factor=3):
+                super(Net, self).__init__()
+                self.upscale_factor = upscale_factor
+                self.ps = nn.PixelShuffle(self.upscale_factor)
+
+            def forward(self, x):
+                return self.ps(x)
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 18, 4, 5), (18, 4, 5))  # type: ignore
+
+    def test_pixel_shuffle_working(self):
+        """
+        (1, c, h, w) --> reshape ---> (1, c/(sh*sw), sh, sw, h, w)
+        --> transpose [0,1,4,2,5,3] ---> (1, sh, h, sw, w, c/(sh*sw))
+        --> reshape ---> (1, c/(sh*sw), sh*h, sw*w)
+        """
+
+        class Net(nn.Module):
+            def __init__(self, C=12, H=4, W=6, sh=3, sw=2):
+                super(Net, self).__init__()
+                self.C = C
+                self.H = H
+                self.W = W
+                self.sh = sh
+                self.sw = sw
+
+            def forward(self, x):
+                y1 = x.view(
+                    1, self.C // (self.sh * self.sw), self.sh, self.sw, self.H, self.W
+                ).contiguous()
+                y2 = y1.permute(0, 1, 4, 2, 5, 3).contiguous()
+                y3 = y2.view(
+                    1, self.C // (self.sh * self.sw), self.sh * self.H, self.sw * self.W
+                ).contiguous()
+                return y3
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 12, 4, 6), (12, 4, 6))  # type: ignore
+
+    def test_reorganize_1(self):
+        """
+        (1, c, h, w) --> reshape ---> (1, c/(sh*sw), h, sh, w, sw)
+        --> transpose [0,3,5,1,2,4] ---> (1, sh, sw, c/(sh*sw), h, w)
+        --> reshape ---> (1, c*sh*sw, h/sh, w/sw)
+        """
+
+        class Net(nn.Module):
+            def __init__(self, C=12, H=4, W=6, sh=2, sw=3):
+                super(Net, self).__init__()
+                self.C = C
+                self.H = H
+                self.W = W
+                self.sh = sh
+                self.sw = sw
+
+            def forward(self, x):
+                y1 = x.view(
+                    1, self.C // (self.sh * self.sw), self.H, self.sh, self.W, self.sw
+                ).contiguous()
+                y2 = y1.permute(0, 3, 5, 1, 2, 4).contiguous()
+                y3 = y2.view(
+                    1,
+                    self.C * (self.sh * self.sw),
+                    self.H // self.sh,
+                    self.W // self.sw,
+                ).contiguous()
+                return y3
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 12, 4, 6), (12, 4, 6))  # type: ignore
+
+    def test_reorganize_2(self):
+        """
+        (1, c, h, w) --> reshape ---> (1, c, h/sh, sh, w/sw, sw)
+        --> transpose [0,1,2,4,3,5] ---> (1, c, h/sh, w/sw, sh, sw)
+        --> reshape ---> (1, c*sh*sw, h/sh, w/sw)
+        """
+
+        class Net(nn.Module):
+            def __init__(self, C=12, H=4, W=6, sh=2, sw=3):
+                super(Net, self).__init__()
+                self.C = C
+                self.H = H
+                self.W = W
+                self.sh = sh
+                self.sw = sw
+
+            def forward(self, x):
+                y1 = x.view(
+                    1, self.C, self.H // self.sh, self.sh, self.W // self.sw, self.sw
+                ).contiguous()
+                y2 = y1.transpose(4, 3).contiguous()
+                y3 = y2.view(
+                    1,
+                    self.C * (self.sh * self.sw),
+                    self.H // self.sh,
+                    self.W // self.sw,
+                ).contiguous()
+                return y3
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (1, 12, 4, 6), (12, 4, 6))  # type: ignore
+
+
+@unittest.skipUnless(_HAS_ONNX, MSG_ONNX_NOT_FOUND)
+class UnaryOperationTests(unittest.TestCase):
+    """
+    Unary Operation Test cases
+    """
+
+    ## Sqrt tests
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_sqrt_tensor(self, minimum_ios_deployment_target="13"):
+        class Net(nn.Module):
+            def forward(self, x):
+                return torch.sqrt(x)
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (18, 4, 5), (18, 4, 5), minimum_ios_deployment_target=minimum_ios_deployment_target)  # type: ignore
+
+
+@unittest.skipUnless(_HAS_ONNX, MSG_ONNX_NOT_FOUND)
+class OperatorTests(unittest.TestCase):
+    """
+    Operator test for Operator
+    """
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_repeat(self, minimum_ios_deployment_target="13"):
+        class Net(nn.Module):
+            def forward(self, x):
+                return x.repeat([2, 3, 1])
+
+        torch_model = Net()
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (18, 4, 5), (18, 4, 5), minimum_ios_deployment_target=minimum_ios_deployment_target)  # type: ignore
+
+
+@unittest.skipUnless(_HAS_ONNX, MSG_ONNX_NOT_FOUND)
+class BinaryOperationTests(unittest.TestCase):
+    """
+    Binary Operation Test cases
+    """
+
+    ## Addition tests
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_add_same_shape(self, minimum_ios_deployment_target="13"):
+        class Net(nn.Module):
+            def forward(self, x):
+                return torch.add(x, y)
+
+        y = torch.rand((18, 4, 5))
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (18, 4, 5), (18, 4, 5), minimum_ios_deployment_target=minimum_ios_deployment_target)  # type: ignore
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_add_same_shape_multiple(self, minimum_ios_deployment_target="13"):
+        class Net(nn.Module):
+            def forward(self, x):
+                return x + y + y1 + y2 + y3
+
+        y = torch.rand((18, 4, 5))
+        y1 = torch.rand((4, 5))
+        y2 = torch.rand((18, 4, 5))
+        y3 = 7.234
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (18, 4, 5), (18, 4, 5), minimum_ios_deployment_target=minimum_ios_deployment_target)  # type: ignore
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_add_tensor_scalar(self, minimum_ios_deployment_target="13"):
+        class Net(nn.Module):
+            def forward(self, x):
+                return torch.add(x, y)
+
+        y = 5
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (18, 4, 5), (18, 4, 5), minimum_ios_deployment_target=minimum_ios_deployment_target)  # type: ignore
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_add_diff_shape(self, minimum_ios_deployment_target="13"):
+        class Net(nn.Module):
+            def forward(self, x):
+                return torch.add(x, y)
+
+        y = torch.rand((4, 5))
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (18, 4, 5), (18, 4, 5), minimum_ios_deployment_target=minimum_ios_deployment_target)  # type: ignore
+
+    ## Subtraction tests
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_sub_same_shape(self, minimum_ios_deployment_target="13"):
+        class Net(nn.Module):
+            def forward(self, x):
+                return torch.sub(x, y)
+
+        y = torch.rand((18, 4, 5))
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (18, 4, 5), (18, 4, 5), minimum_ios_deployment_target=minimum_ios_deployment_target)  # type: ignore
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_sub_same_shape_multiple(self, minimum_ios_deployment_target="13"):
+        class Net(nn.Module):
+            def forward(self, x):
+                return x - y - y1 - y2 - y3
+
+        y = torch.rand((18, 4, 5))
+        y1 = torch.rand((4, 5))
+        y2 = torch.rand((18, 4, 5))
+        y3 = 7.234
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (18, 4, 5), (18, 4, 5), minimum_ios_deployment_target=minimum_ios_deployment_target)  # type: ignore
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_sub_tensor_scalar(self, minimum_ios_deployment_target="13"):
+        class Net(nn.Module):
+            def forward(self, x):
+                return torch.sub(x, y)
+
+        y = 5
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (18, 4, 5), (18, 4, 5), minimum_ios_deployment_target=minimum_ios_deployment_target)  # type: ignore
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_sub_diff_shape(self, minimum_ios_deployment_target="13"):
+        class Net(nn.Module):
+            def forward(self, x):
+                return torch.sub(x, y)
+
+        y = torch.rand((4, 5))
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (18, 4, 5), (18, 4, 5), minimum_ios_deployment_target=minimum_ios_deployment_target)  # type: ignore
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_bianry_ops_mix_test(self, minimum_ios_deployment_target="13"):
+        class Net(nn.Module):
+            def forward(self, x):
+                return ((x * g + a) - d * (c + b) + (a * e - g) / e) / f
+
+        a = torch.rand((18, 4, 5))
+        b = torch.rand((4, 5))
+        c = torch.rand((18, 4, 5))
+        d = 7.234
+        e = torch.rand((5))
+        f = 8.234
+        g = 5
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(torch_model, (18, 4, 5), (18, 4, 5), minimum_ios_deployment_target=minimum_ios_deployment_target)  # type: ignore
+
+
+@unittest.skipUnless(_HAS_ONNX, MSG_ONNX_NOT_FOUND)
+class ReduceOperationTests(unittest.TestCase):
+    """
+    Reduction Operation Test cases
+    """
+
+    ## Reduction tests
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_reducesum(self, minimum_ios_deployment_target="13"):
+        class Net(nn.Module):
+            def forward(self, x):
+                return x.sum(dim=0)
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(
+            torch_model,
+            (18, 4, 5),
+            (4, 5),
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    def test_reducemean(self, minimum_ios_deployment_target="13"):
+        class Net(nn.Module):
+            def forward(self, x):
+                return x.mean(dim=1)
+
+        torch_model = Net()  # type: ignore
+        torch_model.train(False)
+        _test_torch_model_single_io(
+            torch_model,
+            (18, 4, 5),
+            (18, 5),
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
+
+
+@unittest.skipUnless(_HAS_ONNX, MSG_ONNX_NOT_FOUND)
+class TransformationTests(unittest.TestCase):
+    """
+    Test cases for validating transformations
+    """
+
+    # Upsample Test case
+    # Upsample with scalar factor is splited in Floor -> Cast -> Div -> Concat
+    # Hence, is a good measure to test Costant Propagation and removal transformation
+    @unittest.skipIf(
+        _macos_version() < MIN_MACOS_VERSION_10_15,
+        "macOS 10.15+ required. Skipping test.",
+    )
+    @pytest.mark.skip(reason="test failure: <rdar://63138211>")
+    def test_cast_removal_transformation(self, minimum_ios_deployment_target="13"):
+        torch_model = nn.Upsample(scale_factor=2)
+        torch_model.train(False)
+        _test_torch_model_single_io(
+            torch_model,
+            (1, 18, 4, 5),
+            (1, 18, 8, 10),
+            minimum_ios_deployment_target=minimum_ios_deployment_target,
+        )
diff --git a/coremltools/converters/onnx/_tests/test_transformers.py b/coremltools/converters/onnx/_tests/test_transformers.py
new file mode 100644
index 000000000..e69e50f79
--- /dev/null
+++ b/coremltools/converters/onnx/_tests/test_transformers.py
@@ -0,0 +1,274 @@
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+from __future__ import unicode_literals as _
+
+import pytest
+
+onnx = pytest.importorskip("onnx")
+
+import unittest
+import numpy as np
+import numpy.testing as npt  # type: ignore
+
+from coremltools._deps import _HAS_ONNX, MSG_ONNX_NOT_FOUND
+
+if _HAS_ONNX:
+    import onnx
+    from onnx import helper, numpy_helper, TensorProto
+
+    from coremltools.converters.onnx import convert
+    from coremltools.converters.onnx._graph import Graph
+    from coremltools.converters.onnx._transformers import (
+        ConvAddFuser,
+        DropoutRemover,
+        ImageScalerRemover,
+    )
+    from ._test_utils import (
+        _onnx_create_model,
+        _test_onnx_model,
+        _conv_pool_output_size,
+        _random_array,
+    )
+
+
+@unittest.skipUnless(_HAS_ONNX, MSG_ONNX_NOT_FOUND)
+class ConvAddFuserTest(unittest.TestCase):
+    def test_fuse_conv_without_bias(self):  # type: () -> None
+        kernel_shape = (3, 2)
+        strides = (2, 3)
+        pads = (4, 2, 4, 2)
+        dilations = (1, 2)
+        group = 1
+        weight = numpy_helper.from_array(_random_array((16, 3, 3, 2)), name="weight")
+
+        input_shape = (1, 3, 224, 224)
+        output_size = _conv_pool_output_size(
+            input_shape, dilations, kernel_shape, pads, strides
+        )
+
+        output_shape = (1, int(weight.dims[0]), output_size[0], output_size[1])
+
+        inputs = [("input0", input_shape)]
+        outputs = [("output0", output_shape, TensorProto.FLOAT)]
+
+        conv = helper.make_node(
+            "Conv",
+            inputs=[inputs[0][0], "weight"],
+            outputs=["conv_output"],
+            dilations=dilations,
+            group=group,
+            kernel_shape=kernel_shape,
+            pads=pads,
+            strides=strides,
+        )
+
+        b = _random_array((int(weight.dims[0]),))
+        bias = numpy_helper.from_array(b, name="bias")
+
+        add = helper.make_node(
+            "Add",
+            inputs=[conv.output[0], "bias"],
+            outputs=[outputs[0][0]],
+            broadcast=1,
+            axis=1,
+        )
+
+        model = _onnx_create_model([conv, add], inputs, outputs, [weight, bias])
+        graph_ = Graph.from_onnx(model.graph, onnx_ir_version=5)
+        fused_graph = graph_.transformed([ConvAddFuser()])
+
+        self.assertEqual(len(fused_graph.nodes), 1)
+        node = fused_graph.nodes[0]
+        self.assertEqual(len(node.inputs), 3)
+        npt.assert_equal(node.input_tensors[node.inputs[2]], b)
+        self.assertEqual(fused_graph.nodes[0].outputs[0], outputs[0][0])
+
+    def test_fuse_conv_with_bias(self):  # type: () -> None
+        kernel_shape = (3, 2)
+        strides = (2, 3)
+        pads = (4, 2, 4, 2)
+        dilations = (1, 2)
+        group = 1
+        weight = numpy_helper.from_array(_random_array((16, 3, 3, 2)), name="weight")
+        b = _random_array((int(weight.dims[0]),))
+        bias = numpy_helper.from_array(b, name="bias")
+
+        input_shape = (1, 3, 224, 224)
+        output_size = _conv_pool_output_size(
+            input_shape, dilations, kernel_shape, pads, strides
+        )
+
+        output_shape = (1, int(weight.dims[0]), output_size[0], output_size[1])
+
+        inputs = [("input0", input_shape)]
+        outputs = [("output0", output_shape, TensorProto.FLOAT)]
+
+        conv = helper.make_node(
+            "Conv",
+            inputs=[inputs[0][0], "weight", "bias"],
+            outputs=["conv_output"],
+            dilations=dilations,
+            group=group,
+            kernel_shape=kernel_shape,
+            pads=pads,
+            strides=strides,
+        )
+
+        add = helper.make_node(
+            "Add",
+            inputs=[conv.output[0], "bias"],
+            outputs=[outputs[0][0]],
+            broadcast=1,
+            axis=1,
+        )
+
+        model = _onnx_create_model([conv, add], inputs, outputs, [weight, bias])
+        graph_ = Graph.from_onnx(model.graph, onnx_ir_version=5)
+        fused_graph = graph_.transformed([ConvAddFuser()])
+
+        self.assertEqual(len(fused_graph.nodes), 1)
+        node = fused_graph.nodes[0]
+        self.assertEqual(len(node.inputs), 3)
+        npt.assert_equal(node.input_tensors[node.inputs[2]], b * 2)
+        self.assertEqual(fused_graph.nodes[0].outputs[0], outputs[0][0])
+
+
+@unittest.skipUnless(_HAS_ONNX, MSG_ONNX_NOT_FOUND)
+class NodeRemoverTests(unittest.TestCase):
+    def test_dropout_remover(self):  # type: () -> None
+        inputs = [("input", (1, 3, 50, 50))]
+        outputs = [("out", (1, 5, 50, 50), TensorProto.FLOAT)]
+        weight = numpy_helper.from_array(_random_array((5, 3, 1, 1)), name="weight")
+        conv = helper.make_node(
+            "Conv",
+            inputs=["input", "weight"],
+            outputs=["conv_output"],
+            kernel_shape=(1, 1),
+            strides=(1, 1),
+        )
+        drop = helper.make_node(
+            "Dropout", inputs=["conv_output"], outputs=["drop_output"],
+        )
+        exp = helper.make_node("Exp", inputs=["drop_output"], outputs=["out"])
+
+        onnx_model = _onnx_create_model([conv, drop, exp], inputs, outputs)
+
+        graph = Graph.from_onnx(onnx_model.graph, onnx_ir_version=5)
+        new_graph = graph.transformed([DropoutRemover()])
+        self.assertEqual(len(graph.nodes), 3)
+        self.assertEqual(len(new_graph.nodes), 2)
+        self.assertEqual(new_graph.nodes[0].inputs[0], "input")
+        self.assertEqual(new_graph.nodes[1].inputs[0], new_graph.nodes[0].outputs[0])
+        self.assertEqual(new_graph.nodes[1].outputs[0], "out")
+
+    def test_image_scaler_remover(self):  # type: () -> None
+        inputs = [("input", (1, 3, 50, 50))]
+        outputs = [("out", (1, 3, 50, 50), TensorProto.FLOAT)]
+
+        im_scaler = helper.make_node(
+            "ImageScaler",
+            inputs=["input"],
+            outputs=["scaler_out"],
+            bias=[10, -6, 20],
+            scale=3.0,
+        )
+
+        exp = helper.make_node("Exp", inputs=["scaler_out"], outputs=["out"])
+
+        onnx_model = _onnx_create_model([im_scaler, exp], inputs, outputs)
+
+        graph = Graph.from_onnx(onnx_model.graph, onnx_ir_version=5)
+        new_graph = graph.transformed([ImageScalerRemover()])
+        self.assertEqual(len(graph.nodes), 2)
+        self.assertEqual(len(new_graph.nodes), 1)
+        self.assertEqual(new_graph.nodes[0].inputs[0], "input")
+        self.assertEqual(new_graph.nodes[0].outputs[0], "out")
+
+        coreml_model = convert(onnx_model)
+        spec = coreml_model.get_spec()
+
+        self.assertEqual(spec.neuralNetwork.preprocessing[0].scaler.channelScale, 3.0)
+        self.assertEqual(spec.neuralNetwork.preprocessing[0].scaler.blueBias, 20.0)
+        self.assertEqual(spec.neuralNetwork.preprocessing[0].scaler.greenBias, -6.0)
+        self.assertEqual(spec.neuralNetwork.preprocessing[0].scaler.redBias, 10.0)
+
+    def test_multiple_image_scaler(self):  # type : () -> None
+        inputs = [("input_color", (1, 3, 10, 10)), ("input_gray", (1, 1, 10, 10))]
+        outputs = [("out", (1, 4, 10, 10), TensorProto.FLOAT)]
+
+        im_scaler1 = helper.make_node(
+            "ImageScaler",
+            inputs=["input_color"],
+            outputs=["scaler_out_1"],
+            bias=[10, -6, 20],
+            scale=3.0,
+        )
+
+        im_scaler2 = helper.make_node(
+            "ImageScaler",
+            inputs=["input_gray"],
+            outputs=["scaler_out_2"],
+            bias=[-13],
+            scale=5.0,
+        )
+
+        concat = helper.make_node(
+            "Concat", inputs=["scaler_out_1", "scaler_out_2"], outputs=["out"], axis=1
+        )
+
+        onnx_model = _onnx_create_model(
+            [im_scaler1, im_scaler2, concat], inputs, outputs
+        )
+
+        spec = convert(onnx_model).get_spec()
+        self.assertEqual(len(spec.neuralNetwork.layers), 1)
+        self.assertEqual(len(spec.neuralNetwork.preprocessing), 2)
+        self.assertEqual(spec.neuralNetwork.preprocessing[0].scaler.channelScale, 3.0)
+        self.assertEqual(spec.neuralNetwork.preprocessing[0].scaler.blueBias, 20.0)
+        self.assertEqual(spec.neuralNetwork.preprocessing[0].scaler.greenBias, -6.0)
+        self.assertEqual(spec.neuralNetwork.preprocessing[0].scaler.redBias, 10.0)
+        self.assertEqual(spec.neuralNetwork.preprocessing[1].scaler.channelScale, 5.0)
+        self.assertEqual(spec.neuralNetwork.preprocessing[1].scaler.grayBias, -13.0)
+
+
+@unittest.skipUnless(_HAS_ONNX, MSG_ONNX_NOT_FOUND)
+class PixelShuffleFuserTest(unittest.TestCase):
+    def test_pixel_shuffle(self):  # type: () -> None
+        scale_factor = 2
+        input_shape = (1, 8, 2, 2)
+        output_shape = (
+            input_shape[0],
+            int(input_shape[1] / (scale_factor ** 2)),
+            input_shape[2] * scale_factor,
+            input_shape[3] * scale_factor,
+        )
+
+        inputs = [("input0", input_shape)]
+        outputs = [("output0", output_shape, TensorProto.FLOAT)]
+
+        shape1 = [
+            output_shape[0],
+            output_shape[1],
+            scale_factor,
+            scale_factor,
+            input_shape[2],
+            input_shape[3],
+        ]
+
+        shape1 = numpy_helper.from_array(np.asarray(shape1), name="shape1")
+        shape2 = numpy_helper.from_array(np.asarray(list(output_shape)), name="shape2")
+
+        node_0 = helper.make_node(
+            "Reshape", inputs=[inputs[0][0], "shape1"], outputs=["node0"],
+        )
+        node_1 = helper.make_node(
+            "Transpose", inputs=["node0"], outputs=["node1"], perm=[0, 1, 4, 2, 5, 3]
+        )
+        node_2 = helper.make_node(
+            "Reshape", inputs=["node1", "shape2"], outputs=[outputs[0][0]],
+        )
+        model = _onnx_create_model(
+            [node_0, node_1, node_2], inputs, outputs, initializer=[shape1, shape2]
+        )
+        _test_onnx_model(model, decimal=7)
diff --git a/coremltools/converters/onnx/_transformers.py b/coremltools/converters/onnx/_transformers.py
new file mode 100644
index 000000000..ae2958aa1
--- /dev/null
+++ b/coremltools/converters/onnx/_transformers.py
@@ -0,0 +1,944 @@
+from __future__ import absolute_import as _
+from __future__ import division as _
+from __future__ import print_function as _
+from __future__ import unicode_literals as _
+
+from typing import Sequence, Text, Dict, List, Tuple
+import numpy as np
+
+from onnx import TensorProto
+
+from ._graph import Graph, Node
+
+
+def _get_fully_defined_shape(shape, blob_name, graph):
+    if not np.any(shape == -1):
+        return shape
+    if blob_name not in graph.shape_dict:
+        return shape
+    else:
+        return graph.shape_dict[blob_name]
+
+
+def _remove_single_input_output_node(node):
+    for child in node.children:
+        for i, child_input in enumerate(child.inputs):
+            if child_input == node.outputs[0]:
+                # Pass input to child
+                child.inputs[i] = node.inputs[0]
+                # If input tensor is known, pass down the input tensor value
+                if node.inputs[0] in node.input_tensors:
+                    child.input_tensors[node.inputs[0]] = node.input_tensors[
+                        node.inputs[0]
+                    ]
+                # Remove link as a parent from child node
+                child.parents.remove(node)
+                # Link current nodes parent and current child
+                for parent in node.parents:
+                    child.parents.append(parent)
+                    parent.children.append(child)
+                break
+
+    for parent in node.parents:
+        parent.children.remove(node)
+
+
+class NodesFuser(object):
+    """
+    An abstract helper for merging nodes
+    """
+
+    def __init__(
+        self, num_nodes,  # type: int
+    ):
+        # type: (...) -> None
+        assert num_nodes >= 2, "Algorithm only works if fusing multiple nodes"
+        self.num_nodes = num_nodes
+
+    def __call__(self, graph):  # type: (Graph) -> Graph
+        nodes = graph.nodes
+        merged_nodes = {}
+        for node in nodes:
+            nodes_window = []  # type: List[Node]
+            n = node
+            for _ in range(self.num_nodes - 1):
+                if len(n.parents) != 1:
+                    # We're only fusing nodes with single parents
+                    break
+                p = n.get_only_parent()
+                if len(p.children) != 1:
+                    # We can only fuse a node if its parent's
+                    # value isn't used by any other node.
+                    break
+                nodes_window.insert(0, n)
+                n = p
+            if len(nodes_window) > 0:
+                # add parent of chained nodes
+                first = nodes_window[0]
+                p = first.get_only_parent()
+                if len(p.children) == 1:
+                    nodes_window.insert(0, p)
+            if len(nodes_window) != self.num_nodes:
+                continue
+            if not self.is_eligible(graph, nodes_window):
+                continue
+            merged = self.merge(graph, nodes_window)
+            first, last = nodes_window[0], nodes_window[-1]
+            for parent in first.parents:
+                parent.children.remove(first)
+                if merged[0] not in parent.children:
+                    parent.add_child(merged[0])
+            for child in last.children:
+                child.parents.remove(last)
+                if merged[-1] not in child.parents:
+                    child.add_parent(merged[-1])
+            for n in nodes_window:
+                merged_nodes[n.name] = merged
+
+        transformed_nodes = []
+        added_merged = []  # type: List[Node]
+        for node in nodes:
+            if node.name in merged_nodes:
+                merged = merged_nodes[node.name]
+                if merged[0] not in added_merged:
+                    for n in merged:
+                        transformed_nodes.append(n)
+                    added_merged.append(merged[0])
+            else:
+                transformed_nodes.append(node)
+        return graph.create_graph(nodes=transformed_nodes)
+
+    def is_eligible(self, graph, nodes):  # type: (Graph, Sequence[Node]) -> bool
+        """Returns true if this subset of nodes is eligible for fusion."""
+        raise NotImplementedError("Must be implemented by subclass.")
+
+    def merge(self, graph, nodes):  # type: (Graph, Sequence[Node]) -> Sequence[Node]
+        """Merge nodes"""
+        nodes[0].outputs = nodes[-1].outputs
+        return [nodes[0]]
+
+
+class ConvAddFuser(NodesFuser):
+    """
+    Fuses Add layer into parent convolution layer.
+    """
+
+    def __init__(self):  # type: () -> None
+        super(ConvAddFuser, self).__init__(2)
+
+    def is_eligible(self, graph, nodes):  # type: (Graph, Sequence[Node]) -> bool
+        parent, child = nodes[0], nodes[1]
+        if parent.op_type != "Conv":
+            return False
+        if child.op_type != "Add":
+            return False
+        if "broadcast" not in child.attrs:
+            return False
+        if "axis" not in child.attrs:
+            return False
+        if parent.inputs[1] not in parent.input_tensors:
+            return False
+        if len(parent.inputs) > 2 and parent.inputs[2] not in parent.input_tensors:
+            return False
+        if child.inputs[1] not in child.input_tensors:
+            return False
+
+        broadcast = child.attrs["broadcast"]
+        if broadcast != 1:
+            return False
+
+        axis = child.attrs["axis"]
+        if axis != 1:
+            return False
+
+        return True
+
+    def merge(self, graph, nodes):  # type: (Graph, Sequence[Node]) -> Sequence[Node]
+        parent, child = nodes[0], nodes[1]
+        output_channels = parent.input_tensors[parent.inputs[1]].shape[0]
+        if len(parent.inputs) > 2:
+            bias_input_name = parent.inputs[2]
+            bias = parent.input_tensors[bias_input_name]
+        else:
+            bias_input_name = "{}_bias".format(parent.name,)
+            parent.inputs.append(bias_input_name)
+            bias = np.zeros((output_channels,), dtype=np.float32)
+            parent.input_tensors[bias_input_name] = bias
+        bias = bias + child.input_tensors[child.inputs[1]]
+        parent.input_tensors[bias_input_name] = bias
+        parent.outputs = child.outputs
+        parent.children.remove(child)
+        child.parents.remove(parent)
+        return [parent]
+
+
+class BNBroadcastedMulFuser(NodesFuser):
+    """
+    Fuses Mul into BatchNorm
+    """
+
+    def __init__(self):  # type: () -> None
+        super(BNBroadcastedMulFuser, self).__init__(2)
+
+    def is_eligible(self, graph, nodes):  # type: (Graph, Sequence[Node]) -> bool
+        parent, child = nodes[0], nodes[1]
+        if parent.op_type != "BatchNormalization":
+            return False
+        if child.op_type != "Mul":
+            return False
+        if len(child.inputs) != 2:
+            return False
+        if child.inputs[1] not in child.input_tensors:
+            return False
+        t = child.input_tensors[child.inputs[1]]
+        if len(np.squeeze(t).shape) != 1:
+            return False
+        if parent.inputs[1] not in parent.input_tensors:
+            return False
+        if parent.inputs[2] not in parent.input_tensors:
+            return False
+        return True
+
+    def merge(self, graph, nodes):  # type: (Graph, Sequence[Node]) -> Sequence[Node]
+        parent, child = nodes[0], nodes[1]
+        weight = parent.input_tensors[parent.inputs[1]]
+        bias = parent.input_tensors[parent.inputs[2]]
+        W = np.squeeze(child.input_tensors[child.inputs[1]])
+        parent.input_tensors[parent.inputs[1]] = np.multiply(weight, W)
+        parent.input_tensors[parent.inputs[2]] = np.multiply(bias, W)
+        parent.outputs = child.outputs
+        parent.children.remove(child)
+        child.parents.remove(parent)
+        return [parent]
+
+
+class BNBroadcastedAddFuser(NodesFuser):
+    """
+    Fuses Add into BatchNorm
+    """
+
+    def __init__(self):  # type: () -> None
+        super(BNBroadcastedAddFuser, self).__init__(2)
+
+    def is_eligible(self, graph, nodes):  # type: (Graph, Sequence[Node]) -> bool
+        parent, child = nodes[0], nodes[1]
+        if parent.op_type != "BatchNormalization":
+            return False
+        if child.op_type != "Add":
+            return False
+        if len(child.inputs) != 2:
+            return False
+        if child.inputs[1] not in child.input_tensors:
+            return False
+        t = child.input_tensors[child.inputs[1]]
+        if len(np.squeeze(t).shape) != 1:
+            return False
+        if parent.inputs[1] not in parent.input_tensors:
+            return False
+        if parent.inputs[2] not in parent.input_tensors:
+            return False
+        return True
+
+    def merge(self, graph, nodes):  # type: (Graph, Sequence[Node]) -> Sequence[Node]
+        parent, child = nodes[0], nodes[1]
+        bias = parent.input_tensors[parent.inputs[2]]
+        b = np.squeeze(child.input_tensors[child.inputs[1]])
+        parent.input_tensors[parent.inputs[2]] = bias + b
+        parent.outputs = child.outputs
+        parent.children.remove(child)
+        child.parents.remove(parent)
+        return [parent]
+
+
+class DropoutRemover(NodesFuser):
+    """
+    Removes Dropout layer
+    """
+
+    def __init__(self):  # type: () -> None
+        super(DropoutRemover, self).__init__(2)
+
+    def is_eligible(self, graph, nodes):  # type: (Graph, Sequence[Node]) -> bool
+        child = nodes[1]
+        return child.op_type == "Dropout"
+
+    def merge(self, graph, nodes):  # type: (Graph, Sequence[Node]) -> Sequence[Node]
+        parent, child = nodes[0], nodes[1]
+        parent.children.remove(child)
+        child.parents.remove(parent)
+        parent.outputs = [child.outputs[0]]
+        return [parent]
+
+
+class ReshapeInitTensorFuser(object):
+    """
+    Fuses Reshape operator if it is used only to reshape blob in
+    graph initializer. We can reshape here instead of runtime.
+    """
+
+    def __call__(self, graph):  # type: (Graph) -> Graph
+        nodes = graph.nodes
+        removed = []
+        for node in nodes:
+            if node.op_type != "Reshape":
+                continue
+            if not (len(node.input_tensors) == 2 or len(node.input_tensors) == 1):
+                continue
+            tensor_name = node.inputs[0]
+            if tensor_name not in node.input_tensors:
+                continue
+            if len(node.inputs) > 1:
+                shape_name = node.inputs[1]
+                if shape_name not in node.input_tensors:
+                    continue
+            is_non_constant_parent = False
+            if len(node.parents) > 0:
+                for parent in node.parents:
+                    if parent.op_type != "Constant":
+                        is_non_constant_parent = True
+                        break
+            if is_non_constant_parent:
+                continue
+
+            removed.append(node)
+            output_name = node.outputs[0]
+
+            tensor = node.input_tensors[tensor_name]
+            if "shape" in node.attrs:
+                shape = tuple(node.attrs["shape"])
+            else:
+                shape = node.input_tensors[shape_name]  # type: ignore
+
+            # ONNX spec supports setting dimension to '0', in which case
+            # it should be taken from old dimension.
+            # This isn't supported in numpy, so don't transform.
+            # TODO Should we support this case?
+            if any([s == 0 for s in shape]):
+                continue
+
+            reshaped_tensor = tensor.reshape(shape.astype(int))
+
+            for child in node.children:
+                child.parents.remove(node)
+                child.input_tensors[output_name] = reshaped_tensor
+
+        transformed_nodes = [node for node in nodes if node not in removed]
+        return graph.create_graph(nodes=transformed_nodes)
+
+
+class OutputRenamer(object):
+    """
+    Rename outputs according to mapping
+    """
+
+    def __init__(
+        self, mapping,  # type: Dict[Text, Text]
+    ):
+        # type: (...) -> None
+        self.mapping = mapping
+
+    def __call__(self, graph):  # type: (Graph) -> Graph
+        mapping = self.mapping.copy()
+        nodes = graph.nodes
+        for node in nodes:
+            for i in range(len(node.outputs)):
+                output = node.outputs[i]
+                if output not in mapping:
+                    continue
+                node.outputs[i] = mapping[output]
+                for child in node.children:
+                    for j in range(len(child.inputs)):
+                        input_ = child.inputs[j]
+                        if input_ != output:
+                            continue
+                        child.inputs[j] = mapping[output]
+                del mapping[output]
+                if len(mapping) == 0:
+                    break
+        return graph
+
+
+class ReshapeTransposeReshape_pattern1(NodesFuser):
+    """
+    Detects certain types of patterns of "reshape-> (rank 6) -> transpose (rank 6) -> reshape (rank 4)" that can be converted
+    """
+
+    def __init__(self):  # type: () -> None
+        super(ReshapeTransposeReshape_pattern1, self).__init__(3)
+        self.num_added = 0
+
+    def is_eligible(self, graph, nodes):  # type: (Graph, Sequence[Node]) -> bool
+        if not (
+            nodes[0].op_type == "Reshape"
+            and nodes[1].op_type == "Transpose"
+            and nodes[2].op_type == "Reshape"
+        ):
+            return False
+        if len(nodes[0].inputs) == 1 or len(nodes[2].inputs) == 1:
+            return False  # it's an old version of onnx Reshape op that had shape as an attribute
+        if nodes[0].inputs[1] not in nodes[0].input_tensors:
+            return False
+        if nodes[2].inputs[1] not in nodes[2].input_tensors:
+            return False
+
+        shape_1 = nodes[0].input_tensors[nodes[0].inputs[1]]
+        shape_final = nodes[2].input_tensors[nodes[2].inputs[1]]
+
+        shape_1 = _get_fully_defined_shape(shape_1, nodes[0].outputs[0], graph)
+        shape_final = _get_fully_defined_shape(shape_final, nodes[2].outputs[0], graph)
+
+        if len(shape_1) != 6 or shape_1[0] != 1 or len(shape_final) != 4:
+            return False
+
+        # check if coreml can convert this sequence using 1 transpose layer
+        perm = nodes[1].attrs.get("perm", [])
+        if len(perm) != 6:
+            return False
+        if perm[0] != 0:
+            return False
+
+        consecutive_indices = False
+        perm = perm[1:]
+        for i in range(1, 5):
+            if perm[i] - perm[i - 1] == 1:
+                consecutive_indices = True
+                break
+
+        if not consecutive_indices:
+            return False
+
+        return True
+
+    def get_unique_edge_name(self, graph, name):  # type: (Graph, Text) -> Text
+        self.num_added += 1
+        return graph.get_unique_edge_name(name + "_" + str(self.num_added))
+
+    def merge(self, graph, nodes):  # type: (Graph, Sequence[Node]) -> Sequence[Node]
+        """
+        In general, CoreML Reshape and Transpose layers don't support tensors with more
+        than 4 dimensions. However, certain patterns in onnx like
+            "reshape-> (rank 6) -> transpose (rank 6) -> reshape (rank 4)"
+        can be translated to CoreML as (i.e. without going to rank 6)
+            "reshape-> (rank 4) -> transpose (rank 4) -> reshape (rank 4)"
+        """
+        reshape_1 = nodes[0]
+        transpose_1 = nodes[1]
+        final_reshape = nodes[2]
+
+        shape_1 = reshape_1.input_tensors[reshape_1.inputs[1]]
+        shape_1 = _get_fully_defined_shape(shape_1, nodes[0].outputs[0], graph)
+        shape_1 = shape_1[1:]
+        perm = nodes[1].attrs.get("perm", [])
+        perm = perm[1:]
+        perm = [x - 1 for x in perm]
+        # now perm is length 5 list
+
+        new_perm = []
+        new_shape = [1, 1, 1, 1]
+        i = 0
+        found_consecutive_pair = False
+        while i < 5:
+            if not found_consecutive_pair and i < 4 and perm[i + 1] - perm[i] == 1:
+                new_perm.append(perm[i])
+                new_shape[perm[i]] = shape_1[perm[i]] * shape_1[perm[i + 1]]
+                i = i + 2
+                found_consecutive_pair = True
+                continue
+            else:
+                new_perm.append(perm[i] - 1)
+                new_shape[perm[i] - 1] = shape_1[perm[i]]
+            i += 1
+
+        reshape_1.input_tensors[reshape_1.inputs[1]] = np.asarray(new_shape)
+        transpose_1.attrs["perm"] = new_perm
+
+        return [reshape_1, transpose_1, final_reshape]
+
+
+class PixelShuffleFuser(NodesFuser):
+    def __init__(self):  # type: () -> None
+        super(PixelShuffleFuser, self).__init__(3)
+        self.num_added = 0
+
+    def is_eligible(self, graph, nodes):  # type: (Graph, Sequence[Node]) -> bool
+        if not (
+            nodes[0].op_type == "Reshape"
+            and nodes[1].op_type == "Transpose"
+            and nodes[2].op_type == "Reshape"
+        ):
+            return False
+        if len(nodes[0].inputs) == 1 or len(nodes[2].inputs) == 1:
+            return False  # it's an old version of onnx Reshape op that had shape as an attribute
+        if nodes[0].inputs[1] not in nodes[0].input_tensors:
+            return False
+        if nodes[2].inputs[1] not in nodes[2].input_tensors:
+            return False
+
+        shape_1 = nodes[0].input_tensors[nodes[0].inputs[1]]
+        shape_final = nodes[2].input_tensors[nodes[2].inputs[1]]
+
+        shape_1 = _get_fully_defined_shape(shape_1, nodes[0].outputs[0], graph)
+        shape_final = _get_fully_defined_shape(shape_final, nodes[2].outputs[0], graph)
+
+        if len(shape_1) != 6 or shape_1[0] != 1 or len(shape_final) != 4:
+            return False
+
+        if nodes[1].attrs.get("perm", []) != [0, 1, 4, 2, 5, 3]:
+            return False
+
+        return True
+
+    def get_unique_edge_name(self, graph, name):  # type: (Graph, Text) -> Text
+        self.num_added += 1
+        return graph.get_unique_edge_name(name + "_" + str(self.num_added))
+
+    def merge(self, graph, nodes):  # type: (Graph, Sequence[Node]) -> Sequence[Node]
+        """
+        Pixel shuffle is implemented using 3 operators:
+            - Reshape --> rank 6 (1, x1, x2, x3, x4, x5)
+            - Transpose(0, 1, 4, 2, 5, 3) --> (1, x1, x4, x2, x5, x3)
+            - Reshape ---> rank 4
+        CoreML Reshape and Transpose layers don't support tensors with more
+        than 4 dimensions. Thus we change above sequence of operators to the
+        following equivalent sequence:
+            - Reshape --> (x1, x2, x3, x4 * x5)
+            - Transpose(0, 3, 1, 2) --> (x1, x4 * x5, x2, x3)
+            - Reshape --> (x1 * x4, x5, x2, x3)
+            - Transpose(0, 2, 1, 3) --> (x1 * x4, x2, x5, x3)
+            - Reshape --> rank 4
+        """
+        reshape_1 = nodes[0]
+        transpose_1 = nodes[1]
+        final_reshape = nodes[2]
+
+        # first reshape
+        shape_1 = reshape_1.input_tensors[reshape_1.inputs[1]]
+        shape_1 = _get_fully_defined_shape(shape_1, nodes[0].outputs[0], graph)
+        x1 = shape_1[1]
+        x2 = shape_1[2]
+        x3 = shape_1[3]
+        x4 = shape_1[4]
+        x5 = shape_1[5]
+        reshape_1.input_tensors[reshape_1.inputs[1]] = np.asarray([x1, x2, x3, x4 * x5])
+
+        # first transpose
+        transpose_1.children = []
+        transpose_1.attrs["perm"] = [0, 3, 1, 2]
+
+        reshape_output_name = final_reshape.name + "_pixel_shuffle_reshape"
+        transpose_output_name = final_reshape.name + "_pixel_shuffle_transpose"
+
+        transpose_1.outputs = [self.get_unique_edge_name(graph, transpose_output_name)]
+
+        shape_name_second_reshape = self.get_unique_edge_name(
+            graph, reshape_output_name
+        )
+        output_name_second_reshape = self.get_unique_edge_name(
+            graph, reshape_output_name
+        )
+
+        # second reshape
+        reshape_2 = Node(
+            reshape_output_name,
+            "Reshape",
+            {},
+            [transpose_1.outputs[0], shape_name_second_reshape],
+            [output_name_second_reshape],
+        )
+        reshape_2.input_tensors[shape_name_second_reshape] = np.asarray(
+            [x1 * x4, x5, x2, x3]
+        )
+        transpose_1.add_child(reshape_2)
+
+        # second transpose
+        transpose_2 = Node(
+            transpose_output_name,
+            "Transpose",
+            {"perm": [0, 2, 1, 3]},
+            reshape_2.outputs,
+            [self.get_unique_edge_name(graph, transpose_output_name)],
+        )
+        reshape_2.add_child(transpose_2)
+
+        # third reshape
+        final_reshape.inputs = [transpose_2.outputs[0], nodes[2].inputs[1]]
+        final_reshape.parents = []
+        transpose_2.add_child(final_reshape)
+
+        return [reshape_1, transpose_1, reshape_2, transpose_2, final_reshape]
+
+
+class AddModelInputsOutputs(object):
+    """
+    Expose hidden states of recurrent layers as model inputs and outputs
+    """
+
+    def __call__(self, graph):  # type: (Graph) -> Graph
+        input_names = [str(input_[0]) for input_ in graph.inputs]
+        output_names = [str(output_[0]) for output_ in graph.outputs]
+        for node in graph.nodes:
+            if str(node.op_type) == "LSTM":
+                input_h = (
+                    node.inputs[5]
+                    if len(node.inputs) > 5
+                    else node.inputs[0] + "_h_input"
+                )
+                input_c = (
+                    node.inputs[6]
+                    if len(node.inputs) > 6
+                    else node.inputs[0] + "_c_input"
+                )
+                output_h = (
+                    node.outputs[1]
+                    if len(node.outputs) > 1
+                    else node.outputs[0] + "_h_output"
+                )
+                output_c = (
+                    node.outputs[2]
+                    if len(node.outputs) > 2
+                    else node.outputs[0] + "_c_output"
+                )
+                h = node.attrs["hidden_size"]
+                for input_ in [str(input_h), str(input_c)]:
+                    if input_ not in input_names:
+                        graph.inputs.append(tuple((input_, TensorProto.FLOAT, (h,))))  # type: ignore
+                    if input_ not in graph.blob_to_op_type:
+                        graph.blob_to_op_type[input_] = ["LSTM"]
+                for output_ in [str(output_h), str(output_c)]:
+                    if output_ not in output_names:
+                        graph.outputs.append(tuple((output_, TensorProto.FLOAT, (h,))))  # type: ignore
+                    graph.blob_from_op_type[output_] = "LSTM"
+        return graph
+
+
+class ConstantsToInitializers(object):
+    """
+    Takes onnx Constant nodes and puts the tensor into graph initializers instead.
+    """
+
+    def __call__(self, graph):  # type: (Graph) -> Graph
+        output_names = [str(output_[0]) for output_ in graph.outputs]
+        nodes_to_be_removed = []
+        for node in graph.nodes:
+            if node.op_type == "Constant" and (node.name not in output_names):
+                nodes_to_be_removed.append(node)
+                x = node.attrs["value"]
+                for child in node.children:
+                    child.input_tensors[node.outputs[0]] = x
+                    child.parents.remove(node)
+                graph.shape_dict[node.outputs[0]] = x.shape
+
+        transformed_nodes = []
+        for node in graph.nodes:
+            if node not in nodes_to_be_removed:
+                transformed_nodes.append(node)
+        return graph.create_graph(nodes=transformed_nodes)
+
+
+class ConstantFillToInitializers(object):
+    """
+    Takes onnx ConstantFill nodes and puts the tensor into graph initializers instead, for simple cases only.
+    """
+
+    def __call__(self, graph):  # type: (Graph) -> Graph
+        output_names = [str(output_[0]) for output_ in graph.outputs]
+        nodes_to_be_removed = []
+        for node in graph.nodes:
+            if (
+                node.op_type == "ConstantFill"
+                and (node.name not in output_names)
+                and node.attrs.get("input_as_shape", 0)
+                and node.inputs[0] in node.input_tensors
+                and node.attrs.get("extra_shape", None) is None
+            ):
+
+                s = node.input_tensors[node.inputs[0]]
+                x = np.ones(tuple(s.astype(int))) * node.attrs.get("value", 0.0)
+                nodes_to_be_removed.append(node)
+                for child in node.children:
+                    child.input_tensors[node.outputs[0]] = x
+                    child.parents.remove(node)
+                graph.shape_dict[node.outputs[0]] = x.shape
+
+        transformed_nodes = []
+        for node in graph.nodes:
+            if node not in nodes_to_be_removed:
+                transformed_nodes.append(node)
+        return graph.create_graph(nodes=transformed_nodes)
+
+
+class ShapeOpRemover(object):
+    """
+    remove shape op, if the input shape is fully known
+    """
+
+    def __call__(self, graph):  # type: (Graph) -> Graph
+        nodes_to_be_removed = []
+        output_names = [str(output_[0]) for output_ in graph.outputs]
+        for node in graph.nodes:
+            if (
+                node.op_type == "Shape"
+                and (node.name not in output_names)
+                and node.inputs[0] in graph.shape_dict
+            ):
+                x_tuple = graph.shape_dict[node.inputs[0]]  # type: Tuple[int, ...]
+                is_well_defined = True
+                for i in x_tuple:
+                    if not (isinstance(i, int) and i > 0):
+                        is_well_defined = False
+                        break
+                if is_well_defined:
+                    x = np.asarray(x_tuple, dtype=np.float32)
+                    nodes_to_be_removed.append(node)
+                    for child in node.children:
+                        child.input_tensors[node.outputs[0]] = x
+                        child.parents.remove(node)
+                    for parent in node.parents:
+                        parent.children.remove(node)
+                    graph.shape_dict[node.outputs[0]] = x.shape
+
+        transformed_nodes = []
+        for node in graph.nodes:
+            if node not in nodes_to_be_removed:
+                transformed_nodes.append(node)
+        return graph.create_graph(nodes=transformed_nodes)
+
+
+class CastOpRemover(object):
+    """
+    Remove Cast Op: onnx-coreml treats all tensor as Float and hence, Cast operator should be removed
+    """
+
+    def __call__(self, graph):  # type: (Graph) -> Graph
+        global cast_i
+        nodes_to_be_removed = []
+        output_names = [str(output_[0]) for output_ in graph.outputs]
+        for node in graph.nodes:
+            if (
+                node.op_type == "Cast"
+                and (node.name not in output_names)
+                and node.inputs[0] in graph.shape_dict
+            ):
+                nodes_to_be_removed.append(node)
+                _remove_single_input_output_node(node)
+
+        transformed_nodes = []
+        for node in graph.nodes:
+            if node not in nodes_to_be_removed:
+                transformed_nodes.append(node)
+        return graph.create_graph(nodes=transformed_nodes)
+
+
+class PaddingOpRemover(object):
+    """
+    Remove Pad Op if all the pad values are 0
+    """
+
+    def __call__(self, graph):  # type: (Graph) -> Graph
+        global cast_i
+        nodes_to_be_removed = []
+        output_names = [str(output_[0]) for output_ in graph.outputs]
+        for node in graph.nodes:
+            if (
+                node.op_type == "Pad"
+                and (node.name not in output_names)
+                and node.inputs[0] in graph.shape_dict
+            ):
+                pads = node.attrs.get("pads", [])
+                if len(pads) > 0 and sum(pads) == 0:
+                    nodes_to_be_removed.append(node)
+                    _remove_single_input_output_node(node)
+
+        transformed_nodes = []
+        for node in graph.nodes:
+            if node not in nodes_to_be_removed:
+                transformed_nodes.append(node)
+        return graph.create_graph(nodes=transformed_nodes)
+
+
+class ImageScalerRemover(object):
+    """
+    Removes ImageScaler layer if connected to a model input and single parent child nodes
+    """
+
+    def __call__(self, graph):  # type: (Graph) -> Graph
+        input_names = [str(input_[0]) for input_ in graph.inputs]
+        nodes_to_be_removed = []
+        for node in graph.nodes:
+            if (
+                (node.op_type != "ImageScaler")
+                or (len(node.parents) != 0)
+                or (node.inputs[0] not in input_names)
+            ):
+                continue
+            nodes_to_be_removed.append(node.name)
+            for child in node.children:
+                for i, child_input in enumerate(child.inputs):
+                    if child_input == node.outputs[0]:
+                        child.inputs[i] = node.inputs[0]
+                        child.parents.remove(node)
+                        break
+
+        transformed_nodes = []
+        for node in graph.nodes:
+            if node.name not in nodes_to_be_removed:
+                transformed_nodes.append(node)
+        return graph.create_graph(nodes=transformed_nodes)
+
+
+class ConstantRemover(object):
+    """
+    Removes Op if its input is constant
+    Currently, Supports: Gather, Floor, Div, Mul, Slice, Transpose, Concat, Unsqueeze, Squeeze
+    """
+
+    def __call__(self, graph):  # type: (Graph) -> Graph
+        nodes_to_be_removed = []
+        for node in graph.nodes:
+            are_all_inputs_constant = True
+            for input_ in node.inputs:
+                if input_ not in node.input_tensors:
+                    are_all_inputs_constant = False
+                    break
+
+            transformation_performed = False
+            if len(node.parents) != 0 or are_all_inputs_constant == False:
+                continue
+            # TODO: Replace If -> ElIf with more general transformation block
+            if node.op_type == "Gather":
+                data = node.input_tensors[node.inputs[0]]
+                idx = node.input_tensors[node.inputs[1]]
+                axis = node.attrs.get("axis", 0)
+                output = np.take(data, idx, axis=axis)
+                transformation_performed = True
+            elif node.op_type == "Floor":
+                input = node.input_tensors[node.inputs[0]]
+                output = np.floor(input)
+                transformation_performed = True
+            elif node.op_type == "Div" or node.op_type == "Mul":
+                x = node.input_tensors[node.inputs[0]]
+                y = node.input_tensors[node.inputs[1]]
+                for child_node in node.children:
+                    # child_node.parents.remove(node)
+                    if node.op_type == "Div":
+                        output = x / y
+                    else:
+                        output = x * y
+                transformation_performed = True
+            elif node.op_type == "Slice":
+                x = node.input_tensors[node.inputs[0]]
+                ends = node.attrs["ends"]
+                starts = node.attrs["starts"]
+                axes = node.attrs.get("axes", range(len(starts)))
+                output = x
+                for i, a in enumerate(axes):
+                    s = starts[i]
+                    e = ends[i]
+                    n = x.shape[a]
+                    if s < 0:
+                        s += n
+                    if e < 0:
+                        e += n
+                    output = np.take(x, range(s, e), axis=a)  # type: ignore
+                transformation_performed = True
+            elif node.op_type == "Transpose":
+                x = node.input_tensors[node.inputs[0]]
+                perm = node.attrs.get("perm", None)
+                output = np.transpose(x, axes=perm)  # type: ignore
+                transformation_performed = True
+            elif node.op_type == "Concat":
+                x_arr = []
+                for input_ in node.inputs:
+                    x_arr.append(node.input_tensors[input_])
+                axis = node.attrs.get("axis", 0)
+                output = np.concatenate(x_arr, axis=axis)  # type: ignore
+                transformation_performed = True
+            elif node.op_type == "Unsqueeze" or node.op_type == "Squeeze":
+                x = node.input_tensors[node.inputs[0]]
+                if node.op_type == "Unsqueeze":
+                    axes = node.attrs["axes"]
+                    axes.sort()
+                    for axis in axes:
+                        output = np.expand_dims(x, axis=axis)  # type: ignore
+                else:
+                    axes = node.attrs.get("axes", None)
+                    output = np.squeeze(x, axis=tuple(axes))
+                transformation_performed = True
+            elif node.op_type == "Gemm":
+                alpha = node.attrs.get("alpha", 1.0)
+                beta = node.attrs.get("beta", 1.0)
+                transA = node.attrs.get("transA", False)
+                transB = node.attrs.get("transB", False)
+
+                A_tensor = node.input_tensors[node.inputs[0]]
+                B_tensor = node.input_tensors[node.inputs[1]]
+                C_tensor = node.input_tensors[node.inputs[2]]
+
+                A_tensor = np.transpose(A_tensor) if transA else A_tensor
+                B_tensor = np.transpose(B_tensor) if transB else B_tensor
+
+                output = alpha * np.dot(A_tensor, B_tensor) + beta * C_tensor
+                transformation_performed = True
+
+            if transformation_performed:
+                nodes_to_be_removed.append(node)
+                graph.shape_dict[node.outputs[0]] = output.shape
+                for child_node in node.children:
+                    child_node.parents.remove(node)
+                    child_node.input_tensors[node.outputs[0]] = output
+        transformed_nodes = []
+        for node in graph.nodes:
+            if node not in nodes_to_be_removed:
+                transformed_nodes.append(node)
+        return graph.create_graph(nodes=transformed_nodes)
+
+
+class DeadCodeElimination(object):
+    """
+    Removes nodes with unused outputs
+    """
+
+    def __call__(self, graph):  # type: (Graph) -> Graph
+        input_names = [str(input_[0]) for input_ in graph.inputs]
+        output_names = set([str(output_[0]) for output_ in graph.outputs])
+
+        nodes_to_be_removed = []
+        uses = {}
+
+        for _output in output_names:
+            uses[_output] = uses.get(_output, 0) + 1
+
+        for node in graph.nodes:
+            for _input in node.inputs:
+                uses[_input] = uses.get(_input, 0) + 1
+
+        for node in reversed(graph.nodes):
+            output_used = False
+            for _output in node.outputs:
+                if _output in uses:
+                    output_used = True
+                    break
+
+            if not output_used:
+                # Remove current node
+                for _input in node.inputs:
+                    uses[_input] -= 1
+                    if uses[_input] == 0:
+                        del uses[_input]
+                nodes_to_be_removed.append(node.name)
+                for parent in node.parents:
+                    parent.children.remove(node)
+
+        transformed_nodes = []
+        for node in graph.nodes:
+            if node.name not in nodes_to_be_removed:
+                transformed_nodes.append(node)
+
+        for _input in input_names:
+            if _input not in uses:
+                for i in range(len(graph.inputs)):
+                    if graph.inputs[i][0] is _input:
+                        graph.inputs.remove(graph.inputs[i])
+                        break
+
+        return graph.create_graph(nodes=transformed_nodes)
diff --git a/coremltools/converters/sklearn/_LinearSVC.py b/coremltools/converters/sklearn/_LinearSVC.py
index 89b147792..433c9b78a 100644
--- a/coremltools/converters/sklearn/_LinearSVC.py
+++ b/coremltools/converters/sklearn/_LinearSVC.py
@@ -3,16 +3,19 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
 
 if _HAS_SKLEARN:
     from sklearn.svm import LinearSVC as _LinearSVC
+
     sklearn_class = _LinearSVC
     from . import _sklearn_util
 
 from . import _logistic_regression
-model_type = 'classifier'
+
+model_type = "classifier"
+
 
 def convert(model, feature_names, target):
     """Convert a LinearSVC model to the protobuf spec.
@@ -32,19 +35,24 @@ def convert(model, feature_names, target):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
 
     _sklearn_util.check_expected_type(model, _LinearSVC)
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_'))
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "coef_"))
 
     return _MLModel(_logistic_regression._convert(model, feature_names, target))
 
+
 def supports_output_scores(model):
     return True
 
+
 def get_output_classes(model):
     return _logistic_regression.get_output_classes(model)
 
+
 def get_input_dimension(model):
     return _logistic_regression.get_input_dimension(model)
diff --git a/coremltools/converters/sklearn/_LinearSVR.py b/coremltools/converters/sklearn/_LinearSVR.py
index ab60b475f..301eb9f5b 100644
--- a/coremltools/converters/sklearn/_LinearSVR.py
+++ b/coremltools/converters/sklearn/_LinearSVR.py
@@ -3,18 +3,20 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
 
 if _HAS_SKLEARN:
     from sklearn.svm import LinearSVR as _LinearSVR
     import sklearn
     from . import _sklearn_util
+
     sklearn_class = sklearn.svm.LinearSVR
 
 from . import _linear_regression
 
-model_type = 'regressor'
+model_type = "regressor"
+
 
 def convert(model, features, target):
     """Convert a LinearSVR model to the protobuf spec.
@@ -34,12 +36,14 @@ def convert(model, features, target):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
     # Check the scikit learn model
     _sklearn_util.check_expected_type(model, _LinearSVR)
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_'))
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "coef_"))
 
     return _MLModel(_linear_regression._convert(model, features, target))
 
diff --git a/coremltools/converters/sklearn/_NuSVC.py b/coremltools/converters/sklearn/_NuSVC.py
index c3b2578df..eec250e96 100644
--- a/coremltools/converters/sklearn/_NuSVC.py
+++ b/coremltools/converters/sklearn/_NuSVC.py
@@ -5,16 +5,18 @@
 
 from . import _SVC as _SVC
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 
 if _HAS_SKLEARN:
     from ._sklearn_util import check_fitted
     from . import _sklearn_util
     from sklearn.svm import NuSVC as _NuSVC
+
     sklearn_class = _NuSVC
 
 model_type = "classifier"
 
+
 def convert(model, feature_names, target):
     """Convert a Nu-Support Vector Classification (NuSVC) model to the protobuf spec.
     Parameters
@@ -33,26 +35,34 @@ def convert(model, feature_names, target):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
+
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
     _sklearn_util.check_expected_type(model, _NuSVC)
     return _SVC.convert(model, feature_names, target)
 
+
 def supports_output_scores(model):
     return _SVC.supports_output_scores(model)
 
+
 def get_output_classes(model):
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    check_fitted(model, lambda m: hasattr(m, 'support_vectors_'))
-    return _SVC.get_output_classes(model) 
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+    check_fitted(model, lambda m: hasattr(m, "support_vectors_"))
+    return _SVC.get_output_classes(model)
+
 
 def get_input_dimension(model):
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
-    check_fitted(model, lambda m: hasattr(m, 'support_vectors_'))
-    return _SVC.get_input_dimension(model)
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
 
+    check_fitted(model, lambda m: hasattr(m, "support_vectors_"))
+    return _SVC.get_input_dimension(model)
diff --git a/coremltools/converters/sklearn/_NuSVR.py b/coremltools/converters/sklearn/_NuSVR.py
index 72a282f57..b0e422262 100644
--- a/coremltools/converters/sklearn/_NuSVR.py
+++ b/coremltools/converters/sklearn/_NuSVR.py
@@ -5,16 +5,17 @@
 
 from . import _SVR as _SVR
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
 
 if _HAS_SKLEARN:
     from ._sklearn_util import check_fitted
     from sklearn.svm import NuSVR as _NuSVR
     from . import _sklearn_util
+
     sklearn_class = _NuSVR
 
-model_type = 'regressor'
+model_type = "regressor"
 
 
 def convert(model, feature_names, target):
@@ -35,16 +36,20 @@ def convert(model, feature_names, target):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
     _sklearn_util.check_expected_type(model, _NuSVR)
     return _SVR.convert(model, feature_names, target)
 
+
 def get_input_dimension(model):
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
-    check_fitted(model, lambda m: hasattr(m, 'support_vectors_'))
-    return _SVR.get_input_dimension(model)
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
 
+    check_fitted(model, lambda m: hasattr(m, "support_vectors_"))
+    return _SVR.get_input_dimension(model)
diff --git a/coremltools/converters/sklearn/_SVC.py b/coremltools/converters/sklearn/_SVC.py
index add064378..bcdd5b4a8 100644
--- a/coremltools/converters/sklearn/_SVC.py
+++ b/coremltools/converters/sklearn/_SVC.py
@@ -5,40 +5,43 @@
 
 from ...proto import Model_pb2 as _Model_pb2
 from ...proto import SVM_pb2 as _SVM_pb2
-from ... import SPECIFICATION_VERSION
+from ... import SPECIFICATION_VERSION as _SPECIFICATION_VERSION
 from ...models._interface_management import set_classifier_interface_params
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
 
 if _HAS_SKLEARN:
     from ._sklearn_util import check_fitted
     from sklearn.svm import SVC as _SVC
-    sklearn_class = _SVC
 
-model_type = 'classifier'
+    sklearn_class = _SVC
 
+model_type = "classifier"
 
 from ._svm_common import _set_kernel
 
+
 def _generate_base_svm_classifier_spec(model):
     """
     Takes an SVM classifier produces a starting spec using the parts.  that are
     shared between all SVMs.
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
-    check_fitted(model, lambda m: hasattr(m, 'support_vectors_'))
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
+    check_fitted(model, lambda m: hasattr(m, "support_vectors_"))
 
     spec = _Model_pb2.Model()
-    spec.specificationVersion = SPECIFICATION_VERSION
+    spec.specificationVersion = _SPECIFICATION_VERSION
     svm = spec.supportVectorClassifier
 
     _set_kernel(model, svm)
 
     for cur_rho in model.intercept_:
-        if(len(model.classes_) == 2):
+        if len(model.classes_) == 2:
             # For some reason Scikit Learn doesn't negate for binary classification
             svm.rho.append(cur_rho)
         else:
@@ -55,6 +58,7 @@ def _generate_base_svm_classifier_spec(model):
             cur_dest_vector.values.append(i)
     return spec
 
+
 def convert(model, feature_names, target):
     """Convert a Support Vector Classtion (SVC) model to the protobuf spec.
     Parameters
@@ -73,18 +77,28 @@ def convert(model, feature_names, target):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
     spec = _generate_base_svm_classifier_spec(model)
-    spec = set_classifier_interface_params(spec, feature_names, model.classes_, 'supportVectorClassifier', output_features = target)
+    spec = set_classifier_interface_params(
+        spec,
+        feature_names,
+        model.classes_,
+        "supportVectorClassifier",
+        output_features=target,
+    )
 
     svm = spec.supportVectorClassifier
     for i in model.n_support_:
         svm.numberOfSupportVectorsPerClass.append(int(i))
 
     if len(model.probA_) != 0 and len(model.classes_) == 2:
-        print("[WARNING] Scikit Learn uses a technique to normalize pairwise probabilities even for binary classification. "
-              "This can cause differences in predicted probabilities, usually less than 0.5%.")
+        print(
+            "[WARNING] Scikit Learn uses a technique to normalize pairwise probabilities even for binary classification. "
+            "This can cause differences in predicted probabilities, usually less than 0.5%."
+        )
 
     # If this is an empty list, then model.probA_ will be an empty list.
     if len(model.probA_) != 0:
@@ -96,17 +110,24 @@ def convert(model, feature_names, target):
 
     return _MLModel(spec)
 
+
 def supports_output_scores(model):
-    return (len(model.probA_) != 0)
+    return len(model.probA_) != 0
+
 
 def get_output_classes(model):
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    check_fitted(model, lambda m: hasattr(m, 'support_vectors_'))
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+    check_fitted(model, lambda m: hasattr(m, "support_vectors_"))
     return list(model.classes_)
 
+
 def get_input_dimension(model):
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    check_fitted(model, lambda m: hasattr(m, 'support_vectors_'))
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+    check_fitted(model, lambda m: hasattr(m, "support_vectors_"))
     return len(model.support_vectors_[0])
diff --git a/coremltools/converters/sklearn/_SVR.py b/coremltools/converters/sklearn/_SVR.py
index e197d69cc..04ff36e55 100644
--- a/coremltools/converters/sklearn/_SVR.py
+++ b/coremltools/converters/sklearn/_SVR.py
@@ -7,27 +7,31 @@
 from ...models._interface_management import set_regressor_interface_params
 from ... import SPECIFICATION_VERSION
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
 
 if _HAS_SKLEARN:
     from ._sklearn_util import check_fitted
     from sklearn.svm import SVR as _SVR
     from . import _sklearn_util
+
     sklearn_class = _SVR
 
-model_type = 'regressor'
+model_type = "regressor"
 
 from ._svm_common import _set_kernel
 
+
 def _generate_base_svm_regression_spec(model):
     """
     Takes an SVM regression model  produces a starting spec using the parts.
     that are shared between all SVMs.
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
     spec = _Model_pb2.Model()
     spec.specificationVersion = SPECIFICATION_VERSION
     svm = spec.supportVectorRegressor
@@ -45,6 +49,7 @@ def _generate_base_svm_regression_spec(model):
             cur_dest_vector.values.append(i)
     return spec
 
+
 def convert(model, features, target):
     """Convert a Support Vector Regressor (SVR) model to the protobuf spec.
     Parameters
@@ -67,8 +72,11 @@ def convert(model, features, target):
     spec = set_regressor_interface_params(spec, features, target)
     return _MLModel(spec)
 
+
 def get_input_dimension(model):
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    check_fitted(model, lambda m: hasattr(m, 'support_vectors_'))
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+    check_fitted(model, lambda m: hasattr(m, "support_vectors_"))
     return len(model.support_vectors_[0])
diff --git a/coremltools/converters/sklearn/__init__.py b/coremltools/converters/sklearn/__init__.py
index e98ea9282..77268c40c 100644
--- a/coremltools/converters/sklearn/__init__.py
+++ b/coremltools/converters/sklearn/__init__.py
@@ -6,4 +6,3 @@
 # A single function to manage the importing.
 
 from ._converter import convert
-
diff --git a/coremltools/converters/sklearn/_converter.py b/coremltools/converters/sklearn/_converter.py
index d198283f0..aeab54414 100644
--- a/coremltools/converters/sklearn/_converter.py
+++ b/coremltools/converters/sklearn/_converter.py
@@ -3,12 +3,15 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+from coremltools import __version__ as ct_version
+from coremltools.models import _METADATA_VERSION, _METADATA_SOURCE
+
 """
 Defines the primary function for converting scikit-learn models.
 """
 
-def convert(sk_obj, input_features = None,
-        output_feature_names = None):
+
+def convert(sk_obj, input_features=None, output_feature_names=None):
     """
     Convert scikit-learn pipeline, classifier, or regressor to Core ML format.
 
@@ -47,12 +50,12 @@ def convert(sk_obj, input_features = None,
 
 
     input_features: str | dict | list
-    
+
         Optional name(s) that can be given to the inputs of the scikit-learn
         model. Defaults to 'input'.
-        
+
         Input features can be specified in a number of forms.
-        
+
         -   Single string: In this case, the input is assumed to be a single
             array, with the number of dimensions set using num_dimensions.
 
@@ -139,12 +142,20 @@ class prediction and the array of scores for each class (defaults to
     # that sklearn isn't actually imported unless this function is called
     from ...models import MLModel
 
-    # NOTE: Providing user-defined class labels will be enabled when 
+    # NOTE: Providing user-defined class labels will be enabled when
     # several issues with the ordering of the classes are worked out.  For now,
-    # to use custom class labels, directly import the internal function below. 
+    # to use custom class labels, directly import the internal function below.
     from ._converter_internal import _convert_sklearn_model
+
     spec = _convert_sklearn_model(
-            sk_obj, input_features, output_feature_names, class_labels = None)
+        sk_obj, input_features, output_feature_names, class_labels=None
+    )
 
-    return MLModel(spec)
+    model = MLModel(spec)
+    from sklearn import __version__ as sklearn_version
 
+    model.user_defined_metadata[_METADATA_VERSION] = ct_version
+    model.user_defined_metadata[_METADATA_SOURCE] = "scikit-learn=={0}".format(
+        sklearn_version
+    )
+    return model
diff --git a/coremltools/converters/sklearn/_converter_internal.py b/coremltools/converters/sklearn/_converter_internal.py
index 6b1c83795..5bfdf0448 100644
--- a/coremltools/converters/sklearn/_converter_internal.py
+++ b/coremltools/converters/sklearn/_converter_internal.py
@@ -14,9 +14,9 @@
 from ...models.feature_vectorizer import create_feature_vectorizer
 from ...models.pipeline import Pipeline, PipelineRegressor, PipelineClassifier
 
-from ..._deps import HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     import sklearn as _sklearn
     from sklearn.pipeline import Pipeline as sk_Pipeline
 
@@ -48,76 +48,89 @@
 _PIPELINE_INTERNAL_FEATURE_NAME = "__feature_vector__"
 
 _converter_module_list = [
-        _dict_vectorizer,
-        _one_hot_encoder,
-        _normalizer,
-        _standard_scaler,
-        _imputer,
-        _NuSVC,
-        _NuSVR,
-        _SVC,
-        _SVR,
-        _linear_regression,
-        _LinearSVC,
-        _LinearSVR,
-        _logistic_regression,
-        _random_forest_classifier,
-        _random_forest_regressor,
-        _decision_tree_classifier,
-        _decision_tree_regressor,
-        _gradient_boosting_classifier,
-        _gradient_boosting_regressor,
-        _k_neighbors_classifier]
+    _dict_vectorizer,
+    _one_hot_encoder,
+    _normalizer,
+    _standard_scaler,
+    _imputer,
+    _NuSVC,
+    _NuSVR,
+    _SVC,
+    _SVR,
+    _linear_regression,
+    _LinearSVC,
+    _LinearSVR,
+    _logistic_regression,
+    _random_forest_classifier,
+    _random_forest_regressor,
+    _decision_tree_classifier,
+    _decision_tree_regressor,
+    _gradient_boosting_classifier,
+    _gradient_boosting_regressor,
+    _k_neighbors_classifier,
+]
+
 
 def _test_module(m):
     assert m.model_type in ["transformer", "regressor", "classifier"], m.__name__
     if m.model_type == "transformer":
-        assert hasattr(m, 'update_dimension'), m.__name__
+        assert hasattr(m, "update_dimension"), m.__name__
     if m.model_type == "classifier":
         assert hasattr(m, "supports_output_scores"), m.__name__
         assert hasattr(m, "get_output_classes"), m.__name__
-    assert hasattr(m, 'sklearn_class'), m.__name__
+    assert hasattr(m, "sklearn_class"), m.__name__
     assert hasattr(m, "get_input_dimension"), m.__name__
 
     return True
 
+
 assert all(_test_module(m) for m in _converter_module_list)
 
-_converter_lookup = dict( (md.sklearn_class, i) for i, md in enumerate(_converter_module_list))
+_converter_lookup = dict(
+    (md.sklearn_class, i) for i, md in enumerate(_converter_module_list)
+)
 _converter_functions = [md.convert for md in _converter_module_list]
 
+
 def _get_converter_module(sk_obj):
     """
-    Returns the module holding the conversion functions for a 
+    Returns the module holding the conversion functions for a
     particular model).
     """
     try:
         cv_idx = _converter_lookup[sk_obj.__class__]
     except KeyError:
         raise ValueError(
-                "Transformer '%s' not supported; supported transformers are %s."
-                % (repr(sk_obj),
-                    ",".join(k.__name__ for k in _converter_module_list)))
+            "Transformer '%s' not supported; supported transformers are %s."
+            % (repr(sk_obj), ",".join(k.__name__ for k in _converter_module_list))
+        )
 
     return _converter_module_list[cv_idx]
 
+
 def _is_sklearn_model(sk_obj):
-    if not(HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
     from sklearn.pipeline import Pipeline as sk_Pipeline
-    return (isinstance(sk_obj, sk_Pipeline) 
-            or sk_obj.__class__ in _converter_lookup)
 
-def _convert_sklearn_model(input_sk_obj, input_features = None,
-                           output_feature_names = None, class_labels = None):
+    return isinstance(sk_obj, sk_Pipeline) or sk_obj.__class__ in _converter_lookup
+
+
+def _convert_sklearn_model(
+    input_sk_obj, input_features=None, output_feature_names=None, class_labels=None
+):
     """
-    Converts a generic sklearn pipeline, transformer, classifier, or regressor 
+    Converts a generic sklearn pipeline, transformer, classifier, or regressor
     into an coreML specification.
     """
-    if not(HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
     from sklearn.pipeline import Pipeline as sk_Pipeline
-    
+
     if input_features is None:
         input_features = "input"
 
@@ -129,26 +142,28 @@ def _convert_sklearn_model(input_sk_obj, input_features = None,
     if len(sk_obj_list) == 0:
         raise ValueError("No SKLearn transformers supplied.")
 
-    # Put the transformers into a pipeline list to hold them so that they can 
-    # later be added to a pipeline object.  (Hold off adding them to the 
+    # Put the transformers into a pipeline list to hold them so that they can
+    # later be added to a pipeline object.  (Hold off adding them to the
     # pipeline now in case it's a single model at the end, in which case it
     # gets returned as is.)
     #
-    # Each member of the pipeline list is a tuple of the proto spec for that 
+    # Each member of the pipeline list is a tuple of the proto spec for that
     # model, the input features, and the output features.
     pipeline_list = []
 
-    # These help us keep track of what's going on a bit easier. 
-    Input = _namedtuple('InputTransformer', ['name', 'sk_obj', 'module'])
-    Output = _namedtuple('CoreMLTransformer', ['spec', 'input_features', 'output_features'])
+    # These help us keep track of what's going on a bit easier.
+    Input = _namedtuple("InputTransformer", ["name", "sk_obj", "module"])
+    Output = _namedtuple(
+        "CoreMLTransformer", ["spec", "input_features", "output_features"]
+    )
 
-
-    # Get a more information rich representation of the list for convenience. 
-    # obj_list is a list of tuples of (name, sk_obj, and the converter module for 
+    # Get a more information rich representation of the list for convenience.
+    # obj_list is a list of tuples of (name, sk_obj, and the converter module for
     # that step in the list.
-    obj_list = [ Input(sk_obj_name, sk_obj, _get_converter_module(sk_obj))
-                for sk_obj_name, sk_obj in sk_obj_list]
-
+    obj_list = [
+        Input(sk_obj_name, sk_obj, _get_converter_module(sk_obj))
+        for sk_obj_name, sk_obj in sk_obj_list
+    ]
 
     # Various preprocessing steps.
 
@@ -160,16 +175,18 @@ def _convert_sklearn_model(input_sk_obj, input_features = None,
 
         dv_obj = obj_list[0].sk_obj
         output_dim = len(_dict_vectorizer.get_input_feature_names(dv_obj))
- 
+
         if not isinstance(input_features, _string_types):
-            raise TypeError("If the first transformer in a pipeline is a "
-                            "DictVectorizer, then the input feature must be the name "
-                            "of the input dictionary.")
+            raise TypeError(
+                "If the first transformer in a pipeline is a "
+                "DictVectorizer, then the input feature must be the name "
+                "of the input dictionary."
+            )
 
         input_features = [(input_features, datatypes.Dictionary(str))]
-       
+
         if len(obj_list) > 1:
-            output_feature_name = _PIPELINE_INTERNAL_FEATURE_NAME 
+            output_feature_name = _PIPELINE_INTERNAL_FEATURE_NAME
 
         else:
             if output_feature_names is None:
@@ -177,42 +194,45 @@ def _convert_sklearn_model(input_sk_obj, input_features = None,
 
             elif isinstance(output_feature_names, _string_types):
                 output_feature_name = output_feature_names
-            
+
             else:
                 raise TypeError(
                     "For a transformer pipeline, the "
                     "output_features needs to be None or a string "
-                    "for the predicted value.")
- 
+                    "for the predicted value."
+                )
+
         output_features = [(output_feature_name, datatypes.Array(output_dim))]
 
         spec = _dict_vectorizer.convert(dv_obj, input_features, output_features)._spec
-        pipeline_list.append(Output(spec, input_features, output_features) )
+        pipeline_list.append(Output(spec, input_features, output_features))
 
         # Set up the environment for the rest of the pipeline
         current_input_features = output_features
         current_num_dimensions = output_dim
-    
+
         # In the corner case that it's only the dict vectorizer here, just return
-        # and exit with that at this point. 
+        # and exit with that at this point.
         if len(obj_list) == 1:
             return spec
         else:
             del obj_list[0]
 
-    else: 
+    else:
 
         # First, we need to resolve the input feature types as the sklearn pipeline
         # expects just an array as input, but what we want to expose to the coreML
-        # user is an interface with named variables.  This resolution has to handle 
-        # a number of cases. 
+        # user is an interface with named variables.  This resolution has to handle
+        # a number of cases.
 
         # Can we get the number of features from the model?  If so, pass that
-        # information into the feature resolution function.  If we can't, then this 
+        # information into the feature resolution function.  If we can't, then this
         # function should return None.
         first_sk_obj = obj_list[0].sk_obj
-        num_dimensions = _get_converter_module(first_sk_obj).get_input_dimension(first_sk_obj)
-        # Resolve the input features.  
+        num_dimensions = _get_converter_module(first_sk_obj).get_input_dimension(
+            first_sk_obj
+        )
+        # Resolve the input features.
         features = _fm.process_or_validate_features(input_features, num_dimensions)
         current_num_dimensions = _fm.dimension_of_array_features(features)
 
@@ -224,41 +244,47 @@ def _convert_sklearn_model(input_sk_obj, input_features = None,
             current_input_features = features
         else:
             spec, _output_dimension = create_feature_vectorizer(
-                    features, _PIPELINE_INTERNAL_FEATURE_NAME)
+                features, _PIPELINE_INTERNAL_FEATURE_NAME
+            )
 
             assert _output_dimension == current_num_dimensions
-            ft_out_features = [(_PIPELINE_INTERNAL_FEATURE_NAME, 
-                                datatypes.Array(current_num_dimensions))]
-            pipeline_list.append( Output(spec, features, ft_out_features) )
+            ft_out_features = [
+                (
+                    _PIPELINE_INTERNAL_FEATURE_NAME,
+                    datatypes.Array(current_num_dimensions),
+                )
+            ]
+            pipeline_list.append(Output(spec, features, ft_out_features))
             current_input_features = ft_out_features
 
     # Now, validate the sequence of transformers to make sure we have something
     # that can work with all of this.
     for i, (_, _, m) in enumerate(obj_list[:-1]):
         if m.model_type != "transformer":
-            raise ValueError("Only a sequence of transformer classes followed by a "
-                    "single transformer, regressor, or classifier is currently supported. "
-                    "(object in position %d interpreted as %s)" % (i, m.model_type))
+            raise ValueError(
+                "Only a sequence of transformer classes followed by a "
+                "single transformer, regressor, or classifier is currently supported. "
+                "(object in position %d interpreted as %s)" % (i, m.model_type)
+            )
 
     overall_mode = obj_list[-1].module.model_type
-    assert overall_mode in ('transformer', 'regressor', 'classifier')
+    assert overall_mode in ("transformer", "regressor", "classifier")
 
     # Now, go through each transformer in the sequence of transformers and add
     # it to the pipeline.
-    for _, sk_obj, sk_m in obj_list[: -1]:
-
+    for _, sk_obj, sk_m in obj_list[:-1]:
         next_dimension = sk_m.update_dimension(sk_obj, current_num_dimensions)
 
-        output_features = [(_PIPELINE_INTERNAL_FEATURE_NAME, 
-                            datatypes.Array(next_dimension))]
+        output_features = [
+            (_PIPELINE_INTERNAL_FEATURE_NAME, datatypes.Array(next_dimension))
+        ]
         spec = sk_m.convert(sk_obj, current_input_features, output_features)._spec
 
-        pipeline_list.append( Output(spec, current_input_features, output_features))
+        pipeline_list.append(Output(spec, current_input_features, output_features))
 
         current_input_features = output_features
         current_num_dimensions = next_dimension
 
-
     # Now, handle the final transformer.  This is where we need to have different
     # behavior depending on whether it's a classifier, transformer, or regressor.
     _, last_sk_obj, last_sk_m = obj_list[-1]
@@ -271,7 +297,8 @@ def _convert_sklearn_model(input_sk_obj, input_features = None,
             class_labels = _internal_output_classes
 
         output_features = _fm.process_or_validate_classifier_output_features(
-                output_feature_names, class_labels, supports_output_scores)
+            output_feature_names, class_labels, supports_output_scores
+        )
 
     elif overall_mode == "regressor":
         if output_feature_names is None:
@@ -279,27 +306,38 @@ def _convert_sklearn_model(input_sk_obj, input_features = None,
         elif isinstance(output_feature_names, _string_types):
             output_features = [(output_feature_names, datatypes.Double())]
         else:
-            raise TypeError("For a regressor object or regressor pipeline, the "
-                            "output_features needs to be None or a string for the predicted value.")
+            raise TypeError(
+                "For a regressor object or regressor pipeline, the "
+                "output_features needs to be None or a string for the predicted value."
+            )
 
-    else:   # transformer
-        final_output_dimension = last_sk_m.update_dimension(last_sk_obj, current_num_dimensions)
+    else:  # transformer
+        final_output_dimension = last_sk_m.update_dimension(
+            last_sk_obj, current_num_dimensions
+        )
 
         if output_feature_names is None:
-            output_features = [("transformed_features",
-                                datatypes.Array(final_output_dimension))]
+            output_features = [
+                ("transformed_features", datatypes.Array(final_output_dimension))
+            ]
 
         elif isinstance(output_feature_names, _string_types):
-            output_features = [(output_feature_names, datatypes.Array(final_output_dimension))]
+            output_features = [
+                (output_feature_names, datatypes.Array(final_output_dimension))
+            ]
 
         else:
-            raise TypeError("For a transformer object or transformer pipeline, the "
-                            "output_features needs to be None or a string for the "
-                            "name of the transformed value.")
+            raise TypeError(
+                "For a transformer object or transformer pipeline, the "
+                "output_features needs to be None or a string for the "
+                "name of the transformed value."
+            )
 
-    last_spec = last_sk_m.convert(last_sk_obj, current_input_features, output_features)._spec
+    last_spec = last_sk_m.convert(
+        last_sk_obj, current_input_features, output_features
+    )._spec
 
-    pipeline_list.append( Output(last_spec, current_input_features, output_features) )
+    pipeline_list.append(Output(last_spec, current_input_features, output_features))
 
     # Now, create the pipeline and return the spec for it.
 
@@ -309,12 +347,13 @@ def _convert_sklearn_model(input_sk_obj, input_features = None,
 
     original_input_features = pipeline_list[0].input_features
 
-    if overall_mode == 'regressor':
+    if overall_mode == "regressor":
         pipeline = PipelineRegressor(original_input_features, output_features)
 
-    elif overall_mode == 'classifier':
-        pipeline = PipelineClassifier(original_input_features,
-                    class_labels, output_features)
+    elif overall_mode == "classifier":
+        pipeline = PipelineClassifier(
+            original_input_features, class_labels, output_features
+        )
 
     else:
         pipeline = Pipeline(original_input_features, output_features)
@@ -324,4 +363,3 @@ def _convert_sklearn_model(input_sk_obj, input_features = None,
         pipeline.add_model(spec)
 
     return pipeline.spec
-
diff --git a/coremltools/converters/sklearn/_decision_tree_classifier.py b/coremltools/converters/sklearn/_decision_tree_classifier.py
index 12e79c674..5876f38e1 100644
--- a/coremltools/converters/sklearn/_decision_tree_classifier.py
+++ b/coremltools/converters/sklearn/_decision_tree_classifier.py
@@ -5,16 +5,17 @@
 
 from ._tree_ensemble import convert_tree_ensemble
 
-from ..._deps import HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     import sklearn.tree as _tree
     from . import _sklearn_util
 
-model_type = 'classifier'
+model_type = "classifier"
 sklearn_class = _tree.DecisionTreeClassifier
 
+
 def convert(model, input_name, output_features):
     """Convert a decision tree model to protobuf format.
 
@@ -34,23 +35,34 @@ def convert(model, input_name, output_features):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
     _sklearn_util.check_expected_type(model, _tree.DecisionTreeClassifier)
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'tree_') and model.tree_ is not None)
+    _sklearn_util.check_fitted(
+        model, lambda m: hasattr(m, "tree_") and model.tree_ is not None
+    )
+
+    return _MLModel(
+        convert_tree_ensemble(
+            model,
+            input_name,
+            output_features,
+            mode="classifier",
+            class_labels=model.classes_,
+        )
+    )
 
-    return _MLModel(convert_tree_ensemble(model, input_name, output_features,
-            mode = 'classifier',
-            class_labels = model.classes_))
 
 def supports_output_scores(model):
     return True
 
+
 def get_output_classes(model):
     return list(model.classes_)
 
+
 def get_input_dimension(model):
     return model.n_features_
-
-
diff --git a/coremltools/converters/sklearn/_decision_tree_regressor.py b/coremltools/converters/sklearn/_decision_tree_regressor.py
index 08631d7e7..79dee1d1f 100644
--- a/coremltools/converters/sklearn/_decision_tree_regressor.py
+++ b/coremltools/converters/sklearn/_decision_tree_regressor.py
@@ -5,16 +5,17 @@
 
 from ._tree_ensemble import convert_tree_ensemble as _convert_tree_ensemble
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
 
 if _HAS_SKLEARN:
     import sklearn.tree as _tree
     from . import _sklearn_util
 
-model_type = 'regressor'
+model_type = "regressor"
 sklearn_class = _tree.DecisionTreeRegressor
 
+
 def convert(model, feature_names, target):
     """Convert a decision tree model to protobuf format.
 
@@ -34,14 +35,17 @@ def convert(model, feature_names, target):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
 
     _sklearn_util.check_expected_type(model, _tree.DecisionTreeRegressor)
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'tree_') and model.tree_ is not None)
+    _sklearn_util.check_fitted(
+        model, lambda m: hasattr(m, "tree_") and model.tree_ is not None
+    )
     return _MLModel(_convert_tree_ensemble(model, feature_names, target))
 
+
 def get_input_dimension(model):
     return model.n_features_
-
-
diff --git a/coremltools/converters/sklearn/_dict_vectorizer.py b/coremltools/converters/sklearn/_dict_vectorizer.py
index 062e0e73b..409ae9d70 100644
--- a/coremltools/converters/sklearn/_dict_vectorizer.py
+++ b/coremltools/converters/sklearn/_dict_vectorizer.py
@@ -15,15 +15,17 @@
 from ...models.feature_vectorizer import create_feature_vectorizer
 from ...models import MLModel as _MLModel
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
+
 if _HAS_SKLEARN:
     from sklearn.feature_extraction import DictVectorizer
+
     sklearn_class = DictVectorizer
 
 from ...models import datatypes
 from ...models.pipeline import Pipeline
 
-model_type = 'transformer'
+model_type = "transformer"
 
 
 def convert(model, input_features, output_features):
@@ -54,9 +56,9 @@ def convert(model, input_features, output_features):
     # Ensure that the output_features are also solid.
     output_features = process_or_validate_features(output_features, n_dimensions)
 
-    # The DictVectorizer in the framework outputs a sparse dictionary 
-    # of index to value due to other considerations, but we are expecting 
-    # the output of this to be a dense feature vector.  To make that happen, 
+    # The DictVectorizer in the framework outputs a sparse dictionary
+    # of index to value due to other considerations, but we are expecting
+    # the output of this to be a dense feature vector.  To make that happen,
     # put a feature_vectorizer immediately after the dict vectorizer.
     pline = Pipeline(input_features, output_features)
 
@@ -71,41 +73,47 @@ def convert(model, input_features, output_features):
         if isinstance(feature_name, _six.string_types):
             if is_str == False:
                 raise ValueError("Mapping of DictVectorizer mixes int and str types.")
-            
+
             tr_spec.stringToIndex.vector.append(feature_name)
             is_str == True
 
         if isinstance(feature_name, _six.integer_types):
             if is_str == True:
                 raise ValueError("Mapping of DictVectorizer mixes int and str types.")
-            
+
             tr_spec.int64ToIndex.vector.append(feature_name)
             is_str == False
 
-    intermediate_features = [(_INTERMEDIATE_FEATURE_NAME, 
-        datatypes.Dictionary(key_type = int))]
+    intermediate_features = [
+        (_INTERMEDIATE_FEATURE_NAME, datatypes.Dictionary(key_type=int))
+    ]
 
-    # Set the interface for the dict vectorizer with the input and the 
+    # Set the interface for the dict vectorizer with the input and the
     # intermediate output
-    set_transform_interface_params(
-            dv_spec, input_features, intermediate_features)
+    set_transform_interface_params(dv_spec, input_features, intermediate_features)
 
     pline.add_model(dv_spec)
 
-    # Follow the dict vectorizer by a feature_vectorizer to change the sparse 
+    # Follow the dict vectorizer by a feature_vectorizer to change the sparse
     # output layer into a dense vector as expected.
-    fvec, _num_out_dim = create_feature_vectorizer(intermediate_features,
-            output_features[0][0], {"__sparse_vector_features__" : n_dimensions})
+    fvec, _num_out_dim = create_feature_vectorizer(
+        intermediate_features,
+        output_features[0][0],
+        {"__sparse_vector_features__": n_dimensions},
+    )
 
-    pline.add_model(fvec) 
+    pline.add_model(fvec)
 
     return _MLModel(pline.spec)
 
-def update_dimension(m, current_num_dimensions): 
+
+def update_dimension(m, current_num_dimensions):
     return len(m.feature_names_)
 
+
 def get_input_dimension(m):
     return None
 
-def get_input_feature_names(m): 
+
+def get_input_feature_names(m):
     return m.feature_names_
diff --git a/coremltools/converters/sklearn/_gradient_boosting_classifier.py b/coremltools/converters/sklearn/_gradient_boosting_classifier.py
index 6795b0af6..2ae0c3b58 100644
--- a/coremltools/converters/sklearn/_gradient_boosting_classifier.py
+++ b/coremltools/converters/sklearn/_gradient_boosting_classifier.py
@@ -6,15 +6,17 @@
 from ._tree_ensemble import convert_tree_ensemble as _convert_tree_ensemble
 from ._tree_ensemble import get_input_dimension
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
 
 if _HAS_SKLEARN:
     import sklearn.ensemble as _ensemble
     from . import _sklearn_util
+
     sklearn_class = _ensemble.GradientBoostingClassifier
 
-model_type = 'classifier'
+model_type = "classifier"
+
 
 def convert(model, feature_names, target):
     """Convert a boosted tree model to protobuf format.
@@ -35,36 +37,48 @@ def convert(model, feature_names, target):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
     _sklearn_util.check_expected_type(model, _ensemble.GradientBoostingClassifier)
+
     def is_gbr_model(m):
         if len(m.estimators_) == 0:
             return False
-        if hasattr(m, 'estimators_') and m.estimators_ is not None:
+        if hasattr(m, "estimators_") and m.estimators_ is not None:
             for t in m.estimators_.flatten():
-                if not hasattr(t, 'tree_') or t.tree_ is None:
+                if not hasattr(t, "tree_") or t.tree_ is None:
                     return False
             return True
         else:
             return False
+
     _sklearn_util.check_fitted(model, is_gbr_model)
     post_evaluation_transform = None
     if model.n_classes_ == 2:
         base_prediction = [model.init_.prior]
-        post_evaluation_transform = 'Regression_Logistic'
+        post_evaluation_transform = "Regression_Logistic"
     else:
         base_prediction = list(model.init_.priors)
-        post_evaluation_transform = 'Classification_SoftMax'
-    return _MLModel(_convert_tree_ensemble(model, feature_names, target, mode = 'classifier',
-            base_prediction = base_prediction, class_labels = model.classes_,
-            post_evaluation_transform = post_evaluation_transform))
+        post_evaluation_transform = "Classification_SoftMax"
+    return _MLModel(
+        _convert_tree_ensemble(
+            model,
+            feature_names,
+            target,
+            mode="classifier",
+            base_prediction=base_prediction,
+            class_labels=model.classes_,
+            post_evaluation_transform=post_evaluation_transform,
+        )
+    )
+
 
 def supports_output_scores(model):
     return True
 
+
 def get_output_classes(model):
     return list(model.classes_)
-
-
diff --git a/coremltools/converters/sklearn/_gradient_boosting_regressor.py b/coremltools/converters/sklearn/_gradient_boosting_regressor.py
index 7b927005a..2a13614d3 100644
--- a/coremltools/converters/sklearn/_gradient_boosting_regressor.py
+++ b/coremltools/converters/sklearn/_gradient_boosting_regressor.py
@@ -6,15 +6,17 @@
 from ._tree_ensemble import convert_tree_ensemble as _convert_tree_ensemble
 from ._tree_ensemble import get_input_dimension
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
 
 if _HAS_SKLEARN:
     import sklearn.ensemble as _ensemble
     from . import _sklearn_util
+
     sklearn_class = _ensemble.GradientBoostingRegressor
-    
-model_type = 'regressor'
+
+model_type = "regressor"
+
 
 def convert(model, input_features, output_features):
     """Convert a boosted tree model to protobuf format.
@@ -35,16 +37,19 @@ def convert(model, input_features, output_features):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
 
     _sklearn_util.check_expected_type(model, _ensemble.GradientBoostingRegressor)
+
     def is_gbr_model(m):
         if len(m.estimators_) == 0:
             return False
-        if hasattr(m, 'estimators_') and m.estimators_ is not None:
+        if hasattr(m, "estimators_") and m.estimators_ is not None:
             for t in m.estimators_.flatten():
-                if not hasattr(t, 'tree_') or t.tree_ is None:
+                if not hasattr(t, "tree_") or t.tree_ is None:
                     return False
             return True
         else:
@@ -54,6 +59,8 @@ def is_gbr_model(m):
 
     base_prediction = model.init_.mean
 
-    return _MLModel(_convert_tree_ensemble(model, input_features, output_features,
-            base_prediction = base_prediction))
-
+    return _MLModel(
+        _convert_tree_ensemble(
+            model, input_features, output_features, base_prediction=base_prediction
+        )
+    )
diff --git a/coremltools/converters/sklearn/_imputer.py b/coremltools/converters/sklearn/_imputer.py
index 48373904f..5507e49d5 100644
--- a/coremltools/converters/sklearn/_imputer.py
+++ b/coremltools/converters/sklearn/_imputer.py
@@ -10,20 +10,24 @@
 from ...models import datatypes
 from ...models import MLModel as _MLModel
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 
 if _HAS_SKLEARN:
     import sklearn
+
     try:
         # scikit-learn >= 0.21
         from sklearn.impute import SimpleImputer as Imputer
+
         sklearn_class = sklearn.impute.SimpleImputer
     except ImportError:
         # scikit-learn < 0.21
         from sklearn.preprocessing import Imputer
+
         sklearn_class = sklearn.preprocessing.Imputer
-        
-    model_type = 'transformer'
+
+    model_type = "transformer"
+
 
 def convert(model, input_features, output_features):
     """Convert a DictVectorizer model to the protobuf spec.
@@ -44,9 +48,11 @@ def convert(model, input_features, output_features):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
     # Set the interface params.
     spec = _Model_pb2.Model()
     spec.specificationVersion = SPECIFICATION_VERSION
@@ -59,15 +65,14 @@ def convert(model, input_features, output_features):
 
     # Test the scikit-learn model
     _sklearn_util.check_expected_type(model, Imputer)
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'statistics_'))
-    
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "statistics_"))
+
     if model.axis != 0:
         raise ValueError("Imputation is only supported along axis = 0.")
 
-
     # The imputer in our framework only works on single columns, so
-    # we need to translate that over.  The easiest way to do that is to 
-    # put it in a nested pipeline with a feature extractor and a 
+    # we need to translate that over.  The easiest way to do that is to
+    # put it in a nested pipeline with a feature extractor and a
 
     tr_spec = spec.imputer
 
@@ -77,8 +82,9 @@ def convert(model, input_features, output_features):
     try:
         tr_spec.replaceDoubleValue = float(model.missing_values)
     except ValueError:
-        raise ValueError("Only scalar values or NAN as missing_values "
-                "in _imputer are supported.")
+        raise ValueError(
+            "Only scalar values or NAN as missing_values " "in _imputer are supported."
+        )
 
     return _MLModel(spec)
 
@@ -92,11 +98,12 @@ def update_dimension(model, input_dimension):
     # This doesn't expand anything.
     return input_dimension
 
-def get_input_dimension(model):
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'statistics_'))
-    return len(model.statistics_)
 
+def get_input_dimension(model):
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
 
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "statistics_"))
+    return len(model.statistics_)
diff --git a/coremltools/converters/sklearn/_k_neighbors_classifier.py b/coremltools/converters/sklearn/_k_neighbors_classifier.py
index b75284d7e..bb2f28df4 100644
--- a/coremltools/converters/sklearn/_k_neighbors_classifier.py
+++ b/coremltools/converters/sklearn/_k_neighbors_classifier.py
@@ -3,13 +3,14 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from ..._deps import HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
 import coremltools
+
 # from coremltools.proto import Model_pb2
 from coremltools.proto import FeatureTypes_pb2
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     import sklearn.neighbors as _neighbors
     from . import _sklearn_util
 
@@ -17,9 +18,10 @@
 import scipy as sp
 import six as _six
 
-model_type = 'classifier'
+model_type = "classifier"
 sklearn_class = _neighbors.KNeighborsClassifier
 
+
 def convert(model, input_name, output_name):
     """Convert a scikit KNeighborsClassifier to protobuf format.
 
@@ -39,8 +41,10 @@ def convert(model, input_name, output_name):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not (HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
 
     _sklearn_util.check_expected_type(model, sklearn_class)
 
@@ -51,15 +55,18 @@ def convert(model, input_name, output_name):
 
     return _MLModel(_convert_k_neighbors_classifier(model, input_name, output_name))
 
+
 def supports_output_scores(model):
     """KNeighborsClassifier models do not support output scores."""
     return False
 
+
 def get_output_classes(model):
     """Get the candidate classes for the model."""
     _check_fitted(model)
     return list(model.classes_)
 
+
 def _convert_k_neighbors_classifier(model, input_name, output_name):
     """Convert the scikit KNeighborsClassifier to CoreML. Assumes initial validation of the scikit model has been done."""
 
@@ -68,27 +75,43 @@ def _convert_k_neighbors_classifier(model, input_name, output_name):
 
     spec.kNearestNeighborsClassifier.numberOfNeighbors.defaultValue = model.n_neighbors
     spec.kNearestNeighborsClassifier.numberOfNeighbors.range.minValue = 1
-    spec.kNearestNeighborsClassifier.numberOfNeighbors.range.maxValue = _number_of_samples(model, spec) # is there a better heuristic to use here?
+    spec.kNearestNeighborsClassifier.numberOfNeighbors.range.maxValue = _number_of_samples(
+        model, spec
+    )  # is there a better heuristic to use here?
 
     number_of_dimensions = 0
     if _is_algorithm_brute(model):
         number_of_dimensions = model._fit_X.shape[1]
-        spec.kNearestNeighborsClassifier.nearestNeighborsIndex.linearIndex.MergeFromString(b'')
+        spec.kNearestNeighborsClassifier.nearestNeighborsIndex.linearIndex.MergeFromString(
+            b""
+        )
     elif _is_algorithm_kd_tree(model):
         npdata = np.asarray(model._tree.data)
         number_of_dimensions = get_input_dimension(model)
-        spec.kNearestNeighborsClassifier.nearestNeighborsIndex.singleKdTreeIndex.leafSize = model.leaf_size
+        spec.kNearestNeighborsClassifier.nearestNeighborsIndex.singleKdTreeIndex.leafSize = (
+            model.leaf_size
+        )
     else:
-        raise TypeError('KNeighbors algorithm not supported for CoreML conversion: {}'.format(model.algorithm))
-    spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions = number_of_dimensions
+        raise TypeError(
+            "KNeighbors algorithm not supported for CoreML conversion: {}".format(
+                model.algorithm
+            )
+        )
+    spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions = (
+        number_of_dimensions
+    )
 
     # Make sure the distance function is set
-    spec.kNearestNeighborsClassifier.nearestNeighborsIndex.squaredEuclideanDistance.MergeFromString(b'')
+    spec.kNearestNeighborsClassifier.nearestNeighborsIndex.squaredEuclideanDistance.MergeFromString(
+        b""
+    )
 
     input_features = spec.description.input.add()
     input_features.name = input_name[0][0]
     input_features.type.multiArrayType.shape.extend([number_of_dimensions])
-    input_features.type.multiArrayType.dataType = FeatureTypes_pb2.ArrayFeatureType.FLOAT32
+    input_features.type.multiArrayType.dataType = (
+        FeatureTypes_pb2.ArrayFeatureType.FLOAT32
+    )
 
     output_label = spec.description.output.add()
     output_label.name = output_name[0][0]
@@ -97,13 +120,14 @@ def _convert_k_neighbors_classifier(model, input_name, output_name):
     spec.description.predictedFeatureName = output_label.name
 
     # Need to confirm if scikit only accepts integer labels
-    output_label.type.int64Type.MergeFromString(b'')
-    spec.kNearestNeighborsClassifier.uniformWeighting.MergeFromString(b'')
+    output_label.type.int64Type.MergeFromString(b"")
+    spec.kNearestNeighborsClassifier.uniformWeighting.MergeFromString(b"")
 
     _extract_training_data(model, spec)
 
     return spec
 
+
 def _number_of_samples(model, spec):
     """Get the number of samples the model is fitted to."""
 
@@ -113,6 +137,7 @@ def _number_of_samples(model, spec):
         return len(np.asarray(model._tree.data))
     return 0
 
+
 def _extract_training_data(model, spec):
     """Extract the training data from the scikit model and add it to the CoreML spec"""
 
@@ -122,7 +147,9 @@ def _extract_training_data(model, spec):
             X = _unpack_sparse(X)
 
         for sample in X:
-            coreml_sample = spec.kNearestNeighborsClassifier.nearestNeighborsIndex.floatSamples.add()
+            coreml_sample = (
+                spec.kNearestNeighborsClassifier.nearestNeighborsIndex.floatSamples.add()
+            )
             for feature in sample:
                 coreml_sample.vector.append(feature)
 
@@ -130,13 +157,16 @@ def _extract_training_data(model, spec):
         # sklearn guarantees that tree data is not stored in a sparse format
         npdata = np.asarray(model._tree.data)
         for sample in npdata:
-            coreml_sample = spec.kNearestNeighborsClassifier.nearestNeighborsIndex.floatSamples.add()
+            coreml_sample = (
+                spec.kNearestNeighborsClassifier.nearestNeighborsIndex.floatSamples.add()
+            )
             for feature in sample:
                 coreml_sample.vector.append(feature)
 
     for label in model._y:
         spec.kNearestNeighborsClassifier.int64ClassLabels.vector.append(label)
 
+
 def get_input_dimension(model):
     """Get the input dimension for the model"""
     _check_fitted(model)
@@ -147,81 +177,112 @@ def get_input_dimension(model):
         npdata = np.asarray(model._tree.data)
         number_of_dimensions = len(npdata[0])
     else:
-        raise TypeError('KNeighbors algorithm not supported for CoreML conversion: {}'.format(model.algorithm))
+        raise TypeError(
+            "KNeighbors algorithm not supported for CoreML conversion: {}".format(
+                model.algorithm
+            )
+        )
     return number_of_dimensions
 
+
 def _check_fitted(model):
     """Simple wrapper to check if the KNeighborsClassifier has been fitted."""
-    return _sklearn_util.check_fitted(model, lambda m: hasattr(m, '_fit_method') or hasattr(m, '_fit_X'))
+    return _sklearn_util.check_fitted(
+        model, lambda m: hasattr(m, "_fit_method") or hasattr(m, "_fit_X")
+    )
+
 
 def _check_algorithm(model):
     """Ensure the kNeighbors algorithm for the given scikit model is a supported type"""
     is_valid = False
-    print_name = ''
-    if model.algorithm == 'brute' or model.algorithm == 'kd_tree':
+    print_name = ""
+    if model.algorithm == "brute" or model.algorithm == "kd_tree":
         is_valid = True
         print_name = model.algorithm
-    elif model.algorithm == 'auto' and model._fit_method == 'kd_tree':
+    elif model.algorithm == "auto" and model._fit_method == "kd_tree":
         is_valid = True
-        print_name = 'kd_tree'
-    elif model.algorithm == 'auto' and model._fit_method == 'brute':
+        print_name = "kd_tree"
+    elif model.algorithm == "auto" and model._fit_method == "brute":
         is_valid = True
-        print_name = 'brute'
+        print_name = "brute"
     if not is_valid:
-        raise TypeError('KNeighbors algorithm not supported for CoreML conversion: {}'.format(print_name))
+        raise TypeError(
+            "KNeighbors algorithm not supported for CoreML conversion: {}".format(
+                print_name
+            )
+        )
+
 
 def _check_weighting_scheme(model):
     """Simple wrapper to ensure the weighting scheme is valid for CoreML conversion"""
     is_valid = False
-    if model.weights == 'uniform':
+    if model.weights == "uniform":
         is_valid = True
 
     # Other cases CoreML doesn't support include weighting by distance or a user-provided 'callable' object.
 
     if not is_valid:
-        print_name = ''
+        print_name = ""
         if _is_printable(model.weights):
             print_name = model.weights
         else:
-            print_name = getattr(model.weights, '__name__', repr(model.weights))
-        raise TypeError('KNeighbors weight function not supported for CoreML conversion: {}'.format(print_name))
+            print_name = getattr(model.weights, "__name__", repr(model.weights))
+        raise TypeError(
+            "KNeighbors weight function not supported for CoreML conversion: {}".format(
+                print_name
+            )
+        )
+
 
 def _check_distance_metric(model):
     """Simple wrapper to ensure the distance metric is valid for CoreML conversion"""
     is_valid = False
-    if model.metric == 'euclidean':
+    if model.metric == "euclidean":
         is_valid = True
-    elif model.metric == 'minkowski' and model.p == 2:
+    elif model.metric == "minkowski" and model.p == 2:
         is_valid = True
 
     # There are a number of other distance metrics supported by scikit that CoreML doesn't currently support.
 
     if not is_valid:
-        print_name = ''
+        print_name = ""
         if _is_printable(model.metric):
             print_name = model.metric
         else:
-            print_name = getattr(model.metric, '__name__', repr(model.metric))
-        raise TypeError('KNeighbors distance metric not supported for CoreML conversion: {}'.format(print_name))
+            print_name = getattr(model.metric, "__name__", repr(model.metric))
+        raise TypeError(
+            "KNeighbors distance metric not supported for CoreML conversion: {}".format(
+                print_name
+            )
+        )
+
 
 def _is_algorithm_brute(model):
     """Checks if the algorithm for the scikit model is set to 'brute'."""
-    return model.algorithm == 'brute' or (model.algorithm == 'auto' and model._fit_method == 'brute')
+    return model.algorithm == "brute" or (
+        model.algorithm == "auto" and model._fit_method == "brute"
+    )
+
 
 def _is_algorithm_kd_tree(model):
     """Checks if the algorithm for the scikit model is set to 'kd_tree'."""
-    return model.algorithm == 'kd_tree' or (model.algorithm == 'auto' and model._fit_method == 'kd_tree')
+    return model.algorithm == "kd_tree" or (
+        model.algorithm == "auto" and model._fit_method == "kd_tree"
+    )
+
 
 def _is_printable(obj):
     """Check if the object is a valid text type."""
     return isinstance(obj, _six.string_types)
 
+
 def _is_valid_sparse_format(obj):
     """Check if the object is in CSR sparse format (the only valid type for KNeighborsClassifier)"""
     return isinstance(obj, sp.sparse.csr_matrix)
 
+
 def _unpack_sparse(obj):
     """Unpack the sparse matrix into a format that we can easily iterate over for insertion into a CoreML model."""
     if not sp.sparse.issparse(obj):
-        raise TypeError('Object {} is not a scipy sparse matrix type'.format(type(obj)))
+        raise TypeError("Object {} is not a scipy sparse matrix type".format(type(obj)))
     return obj.toarray()
diff --git a/coremltools/converters/sklearn/_linear_regression.py b/coremltools/converters/sklearn/_linear_regression.py
index 7631d6045..26ba2b387 100644
--- a/coremltools/converters/sklearn/_linear_regression.py
+++ b/coremltools/converters/sklearn/_linear_regression.py
@@ -10,18 +10,19 @@
 
 import numpy as _np
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
 
 if _HAS_SKLEARN:
     from . import _sklearn_util
     import sklearn
     from sklearn.linear_model import LinearRegression
-    model_type = 'regressor'
+
+    model_type = "regressor"
     sklearn_class = sklearn.linear_model.LinearRegression
 
-def convert(model, features, target):
 
+def convert(model, features, target):
     """Convert a linear regression model to the protobuf spec.
     Parameters
     ----------
@@ -39,12 +40,14 @@ def convert(model, features, target):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
     # Check the scikit learn model
     _sklearn_util.check_expected_type(model, LinearRegression)
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_'))
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "coef_"))
 
     return _MLModel(_convert(model, features, target))
 
@@ -58,8 +61,8 @@ def _convert(model, features, target):
     # Add parameters for the linear regression.
     lr = spec.glmRegressor
 
-    if(isinstance(model.intercept_, _np.ndarray)):
-        assert(len(model.intercept_) == 1)
+    if isinstance(model.intercept_, _np.ndarray):
+        assert len(model.intercept_) == 1
         lr.offset.append(model.intercept_[0])
     else:
         lr.offset.append(model.intercept_)
@@ -69,8 +72,11 @@ def _convert(model, features, target):
         weights.value.append(i)
     return spec
 
+
 def get_input_dimension(model):
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_'))
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "coef_"))
     return model.coef_.size
diff --git a/coremltools/converters/sklearn/_logistic_regression.py b/coremltools/converters/sklearn/_logistic_regression.py
index 8e2e827ab..2a823125c 100644
--- a/coremltools/converters/sklearn/_logistic_regression.py
+++ b/coremltools/converters/sklearn/_logistic_regression.py
@@ -5,18 +5,21 @@
 
 from collections import Iterable
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
+
 if _HAS_SKLEARN:
     from sklearn.linear_model import LogisticRegression
     from . import _sklearn_util
+
     sklearn_class = LogisticRegression
 
 from ... import SPECIFICATION_VERSION
 from ...models._interface_management import set_classifier_interface_params
 from ...proto import Model_pb2 as _Model_pb2
 
-model_type = 'classifier'
+model_type = "classifier"
+
 
 def convert(model, feature_names, target):
     """Convert a Logistic Regression model to the protobuf spec.
@@ -36,11 +39,13 @@ def convert(model, feature_names, target):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
     _sklearn_util.check_expected_type(model, LogisticRegression)
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_'))
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "coef_"))
 
     return _MLModel(_convert(model, feature_names, target))
 
@@ -49,14 +54,18 @@ def _convert(model, feature_names, target):
     spec = _Model_pb2.Model()
     spec.specificationVersion = SPECIFICATION_VERSION
 
-    set_classifier_interface_params(spec, feature_names, model.classes_, 'glmClassifier', output_features=target)
+    set_classifier_interface_params(
+        spec, feature_names, model.classes_, "glmClassifier", output_features=target
+    )
 
     glmClassifier = spec.glmClassifier
-    
+
     if model.multi_class == "ovr":
         glmClassifier.classEncoding = glmClassifier.OneVsRest
     else:
-        print('[ERROR] Currently "One Vs Rest" is the only supported multiclass option.')
+        print(
+            '[ERROR] Currently "One Vs Rest" is the only supported multiclass option.'
+        )
         return None
 
     glmClassifier.postEvaluationTransform = glmClassifier.Logit
@@ -69,23 +78,30 @@ def _convert(model, feature_names, target):
             glmClassifier.offset.append(model.intercept_)
 
     for cur_in_row in model.coef_:
-        cur_out_row = glmClassifier.weights.add()        
+        cur_out_row = glmClassifier.weights.add()
         for val in cur_in_row:
             cur_out_row.value.append(val)
 
     return spec
 
+
 def supports_output_scores(model):
     return True
 
+
 def get_output_classes(model):
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_'))
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "coef_"))
     return list(model.classes_)
 
+
 def get_input_dimension(model):
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'coef_'))
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "coef_"))
     return len(model.coef_[0])
diff --git a/coremltools/converters/sklearn/_normalizer.py b/coremltools/converters/sklearn/_normalizer.py
index c7da84d7d..bb67b5365 100644
--- a/coremltools/converters/sklearn/_normalizer.py
+++ b/coremltools/converters/sklearn/_normalizer.py
@@ -5,20 +5,22 @@
 
 
 from ... import SPECIFICATION_VERSION
-from ...models._interface_management import set_transform_interface_params as \
-                                       _set_transform_interface_params
+from ...models._interface_management import (
+    set_transform_interface_params as _set_transform_interface_params,
+)
 from ...proto import Model_pb2 as _Model_pb2
 from ...proto.Normalizer_pb2 import Normalizer as _proto__normalizer
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
 
 if _HAS_SKLEARN:
     from . import _sklearn_util
     from sklearn.preprocessing import Normalizer
+
     sklearn_class = Normalizer
 
-model_type = 'transformer'
+model_type = "transformer"
 
 
 def convert(model, input_features, output_features):
@@ -40,13 +42,15 @@ def convert(model, input_features, output_features):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
+
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
     # Test the scikit-learn model
     _sklearn_util.check_expected_type(model, Normalizer)
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'norm'))
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "norm"))
 
     # Set the interface params.
     spec = _Model_pb2.Model()
@@ -55,14 +59,15 @@ def convert(model, input_features, output_features):
 
     # Set the one hot encoder parameters
     _normalizer_spec = spec.normalizer
-    if model.norm == 'l1':
+    if model.norm == "l1":
         _normalizer_spec.normType = _proto__normalizer.L1
-    elif model.norm == 'l2':
+    elif model.norm == "l2":
         _normalizer_spec.normType = _proto__normalizer.L2
-    elif model.norm == 'max':
+    elif model.norm == "max":
         _normalizer_spec.normType = _proto__normalizer.LMax
     return _MLModel(spec)
 
+
 def update_dimension(model, input_dimension):
     """
     Given a model that takes an array of dimension input_dimension, returns
@@ -72,7 +77,7 @@ def update_dimension(model, input_dimension):
     # No change
     return input_dimension
 
+
 def get_input_dimension(model):
     # Cannot determine this now.
     return None
-
diff --git a/coremltools/converters/sklearn/_one_hot_encoder.py b/coremltools/converters/sklearn/_one_hot_encoder.py
index e034ce2a8..7838c518a 100644
--- a/coremltools/converters/sklearn/_one_hot_encoder.py
+++ b/coremltools/converters/sklearn/_one_hot_encoder.py
@@ -16,19 +16,19 @@
 from ...models.feature_vectorizer import create_feature_vectorizer
 from ...models.array_feature_extractor import create_array_feature_extractor
 
-
 from ...models.pipeline import Pipeline
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN as _HAS_SKLEARN
 
 if _HAS_SKLEARN:
     import sklearn
     from sklearn.preprocessing import OneHotEncoder
-    sklearn_class = OneHotEncoder
 
+    sklearn_class = OneHotEncoder
 
 # model type determines the behavior of this module.
-model_type    = "transformer"
+model_type = "transformer"
+
 
 def convert(model, input_features, output_features):
     """Convert a one-hot-encoder model to the protobuf spec.
@@ -49,40 +49,44 @@ def convert(model, input_features, output_features):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
     # Make sure the model is fitted.
     _sklearn_util.check_expected_type(model, OneHotEncoder)
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'active_features_'))
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'n_values_'))
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "active_features_"))
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "n_values_"))
 
-    input_dimension = get_input_dimension(model) 
+    input_dimension = get_input_dimension(model)
 
     if input_dimension is not None:
-        # Make sure that our starting dimensions are correctly managed. 
+        # Make sure that our starting dimensions are correctly managed.
         assert len(input_features) == 1
         assert input_features[0][1] == datatypes.Array(input_dimension)
 
     input_dimension = input_features[0][1].num_elements
 
-    expected_output_dimension = update_dimension(model, input_dimension) 
+    expected_output_dimension = update_dimension(model, input_dimension)
     assert output_features[0][1] == datatypes.Array(expected_output_dimension)
 
-    # Create a pipeline that can do all of the subsequent feature extraction. 
+    # Create a pipeline that can do all of the subsequent feature extraction.
     feature_vectorizer_input_features = []
     feature_vectorizer_size_map = {}
 
-    if model.categorical_features == 'all':
+    if model.categorical_features == "all":
         _categorical_features = set(range(input_dimension))
-        _cat_feature_idx_mapping = dict( (i, i) for i in range(input_dimension))
+        _cat_feature_idx_mapping = dict((i, i) for i in range(input_dimension))
     else:
         _categorical_features = set(model.categorical_features)
-        _cat_feature_idx_mapping = dict( (_idx, i) for i, _idx in enumerate(sorted(model.categorical_features)))
+        _cat_feature_idx_mapping = dict(
+            (_idx, i) for i, _idx in enumerate(sorted(model.categorical_features))
+        )
 
     pline = Pipeline(input_features, output_features)
 
-    # Track the overall packing index, which determines the output ordering. 
+    # Track the overall packing index, which determines the output ordering.
     pack_idx = 0
 
     # First, go through all the columns that are encoded. The sklearn OHE puts
@@ -94,30 +98,37 @@ def convert(model, input_features, output_features):
 
             # This input column is one hot encoded
             feature_extractor_spec = create_array_feature_extractor(
-                input_features, f_name, idx, output_type = 'Int64')
+                input_features, f_name, idx, output_type="Int64"
+            )
 
             pline.add_model(feature_extractor_spec)
 
             _cat_feature_idx = _cat_feature_idx_mapping[idx]
 
             ohe_input_features = [(f_name, datatypes.Int64())]
-            ohe_output_features = [(f_name, datatypes.Dictionary('Int64'))]
+            ohe_output_features = [(f_name, datatypes.Dictionary("Int64"))]
 
             # Create a one hot encoder per column
             o_spec = _Model_pb2.Model()
             o_spec.specificationVersion = SPECIFICATION_VERSION
-            o_spec = set_transform_interface_params(o_spec, ohe_input_features, ohe_output_features)
+            o_spec = set_transform_interface_params(
+                o_spec, ohe_input_features, ohe_output_features
+            )
 
             ohe_spec = o_spec.oneHotEncoder
             ohe_spec.outputSparse = True
 
-            if model.handle_unknown == 'error':
-                ohe_spec.handleUnknown = _OHE_pb2.OneHotEncoder.HandleUnknown.Value('ErrorOnUnknown')
+            if model.handle_unknown == "error":
+                ohe_spec.handleUnknown = _OHE_pb2.OneHotEncoder.HandleUnknown.Value(
+                    "ErrorOnUnknown"
+                )
             else:
-                ohe_spec.handleUnknown = _OHE_pb2.OneHotEncoder.HandleUnknown.Value('IgnoreUnknown')
-            
-            # Need to do a quick search to find the part of the active_features_ mask 
-            # that represents the categorical variables in our part.  Could do this 
+                ohe_spec.handleUnknown = _OHE_pb2.OneHotEncoder.HandleUnknown.Value(
+                    "IgnoreUnknown"
+                )
+
+            # Need to do a quick search to find the part of the active_features_ mask
+            # that represents the categorical variables in our part.  Could do this
             # with binary search, but we probably don't need speed so much here.
             def bs_find(a, i):
                 lb, k = 0, len(a)
@@ -126,17 +137,17 @@ def bs_find(a, i):
                     if a[_idx] < i:
                         lb = _idx + 1
                         k -= 1
-                    k = (k // 2)
+                    k = k // 2
 
                 return lb
 
             # Here are the indices we are looking fo
             f_idx_bottom = model.feature_indices_[_cat_feature_idx]
             f_idx_top = model.feature_indices_[_cat_feature_idx + 1]
-            
-            # Now find where in the active features list we should look. 
+
+            # Now find where in the active features list we should look.
             cat_feat_idx_bottom = bs_find(model.active_features_, f_idx_bottom)
-            cat_feat_idx_top = bs_find(model.active_features_, f_idx_top) 
+            cat_feat_idx_top = bs_find(model.active_features_, f_idx_top)
             n_cat_values = cat_feat_idx_top - cat_feat_idx_bottom
 
             for i in range(cat_feat_idx_bottom, cat_feat_idx_top):
@@ -148,38 +159,43 @@ def bs_find(a, i):
             pline.add_model(o_spec)
 
             # Add the result to the feature_vectorizer at the end.
-            feature_vectorizer_input_features.append( (f_name, datatypes.Dictionary('Int64')) )
+            feature_vectorizer_input_features.append(
+                (f_name, datatypes.Dictionary("Int64"))
+            )
             feature_vectorizer_size_map[f_name] = n_cat_values
 
             pack_idx += 1
 
-    # Now go through all the columns that are not encoded as the sklearn OHE puts 
+    # Now go through all the columns that are not encoded as the sklearn OHE puts
     # these after the encoded ones.  For speed, we can put these all in a single
-    # ArrayFeatureExtractor 
+    # ArrayFeatureExtractor
     #
-    pass_through_features = [idx for idx in range(input_dimension) 
-                             if idx not in _categorical_features]
+    pass_through_features = [
+        idx for idx in range(input_dimension) if idx not in _categorical_features
+    ]
 
     if pass_through_features:
-
         f_name = "__OHE_pass_through__"
 
-
         # This input column is not one hot encoded
         feature_extractor_spec = create_array_feature_extractor(
-                input_features, f_name, pass_through_features)
+            input_features, f_name, pass_through_features
+        )
 
         pline.add_model(feature_extractor_spec)
-        feature_vectorizer_input_features.append( 
-                (f_name, datatypes.Array(len(pass_through_features))) )
-
+        feature_vectorizer_input_features.append(
+            (f_name, datatypes.Array(len(pass_through_features)))
+        )
 
     # Finally, add the feature vectorizer to the pipeline.
-    output_feature_name = output_features[0][0] 
-    output_feature_dimension = output_features[0][1].num_elements 
+    output_feature_name = output_features[0][0]
+    output_feature_dimension = output_features[0][1].num_elements
 
-    fvec, _num_out_dim = create_feature_vectorizer(feature_vectorizer_input_features, 
-            output_features[0][0], feature_vectorizer_size_map)
+    fvec, _num_out_dim = create_feature_vectorizer(
+        feature_vectorizer_input_features,
+        output_features[0][0],
+        feature_vectorizer_size_map,
+    )
 
     # Make sure that the feature vectorizer input actually matches up with the
     assert _num_out_dim == output_features[0][1].num_elements
@@ -194,32 +210,36 @@ def update_dimension(model, input_dimension):
     Given a model that takes an array of dimension input_dimension, returns
     the output dimension.
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'active_features_'))
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'n_values_'))
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "active_features_"))
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "n_values_"))
 
-    if model.categorical_features == 'all':
+    if model.categorical_features == "all":
         return len(model.active_features_)
     else:
-        out_dimension = (len(model.active_features_) 
-                         + (input_dimension - len(model.n_values_)))
+        out_dimension = len(model.active_features_) + (
+            input_dimension - len(model.n_values_)
+        )
 
     return out_dimension
 
 
 def get_input_dimension(model):
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'active_features_'))
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'n_values_'))
-    
-    if model.categorical_features == 'all':
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "active_features_"))
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "n_values_"))
+
+    if model.categorical_features == "all":
         return len(model.feature_indices_) - 1
     else:
-        # This can't actually be determined from the model as indices after the 
+        # This can't actually be determined from the model as indices after the
         # rest of the categorical values don't seem to be tracked
         return None
-
diff --git a/coremltools/converters/sklearn/_random_forest_classifier.py b/coremltools/converters/sklearn/_random_forest_classifier.py
index 8abe0f1e5..009dda3e0 100644
--- a/coremltools/converters/sklearn/_random_forest_classifier.py
+++ b/coremltools/converters/sklearn/_random_forest_classifier.py
@@ -6,15 +6,17 @@
 from ._tree_ensemble import convert_tree_ensemble as _convert_tree_ensemble
 from ._tree_ensemble import get_input_dimension
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
 
 if _HAS_SKLEARN:
     import sklearn.ensemble as _ensemble
     from . import _sklearn_util
+
     sklearn_class = _ensemble.RandomForestClassifier
 
-model_type = 'classifier'
+model_type = "classifier"
+
 
 def convert(model, feature_names, target):
     """Convert a boosted tree model to protobuf format.
@@ -35,28 +37,35 @@ def convert(model, feature_names, target):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
     _sklearn_util.check_expected_type(model, _ensemble.RandomForestClassifier)
+
     def is_rf_model(m):
         if len(m.estimators_) == 0:
             return False
-        if hasattr(m, 'estimators_') and m.estimators_ is not None:
+        if hasattr(m, "estimators_") and m.estimators_ is not None:
             for t in m.estimators_:
-                if not hasattr(t, 'tree_') or t.tree_ is None:
+                if not hasattr(t, "tree_") or t.tree_ is None:
                     return False
             return True
         else:
             return False
+
     _sklearn_util.check_fitted(model, is_rf_model)
-    return _MLModel(_convert_tree_ensemble(model, feature_names, target, mode = 'classifier',
-            class_labels = model.classes_))
+    return _MLModel(
+        _convert_tree_ensemble(
+            model, feature_names, target, mode="classifier", class_labels=model.classes_
+        )
+    )
+
 
 def supports_output_scores(model):
     return True
 
+
 def get_output_classes(model):
     return list(model.classes_)
-
-
diff --git a/coremltools/converters/sklearn/_random_forest_regressor.py b/coremltools/converters/sklearn/_random_forest_regressor.py
index 3eb8a0e6a..ebf4968d4 100644
--- a/coremltools/converters/sklearn/_random_forest_regressor.py
+++ b/coremltools/converters/sklearn/_random_forest_regressor.py
@@ -6,15 +6,17 @@
 from ._tree_ensemble import convert_tree_ensemble as _convert_tree_ensemble
 from ._tree_ensemble import get_input_dimension
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
 
 if _HAS_SKLEARN:
     import sklearn.ensemble as _ensemble
     from . import _sklearn_util
+
     sklearn_class = _ensemble.RandomForestRegressor
 
-model_type = 'regressor'
+model_type = "regressor"
+
 
 def convert(model, feature_names, target):
     """Convert a boosted tree model to protobuf format.
@@ -35,21 +37,23 @@ def convert(model, feature_names, target):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
     _sklearn_util.check_expected_type(model, _ensemble.RandomForestRegressor)
+
     def is_rf_model(m):
         if len(m.estimators_) == 0:
             return False
-        if hasattr(m, 'estimators_') and m.estimators_ is not None:
+        if hasattr(m, "estimators_") and m.estimators_ is not None:
             for t in m.estimators_:
-                if not hasattr(t, 'tree_') or t.tree_ is None:
+                if not hasattr(t, "tree_") or t.tree_ is None:
                     return False
             return True
         else:
             return False
+
     _sklearn_util.check_fitted(model, is_rf_model)
     return _MLModel(_convert_tree_ensemble(model, feature_names, target))
-
-
diff --git a/coremltools/converters/sklearn/_sklearn_util.py b/coremltools/converters/sklearn/_sklearn_util.py
index 3b0fea336..e313de1ea 100644
--- a/coremltools/converters/sklearn/_sklearn_util.py
+++ b/coremltools/converters/sklearn/_sklearn_util.py
@@ -3,6 +3,7 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+
 def check_fitted(model, func):
     """Check if a model is fitted. Raise error if not.
 
@@ -17,6 +18,7 @@ def check_fitted(model, func):
     if not func(model):
         raise TypeError("Expected a 'fitted' model for conversion")
 
+
 def check_expected_type(model, expected_type):
     """Check if a model is of the right type. Raise error if not.
 
@@ -28,7 +30,8 @@ def check_expected_type(model, expected_type):
     expected_type: Type
         Expected type of the scikit-learn.
     """
-    if (model.__class__.__name__ != expected_type.__name__):
-        raise TypeError("Expected model of type '%s' (got %s)" % \
-                (expected_type.__name__, model.__class__.__name__))
-
+    if model.__class__.__name__ != expected_type.__name__:
+        raise TypeError(
+            "Expected model of type '%s' (got %s)"
+            % (expected_type.__name__, model.__class__.__name__)
+        )
diff --git a/coremltools/converters/sklearn/_standard_scaler.py b/coremltools/converters/sklearn/_standard_scaler.py
index 35af3f87a..86b0473d0 100644
--- a/coremltools/converters/sklearn/_standard_scaler.py
+++ b/coremltools/converters/sklearn/_standard_scaler.py
@@ -5,21 +5,24 @@
 
 
 from ... import SPECIFICATION_VERSION
-from ...models._interface_management import set_transform_interface_params as \
-                                       _set_transform_interface_params
+from ...models._interface_management import (
+    set_transform_interface_params as _set_transform_interface_params,
+)
 from ...proto import Model_pb2 as _Model_pb2
 from ...proto import FeatureTypes_pb2 as _FeatureTypes_pb2
 
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 from ...models import MLModel as _MLModel
 
 if _HAS_SKLEARN:
     from . import _sklearn_util
     import sklearn
     from sklearn.preprocessing import StandardScaler
+
     sklearn_class = StandardScaler
 
-model_type = 'transformer'
+model_type = "transformer"
+
 
 def convert(model, input_features, output_features):
     """Convert a _imputer model to the protobuf spec.
@@ -40,13 +43,15 @@ def convert(model, input_features, output_features):
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
     # Test the scikit-learn model
     _sklearn_util.check_expected_type(model, StandardScaler)
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'mean_'))
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'scale_'))
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "mean_"))
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "scale_"))
 
     # Set the interface params.
     spec = _Model_pb2.Model()
@@ -63,20 +68,25 @@ def convert(model, input_features, output_features):
 
     return _MLModel(spec)
 
+
 def update_dimension(model, input_dimension):
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'mean_'))
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'scale_'))
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "mean_"))
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "scale_"))
     # Nothing to do for this model
     return input_dimension
 
+
 def get_input_dimension(model):
-    if not(_HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'mean_'))
-    _sklearn_util.check_fitted(model, lambda m: hasattr(m, 'scale_'))
-    return len(model.mean_)
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
 
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "mean_"))
+    _sklearn_util.check_fitted(model, lambda m: hasattr(m, "scale_"))
+    return len(model.mean_)
diff --git a/coremltools/converters/sklearn/_svm_common.py b/coremltools/converters/sklearn/_svm_common.py
index 22d3fdd34..1ba96caca 100644
--- a/coremltools/converters/sklearn/_svm_common.py
+++ b/coremltools/converters/sklearn/_svm_common.py
@@ -12,27 +12,30 @@ def _set_kernel(model, spec):
     """
     Takes the sklearn SVM model and returns the spec with the protobuf kernel for that model.
     """
+
     def gamma_value(model):
-        if(model.gamma == 'auto' or model.gamma == 'auto_deprecated'):
+        if model.gamma == "auto" or model.gamma == "auto_deprecated":
             # auto gamma value is 1/num_features
-            return 1/float(len(model.support_vectors_[0]))
+            return 1 / float(len(model.support_vectors_[0]))
         else:
             return model.gamma
 
-
     result = None
-    if(model.kernel == 'linear'):
-        spec.kernel.linearKernel.MergeFromString(b'')  # hack to set kernel to an empty type
-    elif(model.kernel == 'rbf'):
-            spec.kernel.rbfKernel.gamma = gamma_value(model)
-    elif(model.kernel == 'poly'):
+    if model.kernel == "linear":
+        spec.kernel.linearKernel.MergeFromString(
+            b""
+        )  # hack to set kernel to an empty type
+    elif model.kernel == "rbf":
+        spec.kernel.rbfKernel.gamma = gamma_value(model)
+    elif model.kernel == "poly":
         spec.kernel.polyKernel.gamma = gamma_value(model)
         spec.kernel.polyKernel.c = model.coef0
         spec.kernel.polyKernel.degree = model.degree
-    elif(model.kernel == 'sigmoid'):
+    elif model.kernel == "sigmoid":
         spec.kernel.sigmoidKernel.gamma = gamma_value(model)
         spec.kernel.sigmoidKernel.c = model.coef0
     else:
-        raise ValueError('Unsupported kernel. The following kernel are supported: linear, RBF, polynomial and sigmoid.')
+        raise ValueError(
+            "Unsupported kernel. The following kernel are supported: linear, RBF, polynomial and sigmoid."
+        )
     return result
-
diff --git a/coremltools/converters/sklearn/_tree_ensemble.py b/coremltools/converters/sklearn/_tree_ensemble.py
index f0300c7d6..2affcf5be 100644
--- a/coremltools/converters/sklearn/_tree_ensemble.py
+++ b/coremltools/converters/sklearn/_tree_ensemble.py
@@ -6,18 +6,19 @@
 from ...models.tree_ensemble import TreeEnsembleRegressor, TreeEnsembleClassifier
 from ...models._feature_management import process_or_validate_features
 
-from ..._deps import HAS_SKLEARN
+from ..._deps import _HAS_SKLEARN
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.tree import _tree
 
 import numpy as _np
 
-def _get_value(scikit_value, mode = 'regressor', scaling = 1.0, n_classes = 2, tree_index = 0):
+
+def _get_value(scikit_value, mode="regressor", scaling=1.0, n_classes=2, tree_index=0):
     """ Get the right value from the scikit-tree
     """
     # Regression
-    if mode == 'regressor':
+    if mode == "regressor":
         return scikit_value[0] * scaling
 
     # Binary classification
@@ -41,65 +42,105 @@ def _get_value(scikit_value, mode = 'regressor', scaling = 1.0, n_classes = 2, t
             value = {tree_index: scikit_value[0] * scaling}
     return value
 
-def _recurse(coreml_tree, scikit_tree, tree_id, node_id, scaling = 1.0, mode = 'regressor',
-             n_classes = 2, tree_index = 0):
+
+def _recurse(
+    coreml_tree,
+    scikit_tree,
+    tree_id,
+    node_id,
+    scaling=1.0,
+    mode="regressor",
+    n_classes=2,
+    tree_index=0,
+):
     """Traverse through the tree and append to the tree spec.
     """
-    if not(HAS_SKLEARN):
-        raise RuntimeError('scikit-learn not found. scikit-learn conversion API is disabled.')
-    
+    if not (_HAS_SKLEARN):
+        raise RuntimeError(
+            "scikit-learn not found. scikit-learn conversion API is disabled."
+        )
+
     ## Recursion should not be called on the leaf node.
     if node_id == _tree.TREE_LEAF:
         raise ValueError("Invalid node_id %s" % _tree.TREE_LEAF)
 
     # Add a branch node to the tree
     if scikit_tree.children_left[node_id] != _tree.TREE_LEAF:
-        branch_mode = 'BranchOnValueLessThanEqual'
+        branch_mode = "BranchOnValueLessThanEqual"
         feature_index = scikit_tree.feature[node_id]
         feature_value = scikit_tree.threshold[node_id]
         left_child_id = scikit_tree.children_left[node_id]
         right_child_id = scikit_tree.children_right[node_id]
 
         # Add a branch node
-        coreml_tree.add_branch_node(tree_id, node_id, feature_index,
-                feature_value, branch_mode, left_child_id, right_child_id)
+        coreml_tree.add_branch_node(
+            tree_id,
+            node_id,
+            feature_index,
+            feature_value,
+            branch_mode,
+            left_child_id,
+            right_child_id,
+        )
 
         # Now recurse
-        _recurse(coreml_tree, scikit_tree, tree_id, left_child_id, scaling, mode, n_classes, tree_index)
-        _recurse(coreml_tree, scikit_tree, tree_id, right_child_id, scaling, mode, n_classes, tree_index)
+        _recurse(
+            coreml_tree,
+            scikit_tree,
+            tree_id,
+            left_child_id,
+            scaling,
+            mode,
+            n_classes,
+            tree_index,
+        )
+        _recurse(
+            coreml_tree,
+            scikit_tree,
+            tree_id,
+            right_child_id,
+            scaling,
+            mode,
+            n_classes,
+            tree_index,
+        )
 
     # Add a leaf node to the tree
     else:
         # Get the scikit-learn value
         if scikit_tree.n_outputs != 1:
-            raise ValueError('Expected only 1 output in the scikit-learn tree.')
-        value = _get_value(scikit_tree.value[node_id], mode, scaling, n_classes, tree_index)
+            raise ValueError("Expected only 1 output in the scikit-learn tree.")
+        value = _get_value(
+            scikit_tree.value[node_id], mode, scaling, n_classes, tree_index
+        )
         coreml_tree.add_leaf_node(tree_id, node_id, value)
 
 
 def get_input_dimension(model):
-
-    if hasattr(model, 'n_features_'):
+    if hasattr(model, "n_features_"):
         return model.n_features_
 
-    elif hasattr(model, 'n_estimators'):
+    elif hasattr(model, "n_estimators"):
         if model.n_estimators == 0:
             raise ValueError("model not trained.")
 
         try:
-            return model.estimators_[0,0].n_features_
+            return model.estimators_[0, 0].n_features_
         except IndexError:
             raise ValueError("Model not trained or invalid model.")
     else:
         raise ValueError("Unable to obtain input dimension from model.")
 
 
-def convert_tree_ensemble(model, input_features,
-                          output_features = ('predicted_class', float),
-                          mode = 'regressor',
-                          base_prediction = None,
-                          class_labels = None,
-                          post_evaluation_transform = None):
+def convert_tree_ensemble(
+    model,
+    input_features,
+    output_features=("predicted_class", float),
+    mode="regressor",
+    base_prediction=None,
+    class_labels=None,
+    post_evaluation_transform=None,
+):
     """
     Convert a generic tree regressor model to the protobuf spec.
 
@@ -142,19 +183,23 @@ def convert_tree_ensemble(model, input_features,
 
     num_dimensions = get_input_dimension(model)
     features = process_or_validate_features(input_features, num_dimensions)
-    
+
     n_classes = None
-    if mode == 'classifier':
+    if mode == "classifier":
         n_classes = model.n_classes_
         if class_labels is None:
             class_labels = range(n_classes)
         else:
             if len(class_labels) != n_classes:
-                raise ValueError("Number of classes in model (%d) does not match "
-                                 "length of supplied class list (%d)."
-                                 % (n_classes, len(class_labels)))
-
-        coreml_tree = TreeEnsembleClassifier(input_features, class_labels, output_features)
+                raise ValueError(
+                    "Number of classes in model (%d) does not match "
+                    "length of supplied class list (%d)."
+                    % (n_classes, len(class_labels))
+                )
+
+        coreml_tree = TreeEnsembleClassifier(
+            input_features, class_labels, output_features
+        )
         if post_evaluation_transform is not None:
             coreml_tree.set_post_evaluation_transform(post_evaluation_transform)
 
@@ -172,29 +217,48 @@ def convert_tree_ensemble(model, input_features,
         coreml_tree.set_default_prediction_value(base_prediction)
 
     # Single tree
-    if hasattr(model, 'tree_'):
-        _recurse(coreml_tree, model.tree_, tree_id = 0, node_id = 0,
-                               mode = mode, n_classes = n_classes)
+    if hasattr(model, "tree_"):
+        _recurse(
+            coreml_tree,
+            model.tree_,
+            tree_id=0,
+            node_id=0,
+            mode=mode,
+            n_classes=n_classes,
+        )
 
     # Multiple trees
-    elif hasattr(model, 'estimators_'):
+    elif hasattr(model, "estimators_"):
         is_ensembling_in_separate_trees = False
         if type(model.estimators_) != list:
-            is_ensembling_in_separate_trees = len(model.estimators_.shape) > 0 and model.estimators_.shape[1] > 1
+            is_ensembling_in_separate_trees = (
+                len(model.estimators_.shape) > 0 and model.estimators_.shape[1] > 1
+            )
             estimators = model.estimators_.flatten()
         else:
             estimators = model.estimators_
 
-        scaling = model.learning_rate if hasattr(model, 'learning_rate') else 1.0 / len(estimators)
+        scaling = (
+            model.learning_rate
+            if hasattr(model, "learning_rate")
+            else 1.0 / len(estimators)
+        )
         for tree_id, base_model in enumerate(estimators):
             if is_ensembling_in_separate_trees:
                 tree_index = tree_id % n_classes
             else:
                 tree_index = 0
-            _recurse(coreml_tree, base_model.tree_, tree_id, node_id = 0,
-                    scaling = scaling, mode = mode, n_classes = n_classes, tree_index = tree_index)
+            _recurse(
+                coreml_tree,
+                base_model.tree_,
+                tree_id,
+                node_id=0,
+                scaling=scaling,
+                mode=mode,
+                n_classes=n_classes,
+                tree_index=tree_index,
+            )
     else:
-        raise TypeError('Unknown scikit-learn tree model type.')
+        raise TypeError("Unknown scikit-learn tree model type.")
 
     return coreml_tree.spec
-
diff --git a/coremltools/converters/tensorflow/_tf_converter.py b/coremltools/converters/tensorflow/_tf_converter.py
deleted file mode 100644
index fc035ae32..000000000
--- a/coremltools/converters/tensorflow/_tf_converter.py
+++ /dev/null
@@ -1,256 +0,0 @@
-# Copyright (c) 2019, Apple Inc. All rights reserved.
-#
-# Use of this source code is governed by a BSD-3-clause license that can be
-# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
-
-
-import os.path
-from ...models import MLModel
-
-
-def convert(filename,
-            inputs=None,
-            outputs=None,
-            image_input_names=None,
-            tf_image_format=None,
-            is_bgr=False,
-            red_bias=0.0,
-            green_bias=0.0,
-            blue_bias=0.0,
-            gray_bias=0.0,
-            image_scale=1.0,
-            class_labels=None,
-            predicted_feature_name=None,
-            predicted_probabilities_output='',
-            add_custom_layers=False,  # type: bool
-            custom_conversion_functions=None,  # type: dict{text, any}
-            custom_shape_functions=None,  # type: dict{text, any}
-            **kwargs):
-    """
-    Convert TensorFlow model to Core ML format.
-
-    Parameters
-    ----------
-    filename: str
-        Path to the TensorFlow model. Takes in one of the following formats:
-
-        - TensorFlow frozen graph (.pb) model file name
-        - TensorFlow tf.keras HDF5 (.h5) model file name
-        - TensorFlow SavedModel directory path
-        - TensorFlow concrete functions(s)
-
-    inputs: dict(str: list or tuple)
-        Model input name and shape pairs.
-
-    outputs: [str]
-        Model output names.
-
-    image_input_names: [str] | str
-      Input names (a subset of the keys of inputs)
-      that can be treated as images by Core ML. All other inputs
-      are treated as MultiArrays.
-    tf_image_format: str
-      Optional and valid if image_input_names is also set. Specify either 'NCHW' or 'NHWC' to set or
-      override the image format. If not set, tries to use hints from the graph which may be present in convolution or
-      other image-specific layers. Ultimately defaults to NHWC.
-    is_bgr: bool | dict():
-      Applicable only if image_input_names is specified.
-      To specify different values for each image input provide a dictionary with input names as keys and booleans as values.
-    red_bias: float | dict()
-      Bias value to be added to the red channel of the input image, after applying scale.
-      Defaults to 0.0
-      Applicable only if image_input_names is specified.
-      To specify different values for each image input provide a dictionary with input names as keys.
-    blue_bias: float | dict()
-      Bias value to be added to the blue channel of the input image, after applying scale.
-      Defaults to 0.0
-      Applicable only if image_input_names is specified.
-      To specify different values for each image input provide a dictionary with input names as keys.
-    green_bias: float | dict()
-      Bias value to be added to the green channel of the input image, after applying scale.
-      Defaults to 0.0
-      Applicable only if image_input_names is specified.
-      To specify different values for each image input provide a dictionary with input names as keys.
-    gray_bias: float | dict()
-      Bias value to be added to the input image (in grayscale), after applying scale.
-      Defaults to 0.0
-      Applicable only if image_input_names is specified.
-      To specify different values for each image input provide a dictionary with input names as keys.
-    image_scale: float | dict()
-      Value by which input images will be scaled before bias is added and
-      Core ML model makes a prediction. Defaults to 1.0.
-      Applicable only if image_input_names is specified.
-      To specify different values for each image input provide a dictionary with input names as keys.
-    class_labels: list[int or str] | str
-      Class labels (applies to classifiers only) that map the index of the
-      output of a neural network to labels in a classifier.
-      If the provided class_labels is a string, it is assumed to be a
-      file path where classes are parsed as a list of newline separated
-      strings.
-    predicted_feature_name: str
-      Name of the output feature for the class labels exposed in the Core ML
-      model (applies to classifiers only). Defaults to 'classLabel'
-    predicted_probabilities_output: str
-      Name of the neural network output to be interpreted as the predicted
-      probabilities of the resulting classes. Typically the output of a
-      softmax function.
-    add_custom_layers: bool
-      Flag to turn on addition of custom CoreML layers for unsupported TF ops or attributes within
-      a supported op.
-    custom_conversion_functions: dict(): {Text: func(**kwargs)}
-      Argument to provide user-defined functions for converting Tensorflow operations (op, for short).
-      A dictionary with keys corresponding to the names or types of the TF ops and values as handle to user-defined functions.
-      The keys can be either the type of the op or the name of the op. If former, then the function is called whenever the op
-      of that type is encountered during conversion. By using op names, specific ops can be targeted which is
-      useful for handling unsupported configuration in an op.
-      The function receives multiple arguments: TF operation, the CoreML Neural network builder object,
-      dictionary containing the op's inputs that are constants and their values (as numpy arrays).
-      The function can add custom layers or any other combination of CoreML layers to translate the TF op.
-      See "examples/custom_layer_examples.ipynb" jupyter-notebook for examples on using this argument.
-    custom_shape_functions: dict(): {Text: func()}
-      Argument to provide user-defined functions to compute shape for given op.
-      A dictionary with keys corresponding to the type of TF Op and value as handled to user-defined function.
-      Function receives `layer specification` and `input shape` as a input.
-      output of the function must be output shape for give op. (generally List).
-      Custom shape function is required for adding custom layer in Core ML 3.
-
-    Returns
-    -------
-    model: MLModel
-        Returns an MLModel instance representing a Core ML model.
-
-    Examples
-    --------
-    .. code-block:: python
-
-        import coremltools
-        from tensorflow.keras.applications import ResNet50
-
-        model = coremltools.converters.tensorflow.convert(
-            './model.h5',
-             inputs={'input_1': (1, 224, 224, 3)},
-             outputs=['Identity']
-        )
-
-    For more examples, see: https://github.com/apple/coremltools/blob/master/docs/NeuralNetworkGuide.md
-    """
-    use_cpu_only = kwargs.get('use_cpu_only')
-    use_cpu_only = use_cpu_only if use_cpu_only is not None else False
-
-    optional_inputs = kwargs.get('optional_inputs')
-    optional_inputs = optional_inputs if optional_inputs is not None else []
-
-    # `tf_model_path` takes in one of the following formats:
-    # 1) TensorFlow frozen graph (.pb) model file name
-    # 2) TensorFlow tf.keras HDF5 (.h5) model file name
-    # 3) TensorFlow SavedModel directory path
-    # 4) TensorFlow concrete functions(s)
-
-    invalid_filename_message = ('invalid input tf_model_path: {}!\n'
-                                'Supported tf_model_path input format includes:\n'
-                                '- Path to TensorFlow frozen graph (.pb) file\n'
-                                '- Path to TensorFlow tf.keras model (.h5) file\n'
-                                '- Path to TensorFlow SavedModel directory\n'
-                                '- List of TensorFlow concrete functions'.format(filename))
-
-    if isinstance(filename, str) and not os.path.exists(filename):
-        raise ValueError('invalid input tf_model_path \'{}\' does not exist.'.format(filename))
-
-    if isinstance(filename, str) and os.path.isfile(filename):
-        # path to the model file must end with either .pb or .h5 format
-        if not (filename.endswith('.pb') or filename.endswith('.h5')):
-            raise ValueError(invalid_filename_message)
-
-        if filename.endswith('.h5'):
-            filename = _graph_def_from_saved_model_or_keras_model(filename)
-
-    elif isinstance(filename, str) and os.path.isdir(filename):
-        filename = _graph_def_from_saved_model_or_keras_model(filename)
-
-    elif isinstance(filename, list):
-        filename = _graph_def_from_concrete_function(filename)
-    else:
-        raise ValueError(invalid_filename_message)
-
-    # convert from TensorFlow to SSA IR
-    from ..nnssa.frontend.tensorflow import load as frontend_load
-    ssa = frontend_load(filename, resume_on_errors=False, inputs=inputs, outputs=outputs, **kwargs)
-
-    # convert from SSA IR to Core ML
-    from ..nnssa.coreml.ssa_converter import ssa_convert
-    model_spec = ssa_convert(ssa,
-                             top_func='main',
-                             inputs=inputs,
-                             outputs=outputs,
-                             image_input_names=image_input_names,
-                             image_format=tf_image_format,
-                             is_bgr=is_bgr,
-                             red_bias=red_bias,
-                             green_bias=green_bias,
-                             blue_bias=blue_bias,
-                             gray_bias=gray_bias,
-                             image_scale=image_scale,
-                             class_labels=class_labels,
-                             predicted_feature_name=predicted_feature_name,
-                             predicted_probabilities_output=predicted_probabilities_output,
-                             add_custom_layers=add_custom_layers,
-                             custom_conversion_functions=custom_conversion_functions,
-                             custom_shape_functions=custom_shape_functions,
-                             optional_inputs=optional_inputs)
-
-    return MLModel(model_spec, useCPUOnly=use_cpu_only)
-
-
-def _graph_def_from_saved_model_or_keras_model(filename):
-    """
-    Utility function that returns GraphDef object from the given SavedModel or HDF5 model.
-    :param filename: TensorFlow SavedModel directory or Keras HDF5 model (.h5) file.
-    :return: TensorFlow GraphDef object.
-    """
-    try:
-        import tensorflow as tf
-        from tensorflow.python.keras.saving import saving_utils as _saving_utils
-        from tensorflow.python.framework import convert_to_constants as _convert_to_constants
-        if filename.endswith('.h5'):
-            model = tf.keras.models.load_model(filename)
-            tf.keras.backend.set_learning_phase(False)
-            func = _saving_utils.trace_model_call(model)
-            concrete_func = func.get_concrete_function()
-        else:
-            model = tf.saved_model.load(filename)
-            signatures = model.signatures
-            if len(signatures) == 0:
-              raise ValueError('Unable to load a model with no signatures provided.')
-            if len(signatures) >= 2:
-              raise ValueError('Unable to load a model with multiple signatures')
-            concrete_func = list(signatures.values())[0]
-        frozen_func = _convert_to_constants.convert_variables_to_constants_v2(concrete_func)
-        graph_def = frozen_func.graph.as_graph_def(add_shapes=True)
-    except ImportError as e:
-        raise ImportError('Failed to import TensorFlow utilities. {}.'.format(e))
-    except ValueError as e:
-        raise ValueError('Failed to load SavedModel or .h5 model. {}.'.format(e))
-    except Exception as e:
-        raise RuntimeError('Failed to load SavedModel or .h5 model. {}.'.format(e))
-    return graph_def
-
-
-def _graph_def_from_concrete_function(concrete_functions):
-    """
-    Utility function that returns GraphDef object from the given concrete functions.
-    :param concrete_functions: list of TensorFlow concrete functions.
-    :return: TensorFlow GraphDef object.
-    """
-    if len(concrete_functions) != 1:
-        raise ValueError('This converter can only convert a single ConcreteFunction.')
-    try:
-        import tensorflow as tf
-        from tensorflow.python.framework import convert_to_constants as _convert_to_constants
-        from tensorflow.python.eager import function as _function
-        frozen_func = _convert_to_constants.convert_variables_to_constants_v2(concrete_functions[0])
-        graph_def = frozen_func.graph.as_graph_def(add_shapes=True)
-    except ImportError as e:
-        raise ImportError('Failed to import TensorFlow utilities. {}.'.format(e))
-    except Exception as e:
-        raise RuntimeError('Failed to load concrete functions(s). {}.'.format(e))
-    return graph_def
diff --git a/coremltools/converters/tensorflow/test/test_base.py b/coremltools/converters/tensorflow/test/test_base.py
deleted file mode 100644
index 0ebb4f306..000000000
--- a/coremltools/converters/tensorflow/test/test_base.py
+++ /dev/null
@@ -1,413 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-
-import os, sys
-import tensorflow.compat.v1 as tf
-import numpy as np
-import pytest
-import unittest
-import shutil, tempfile
-from tensorflow.python.tools.freeze_graph import freeze_graph
-from tensorflow.tools.graph_transforms import TransformGraph
-
-import coremltools
-
-# local to pytest
-from test_utils import generate_data, tf_transpose
-
-DEBUG = False
-
-def _parse_coreml_input_shapes(mlmodel):
-    return {x.name : list(x.type.multiArrayType.shape) for x in
-        mlmodel._spec.description.input}
-
-def _parse_coreml_name_to_tf(coreml_name):
-    if coreml_name.endswith('__invar__'):
-        tf_name = coreml_name.replace('__invar__', '')
-    elif coreml_name.endswith('__outvar__'):
-        tf_name = coreml_name.replace('__outvar__', '')
-    else:
-        tf_name = coreml_name
-    return tf_name
-
-
-class TFNetworkTest(unittest.TestCase):
-
-    @classmethod
-    def setUpClass(self):
-        """
-        Set up the unit test by loading common utilities.
-        """
-
-    def _get_tf_tensor_name(self, graph, name):
-        """
-        Convenience function to get the name of first output tensor of an op with name
-        """
-        return graph.get_operation_by_name(name).outputs[0].name
-
-    def _simple_freeze(self, input_graph, input_checkpoint, output_graph, output_node_names):
-        # output_node_names is a string of names separated by comma
-        freeze_graph(
-            input_graph=input_graph,
-            input_saver="",
-            input_binary=True,
-            input_checkpoint=input_checkpoint,
-            output_node_names=output_node_names,
-            restore_op_name="save/restore_all",
-            filename_tensor_name="save/Const:0",
-            output_graph=output_graph,
-            clear_devices=True,
-            initializer_nodes="")
-
-    def _quantize_static_tf_model(self, logdir, model_path, output_names):
-
-        with open(model_path, 'rb') as f:
-            serialized = f.read()
-
-        gdef = tf.GraphDef()
-        gdef.ParseFromString(serialized)
-
-        tf.reset_default_graph()
-        graph = tf.Graph()
-        with graph.as_default() as g:
-            transforms = [
-                "add_default_attributes", "remove_nodes(op=Identity, op=CheckNumerics)",
-                "fold_constants(ignore_errors=true)", "fold_batch_norms", "fold_old_batch_norms",
-                "quantize_weights(minimum_size=1)", "quantize_nodes", "strip_unused_nodes",
-                "sort_by_execution_order"
-            ]
-
-            transformed_graph_def = TransformGraph(gdef, [], output_names, transforms)
-            tf.import_graph_def(transformed_graph_def, name='')
-
-        tf.train.write_graph(graph, logdir, "./tf_quantized_frozen.pb", as_text=False)
-        return os.path.join(logdir, 'tf_quantized_frozen.pb')
-
-    def _test_tf_model(
-            self,
-            graph,
-            input_shapes,
-            output_node_names,
-            data_mode='random',
-            input_refs=None,
-            delta=1e-2,
-            use_cpu_only=False,
-            graph_optimizations="freeze",  # one of ["freeze", "convert_variables_to_constants", None]
-            quantize_tf_model=False,
-            quantize_mlmodel=False,
-            quantize_config={}):
-        """
-        Common entry to testing routine.
-        graph - defined TensorFlow graph.
-        input_shapes -  dict str:shape for each input op (placeholder)
-        output_node_names - output_node_names, a list of strings
-        data_mode - auto-generated input vectors, can be 'random', 'zeros', 'ones', 'linear', etc.
-        input_refs - a dictionary of reference input in tensorFlow axis order, each entry is str:shape.
-            When using auto-generated input vectors, set input_refs to None.
-        delta - maximum difference of normalized TensorFlow and CoreML outputs
-        use_cpu_only - If True, instantiate and run CoreML model with CPU only
-        graph_optimizations == "freeze" - Force TensorFlow graph to be frozen before converting.
-        quantize_tf_model - If True, try to quantize TensorFlow model before converting
-        quantize_mlmodel - If True, quantize the mlmodel after converting.
-        quantize_config - Dictionary with test quantization parameters
-        """
-
-        # Some file processing
-        model_dir = tempfile.mkdtemp()
-        graph_def_file = os.path.join(model_dir, 'tf_graph.pb')
-        checkpoint_file = os.path.join(model_dir, 'tf_model.ckpt')
-        static_model_file = os.path.join(model_dir, 'tf_static.pb')
-        coreml_model_file = os.path.join(model_dir, 'coreml_model.mlmodel')
-
-        # add a saver
-        tf.reset_default_graph()
-        if graph_optimizations == "freeze":
-            with graph.as_default() as g:
-                saver = tf.train.Saver()
-
-        if input_refs is None:
-            feed_dict = {
-                self._get_tf_tensor_name(graph, name): generate_data(input_shapes[name], data_mode)
-                for name in input_shapes
-            }
-        else:
-            feed_dict = {
-                self._get_tf_tensor_name(graph, name): input_refs[name]
-                for name in list(input_refs.keys())
-            }
-
-        with tf.Session(graph=graph) as sess:
-            # initialize
-            initializer_op = tf.global_variables_initializer()
-            sess.run(initializer_op)
-            # run the result
-            fetches = [graph.get_operation_by_name(name).outputs[0] for name in output_node_names]
-            result = sess.run(fetches, feed_dict=feed_dict)
-            # save graph definition somewhere
-            tf.train.write_graph(sess.graph, model_dir, graph_def_file, as_text=False)
-            # save the weights if freezing is needed
-            if not graph_optimizations:
-                static_model_file = graph_def_file
-            elif graph_optimizations == "freeze":
-                saver.save(sess, checkpoint_file)
-                self._simple_freeze(
-                    input_graph=graph_def_file,
-                    input_checkpoint=checkpoint_file,
-                    output_graph=static_model_file,
-                    output_node_names=",".join(output_node_names))
-            else:
-                output_graph_def = tf.graph_util.convert_variables_to_constants(
-                    sess, graph.as_graph_def(), output_node_names)
-                with tf.gfile.GFile(static_model_file, "wb") as f:
-                    f.write(output_graph_def.SerializeToString())
-
-        # if TF needs to be quantized, quantize the graph
-        if quantize_tf_model:
-            static_model_file = self._quantize_static_tf_model(
-                model_dir, static_model_file, output_node_names)
-
-        # convert to CoreML
-        mlmodel = coremltools.converters.tensorflow.convert(
-            static_model_file,
-            inputs=input_shapes,
-            outputs=output_node_names,
-            use_cpu_only=use_cpu_only)
-
-        # Quantize MLModel if needed
-        if quantize_mlmodel:
-            from coremltools.models.neural_network.quantization_utils import quantize_weights
-            nbits = quantize_config['nbits']
-            mode = quantize_config['mode']
-            mlmodel = quantize_weights(mlmodel, nbits, quantization_mode=mode)
-
-        if DEBUG:
-            print('\n mlmodel description: \n')
-            from coremltools.models.neural_network.printer import print_network_spec
-            print_network_spec(mlmodel.get_spec(), style='coding')
-            mlmodel.save(coreml_model_file)
-            print('\n mlmodel saved at %s' % coreml_model_file)
-
-        coreml_input_names = [str(x) for x in mlmodel.input_description]
-        coreml_input_shapes = _parse_coreml_input_shapes(mlmodel)
-
-        # Transpose input data as CoreML requires
-        coreml_inputs = {}
-        for name in coreml_input_names:
-            tfop_name = _parse_coreml_name_to_tf(name)
-            if tfop_name in input_shapes:
-                coreml_inputs[name] = tf_transpose(
-                    feed_dict[self._get_tf_tensor_name(graph, tfop_name)])
-            else:
-                coreml_inputs[name] = np.zeros(coreml_input_shapes[name])
-
-        # Run predict in CoreML
-        coreml_output = mlmodel.predict(coreml_inputs, useCPUOnly=use_cpu_only)
-
-        for idx, out_name in enumerate(output_node_names):
-            tf_out = result[idx]
-            if len(tf_out.shape) == 0:
-                tf_out = np.array([tf_out])
-
-            tp = tf_out.flatten()
-            if out_name in coreml_output:
-                coreml_out = coreml_output[out_name]
-            elif out_name+'__outvar__' in coreml_output:
-                coreml_out = coreml_output[out_name+'__outvar__']
-            else:
-                self.assertTrue(False, 'CoreML output not found')
-
-            cp = coreml_out.flatten()
-
-            self.assertTrue(tf_out.shape == coreml_out.shape)
-            for i in range(len(tp)):
-                max_den = max(1.0, tp[i], cp[i])
-                self.assertAlmostEqual(tp[i] / max_den, cp[i] / max_den, delta=delta)
-
-        # Cleanup files - models on disk no longer useful
-        if os.path.exists(model_dir):
-            shutil.rmtree(model_dir)
-
-    def _test_tf_model_constant(
-            self,
-            graph,
-            input_shapes,
-            output_node_names,
-            data_mode='random_zero_mean',
-            delta=1e-2,
-            use_cpu_only=False,
-            validate_bool_only=False):
-        """
-        Common entry to testing routine for graphs that have no variables.
-
-        Parameters
-        ----------
-        graph: tf.Graph()
-            TensorFlow graph.
-        input_shapes: dict [str : shape]
-            Shapes for each input (placeholder).
-        output_node_names: list of str
-            Output tensor names.
-        data_mode: str
-            Data mode for the placeholder data generation.
-        input_refs: a dictionary of reference input in tensorFlow axis order.
-            Each entry is str:shape. When using auto-generated input vectors,
-            set input_refs to None.
-        delta: float
-            Delta for error checking, default 1e-2.
-        use_cpu_only: bool
-            If true, force use CPU only, default False.
-        validate_bool_only: bool
-            If true, only validate it's zero or non-zero, otherwise, validate
-            float values, default False.
-        """
-
-        model_dir = tempfile.mkdtemp()
-        frozen_model_file = os.path.join(model_dir, 'tf_frozen.pb')
-        coreml_model_file = os.path.join(model_dir, 'coreml_model.mlmodel')
-
-        feed_input_shapes = { k : tuple([i if i > 0 else 10 for i in ashape]) for (k,ashape) in input_shapes.items()}
-        feed_dict = {
-            self._get_tf_tensor_name(graph, name): generate_data(feed_input_shapes[name], data_mode)
-            for name in feed_input_shapes
-        }
-
-        with tf.Session(graph=graph) as sess:
-            # initialize
-            sess.run(tf.global_variables_initializer())
-            # run the result
-            fetches = []
-            for name in output_node_names:
-                fetches += graph.get_operation_by_name(name).outputs
-
-            result = sess.run(fetches, feed_dict=feed_dict)
-
-            output_graph_def = tf.graph_util.convert_variables_to_constants(
-                sess,  # The session is used to retrieve the weights
-                tf.get_default_graph().as_graph_def(
-                ),  # The graph_def is used to retrieve the nodes
-                output_node_names  # The output node names are used to select the useful nodes
-            )
-            with tf.gfile.GFile(frozen_model_file, 'wb') as f:
-                f.write(output_graph_def.SerializeToString())
-
-        # convert to CoreML
-        mlmodel = coremltools.converters.tensorflow.convert(
-            frozen_model_file,
-            inputs=input_shapes,
-            outputs=output_node_names,
-            use_cpu_only=use_cpu_only)
-
-        if DEBUG:
-            print('\n mlmodel description: \n')
-            from coremltools.models.neural_network.printer import print_network_spec
-            print_network_spec(mlmodel.get_spec(), style='coding')
-            mlmodel.save(coreml_model_file)
-            print('\n mlmodel saved at %s' % coreml_model_file)
-
-        # Transpose input data as CoreML requires
-        coreml_inputs = {
-            name: tf_transpose(feed_dict[self._get_tf_tensor_name(graph, name)])
-            for name in feed_input_shapes
-        }
-
-        # Run predict in CoreML
-        coreml_output = mlmodel.predict(coreml_inputs, useCPUOnly=use_cpu_only)
-
-        idx = 0
-        for node_name in output_node_names:
-            num_outputs = len(graph.get_operation_by_name(node_name).outputs)
-            if graph.get_operation_by_name(node_name).type == 'Merge':
-                num_outputs = 1
-            for out_id in range(num_outputs):
-                tf_out = result[idx]
-                if len(tf_out.shape) == 0:
-                    tf_out = np.array([tf_out])
-                tp = tf_out.flatten()
-                out_name = node_name if num_outputs == 1 else node_name + '_' + str(out_id)
-                coreml_out = coreml_output[out_name]
-                cp = coreml_out.flatten()
-
-                self.assertTrue(tf_out.shape == coreml_out.shape, msg=(tf_out.shape, 'vs.', coreml_out.shape))
-
-                if validate_bool_only:
-                    cp = np.logical_and(cp, cp)
-                for i in range(len(tp)):
-                    max_den = max(1.0, tp[i], cp[i])
-                    self.assertAlmostEqual(tp[i] / max_den, cp[i] / max_den, delta=delta)
-
-                idx += 1
-
-        # Cleanup files - models on disk no longer useful
-        if os.path.exists(model_dir):
-            shutil.rmtree(model_dir)
-        return mlmodel
-
-class TFNetworkBatchTest(TFNetworkTest):
-
-    def _test_tf_model(
-            self,
-            graph,
-            input_tensor_shapes,
-            output_node_names,
-            data_mode='random',
-            delta=1e-2,
-            use_cpu_only=False,
-            graph_optimizations="freeze",
-            quantize_tf_model=False,
-            batch_sizes=None):
-        """ Test function for TFConvNetTest.
-        graph: TensorFlow graph representing the model.
-        input_tensor_shapes: dict of input op and the shape of tensor it generates.
-        output_node_names: A list of names of output nodes.
-        graph_optimizations: one of ["freeze", "convert_variables_to_constants", None].
-            graph optimizations performed on the TensorFlow graph before conversion.
-        quantize_tf_model: If true, will run TF-quantization utility on TF graph.
-        batch_sizes: If not None, and if all input shapes' first dimension is None,
-            test the TF graph with each batch size in batch_sizes
-        """
-
-        variable_batch_size = False
-        for name, shape in input_tensor_shapes.items():
-            if len(shape) > 1 and shape[0] is None:
-                variable_batch_size = True
-                break
-
-        if variable_batch_size: # batched case
-            if batch_sizes is None or len(batch_sizes) == 0:
-                batch_sizes = [1]
-            elif 1 not in batch_sizes:
-                batch_sizes = [1] + batch_sizes
-
-            for bs in batch_sizes:
-                input_shapes = {}
-                for name, shape in input_tensor_shapes.items():
-                    if shape[0] is None:
-                        input_shapes[name] = [bs] + list(shape[1:])
-                    else:
-                        input_shapes[name] = shape
-
-                super(TFNetworkBatchTest, self)._test_tf_model(
-                    graph,
-                    input_shapes,
-                    output_node_names,
-                    data_mode=data_mode,
-                    input_refs=None,
-                    delta=delta,
-                    use_cpu_only=use_cpu_only,
-                    graph_optimizations=graph_optimizations,
-                    quantize_tf_model=quantize_tf_model)
-
-        else:
-            super(TFNetworkBatchTest, self)._test_tf_model(
-                graph,
-                input_tensor_shapes,
-                output_node_names,
-                data_mode=data_mode,
-                input_refs=None,
-                delta=delta,
-                use_cpu_only=use_cpu_only,
-                graph_optimizations=graph_optimizations,
-                quantize_tf_model=quantize_tf_model)
-
diff --git a/coremltools/converters/tensorflow/test/test_convnets.py b/coremltools/converters/tensorflow/test/test_convnets.py
deleted file mode 100644
index 0e66fdc37..000000000
--- a/coremltools/converters/tensorflow/test/test_convnets.py
+++ /dev/null
@@ -1,1612 +0,0 @@
-import unittest
-import tensorflow.compat.v1 as tf
-import numpy as np
-from coremltools._deps import HAS_TF_1_14
-import math
-
-from test_base import TFNetworkTest, TFNetworkBatchTest
-import itertools
-
-
-# IMPORTANT NOTE TO ADD NEW TESTS:
-# For each test function you should set up your own graph and session.
-# Otherwise TF will carry all ops and tensors from previously run tests.
-
-def conv_cell(inp, conv_weights, bias=None, activation=None, pooling=None, has_batchnorm=False, conv_config=None, data_format='NHWC'):
-    if conv_config is None:
-        conv_config = {'strides': [1, 1, 1, 1], 'padding': 'SAME'}
-    x = tf.nn.conv2d(inp, conv_weights, conv_config['strides'], conv_config['padding'], data_format=data_format)
-    return x
-
-
-class TFConvNetTest(TFNetworkBatchTest):
-    @classmethod
-    def setUpClass(self):
-        """
-        Set up the unit test by loading common utilities.
-        """
-        pass
-
-    def test_toy(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            matrix1 = tf.placeholder(tf.float32, shape=[1, 2])
-            matrix2 = tf.Variable(tf.truncated_normal([2, 1]))
-            product = tf.matmul(matrix1, matrix2)
-        self._test_tf_model(graph, {matrix1.op.name: [1, 2]}, [product.op.name])
-
-    def test_linear(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            # placeholder constructor returns a tensor not an op
-            x = tf.placeholder(tf.float32, shape=[None, 20])
-            # Make a redundant tensor. It should get trimmed
-            gt = tf.placeholder(tf.float32, shape=[None, 10])
-
-            W = tf.Variable(tf.ones([20, 10]))
-            b = tf.Variable(tf.ones([10]))
-
-            y = tf.matmul(x, W) + b
-            output_name = [y.op.name]
-        self._test_tf_model(graph, {x.op.name: [None, 20]}, output_name,
-                            batch_sizes=[1, 8])
-
-    def test_convnet(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=[None, 8, 8, 3])
-            W1 = tf.Variable(tf.truncated_normal([3, 3, 3, 4], stddev=0.3))
-            x = conv_cell(a, W1)
-            W2 = tf.Variable(tf.truncated_normal([3, 3, 4, 2], stddev=0.3))
-            x = conv_cell(x, W2)
-        self._test_tf_model(graph, {a.op.name: [None, 8, 8, 3]}, [x.op.name],
-                            batch_sizes=[1, 4])
-
-    def test_convnet_batchnorm(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=[None, 8, 8, 3])
-            W1 = tf.Variable(tf.truncated_normal([3, 3, 3, 4], stddev=0.3))
-            x = conv_cell(a, W1, has_batchnorm=True)
-            W2 = tf.Variable(tf.truncated_normal([3, 3, 4, 2], stddev=0.3))
-            x = conv_cell(x, W2, has_batchnorm=True)
-        self._test_tf_model(graph, {a.op.name: [None, 8, 8, 3]}, [x.op.name],
-                            batch_sizes=[1, 4])
-
-    def test_simple_convnet(self):
-        def weight_variable(shape):
-            initial = tf.truncated_normal(shape, stddev=0.1)
-            return tf.Variable(initial)
-
-        def bias_variable(shape):
-            initial = tf.constant(0.1, shape=shape)
-            return tf.Variable(initial)
-
-        def conv2d(x, W):
-            return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
-
-        def max_pool_2x2(x):
-            return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
-
-        def avg_pool_2x2(x):
-            return tf.nn.avg_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
-
-        graph = tf.Graph()
-        with graph.as_default():
-            W_conv1 = weight_variable([5, 5, 1, 32])
-            b_conv1 = bias_variable([32])
-
-            x_image = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
-            h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
-            h_pool1 = max_pool_2x2(h_conv1)
-
-            W_conv2 = weight_variable([5, 5, 32, 64])
-            b_conv2 = bias_variable([64])
-
-            h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
-            h_pool2 = avg_pool_2x2(h_conv2)
-
-        output_name = [h_pool2.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [None, 28, 28, 1]},
-                            output_name, batch_sizes=[1, 4])
-
-    def test_convnet_classifier(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 8, 8, 3])
-            W_conv1 = tf.Variable(tf.truncated_normal([3, 3, 3, 2], stddev=0.3))
-            h_conv1 = tf.nn.conv2d(x_image, W_conv1, strides=[1, 1, 1, 1], padding='SAME')
-            h_conv1_flat = tf.reshape(h_conv1, [-1, 8 * 8 * 2])
-            W_fc1 = tf.Variable(tf.truncated_normal([8 * 8 * 2, 4], stddev=0.3))
-            h_fc1 = tf.matmul(h_conv1_flat, W_fc1)
-        output_name = [h_fc1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [None, 8, 8, 3]}, output_name,
-                            batch_sizes=[1, 10])
-
-    @unittest.skip('Type 12 cannot be mapped')
-    def test_convnet_quantized(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 8, 8, 3])
-            W_conv1 = tf.Variable(tf.truncated_normal([3, 3, 3, 2], stddev=0.3))
-            h_conv1 = tf.nn.conv2d(x_image, W_conv1, strides=[1, 1, 1, 1], padding='SAME')
-            h_conv1_flat = tf.reshape(h_conv1, [-1, 8 * 8 * 2])
-            W_fc1 = tf.Variable(tf.truncated_normal([8 * 8 * 2, 4], stddev=0.3))
-            h_fc1 = tf.matmul(h_conv1_flat, W_fc1)
-
-        output_name = [h_fc1.op.name]
-        # quantized
-        self._test_tf_model(
-            graph, {x_image.op.name: [1, 8, 8, 3]},
-            output_name,
-            delta=0.20,
-            quantize_tf_model=True)
-
-    def test_pad_conv_fuse(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x = tf.placeholder(tf.float32, shape=[None, 32, 18, 3])
-            W = tf.Variable(tf.truncated_normal([9, 9, 3, 5], stddev=1))
-            paddings = tf.constant([[0, 0], [5, 5], [1, 1], [0, 0]])
-            x_pad = tf.pad(x, paddings, "CONSTANT")
-            output = tf.nn.conv2d(x_pad, W, strides=[1, 1, 1, 1], padding='VALID')
-        output_name = [output.op.name]
-        self._test_tf_model(graph, {x.op.name: [None, 32, 18, 3]}, output_name,
-                            delta=.05, batch_sizes=[1, 4])
-
-    def test_dilated_conv(self):
-        Cin = 3
-        Cout = 5
-        # params in format (Hin, Win, K, dilation)
-        params = [(30, 18, 3, 3), (16, 12, 3, 4), (15, 21, 1, 3), (27, 18, 5, 3), (14, 20, 3, 2)]
-        for param in params:
-            Hin, Win, K, d = param
-            graph = tf.Graph()
-            with graph.as_default():
-                x = tf.placeholder(tf.float32, shape=[None, Hin, Win, Cin])
-                W = tf.Variable(tf.truncated_normal([K, K, Cin, Cout], stddev=1))
-                output = tf.nn.convolution(
-                    x, W, strides=[1, 1], padding='VALID', dilation_rate=[d, d])
-            output_name = [output.op.name]
-            self._test_tf_model(graph, {x.op.name: [None, Hin, Win, Cin]},
-                                output_name, delta=.01, batch_sizes=[1, 4])
-
-    def test_depthwise_conv2d_native(self):
-        options = dict(
-            depthwise_multiplier=[1, 2],
-            strides=[[1, 1, 1, 1], [1, 2, 2, 1]],
-            padding=['VALID', 'SAME'],
-        )
-        product = itertools.product(*options.values())
-        for prod in product:
-            params = dict(zip(options.keys(), prod))
-            graph = tf.Graph()
-            with graph.as_default():
-                x_image = tf.placeholder(tf.float32, shape=[None, 16, 16, 3])
-                kernels = tf.Variable(
-                    tf.truncated_normal([3, 3, 3, params['depthwise_multiplier']],
-                                        stddev=0.3))
-                conv1 = tf.nn.depthwise_conv2d_native(
-                    input=x_image, filter=kernels, strides=params['strides'],
-                    padding=params['padding'])
-            output_name = [conv1.op.name]
-            self._test_tf_model(graph, {x_image.op.name: [None, 16, 16, 3]},
-                                output_name, batch_sizes=[1, 4])
-
-
-class TFSingleLayerTest(TFNetworkBatchTest):
-    """
-    Small models from tensorflow.layers
-    """
-    def test_add_n(self):
-
-        graph = tf.Graph()
-        with graph.as_default():
-            x = tf.placeholder(tf.float32, shape=[2,3])
-            y = tf.placeholder(tf.float32, shape=[2,3])
-            z = tf.placeholder(tf.float32, shape=[2,3])
-            dummpy = tf.Variable([1,2])
-            constant = tf.constant([[1,2,3],[4,5,6]], dtype=tf.float32)
-            output = [tf.add_n([x,x]), tf.add_n([x,y,2*z]), tf.add_n([x,constant])]
-        output_names = [n.op.name for n in output]
-        self._test_tf_model(
-            graph, {x.op.name: [2,3], y.op.name: [2,3], z.op.name: [2,3]},
-            output_names)
-
-    def test_dense(self):
-        # dense layer with some activation
-        graph = tf.Graph()
-        with graph.as_default():
-            x = tf.placeholder(tf.float32, shape=[None, 10])
-            y = tf.layers.dense(
-                inputs=x,
-                units=16,
-                activation=tf.sigmoid,
-                bias_initializer=tf.random_uniform_initializer)
-        output_name = [y.op.name]
-        self._test_tf_model(
-            graph, {x.op.name: [None, 10]},
-            output_name,
-            delta=1e-2,
-            quantize_tf_model=False,
-            use_cpu_only=True,
-            batch_sizes=[1, 10])
-
-    @unittest.skip('Type 12 cannot be mapped')
-    def test_dense_quantized(self):
-        # dense layer with some activation
-        graph = tf.Graph()
-        with graph.as_default():
-            x = tf.placeholder(tf.float32, shape=[None, 10])
-            y = tf.layers.dense(
-                inputs=x,
-                units=16,
-                activation=tf.sigmoid,
-                bias_initializer=tf.random_uniform_initializer)
-
-        output_name = [y.op.name]
-        self._test_tf_model(
-            graph, {x.op.name: [None, 10]}, output_name, delta=0.05,
-            quantize_tf_model=True, batch_sizes=[1, 4])
-
-    def test_dense_concat(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x = tf.placeholder(tf.float32, shape=[None, 10])
-            y = tf.layers.dense(
-                inputs=x,
-                units=16,
-                activation=tf.nn.relu,
-                bias_initializer=tf.random_uniform_initializer)
-            z1 = tf.layers.dense(
-                inputs=y,
-                units=20,
-                activation=tf.nn.relu,
-                bias_initializer=tf.random_uniform_initializer)
-            z2 = tf.layers.dense(
-                inputs=y,
-                units=20,
-                activation=tf.nn.relu,
-                bias_initializer=tf.random_uniform_initializer)
-            z3 = tf.layers.dense(
-                inputs=y,
-                units=20,
-                activation=tf.nn.relu,
-                bias_initializer=tf.random_uniform_initializer)
-            z = tf.concat([z1, z2, z3], axis=1)
-
-        output_name = [z.op.name]
-        self._test_tf_model(graph, {x.op.name: [None, 10]}, output_name,
-                            use_cpu_only=True, batch_sizes=[1, 4])
-
-    def test_conv2d_no_bias(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 8, 8, 3])
-            W = tf.Variable(tf.random_normal((5, 5, 3, 4)))  # [Kh, Kw, Cin, Cout]
-            conv1 = tf.nn.conv2d(input=x_image, filter=W, strides=[1, 1, 1, 1], padding='SAME')
-        output_name = [conv1.op.name]
-        self._test_tf_model(
-            graph, {x_image.op.name: [None, 8, 8, 3]},
-            output_name,
-            delta=1e-2,
-            quantize_tf_model=False,
-            use_cpu_only=True,
-            batch_sizes=[1, 4])
-
-    def test_conv2d(self):
-        graph = tf.Graph()
-        batch_sizes = [1, 10]
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 8, 8, 3])
-            conv1 = tf.layers.conv2d(
-                inputs=x_image,
-                filters=4,
-                kernel_size=[5, 5],
-                padding='same',
-                activation=tf.nn.relu,
-                bias_initializer=tf.constant_initializer([1, 2, 3, 4]))
-        output_name = [conv1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [None, 8, 8, 3]},
-                            output_name, delta=1e-2, use_cpu_only=True, batch_sizes=[1, 4])
-
-    @unittest.skip('Type 12 cannot be mapped')
-    def test_conv2d_quantized(self):
-        # conv layer with "fused activation"
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 8, 8, 3])
-            conv1 = tf.layers.conv2d(
-                inputs=x_image,
-                filters=4,
-                kernel_size=[5, 5],
-                padding='same',
-                activation=tf.nn.relu,
-                bias_initializer=tf.random_uniform_initializer)
-
-        output_name = [conv1.op.name]
-        self._test_tf_model(
-            graph, {x_image.op.name: [None, 8, 8, 3]},
-            output_name,
-            delta=0.05,
-            quantize_tf_model=True,
-            batch_sizes=[1, 4])
-
-    def test_conv2d_valid(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 8, 8, 3])
-            conv1 = tf.layers.conv2d(
-                inputs=x_image,
-                filters=4,
-                kernel_size=[3, 3],
-                padding='valid',
-                activation=tf.nn.relu,
-                bias_initializer=tf.random_uniform_initializer)
-        output_name = [conv1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [None, 8, 8, 3]},
-                            output_name, batch_sizes=[1, 4])
-
-    def test_conv2d_stride2(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 8, 8, 3])
-            conv1 = tf.layers.conv2d(
-                inputs=x_image,
-                filters=4,
-                kernel_size=[3, 3],
-                padding='valid',
-                strides=(2, 2),
-                bias_initializer=tf.random_uniform_initializer)
-        output_name = [conv1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [None, 8, 8, 3]},
-                            output_name, batch_sizes=[1, 4])
-
-    @unittest.skip('SpaceToBatchND, BatchToSpaceND does not yet support some of the inputs')
-    def test_conv2d_dilated(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 32, 32, 3])
-            conv1 = tf.layers.conv2d(
-                inputs=x_image,
-                filters=4,
-                kernel_size=[3, 3],
-                padding='valid',
-                dilation_rate=(3, 4),  # does not yet support non-equal dilation rate
-                bias_initializer=tf.random_uniform_initializer)
-        output_name = [conv1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [None, 32, 32, 3]},
-                            output_name, batch_sizes=[1, 4])
-
-    def test_conv2d_transpose(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 2, 2, 8])
-            conv1 = tf.layers.conv2d_transpose(
-                inputs=x_image,
-                filters=4,
-                kernel_size=[5, 5],
-                padding='same',
-                activation=tf.nn.relu,
-                bias_initializer=tf.random_uniform_initializer)
-        output_name = [conv1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [1, 2, 2, 8]}, output_name)
-
-    def test_conv2d_transpose_valid(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 8, 8, 3])
-            conv1 = tf.layers.conv2d_transpose(
-                inputs=x_image,
-                filters=4,
-                kernel_size=[3, 3],
-                padding='valid',
-                activation=tf.nn.relu,
-                bias_initializer=tf.random_uniform_initializer)
-        output_name = [conv1.op.name]
-        self._test_tf_model(
-            graph, {x_image.op.name: [1, 8, 8, 3]}, output_name)
-
-    def test_conv2d_transpose_stride2(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 8, 8, 3])
-            conv1 = tf.layers.conv2d_transpose(
-                inputs=x_image,
-                filters=4,
-                kernel_size=[3, 3],
-                padding='valid',
-                strides=(2, 2),
-                bias_initializer=tf.random_uniform_initializer)
-        output_name = [conv1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [1, 8, 8, 3]}, output_name)
-
-    def test_conv2d_ave_pooling(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 16, 16, 3])
-            conv1 = tf.layers.conv2d(
-                inputs=x_image,
-                filters=4,
-                kernel_size=[3, 3],
-                padding='same',
-                activation=tf.nn.relu,
-                bias_initializer=tf.random_uniform_initializer)
-            pool1 = tf.layers.average_pooling2d(inputs=conv1, pool_size=[2, 2],
-                                                strides=2)
-        output_name = [pool1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [None, 16, 16, 3]},
-                            output_name, batch_sizes=[1, 4])
-
-    def test_conv2d_max_pooling(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 16, 16, 3])
-            conv1 = tf.layers.conv2d(
-                inputs=x_image,
-                filters=4,
-                kernel_size=[3, 3],
-                padding='same',
-                activation=tf.nn.relu,
-                bias_initializer=tf.random_uniform_initializer)
-            pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[3, 3],
-                                            strides=1, padding='same')
-        output_name = [pool1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [None, 16, 16, 3]},
-                            output_name, batch_sizes=[1, 4])
-
-    def test_conv2d_bn(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 16, 16, 3])
-            conv1 = tf.layers.conv2d(
-                inputs=x_image,
-                filters=4,
-                kernel_size=[3, 3],
-                padding='same',
-                activation=tf.nn.relu,
-                bias_initializer=tf.random_uniform_initializer)
-            bn1 = tf.layers.batch_normalization(inputs=conv1, axis=-1)
-        output_name = [bn1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [None, 16, 16, 3]},
-                            output_name, batch_sizes=[1, 4])
-
-    def test_conv2d_spatial_bn(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 16, 16, 3])
-            bn1 = tf.layers.batch_normalization(inputs=x_image, axis=2)
-        output_name = [bn1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [None, 16, 16, 3]},
-                            output_name, batch_sizes=[1, 4])
-
-    def test_separable_conv2d(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 8, 8, 3])
-            conv1 = tf.layers.separable_conv2d(
-                inputs=x_image, filters=4, kernel_size=[3, 3], padding='valid', depth_multiplier=2)
-        output_name = [conv1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [None, 8, 8, 3]},
-                            output_name, batch_sizes=[1, 4])
-
-    def test_conv1d(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 8, 3])
-            conv1 = tf.layers.conv1d(
-                inputs=x_image, filters=2, kernel_size=3, padding='valid', use_bias=True)
-        output_name = [conv1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [None, 8, 3]}, output_name,
-                            data_mode='linear', delta=.05, batch_sizes=[1, 4])
-
-    def test_conv1d_dense(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 8, 3])
-            conv1 = tf.layers.conv1d(
-                inputs=x_image,
-                filters=2,
-                kernel_size=3,
-                padding='same',
-                bias_initializer=tf.random_uniform_initializer)
-            conv1_flat = tf.reshape(conv1, [-1, 8 * 2])
-            y = tf.layers.dense(inputs=conv1_flat, units=6, activation=tf.nn.relu)
-        output_name = [y.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [None, 8, 3]}, output_name,
-                            batch_sizes=[1, 4])
-
-    def test_conv1d_ave_pooling(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 8, 3])
-            conv1 = tf.layers.conv1d(inputs=x_image, filters=2, kernel_size=5,
-                                     padding='same')
-            pool1 = tf.layers.average_pooling1d(inputs=conv1, pool_size=2,
-                                                strides=2)
-        output_name = [pool1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [None, 8, 3]}, output_name,
-                            batch_sizes=[1, 4])
-
-    def test_conv1d_max_pooling(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 8, 3])
-            conv1 = tf.layers.conv1d(inputs=x_image, filters=2, kernel_size=3,
-                                     padding='same')
-            pool1 = tf.layers.max_pooling1d(inputs=conv1, pool_size=2,
-                                            strides=1)
-        output_name = [pool1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [1, 8, 3]}, output_name,
-                            batch_sizes=[1, 4])
-
-    def test_conv2d_resize_bilinear(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 16, 16, 3])
-            conv1 = tf.layers.conv2d(
-                inputs=x_image,
-                filters=3,
-                kernel_size=[3, 3],
-                padding='same',
-                activation=tf.nn.relu)
-            bl1 = tf.image.resize_bilinear(images=conv1, size=[32, 32])
-        output_name = [bl1.op.name]
-        self._test_tf_model(graph, {x_image.op.name: [None, 16, 16, 3]},
-                            output_name, batch_sizes=[1, 4])
-
-    def test_depthwise_conv2d(self):
-        options = dict(
-            depthwise_multiplier=[1, 2],
-            strides=[[1, 1, 1, 1], [1, 2, 2, 1]],
-            padding=['VALID', 'SAME'],
-        )
-        product = itertools.product(*options.values())
-        for prod in product:
-            params = dict(zip(options.keys(), prod))
-            graph = tf.Graph()
-            with graph.as_default():
-                x_image = tf.placeholder(tf.float32, shape=[None, 16, 16, 3])
-                kernels = tf.Variable(
-                    tf.truncated_normal([3, 3, 3, params['depthwise_multiplier']],
-                                        stddev=0.3))
-                conv1 = tf.nn.depthwise_conv2d(
-                    input=x_image,
-                    filter=kernels,
-                    strides=params['strides'],
-                    padding=params['padding'])
-            output_name = [conv1.op.name]
-            self._test_tf_model(graph, {x_image.op.name: [None, 16, 16, 3]},
-                                output_name, batch_sizes=[1, 4])
-
-    def test_concat_constants(self):
-        graph = tf.Graph()
-        x, y = np.meshgrid(np.linspace(0., 1., 256), np.linspace(0., 1., 256))
-        x = np.reshape(x, [1, 256, 256, 1])
-        y = np.reshape(y, [1, 256, 256, 1])
-        with graph.as_default():
-            x_image = tf.placeholder(tf.float32, shape=[None, 256, 256, 3])
-            xx = tf.constant(x, dtype=tf.float32)
-            yy = tf.constant(y, dtype=tf.float32)
-            img_concatenated = tf.concat([x_image, xx, yy], -1)
-        output_name = [img_concatenated.op.name]
-        self._test_tf_model_constant(graph, {x_image.op.name: [1, 256, 256, 3]}, output_name)
-
-    def test_split(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x_input = tf.placeholder(tf.float32, shape=[None, 10, 10, 6])
-            y1, y2 = tf.split(x_input, 2, axis=3)
-            z = tf.add(y1, y2)
-        self._test_tf_model_constant(graph, {x_input.op.name: [1, 10, 10, 6]}, [z.op.name])
-
-    def test_add(self):
-        shape_a = [[3, 4, 5], [1, 4 ,5], [1, 1, 4, 5]]
-        shape_b = [[3, 4 ,5], [4, 5], [4, 5]]
-        expand_dims = [None, [0], [0, 1], None]
-
-        for i in range(len(shape_a)):
-            graph = tf.Graph()
-            with graph.as_default():
-                a = tf.placeholder(tf.float32, shape=shape_a[i])
-                b = tf.placeholder(tf.float32, shape=shape_b[i])
-                out = tf.add(a, b)
-            mlmodel = self._test_tf_model_constant(graph, {a.op.name: shape_a[i], b.op.name: shape_b[i]}, [out.op.name])
-            nn_spec = mlmodel.get_spec().neuralNetwork
-            layers = nn_spec.layers
-            if expand_dims[i] is not None:
-                self.assertEqual(layers[0].expandDims.axes, expand_dims[i])
-            self.assertEqual(layers[-1].WhichOneof('layer'), 'add')
-
-    def test_add_stress(self):
-        B = 16
-        C = 3
-        H = 64
-        W = 64
-        # shapes = itertools.combinations([[B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W]], 2)
-
-        shapes_a = [[1, 1, 1], [1], [1, 1, 1, 1]]
-        shapes_b = [[B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W]]
-        for shape_a in shapes_a:
-            for shape_b in shapes_b:
-                print(shape_a, shape_b)
-                graph = tf.Graph()
-                with graph.as_default():
-                    a = tf.placeholder(tf.float32, shape=shape_a)
-                    b = tf.placeholder(tf.float32, shape=shape_b)
-                    out = tf.add(a, b)
-                mlmodel = self._test_tf_model_constant(graph, {a.op.name: shape_a, b.op.name: shape_b}, [out.op.name])
-                nn_spec = mlmodel.get_spec().neuralNetwork
-                layers = nn_spec.layers
-                self.assertEqual(layers[-1].WhichOneof('layer'), 'add')
-
-    def test_add_elementwise_scalar(self):
-        graph = tf.Graph()
-        input_shape = [32, 3, 64, 64]
-        with graph.as_default():
-            x = tf.constant(0.2342, shape=[])
-            y = tf.placeholder(tf.float32, shape=input_shape)
-            output = tf.add(x, y)
-        output_name = [output.op.name]
-        self._test_tf_model_constant(graph, {y.op.name: input_shape}, output_name)
-
-    def test_add_broadcastable(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x = tf.placeholder(tf.float32, shape=[3])
-            y = tf.placeholder(tf.float32, shape=[32, 18, 3])
-            output = tf.add(x, y)
-        output_name = [output.op.name]
-        self._test_tf_model_constant(graph, {x.op.name: [3], y.op.name: [32, 18, 3]}, output_name)
-
-    def test_sub(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.float32, shape=shape)
-            out = tf.math.subtract(a, b)
-        self._test_tf_model_constant(graph, {a.op.name: shape, b.op.name: shape}, [out.op.name])
-
-    def test_sub_v1(self):
-        graph = tf.Graph()
-        input_shape = [32, 3, 64, 64]
-        with graph.as_default():
-            x = tf.constant(0.2342, shape=[])
-            y = tf.placeholder(tf.float32, shape=input_shape)
-            output = tf.math.subtract(x, y)
-        output_name = [output.op.name]
-        self._test_tf_model_constant(graph, {y.op.name: input_shape}, output_name)
-
-    def test_sub_v2(self):
-        graph = tf.Graph()
-        input_shape = [32, 3, 64, 64]
-        with graph.as_default():
-            x = tf.constant(0.2342, shape=[])
-            y = tf.placeholder(tf.float32, shape=input_shape)
-            output = tf.math.subtract(y, x)
-        output_name = [output.op.name]
-        self._test_tf_model_constant(graph, {y.op.name: input_shape}, output_name)
-
-    def test_sub_v3(self):
-        graph = tf.Graph()
-        input_shape = [1]
-        with graph.as_default():
-            x = tf.constant(0, shape=[])
-            y = tf.placeholder(tf.int32, shape=input_shape)
-            output = tf.math.subtract(y, x)
-        output_name = [output.op.name]
-        self._test_tf_model_constant(graph, {y.op.name: input_shape}, output_name)
-
-    def test_mul(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.float32, shape=shape)
-            out = tf.math.multiply(a, b)
-        self._test_tf_model_constant(graph, {a.op.name: shape, b.op.name: shape}, [out.op.name])
-
-    def test_floor_mod(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.float32, shape=shape)
-            out = tf.floormod(a, b)
-        self._test_tf_model_constant(graph, {a.op.name: shape, b.op.name: shape}, [out.op.name])
-
-    def test_floor_div(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.float32, shape=shape)
-            out = tf.floor_div(a, b)
-        self._test_tf_model_constant(graph, {a.op.name: shape, b.op.name: shape}, [out.op.name])
-
-    def test_real_div(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.float32, shape=shape)
-            out = tf.divide(a, b)
-        self._test_tf_model_constant(graph, {a.op.name: shape, b.op.name: shape}, [out.op.name])
-
-    def test_real_div_constant(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            b = tf.constant(5.0, shape=[])
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.divide(a, b)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_real_div_constant_v1(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            b = tf.constant(5.0, shape=[1])
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.divide(a, b)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_bias_add(self):
-        # shape = [3, 4, 5]
-        shape = [1, 2, 2, 4]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.float32, shape=[4])
-            out = tf.nn.bias_add(a, b)
-        self._test_tf_model_constant(graph, {a.op.name: shape, b.op.name: [4]}, [out.op.name])
-
-    def test_maximum(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.float32, shape=shape)
-            out = tf.maximum(a, b)
-        self._test_tf_model_constant(graph, {a.op.name: shape, b.op.name: shape}, [out.op.name])
-
-    def test_minimum(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.float32, shape=shape)
-            out = tf.minimum(a, b)
-        self._test_tf_model_constant(graph, {a.op.name: shape, b.op.name: shape}, [out.op.name])
-
-    def test_reduce_prod(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.reduce_prod(a, axis=0)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name],
-                                     data_mode='random_zero_mean_with_zeros')
-
-    def test_reduce_mean(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.reduce_mean(a, axis=-1)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_reduce_sum(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.reduce_sum(a, axis=-1)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_reduce_max(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.reduce_max(a, axis=-1)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_reduce_min(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.reduce_min(a, axis=-1)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_logical_and(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.float32, shape=shape)
-            out = tf.logical_and(tf.less(a, b), tf.less(a, b))
-        self._test_tf_model_constant(graph, {a.op.name: shape, b.op.name: shape}, [out.op.name])
-
-    def test_logical_or(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.float32, shape=shape)
-            out = tf.logical_or(tf.less(a, b), tf.less(a, b))
-        self._test_tf_model_constant(graph, {a.op.name: shape, b.op.name: shape}, [out.op.name])
-
-    def test_logical_not(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.float32, shape=shape)
-            out = tf.logical_not(tf.less(a, b))
-        self._test_tf_model_constant(graph, {a.op.name: shape, b.op.name: shape}, [out.op.name])
-
-    def test_cast(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.cast(a, tf.int32)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_sin(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.sin(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_cos(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.cos(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_tan(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.tan(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_sqrt(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=[None, 10, 10, 6])
-            out = tf.sqrt(a)
-        self._test_tf_model_constant(graph, {a.op.name: [1, 10, 10, 6]}, [out.op.name], data_mode='random_large')
-
-    def test_rsqrt(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.rsqrt(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name], data_mode='random_large')
-
-    def test_pow(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=[None, 5, 5, 6])
-            out = tf.pow(a, 4)
-        self._test_tf_model_constant(graph, {a.op.name: [1, 5, 5, 6]}, [out.op.name])
-
-    def test_log(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=[None, 20])
-            out = tf.log(a)
-        self._test_tf_model_constant(graph, {a.op.name: [1, 20]}, [out.op.name], data_mode='random')
-
-    def test_exp(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=[None, 20])
-            out = tf.exp(a)
-        self._test_tf_model_constant(graph, {a.op.name: [1, 20]}, [out.op.name])
-
-    def test_abs(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.abs(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_square(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=[None, 20])
-            out = tf.square(a)
-        self._test_tf_model_constant(graph, {a.op.name: [1, 20]}, [out.op.name])
-
-    def test_squared_difference(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.float32, shape=shape)
-            out = tf.squared_difference(a, b)
-        self._test_tf_model_constant(graph, {a.op.name: shape, b.op.name: shape}, [out.op.name])
-
-    def test_sign(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.sign(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name], data_mode='random_int')
-
-    def test_ceil(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.ceil(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name], data_mode='random_int')
-
-    def test_floor(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.floor(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name], data_mode='random_int')
-
-    def test_round(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.round(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_negative(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.negative(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_equal(self):
-        shape_a = [1, 4, 5]
-        shape_b = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape_a)
-            b = tf.placeholder(tf.float32, shape=shape_b)
-            out = tf.equal(a, b)
-        self._test_tf_model_constant(
-            graph, {a.op.name: shape_a, b.op.name: shape_b}, [out.op.name],
-            data_mode='random_zero_mean_with_zeros')
-
-    def test_not_equal(self):
-        shape_a = [1, 4, 5]
-        shape_b = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape_a)
-            b = tf.placeholder(tf.float32, shape=shape_b)
-            out = tf.not_equal(a, b)
-        self._test_tf_model_constant(
-            graph, {a.op.name: shape_a, b.op.name: shape_b}, [out.op.name],
-            data_mode='random_zero_mean_with_zeros')
-
-    def test_less(self):
-        shape_a = [1, 4, 5]
-        shape_b = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape_a)
-            b = tf.placeholder(tf.float32, shape=shape_b)
-            out = tf.less(a, b)
-        self._test_tf_model_constant(
-            graph, {a.op.name: shape_a, b.op.name: shape_b}, [out.op.name],
-            data_mode='random_zero_mean_with_zeros')
-
-    def test_less_equal(self):
-        shape_a = [1, 4, 5]
-        shape_b = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape_a)
-            b = tf.placeholder(tf.float32, shape=shape_b)
-            out = tf.less_equal(a, b)
-        self._test_tf_model_constant(
-            graph, {a.op.name: shape_a, b.op.name: shape_b}, [out.op.name],
-            data_mode='random_zero_mean_with_zeros')
-
-    def test_greater(self):
-        shape_a = [1, 4, 5]
-        shape_b = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape_a)
-            b = tf.placeholder(tf.float32, shape=shape_b)
-            out = tf.greater(a, b)
-        self._test_tf_model_constant(
-            graph, {a.op.name: shape_a, b.op.name: shape_b}, [out.op.name],
-            data_mode='random_zero_mean_with_zeros')
-
-    def test_greater_equal(self):
-        shape_a = [1, 4, 5]
-        shape_b = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape_a)
-            b = tf.placeholder(tf.float32, shape=shape_b)
-            out = tf.greater_equal(a, b)
-        self._test_tf_model_constant(
-            graph, {a.op.name: shape_a, b.op.name: shape_b}, [out.op.name],
-            data_mode='random_zero_mean_with_zeros')
-
-    def test_strided_slice(self):
-        shape = [3, 2, 3]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.strided_slice(a, [1, 0, 0], [2, -1, 3])
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_expand_dims(self):
-        shape = [3, 2, 3]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.expand_dims(a, axis=-1)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_scalar_input_with_consecutive_expand_dims(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape = ())
-            b = tf.expand_dims(a, axis=-1)
-            out = tf.expand_dims(b, axis=-1)
-        self._test_tf_model_constant(graph, {a.op.name: ()}, [out.op.name])
-
-    def test_tile(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.tile(a, [1, 2, 3])
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_unary_activation_sigmoid(self):
-        shape = [1, 5, 5, 6]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.sigmoid(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_unary_activation_relu(self):
-        shape = [1, 5, 5, 6]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.nn.relu(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_unary_activation_relu6(self):
-        shape = [1, 5, 5, 6]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.nn.relu6(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name], data_mode='random_large')
-
-    def test_unary_activation_leaky_relu(self):
-        shape = [1, 5, 5, 6]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.nn.leaky_relu(a, 0.15)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_unary_activation_tanh(self):
-        shape = [1, 5, 5, 6]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.tanh(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_unary_activation_elu(self):
-        shape = [1, 5, 5, 6]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.nn.elu(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_stack(self):
-        shape = [1]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.float32, shape=shape)
-            out = tf.stack([a, b], axis=1)
-        self._test_tf_model_constant(graph, {a.op.name: shape, b.op.name: shape}, [out.op.name])
-
-    def test_gather_nd(self):
-        shape = [2, 3, 2]
-        indices = [[[0, 0], [0, 1]], [[1, 0], [1, 1]]]
-        graph = tf.Graph()
-        with graph.as_default():
-            params = tf.placeholder(tf.float32, shape=shape)
-            out = tf.gather_nd(params=params, indices=indices)
-        self._test_tf_model_constant(graph, {params.op.name: shape}, [out.op.name])
-
-    def test_scatter_nd(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            indices = tf.constant([[0], [2]])
-            updates = tf.placeholder(tf.float32, shape=[2, 4, 4])
-            shape = tf.constant([4, 4, 4])
-            out = tf.scatter_nd(indices, updates, shape)
-        self._test_tf_model_constant(graph, {updates.op.name: [2, 4, 4]}, [out.op.name])
-
-    def test_scatter_nd_with_dynamic_shape(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            indices = tf.constant([[0], [2]])
-            updates = tf.placeholder(tf.float32, shape=[2, 4, 4])
-            tensor = tf.placeholder(tf.float32, shape=[None, 4, 4])
-            shape = tf.shape(tensor)
-            out = tf.scatter_nd(indices, updates, shape)
-        self._test_tf_model_constant(graph, {updates.op.name: [2, 4, 4], tensor.op.name: [-1,4,4]}, [out.op.name])
-
-    def test_constant_pad(self):
-        shape = [1, 2, 2, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            paddings = tf.constant([[0, 0], [1, 1], [2, 2], [0, 0]])
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.pad(a, paddings=paddings, mode='constant')
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_constant_pad_v2(self):
-        shape = [1, 2, 2, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            paddings = tf.constant([[0, 0], [1, 1], [2, 2], [0, 0]])
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.pad(a, paddings=paddings, mode='constant', constant_values=1)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_mirror_pad(self):
-        shape = [1, 2, 2, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]])
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.pad(a, paddings=paddings, mode='reflect')
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_one_hot(self):
-        shape = [2, 2, 3]
-        graph = tf.Graph()
-        # indices as constants
-        with graph.as_default():
-            indices = [[0, 2], [1, -1]]
-            one_hot = tf.one_hot(indices=indices, depth=3)
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.add(one_hot, a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-        # indices as inputs
-        # todo: add implementation
-        # with graph.as_default():
-        #     indices = tf.placeholder(tf.int32, shape=[1])
-        #     out = tf.one_hot(indices=indices, depth=3)
-        # self._test_tf_model_constant(graph, {indices.op.name: [1]}, [out.op.name], data_mode='random_zeros_ones')
-
-    def test_size(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.int32, shape=[1])
-            size = tf.size(a)
-            out = tf.add(size, b)
-        self._test_tf_model_constant(
-            graph, {a.op.name: shape, b.op.name: [1]},
-            [out.op.name], data_mode='random_int')
-
-    def test_all(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.keras.backend.all(a, axis=0)
-        self._test_tf_model_constant(
-            graph, {a.op.name: shape}, [out.op.name],
-            data_mode='random_zeros_ones', validate_bool_only=True)
-
-    def test_any(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.keras.backend.any(a, axis=0)
-        self._test_tf_model_constant(
-            graph, {a.op.name: shape}, [out.op.name],
-            data_mode='random_zeros_ones', validate_bool_only=True)
-
-    def test_topk(self):
-        shape = [12, 5, 9, 7]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            values, indices = tf.math.top_k(a, k=3)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [values.op.name])
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [indices.op.name])
-
-    def test_argmax(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.argmax(a, axis=-1)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_argmin(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.argmin(a, axis=-1)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_fill(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.add(tf.fill(dims=shape, value=1.0), a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_clip(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.clip_by_value(a, clip_value_min=-0.2, clip_value_max=0.2)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_log_softmax(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.nn.log_softmax(a, axis=-1)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_slice(self):
-        shape = [3, 4, 10]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.slice(a, begin=[0, 1, 0], size=[-1, 2, 3])
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-        tf.reset_default_graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.strided_slice(a, begin=[0, 1, 0], end=[-1, 2, 5], strides=[1, 2, 1])
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_resize_bilinear(self):
-        sizes = [[20, 30], [20, 30], [25, 45]]
-        align_corners = [True, False, False]
-        for sz, ac in zip(sizes, align_corners):
-            graph = tf.Graph()
-            with graph.as_default():
-                x_input = tf.placeholder(tf.float32, shape=[None, 10, 10, 3])
-                z = tf.image.resize_bilinear(x_input, size=sz,
-                                             align_corners=ac)
-                output_name = [z.op.name]
-            self._test_tf_model_constant(graph, {x_input.op.name: [1, 10, 10, 3]},
-                                         output_name)
-
-    def test_resize_nearest_neighbor(self):
-        sizes = [[20, 30]]
-        align_corners = [False]
-        for sz, ac in zip(sizes, align_corners):
-            graph = tf.Graph()
-            with graph.as_default():
-                x_input = tf.placeholder(tf.float32, shape=[None, 10, 10, 3])
-                z = tf.image.resize_nearest_neighbor(x_input, size=sz,
-                                                     align_corners=ac)
-                output_name = [z.op.name]
-            self._test_tf_model_constant(graph, {x_input.op.name: [1, 10, 10, 3]},
-                                         output_name)
-
-    def test_strided_slice_ellipsis_mask(self):
-        shape = [3, 4, 10]
-        graph = tf.Graph()
-
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.strided_slice(a, begin=[-1, 5], end=[1, -6], strides=[1, 1], end_mask=1, ellipsis_mask=2)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_slice_issue_304(self):
-        shape = [1, 80, 20, 3]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            aux = a[:, :-1, :, :]
-            out = tf.multiply(aux, 1, 'aux')
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-        tf.reset_default_graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            aux = a[:, :-1, :, :]
-            out = tf.multiply(aux, 1, 'aux')
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-        tf.reset_default_graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            aux = a[:, :-1, :-1, :]
-            out = tf.multiply(aux, 1, 'aux')
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-        tf.reset_default_graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            aux = a[:, :, :-1, :]
-            out = tf.multiply(aux, 1, 'aux')
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-        tf.reset_default_graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            aux = a[:, 1:, :-1, :]
-            out = tf.multiply(aux, 1, 'aux')
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-        tf.reset_default_graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            aux = a[:, :-1, 1:, :]
-            out = tf.multiply(aux, 1, 'aux')
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-        tf.reset_default_graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            aux = a[:, :, 1:, :]
-            out = tf.multiply(aux, 1, 'aux')
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-        tf.reset_default_graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            aux = a[:, 1:, :, :]
-            out = tf.multiply(aux, 1, 'aux')
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-        tf.reset_default_graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            aux = a[:, 1:, 1:, :]
-            out = tf.multiply(aux, 1, 'aux')
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_crop_resize(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            # placeholder constructor returns a tensor not an op
-            x = tf.placeholder(tf.float32, shape=[None, 20])
-            # Make a redundant tensor. It should get trimmed
-            dummy = tf.placeholder(tf.float32, shape=[None, 10])
-
-            W = tf.Variable(tf.ones([20, 10]))
-            b = tf.Variable(tf.ones([10]))
-
-            y = tf.matmul(x, W) + b
-
-        self._test_tf_model(graph, {x.op.name: [None, 20]}, [y.op.name],
-                            batch_sizes=[1, 8])
-
-    def test_where(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.float32, shape=shape)
-            c = tf.placeholder(tf.bool, shape=shape)
-            out = tf.where(c, a, b)
-        self._test_tf_model_constant(graph, {
-            a.op.name: shape, b.op.name: shape, c.op.name: shape}, [out.op.name])
-
-    @unittest.skipIf(not HAS_TF_1_14, 'Missing TF 1.14. Skipping test.')
-    def test_where_v2(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = tf.placeholder(tf.float32, shape=shape)
-            c = tf.placeholder(tf.bool, shape=shape)
-            out = tf.where_v2(c, a, b)
-        self._test_tf_model_constant(graph, {
-            a.op.name: shape, b.op.name: shape, c.op.name: shape}, [out.op.name])
-
-    def test_where_non_zero(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.where(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name], data_mode='random_zeros_ones')
-
-    def test_transpose(self):
-        shape = [4, 3, 1]
-        graph = tf.Graph()
-        with graph.as_default():
-            axes = np.random.permutation(len(shape))
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.transpose(a, axes)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_space_to_depth(self):
-        shapes = [[1, 2, 2, 1], [1, 2, 2, 3], [1, 4, 4, 1], [4, 4, 6, 2]]
-        for shape in shapes:
-            graph = tf.Graph()
-            with graph.as_default():
-                a = tf.placeholder(tf.float32, shape=shape)
-                out = tf.space_to_depth(a, 2)
-            self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_depth_to_space(self):
-        shapes = [[1, 1, 1, 4], [1, 1, 1, 12], [1, 2, 2, 4], [4, 2, 3, 8]]
-        for shape in shapes:
-            graph = tf.Graph()
-            with graph.as_default():
-                a = tf.placeholder(tf.float32, shape=shape)
-                out = tf.depth_to_space(a, 2)
-            self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_space_to_batch_nd(self):
-        shapes = [[1, 2, 2, 1], [1, 2, 2, 3], [1, 4, 4, 1], [2, 2, 4, 1]]
-        for shape in shapes:
-            graph = tf.Graph()
-            with graph.as_default():
-                a = tf.placeholder(tf.float32, shape=shape)
-                out = tf.space_to_batch_nd(a, block_shape=[2, 2], paddings=[[0, 0], [0, 0]])
-            self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_space_to_batch_nd_with_paddings(self):
-        shapes = [[1, 2, 2, 1], [1, 2, 2, 3], [1, 4, 4, 1], [2, 2, 4, 1]]
-        for shape in shapes:
-            graph = tf.Graph()
-            with graph.as_default():
-                a = tf.placeholder(tf.float32, shape=shape)
-                out = tf.space_to_batch_nd(a, block_shape=[2, 2], paddings=[[2, 2], [3, 3]])
-            self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_batch_to_space_nd(self):
-        shapes = [[4, 1, 1, 1], [4, 1, 1, 3], [4, 2, 2, 1], [8, 1, 3, 1]]
-        for shape in shapes:
-            graph = tf.Graph()
-            with graph.as_default():
-                a = tf.placeholder(tf.float32, shape=shape)
-                out = tf.batch_to_space_nd(a, block_shape=[2, 2], crops=[[0, 0], [0, 0]])
-            self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_batch_to_space_nd_with_cropping(self):
-        shapes = [[4, 3, 3, 1], [4, 3, 3, 3], [4, 2, 2, 1], [8, 5, 3, 1]]
-        for shape in shapes:
-            graph = tf.Graph()
-            with graph.as_default():
-                a = tf.placeholder(tf.float32, shape=shape)
-                out = tf.batch_to_space_nd(a, block_shape=[2, 2], crops=[[1, 2], [1, 1]])
-            self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_selu(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.nn.selu(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_matrix_band_part(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.linalg.band_part(a, 2, -1)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    @unittest.skip('numeric')
-    def test_lrn(self):
-        shape = [1, 4, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.nn.lrn(a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_cond(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x = tf.constant(-2., dtype=tf.float32)
-            y = tf.constant(0., dtype=tf.float32)
-            data = tf.placeholder(tf.float32, shape=[1])
-
-            def f1(): return tf.zeros(shape=[1])
-
-            def f2(): return tf.multiply(data, 3.)
-
-            out = tf.cond(tf.less_equal(x, y), f1, f2)
-        self._test_tf_model_constant(graph, {data.op.name: [1]}, [out.op.name])
-
-    def test_cond_with_lambda(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.constant(-2., dtype=tf.float32)
-            b = tf.constant(23., dtype=tf.float32)
-            data = tf.placeholder(tf.float32, shape=[1])
-            c = tf.multiply(a, b)
-            out = tf.cond(a < b, lambda: tf.add(a, c), lambda: tf.square(data))
-        self._test_tf_model_constant(graph, {data.op.name: [1]}, [out.op.name])
-
-    def test_zeros_like_static(self):
-        shape = [3, 4, 5]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            out = tf.add(tf.zeros_like(a), a)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_zeros_like_dynamic(self):
-        shape = [3,]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.int32, shape=shape)
-            c = tf.fill(dims=a, value=0.2)
-            out = tf.zeros_like(c)
-        self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-
-    def test_gelu_approximate(self):
-        '''
-        test that gelu tanh approximate formula pattern is fused into a single gelu layer
-        '''
-
-        shape = [3]
-        graph = tf.Graph()
-        with graph.as_default():
-            a = tf.placeholder(tf.float32, shape=shape)
-            b = 0.5 * (1.0 + tf.tanh((math.sqrt(2 / math.pi) * (a + 0.044715 * tf.pow(a, 3)))))
-            out = b * a
-        mlmodel = self._test_tf_model_constant(graph, {a.op.name: shape}, [out.op.name])
-        spec = mlmodel.get_spec()
-        nn_spec = spec.neuralNetwork
-        number_gelu_layers = 0
-        for layer in nn_spec.layers:
-            if layer.WhichOneof('layer') == 'gelu':
-                number_gelu_layers += 1
-        self.assertEqual(number_gelu_layers, 1)
-
-
-if __name__ == '__main__':
-    # unittest.main()
-    suite = unittest.TestSuite()
-    suite.addTest(TFSingleLayerTest('test_scalar_input_with_consecutive_expand_dims'))
-    unittest.TextTestRunner().run(suite)
diff --git a/coremltools/converters/tensorflow/test/test_custom_layer.py b/coremltools/converters/tensorflow/test/test_custom_layer.py
deleted file mode 100644
index b17550663..000000000
--- a/coremltools/converters/tensorflow/test/test_custom_layer.py
+++ /dev/null
@@ -1,162 +0,0 @@
-import pytest
-import unittest
-import tempfile
-import tensorflow as tf
-import numpy as np
-import os
-
-from coremltools.converters.tensorflow import convert
-from tensorflow.python.tools.freeze_graph import freeze_graph
-from coremltools.proto import NeuralNetwork_pb2
-from coremltools.converters.nnssa.coreml import shapes as custom_shape_update
-
-class CustomLayerTest(unittest.TestCase):
-
-    @classmethod
-    def setUpClass(self):
-        self.test_temp_dir = tempfile.mkdtemp()
-
-    def _simple_freeze(self, input_graph, input_checkpoint, output_graph,
-                        output_node_names):
-        # output_node_names is a string of names separated by comma
-        freeze_graph(input_graph=input_graph,
-                    input_saver="",
-                    input_binary=False,
-                    input_checkpoint=input_checkpoint,
-                    output_node_names=output_node_names,
-                    restore_op_name="save/restore_all",
-                    filename_tensor_name="save/Const:0",
-                    output_graph=output_graph,
-                    clear_devices=True,
-                    initializer_nodes="")
-
-    def _test_tf_graph(self, graph,
-                        output_feature_names,
-                        input_name_shape_dict,
-                        useCPUOnly=True,
-                        add_custom_layers=False,
-                        custom_conversion_functions={},
-                        custom_shape_functions={}):
-        # Create temporary model
-        model_dir = self.test_temp_dir
-        graph_def_file = os.path.join(model_dir, 'temp.pbtxt')
-        checkpoint_file = os.path.join(model_dir, 'temp.ckpt')
-        frozen_model_file = os.path.join(model_dir, 'temp.pb')
-
-        tf.reset_default_graph()
-
-        with tf.Session(graph = graph) as sess:
-            # initialize
-            sess.run(tf.global_variables_initializer())
-            # prepare the tensorflow inputs
-            feed_dict = {}
-            for in_tensor_name in input_name_shape_dict:
-                in_tensor_shape = input_name_shape_dict[in_tensor_name]
-                feed_dict[in_tensor_name + ':0'] = np.random.rand(*in_tensor_shape)
-            # run the result
-            fetches = [graph.get_operation_by_name(name).outputs[0] for name in \
-                output_feature_names]
-            tf_result = sess.run(fetches, feed_dict=feed_dict)
-            # save graph definition somewhere
-            tf.train.write_graph(sess.graph, model_dir, graph_def_file)
-            # save the weights
-            saver = tf.train.Saver()
-            saver.save(sess, checkpoint_file)
-
-        # freeze the graph
-        self._simple_freeze(
-            input_graph=graph_def_file,
-            input_checkpoint=checkpoint_file,
-            output_graph=frozen_model_file,
-            output_node_names=",".join(output_feature_names))
-
-
-        coreml_model = convert(frozen_model_file,
-                            outputs=output_feature_names,
-                            inputs=input_name_shape_dict,
-                            add_custom_layers=add_custom_layers,
-                            custom_conversion_functions=custom_conversion_functions,
-                            custom_shape_functions=custom_shape_functions)
-        return coreml_model
-
-# Custom Layer Tests
-class TestCustomLayer(CustomLayerTest):
-    # Test custom layer with conversion function
-    def test_custom_topk(self):
-        # Custom shape function
-        def _shape_topk(layer_spec, input_shapes):
-            params = layer_spec.topK
-            value_shape = index_shape = input_shapes[0][:-1] + [params.K]
-            output_shapes = [value_shape, index_shape]
-            return output_shapes
-
-        # Custom conversion function
-        def _convert_topk(ssa_converter, node):
-            coreml_nn_builder = ssa_converter._get_builder()
-            constant_inputs = node.attr
-
-            params = NeuralNetwork_pb2.CustomLayerParams()
-            params.className = 'Top_K'
-            params.description = "Custom layer that corresponds to the top_k TF op"
-            params.parameters["sorted"].boolValue = node.attr.get('sorted')
-            # get the value of k
-            k = constant_inputs.get(node.inputs[1], 3)
-            params.parameters["k"].intValue = k
-            layer = coreml_nn_builder.add_custom(name=node.name,
-                                        input_names=[node.inputs[0]],
-                                        output_names=['output'],
-                                        custom_proto_spec=params)
-            custom_shape_update.propagate_single_layer(layer, ssa_converter.tensor_shapes, custom_shape_function=_shape_topk)
-
-        graph = tf.Graph()
-        with graph.as_default() as g:
-            x = tf.placeholder(tf.float32, shape=[None, 8], name='input')
-            y = tf.layers.dense(inputs=x, units=12, activation=tf.nn.relu)
-            y = tf.nn.softmax(y, axis=1)
-            y = tf.nn.top_k(y, k=3, sorted=False, name='output')
-
-        output_name = ['output']
-        inputs = {'input':[1, 8]}
-
-        coreml_model = self._test_tf_graph(graph,
-                                           output_name,
-                                           inputs,
-                                           add_custom_layers=True,
-                                           custom_conversion_functions={'TopKV2': _convert_topk},
-                                           custom_shape_functions={'TopKV2':_shape_topk})
-        
-        spec = coreml_model.get_spec()
-        layers = spec.neuralNetwork.layers
-        self.assertIsNotNone(layers[3].custom)
-        self.assertEqual('Top_K', layers[3].custom.className)
-        self.assertEqual(3, layers[3].custom.parameters['k'].intValue)
-        self.assertEqual(False, layers[3].custom.parameters['sorted'].boolValue)
-
-    # Test custom layer with no custom conversion funtion provided path
-    def test_custom_acos(self):
-        # Custom Shape function
-        def _shape_acos(layer_spec, input_shapes):
-            return input_shapes[:]
-
-        graph = tf.Graph()
-        with graph.as_default() as g:
-            x = tf.placeholder(tf.float32, shape=[None, 8], name='input')
-            y = tf.layers.dense(inputs=x, units=12, activation=tf.nn.relu)
-            y = tf.math.acos(y, name='output')
-        
-        output_name = ['output']
-        inputs = {'input':[1, 8]}
-
-        coreml_model = self._test_tf_graph(graph,
-                                           output_name,
-                                           inputs,
-                                           add_custom_layers=True,
-                                           custom_shape_functions={'Acos':_shape_acos})
-        
-        spec = coreml_model.get_spec()
-        layers = spec.neuralNetwork.layers
-        self.assertIsNotNone(layers[2].custom)
-        self.assertEqual('Acos', layers[2].custom.className)
-
-if __name__ == '__main__':
-    unittest.main() 
diff --git a/coremltools/converters/tensorflow/test/test_dyn_rnn.py b/coremltools/converters/tensorflow/test/test_dyn_rnn.py
deleted file mode 100644
index 44caecb6c..000000000
--- a/coremltools/converters/tensorflow/test/test_dyn_rnn.py
+++ /dev/null
@@ -1,142 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-
-import tensorflow as tf
-import numpy as np
-import unittest
-
-from test_base import TFNetworkTest
-
-
-class TFDynRNNTest(TFNetworkTest):
-
-    # Allows you to override common test entry for this class
-    # Backend - set use_cpu_only to be True when working on Intel GPU macs
-    def _test_tf_model(
-            self,
-            graph,
-            input_shapes,
-            output_node_names,
-            data_mode='random',
-            input_refs=None,
-            delta=1e-2,
-            use_cpu_only=True,
-            graph_optimizations="freeze",  # one of ["freeze", "convert_variables_to_constants", None]
-            quantize_tf_model=False,
-            quantize_mlmodel=False,
-            quantize_config={}):
-        super(TFDynRNNTest, self)._test_tf_model(
-            graph,
-            input_shapes,
-            output_node_names,
-            data_mode=data_mode,
-            input_refs=input_refs,
-            delta=delta,
-            use_cpu_only=use_cpu_only,
-            graph_optimizations=graph_optimizations,
-            quantize_tf_model=quantize_tf_model,
-            quantize_mlmodel=quantize_mlmodel,
-            quantize_config=quantize_config)
-
-    def test_simple_lstm(self, **kwargs):
-        # (batch_size, seq_len, input_dim)
-        batch_size, sequence_length, hidden_size = 1, 5, 10
-        input_shape = [batch_size, sequence_length, hidden_size]
-
-        graph = tf.Graph()
-        with graph.as_default() as g:
-            lstm_initializer = tf.constant_initializer(0.8)
-
-            data = tf.placeholder(tf.float32, input_shape, name='input')
-            cell = tf.nn.rnn_cell.LSTMCell(
-                hidden_size, state_is_tuple=True, initializer=lstm_initializer)
-
-            init_state = cell.zero_state(batch_size, dtype=tf.float32)
-            val, state = tf.nn.dynamic_rnn(cell, data, initial_state=init_state, dtype=tf.float32)
-
-        self._test_tf_model(graph, {'input': input_shape},
-            [val.op.name, state[0].op.name, state[1].op.name],
-            quantize_mlmodel=kwargs.get('quantize_mlmodel',False),
-            quantize_config=kwargs.get('quantize_config', {}))
-
-    def test_simple_lstm_quantized_8bit_linear(self):
-        np.random.seed(2019)
-        quantize_config = {'nbits': 8, 'mode': 'linear'}
-        self.test_simple_lstm(quantize_mlmodel=True,
-                              quantize_config=quantize_config)
-
-    def test_lstm_block_fused_cell(self):
-        sequence_length, batch_size, input_dim, hidden_size = 5, 1, 24, 32
-        input_shape = [sequence_length, batch_size, input_dim]
-        forget_bias = np.random.rand()
-        graph = tf.Graph()
-        with graph.as_default():
-            data = tf.placeholder(tf.float32, input_shape, name='input')
-            lstm_block_cell = tf.contrib.rnn.LSTMBlockFusedCell(
-                num_units=hidden_size, forget_bias=forget_bias)
-            _, state = lstm_block_cell(data, dtype=tf.float32)
-        self._test_tf_model(graph, {'input': input_shape}, [state[0].op.name, state[1].op.name])
-
-    def test_simple_rnn(self):
-        batch_size, sequence_length, hidden_size = 1, 5, 10
-        input_shape = [batch_size, sequence_length, hidden_size]  # (batch_size, seq_len, input_dim)
-        graph = tf.Graph()
-        with graph.as_default() as g:
-            rnn_initializer = tf.constant_initializer(0.8)
-
-            data = tf.placeholder(tf.float32, input_shape, name='input')
-            cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
-
-            init_state = cell.zero_state(batch_size, dtype=tf.float32)
-            val, state = tf.nn.dynamic_rnn(cell, data, initial_state=init_state, dtype=tf.float32)
-
-        self._test_tf_model(graph, {'input': input_shape}, [val.op.name, state.op.name])
-
-    def test_simple_bilstm(self):
-        batch_size, sequence_length, hidden_size = 1, 5, 10
-        input_shape = [batch_size, sequence_length, hidden_size]  # (batch_size, seq_len, input_dim)
-        graph = tf.Graph()
-        with graph.as_default() as g:
-            data = tf.placeholder(tf.float32, input_shape, name='input')
-            fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, state_is_tuple=True)
-            bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, state_is_tuple=True)
-
-            init_state = fw_cell.zero_state(batch_size, dtype=tf.float32)
-            val, states = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, data, dtype=tf.float32)
-
-        output_node_names = [
-            x.op.name
-            for x in [val[0], val[1], states[0][0], states[0][1], states[1][0], states[1][1]]
-        ]
-        self._test_tf_model(graph, {'input': input_shape}, output_node_names)
-
-    @unittest.skip
-    def test_batched_bilstm(self):
-        batch_size, max_sequence_length, hidden_size = 4, 5, 10
-        input_shape = [
-            batch_size, max_sequence_length, hidden_size
-        ]  # (batch_size, seq_len, input_dim)
-        graph = tf.Graph()
-        with graph.as_default() as g:
-            data = tf.placeholder(tf.float32, input_shape, name='input')
-            fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, state_is_tuple=True)
-            bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, state_is_tuple=True)
-
-            init_state = fw_cell.zero_state(batch_size, dtype=tf.float32)
-            val, states = tf.nn.bidirectional_dynamic_rnn(
-                fw_cell, bw_cell, data, sequence_length=[2, 3, 4, 5], dtype=tf.float32)
-
-        output_node_names = [
-            x.op.name
-            for x in [val[0], val[1], states[0][0], states[0][1], states[1][0], states[1][1]]
-        ]
-        self._test_tf_model(graph, {'input': input_shape}, output_node_names)
-
-
-if __name__ == '__main__':
-    unittest.main()
-    # suite = unittest.TestSuite()
-    # suite.addTest(TFDynRNNTest('test_lstm_block_fused_cell'))
-    # unittest.TextTestRunner().run(suite)
diff --git a/coremltools/converters/tensorflow/test/test_tf_2x.py b/coremltools/converters/tensorflow/test/test_tf_2x.py
deleted file mode 100644
index b4781e28c..000000000
--- a/coremltools/converters/tensorflow/test/test_tf_2x.py
+++ /dev/null
@@ -1,1255 +0,0 @@
-import unittest
-import tempfile
-import numpy as np
-import tensorflow as tf
-import coremltools
-import os
-import shutil
-from test_utils import generate_data
-from coremltools._deps import HAS_TF_2
-import math
-import pytest
-from itertools import *
-
-
-@unittest.skipUnless(HAS_TF_2, 'missing TensorFlow 2+.')
-class TestSingleOp(unittest.TestCase):
-    # In this class we test tensorflow 2.x op without using Keras API
-
-    def _test_coreml(self, model, input_dic=None, output_names=None):
-
-        # Get concrete function
-        concrete_func = model.__call__.get_concrete_function()
-
-        # Get function input
-        if input_dic == None:
-            input_dic = []
-            for input in concrete_func.inputs:
-                name = input.name.split(':')[0]
-                shape = input.shape
-                if shape == None or any([x is None for x in input.shape.as_list()]):
-                    raise ValueError("Please specify 'input_dic' for dynamic shape input.")
-                shape = input.shape.as_list()
-                input_dic.append((name, shape))
-        else:
-            if not isinstance(input_dic, list):
-                raise TypeError("'input_dic' should be [(str, tensor)] type.")
-
-        inputs = [(name, np.random.uniform(-1,1,shape).astype(np.float32), shape)
-                  for name, shape in input_dic]
-
-        # Get output names
-        if output_names == None:
-            output_names = [output.name.split(':')[0] for output in concrete_func.outputs]
-        else:
-            if not isinstance(output_names, list):
-                raise TypeError("'output_names' should be [str] type.")
-        # Tensorflow predict
-        tf_inputs = [tf.convert_to_tensor(value) for name, value, shape in inputs]
-        tf_outputs = model(*tf_inputs)
-
-        # Coreml model predict
-        # Somehow the converter cannot have input shape [] now
-        # TODO: Need to fix it
-        coreml_inputs = {name: shape if not shape == [] else [1,] for name, value, shape in inputs}
-        coreml_predict_inputs = {name: value if not shape == [] else np.array([value]) for name, value, shape in inputs}
-
-        coreml_model = coremltools.converters.tensorflow.convert(
-            [concrete_func],
-            inputs=coreml_inputs,
-            outputs=output_names
-        )
-        coreml_outputs = coreml_model.predict(coreml_predict_inputs)
-
-        # Compare Tensorflow and Coreml
-        if not isinstance(tf_outputs, tuple):
-            tf_outputs = tuple([tf_outputs])
-        self.assertTrue(len(tf_outputs), len(coreml_outputs))
-        self.assertTrue(len(tf_outputs), len(output_names))
-        for i, output_name in enumerate(output_names):
-            np.testing.assert_almost_equal(tf_outputs[i].numpy(), coreml_outputs[output_name], decimal=2)
-
-    def test_single_output_example(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[3,3], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[3,3], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return x+y
-        self._test_coreml(model())
-
-    def test_multiple_outputs_example(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[3,3], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[3,3], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return x+y, x-y, x*y
-        self._test_coreml(model())
-
-    def test_relu(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[10,10], dtype=tf.float32)])
-
-            def __call__(self, x):
-                return tf.nn.relu(x)
-        self._test_coreml(model())
-
-    def test_control_flow(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[], dtype=tf.float32)])
-
-            def __call__(self, x):
-                if x <= 0.:
-                    return 0.
-                else:
-                    return x * 3.
-        self._test_coreml(model())
-
-    def test_add_n(self):
-
-        class model(tf.Module):
-
-            # TODO: Add single input and constant testcases
-            # Blocked by a bug in coremltools
-
-            def test_single_variable(self, x):
-                return tf.add_n([x, x])
-
-            def test_variable(self, x, y, z):
-                return tf.add_n([x,y,z,2*z])
-
-            def test_variable_and_constant(self, x):
-                return tf.add_n([x, tf.constant([[1,2,3],[4,5,6]], dtype=tf.float32)])
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=[2,3], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[2,3], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[2,3], dtype=tf.float32)])
-            def __call__(self, x, y, z):
-                return (self.test_single_variable(x),
-                        self.test_variable(x, y, z),
-                        self.test_variable_and_constant(x))
-
-        self._test_coreml(model())
-@unittest.skipUnless(HAS_TF_2, 'missing TensorFlow 2+.')
-class TestStack(TestSingleOp):
-
-    def test_stack_simple(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[2,3], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[2,3], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[2,3], dtype=tf.float32)])
-            def __call__(self, x, y, z):
-                return (tf.stack([x,y,z], axis=0),
-                        tf.stack([x,y,z], axis=1),
-                        tf.stack([x,y,z], axis=-1))
-
-        self._test_coreml(model())
-
-    def test_stack_simple_with_relu(self):
-
-        # Sometime even a single layer can be compiled but it fail after connects
-        # it with other layer...
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[2,3], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[2,3], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[2,3], dtype=tf.float32)])
-            def __call__(self, x, y, z):
-                return (tf.nn.relu(tf.stack([x,y,z], axis=0)))
-
-        self._test_coreml(model())
-
-    @pytest.mark.xfail(reason="There are still issues with scalar inputs in coremltools...")
-    def test_stack_simple_scalar(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[], dtype=tf.float32)])
-            def __call__(self, x, y, z):
-                return (tf.stack([x,y,z], axis=0),
-                        tf.stack([x,y,z], axis=-1))
-
-        self._test_coreml(model())
-
-    def test_stack_dynamic(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[None,None], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[None,None], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[None,None], dtype=tf.float32)])
-            def __call__(self, x, y, z):
-                return (tf.stack([x,y,z], axis=0),
-                        tf.stack([x,y,z], axis=1),
-                        tf.stack([x,y,z], axis=-1))
-
-        self._test_coreml(model(),input_dic=[('x', [2,3]),('y', [2,3]), ('z', [2,3])])
-
-    def test_stack_dynamic_with_relu(self):
-
-        # Sometime even a single layer can be compiled but it fail after connects
-        # it with other layer...
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[None,None], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[None,None], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[None,None], dtype=tf.float32)])
-            def __call__(self, x, y, z):
-                return (tf.nn.relu(tf.stack([x,y,z], axis=0)))
-
-        self._test_coreml(model(),input_dic=[('x', [2,3]),('y', [2,3]), ('z', [2,3])])
-
-@unittest.skipUnless(HAS_TF_2, 'missing TensorFlow 2+.')
-class TestEinsum(TestSingleOp):
-
-    def test_einsum_transpose(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[7,3,5,2], dtype=tf.float32)])
-
-            def __call__(self, x):
-                return (tf.einsum('ijkt->jtki', x),
-                       tf.einsum('ijkt->ijkt', x))
-        self._test_coreml(model())
-
-    def test_einsum_inner_product(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[4,2,2,3], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[4,2,2,3], dtype=tf.float32)])
-
-            def __call__(self, x, y):
-                return (tf.einsum('ijkt,ijkt->', x, y),
-                        tf.einsum('ijkt,ikjt->', x, y))
-
-        self._test_coreml(model())
-
-    def test_einsum_matrix_multiplication_rank2(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[4,3], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[3,5], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return (tf.einsum('ij,jk->ki', x, y),
-                        tf.einsum('ij,jk->ik',x ,y))
-        self._test_coreml(model())
-
-    def test_einsum_matrix_multiplication_rank2_transpose_case(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[4,3], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[5,3], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return (tf.einsum('ij,kj->ki', x, y),
-                        tf.einsum('ij,kj->ik',x ,y))
-        self._test_coreml(model())
-
-    def test_einsum_matrix_multiplication_rank3(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[4,3,7], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[3,8,4], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return tuple([tf.einsum('ijk,jti->'+''.join(suffix), x, y) for suffix in list(permutations('kit'))])
-
-        self._test_coreml(model())
-
-    def test_einsum_matrix_multiplication_rank4(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[4,3,7,9], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[10,4,9,3], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return tuple([tf.einsum('ijkt,zitj->'+''.join(suffix), x, y) for suffix in list(permutations('itzk'))])
-        self._test_coreml(model())
-
-
-    def test_einsum_matrix_multiplication_rank5(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[4,3,7,9,6], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[10,6,4,9,3], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return tuple([tf.einsum('ijktp,zpitj->'+''.join(suffix), x, y) for suffix in list(permutations('iptzk'))])
-        self._test_coreml(model())
-
-    def test_einsum_high_rank_matrix_multiplication_testcase_1(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[4,3,7], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[9,2,3], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return tuple([tf.einsum('bia,cdi->'+''.join(suffix), x, y) for suffix in list(permutations('abcd'))])
-
-        self._test_coreml(model())
-
-    def test_einsum_high_rank_matrix_multiplication_testcase_2(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[4,7,3], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[9,2,3], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return tuple([tf.einsum('bai,cdi->'+''.join(suffix), x, y) for suffix in list(permutations('abcd'))])
-
-        self._test_coreml(model())
-
-    def test_einsum_high_rank_matrix_multiplication_testcase_3(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[4,3,7,5], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[9,2,3,], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return tuple([tf.einsum('bitk,cdi->'+''.join(suffix), x, y) for suffix in list(permutations('btkcd'))])
-
-        self._test_coreml(model())
-
-    def test_einsum_high_rank_matrix_multiplication_testcase_4(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[4,3,7,5], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[2,3], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return tuple([tf.einsum('bitk,di->'+''.join(suffix), x, y) for suffix in list(permutations('btkd'))])
-
-        self._test_coreml(model())
-
-    def test_einsum_tensor_contraction_testcase_1(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[4,2,3], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[6,2,3], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return tuple([tf.einsum('abc,dbc->'+''.join(suffix), x, y) for suffix in list(permutations('ad'))])
-
-        self._test_coreml(model())
-
-    def test_einsum_tensor_contraction_testcase_2(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[32,4,3,2], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[6,2,3,32], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return tuple([tf.einsum('iabc,dcbi->'+''.join(suffix), x, y) for suffix in list(permutations('ad'))])
-
-        self._test_coreml(model())
-
-    def test_einsum_tensor_contraction_with_batch_testcase_1(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[32,4,3,2], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[6,2,3,32], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return tuple([tf.einsum('iabc,dcbi->'+''.join(suffix), x, y) for suffix in list(permutations('aid'))])
-
-        self._test_coreml(model())
-
-    def test_einsum_tensor_contraction_with_batch_testcase_2(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[64,32,4,3,2], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[6,2,64,3,32], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return tuple([tf.einsum('jiabc,dcjbi->'+''.join(suffix), x, y) for suffix in list(permutations('aidj'))])
-
-        self._test_coreml(model())
-
-    def test_einsum_with_dynamic_shape_matrix_multiplication(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[None,None,None,None,None], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[None,None,None,None,None], dtype=tf.float32)])
-            def __call__(self, x, y):
-                return tuple([tf.einsum('ijktp,zpitj->'+''.join(suffix), x, y) for suffix in list(permutations('iptzk'))])
-        self._test_coreml(model(), input_dic=[('x', [4,3,7,9,6]), ('y', [10,6,4,9,3])])
-
-    def test_einsum_with_dynamic_shape_inner_product(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[None,None,None,None], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[None,None,None,None], dtype=tf.float32)])
-
-            def __call__(self, x, y):
-                return (tf.einsum('ijkt,ijkt->', x, y),
-                        tf.einsum('ijkt,ikjt->', x, y))
-
-        self._test_coreml(model(), input_dic=[('x', [4,2,2,3]), ('y', [4,2,2,3])])
-
-    def test_einsum_with_dynamic_shape_transpose(self):
-
-        class model(tf.Module):
-            @tf.function(input_signature=[tf.TensorSpec(shape=[None,None,None,None], dtype=tf.float32)])
-
-            def __call__(self, x):
-                return (tf.einsum('ijkt->jtki', x),
-                       tf.einsum('ijkt->ijkt', x))
-        self._test_coreml(model(), input_dic=[('x', [7,3,5,2])])
-
-@unittest.skipUnless(HAS_TF_2, 'missing TensorFlow 2+.')
-class TestTensorflow2Model(unittest.TestCase):
-
-    def setUp(self):
-        self.saved_model_dir = tempfile.mkdtemp()
-
-    def test_two_layers_control_dependency(self):
-
-        class model(tf.Module):
-
-            def __init__(self, name=None):
-                super(model, self).__init__(name=name)
-                self.w = tf.constant(tf.random.normal(shape=[1, 10]), name='bias', dtype=tf.float32)
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=[1, 10], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[1, 10], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[1, 10], dtype=tf.float32)])
-            def __call__(self, x, y, z):
-                with tf.control_dependencies([x]):
-                    with tf.control_dependencies([y]):
-                        return self.w + z
-        model = model()
-        tf.saved_model.save(model, self.saved_model_dir)
-        mlmodel = coremltools.converters.tensorflow.convert(
-            self.saved_model_dir,
-            input={'x':[1,10], 'y':[1,10], 'z':[1,10]},
-            outputs=['Identity']
-        )
-
-        x, y, z = np.random.rand(1,10), np.random.rand(1, 10), np.random.rand(1, 10)
-        tf_output = model(x, y, z).numpy()
-        ml_output = mlmodel.predict({'x':x, 'y':y, 'z':z})['Identity']
-
-        np.testing.assert_almost_equal(tf_output, ml_output, decimal=3)
-
-
-    def test_two_control_inputs(self):
-
-        class model(tf.Module):
-
-            def __init__(self, name=None):
-                super(model, self).__init__(name=name)
-                self.w = tf.constant(tf.random.normal(shape=[1, 10]), name='bias', dtype=tf.float32)
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=[1, 10], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[1, 10], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[1, 10], dtype=tf.float32)])
-            def __call__(self, x, y, z):
-                with tf.control_dependencies([x, y]):
-                    return self.w + z
-        model = model()
-        tf.saved_model.save(model, self.saved_model_dir)
-        mlmodel = coremltools.converters.tensorflow.convert(
-            self.saved_model_dir,
-            input={'x':[1,10], 'y':[1,10], 'z':[1,10]},
-            outputs=['Identity']
-        )
-
-        x, y, z = np.random.rand(1,10), np.random.rand(1, 10), np.random.rand(1, 10)
-        tf_output = model(x, y, z).numpy()
-        ml_output = mlmodel.predict({'x':x, 'y':y, 'z':z})['Identity']
-
-        np.testing.assert_almost_equal(tf_output, ml_output, decimal=3)
-
-
-    def test_control_inputs_with_node_with_no_outputs(self):
-
-        class model(tf.Module):
-
-            def __init__(self, name=None):
-                super(model, self).__init__(name=name)
-                self.w = tf.constant(tf.random.normal(shape=[1, 10]), name='bias', dtype=tf.float32)
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=[1, 10], dtype=tf.float32),
-                                          tf.TensorSpec(shape=[1, 10], dtype=tf.float32)])
-            def __call__(self, x, y):
-                with tf.control_dependencies([x]):
-                    return self.w + y
-        model = model()
-        tf.saved_model.save(model, self.saved_model_dir)
-        mlmodel = coremltools.converters.tensorflow.convert(
-            self.saved_model_dir,
-            input={'x':[1,10], 'y':[1,10]},
-            outputs=['Identity']
-        )
-
-        x, y = np.random.rand(1,10), np.random.rand(1, 10)
-        tf_output = model(x, y).numpy()
-        ml_output = mlmodel.predict({'x':x, 'y':y})['Identity']
-
-        np.testing.assert_almost_equal(tf_output, ml_output, decimal=3)
-
-    def test_save_and_load_low_level_model(self):
-        class model(tf.Module):
-            def __init__(self, in_features, output_features, name=None):
-                super(model, self).__init__(name=name)
-                self.in_features = in_features
-                self.w = tf.Variable(tf.random.normal([in_features, output_features]), name='w')
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=[None, 20], dtype=tf.float32)])
-            def __call__(self, x):
-                return tf.matmul(x, self.w)
-        in_features = 20
-        output_features = 30
-        model = model(in_features, output_features)
-        tf.saved_model.save(model, self.saved_model_dir)
-
-        mlmodel = coremltools.converters.tensorflow.convert(
-            self.saved_model_dir,
-            inputs={'x':[1,20]},
-            outputs=['Identity']
-        )
-
-        input = np.random.rand(1,20)
-        tf_output = model(input).numpy()
-        ml_output = mlmodel.predict({'x':input})['Identity']
-
-        np.testing.assert_almost_equal(tf_output, ml_output, decimal=3)
-
-    def test_save_and_load_low_level_model_with_multiple_signatures(self):
-        class model(tf.Module):
-            def __init__(self, in_features, output_features, name=None):
-                super(model, self).__init__(name=name)
-                self.in_features = in_features
-                self.w = tf.Variable(tf.random.normal([in_features, output_features]), name='w')
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=[None, 20], dtype=tf.float32)])
-            def __call__(self, x):
-                return tf.matmul(x, self.w)
-
-            @tf.function(input_signature=[tf.TensorSpec(shape=[None, 20], dtype=tf.float32)])
-            def predict(self, x):
-                return 2*tf.matmul(x, self.w)
-
-        in_features = 20
-        output_features = 30
-        model = model(in_features, output_features)
-        signatures = {'a':model.__call__, 'b':model.predict}
-        tf.saved_model.save(model, self.saved_model_dir, signatures)
-        with pytest.raises(ValueError):
-            mlmodel = coremltools.converters.tensorflow.convert(
-                self.saved_model_dir,
-                inputs={'x':[1,20]},
-                outputs=['Identity']
-            )
-
-    def test_save_and_load_low_level_model_with_no_signatures(self):
-        class model(tf.Module):
-            def __init__(self, in_features, output_features, name=None):
-                super(model, self).__init__(name=name)
-                self.in_features = in_features
-                self.w = tf.Variable(tf.random.normal([in_features, output_features]), name='w')
-
-            @tf.function()
-            def __call__(self, x):
-                return tf.matmul(x, self.w)
-
-        in_features = 20
-        output_features = 30
-        model = model(in_features, output_features)
-        tf.saved_model.save(model, self.saved_model_dir)
-        with pytest.raises(ValueError):
-            mlmodel = coremltools.converters.tensorflow.convert(
-                self.saved_model_dir,
-                inputs={'x':[1,20]},
-                outputs=['Identity']
-            )
-
-@unittest.skipUnless(HAS_TF_2, 'missing TensorFlow 2+.')
-class TestKerasFashionMnist(unittest.TestCase):
-
-    def setUp(self):
-        self.input_shape = (1, 28, 28)
-        self.saved_model_dir = tempfile.mkdtemp()
-        _, self.model_path = tempfile.mkstemp(suffix='.h5', prefix=self.saved_model_dir)
-
-    def tearDown(self):
-        if os.path.exists(self.saved_model_dir):
-            shutil.rmtree(self.saved_model_dir)
-
-    @staticmethod
-    def _build_model_sequential():
-        keras_model = tf.keras.Sequential([
-            tf.keras.layers.Flatten(input_shape=(28, 28)),
-            tf.keras.layers.Dense(128, activation='relu'),
-            tf.keras.layers.Dense(10, activation='softmax')
-        ])
-        return keras_model
-
-    @staticmethod
-    def _build_model_functional():
-        inputs = tf.keras.Input(shape=(28, 28), name='data')
-        x = tf.keras.layers.Flatten(input_shape=(28, 28))(inputs)
-        x = tf.keras.layers.Dense(128, activation='relu')(x)
-        outputs = tf.keras.layers.Dense(10, activation='softmax')(x)
-        keras_model = tf.keras.Model(inputs=inputs, outputs=outputs)
-        return keras_model
-
-    def _test_conversion_prediction(self, keras_model, model_path, inputs, outputs):
-        # convert and validate
-        model = coremltools.converters.tensorflow.convert(
-            model_path,
-            inputs=inputs,
-            outputs=outputs
-        )
-        self.assertTrue(isinstance(model, coremltools.models.MLModel))
-
-        # verify numeric correctness of predictions
-        inputs = generate_data(shape=self.input_shape)
-        keras_prediction = keras_model.predict(inputs)
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        prediction = model.predict({keras_model.inputs[0].name.split(':')[0]: inputs})[output_name]
-        np.testing.assert_array_equal(keras_prediction.shape, prediction.shape)
-        np.testing.assert_almost_equal(keras_prediction.flatten(), prediction.flatten(), decimal=4)
-
-    def test_sequential_builder_keras_model_format(self):
-        keras_model = self._build_model_sequential()
-        # save model as Keras hdf5 .h5 model file
-        keras_model.save(self.model_path)
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-
-        self._test_conversion_prediction(
-            keras_model=keras_model,
-            model_path=self.model_path,
-            inputs={input_name: self.input_shape},
-            outputs=[output_name]
-        )
-
-    def test_sequential_builder_saved_model_format(self):
-        keras_model = self._build_model_sequential()
-        # save model as SavedModel directory
-        keras_model.save(self.saved_model_dir, save_format='tf')
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        self._test_conversion_prediction(
-            keras_model=keras_model,
-            model_path=self.saved_model_dir,
-            inputs={input_name: self.input_shape},
-            outputs=[output_name]
-        )
-
-    def test_functional_builder(self):
-        keras_model = self._build_model_functional()
-        # save model as Keras hdf5 .h5 model file
-        keras_model.save(self.model_path)
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        self._test_conversion_prediction(
-            keras_model=keras_model,
-            model_path=self.model_path,
-            inputs={input_name: self.input_shape},
-            outputs=[output_name]
-        )
-
-
-@unittest.skipUnless(HAS_TF_2, 'missing TensorFlow 2+.')
-class TestModelFormats(unittest.TestCase):
-
-    def setUp(self):
-        self.saved_model_dir = tempfile.mkdtemp()
-        _, self.model_path = tempfile.mkstemp(suffix='.h5', prefix=self.saved_model_dir)
-
-    def tearDown(self):
-        if os.path.exists(self.saved_model_dir):
-            shutil.rmtree(self.saved_model_dir)
-
-    @staticmethod
-    def _test_prediction(keras_model, core_ml_model, inputs, decimal=4):
-        keras_model.predict(inputs)
-        keras_prediction = keras_model.predict(inputs)
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        prediction = core_ml_model.predict({input_name: inputs})[output_name]
-        np.testing.assert_array_equal(keras_prediction.shape, prediction.shape)
-        np.testing.assert_almost_equal(keras_prediction.flatten(), prediction.flatten(), decimal=decimal)
-
-    def test_concrete_function(self):
-        # construct a toy model
-        root = tf.train.Checkpoint()
-        root.v1 = tf.Variable(3.)
-        root.v2 = tf.Variable(2.)
-        root.f = tf.function(lambda x: root.v1 * root.v2 * x)
-
-        # save the model
-        input_data = tf.constant(1., shape=[1, 1])
-        to_save = root.f.get_concrete_function(input_data)
-        tf.saved_model.save(root, self.saved_model_dir, to_save)
-
-        tf_model = tf.saved_model.load(self.saved_model_dir)
-        concrete_func = tf_model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
-
-        model = coremltools.converters.tensorflow.convert(
-            [concrete_func],
-            inputs={'x': (1, 1)},
-            outputs=['Identity']
-        )
-
-        self.assertTrue(isinstance(model, coremltools.models.MLModel))
-
-    def test_control_flow(self):
-        @tf.function(input_signature=[tf.TensorSpec([], tf.float32)])
-        def control_flow(x):
-            if x <= 0:
-                return 0.
-            else:
-                return x * 3.
-
-        to_save = tf.Module()
-        to_save.control_flow = control_flow
-
-        saved_model_dir = tempfile.mkdtemp()
-        tf.saved_model.save(to_save, saved_model_dir)
-        tf_model = tf.saved_model.load(saved_model_dir)
-        concrete_func = tf_model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
-
-        model = coremltools.converters.tensorflow.convert(
-            [concrete_func],
-            inputs={'x': (1,)},
-            outputs=['Identity']
-        )
-
-        self.assertTrue(isinstance(model, coremltools.models.MLModel))
-        input_data = generate_data(shape=[20])
-        for data in input_data:
-            tf_prediction = to_save.control_flow(data).numpy().flatten()
-            cm_prediction = model.predict({'x': np.array([data])})['Identity'].flatten()
-            np.testing.assert_array_almost_equal(tf_prediction, cm_prediction, decimal=2)
-
-    def test_subclassed_keras_model(self):
-        class MyModel(tf.keras.Model):
-            def __init__(self):
-                super(MyModel, self).__init__()
-                self.dense1 = tf.keras.layers.Dense(4)
-                self.dense2 = tf.keras.layers.Dense(5)
-
-            @tf.function
-            def call(self, input_data):
-                return self.dense2(self.dense1(input_data))
-
-        keras_model = MyModel()
-        inputs = generate_data(shape=(4, 4))
-
-        # subclassed model can only be saved as SavedModel format
-        keras_model._set_inputs(inputs)
-        keras_model.save(self.saved_model_dir, save_format='tf')
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        # convert and validate
-        model = coremltools.converters.tensorflow.convert(
-            self.saved_model_dir,
-            inputs={input_name: (4, 4)},
-            outputs=[output_name])
-        self.assertTrue(isinstance(model, coremltools.models.MLModel))
-        self._test_prediction(keras_model=keras_model, core_ml_model=model, inputs=inputs)
-
-
-@unittest.skipIf(False, 'skipping slow full model conversion tests.')
-@unittest.skipUnless(HAS_TF_2, 'missing TensorFlow 2+.')
-class TestKerasApplications(unittest.TestCase):
-
-    def setUp(self):
-        self.saved_model_dir = tempfile.mkdtemp()
-        _, self.model_path = tempfile.mkstemp(suffix='.h5', prefix=self.saved_model_dir)
-
-    def tearDown(self):
-        if os.path.exists(self.saved_model_dir):
-            shutil.rmtree(self.saved_model_dir)
-
-    def _predict_keras_intermediate_layer(self, data, layer_name):
-        """
-        Helper function to print intermediate layer for debugging.
-        """
-        partial_keras_model = tf.keras.models.Model(
-            inputs=self.keras_model.input,
-            outputs=self.keras_model.get_layer(layer_name).output)
-        return partial_keras_model.predict(data)
-
-    def _test_model(self, keras_model, model_path, inputs, outputs, decimal=4, verbose=False):
-        keras_model.save(model_path)
-
-        # convert and validate
-        model = coremltools.converters.tensorflow.convert(
-            model_path,
-            inputs=inputs,
-            outputs=outputs
-        )
-        self.assertTrue(isinstance(model, coremltools.models.MLModel))
-
-        if verbose:
-            print('TensorFlow Keras model saved at {}'.format(model_path))
-            tmp_model_path = self.model_path.rsplit('.')[0] + '.mlmodel'
-            model.save(tmp_model_path)
-            print('Core ML model saved at {}'.format(tmp_model_path))
-
-        # verify numeric correctness of predictions
-        # assume one input one output for now
-        name, shape = list(inputs.items())[0]
-        data = generate_data(shape=shape)
-
-        # self._predict_keras_intermediate_layer(data, 'conv1')
-        keras_prediction = keras_model.predict(data)
-        prediction = model.predict({name: data})[outputs[0]]
-
-        if verbose:
-            print('Shape Keras:', keras_prediction.shape, ' vs. Core ML:', prediction.shape)
-            print('Input  :', data.flatten()[:16])
-            print('Keras  :', keras_prediction.flatten()[:16])
-            print('Core ML:', prediction.flatten()[:16])
-
-        np.testing.assert_array_equal(
-            keras_prediction.shape, prediction.shape)
-        np.testing.assert_almost_equal(
-            keras_prediction.flatten(), prediction.flatten(), decimal=decimal)
-
-    @pytest.mark.slow
-    def test_vgg16_keras_model(self):
-        # load the tf.keras model
-        keras_model = tf.keras.applications.VGG16(
-            weights=None, input_shape=(32, 32, 3))
-        # save model as Keras hdf5 .h5 model file
-        keras_model.save(self.model_path)
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        self._test_model(
-            keras_model=keras_model,
-            model_path=self.model_path,
-            inputs={input_name: (1, 32, 32, 3)},
-            outputs=[output_name])
-
-    @pytest.mark.slow
-    def test_vgg19_saved_model(self):
-        # load the tf.keras model
-        keras_model = tf.keras.applications.VGG19(
-            weights=None, input_shape=(32, 32, 3))
-        # save model as SavedModel directory
-        keras_model.save(self.saved_model_dir, save_format='tf')
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        self._test_model(
-            keras_model=keras_model,
-            model_path=self.saved_model_dir,
-            inputs={input_name: (1, 32, 32, 3)},
-            outputs=[output_name])
-
-    @pytest.mark.slow
-    def test_densenet121(self):
-        keras_model = tf.keras.applications.DenseNet121(
-            weights=None, input_shape=(32, 32, 3))
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        self._test_model(
-            keras_model=keras_model,
-            model_path=self.model_path,
-            inputs={input_name: (1, 32, 32, 3)},
-            outputs=[output_name])
-
-    @pytest.mark.slow
-    def test_inception_resnet_v2(self):
-        keras_model = tf.keras.applications.InceptionResNetV2(
-            weights=None, input_shape=(75, 75, 3))
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        self._test_model(
-            keras_model=keras_model,
-            model_path=self.model_path,
-            inputs={input_name: (1, 75, 75, 3)},
-            outputs=[output_name])
-
-    @pytest.mark.slow
-    def test_inception_v3(self):
-        keras_model = tf.keras.applications.InceptionV3(
-            weights=None, input_shape=(75, 75, 3))
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        self._test_model(
-            keras_model=keras_model,
-            model_path=self.model_path,
-            inputs={input_name: (1, 75, 75, 3)},
-            outputs=[output_name])
-
-    def test_mobilenet(self):
-        keras_model = tf.keras.applications.MobileNet(
-            weights=None, input_shape=(32, 32, 3))
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        self._test_model(
-            keras_model=keras_model,
-            model_path=self.model_path,
-            inputs={input_name: (1, 32, 32, 3)},
-            outputs=[output_name])
-
-    def test_mobilenet_v2(self):
-        keras_model = tf.keras.applications.MobileNetV2(
-            weights=None, input_shape=(32, 32, 3))
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        self._test_model(
-            keras_model=keras_model,
-            model_path=self.model_path,
-            inputs={input_name: (1, 32, 32, 3)},
-            outputs=[output_name])
-
-    @unittest.skip('shape mismatch')
-    @pytest.mark.slow
-    def test_nasnet_mobile(self):
-        keras_model = tf.keras.applications.NASNetMobile(
-            weights=None, input_shape=(32, 32, 3))
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        self._test_model(
-            keras_model=keras_model,
-            model_path=self.model_path,
-            inputs={input_name: (1, 32, 32, 3)},
-            outputs=[output_name], decimal=3)
-
-    @unittest.skip('shape mismatch')
-    @pytest.mark.slow
-    def test_nasnet_large(self):
-        keras_model = tf.keras.applications.NASNetLarge(
-            weights=None, input_shape=(32, 32, 3))
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        self._test_model(
-            keras_model=keras_model,
-            model_path=self.model_path,
-            inputs={input_name: (1, 32, 32, 3)},
-            outputs=[output_name], decimal=3)
-
-    @pytest.mark.slow
-    def test_resnet50(self):
-        keras_model = tf.keras.applications.ResNet50(
-            weights=None, input_shape=(32, 32, 3))
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        self._test_model(
-            keras_model=keras_model,
-            model_path=self.model_path,
-            inputs={input_name: (1, 32, 32, 3)},
-            outputs=[output_name])
-
-    @pytest.mark.slow
-    def test_resnet50_v2(self):
-        keras_model = tf.keras.applications.ResNet50V2(
-            weights=None, input_shape=(32, 32, 3))
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        self._test_model(
-            keras_model=keras_model,
-            model_path=self.model_path,
-            inputs={input_name: (1, 32, 32, 3)},
-            outputs=[output_name])
-
-    @pytest.mark.slow
-    def test_xception(self):
-        keras_model = tf.keras.applications.Xception(
-            weights=None, input_shape=(71, 71, 3))
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        self._test_model(
-            keras_model=keras_model,
-            model_path=self.model_path,
-            inputs={input_name: (1, 71, 71, 3)},
-            outputs=[output_name])
-
-
-@unittest.skipUnless(HAS_TF_2, 'missing TensorFlow 2+.')
-class TestCornerCases(unittest.TestCase):
-
-    def setUp(self):
-        self.saved_model_dir = tempfile.mkdtemp()
-        _, self.model_path = tempfile.mkstemp(suffix='.h5', prefix=self.saved_model_dir)
-
-    def tearDown(self):
-        if os.path.exists(self.saved_model_dir):
-            shutil.rmtree(self.saved_model_dir)
-
-    def _test_model(
-        self,
-        keras_model,
-        model_path,
-        inputs,
-        outputs=None,
-        decimal=4,
-        use_cpu_only=False,
-        verbose=False
-    ):
-        keras_model.save(model_path)
-
-        # convert and validate
-        model = coremltools.converters.tensorflow.convert(
-            model_path,
-            inputs=inputs,
-            outputs=outputs
-        )
-        self.assertTrue(isinstance(model, coremltools.models.MLModel))
-
-        if verbose:
-            print('TensorFlow Keras model saved at {}'.format(model_path))
-            tmp_model_path = self.model_path.rsplit('.')[0] + '.mlmodel'
-            model.save(tmp_model_path)
-            print('Core ML model saved at {}'.format(tmp_model_path))
-
-        # verify numeric correctness of predictions
-        # assume one input one output for now
-        name, shape = list(inputs.items())[0]
-        data = generate_data(shape=shape)
-
-        keras_prediction = keras_model.predict(data)
-
-        # If outputs are not supplied, get the output name
-        # from the keras model.
-        if not outputs:
-            output_name = keras_model.outputs[0].name
-            outputs = [output_name.split('/')[1].split(':')[0]]
-
-        prediction = model.predict({name: data}, use_cpu_only=use_cpu_only)[outputs[0]]
-
-        if verbose:
-            print('Shape Keras:', keras_prediction.shape, ' vs. Core ML:', prediction.shape)
-            print('Input  :', data.flatten()[:16])
-            print('Keras  :', keras_prediction.flatten()[:16])
-            print('Core ML:', prediction.flatten()[:16])
-
-        np.testing.assert_array_equal(
-            keras_prediction.shape, prediction.shape)
-        np.testing.assert_almost_equal(
-            keras_prediction.flatten(), prediction.flatten(), decimal=decimal)
-
-        return model
-
-    def test_output_identity_node_removal(self):
-        inpt = tf.keras.layers.Input(shape=[32, 32, 3], batch_size=1)
-        out = tf.keras.layers.SeparableConv2D(
-            filters=5,
-            kernel_size=(3, 3),
-        )(inpt)
-        out = tf.keras.layers.Conv2D(
-            filters=5,
-            kernel_size=1,
-        )(out)
-        keras_model = tf.keras.Model(inpt, out)
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        self._test_model(keras_model=keras_model,
-                         model_path=self.model_path,
-                         inputs={input_name: (1, 32, 32, 3)},
-                         decimal=2)
-
-    def test_batch_norm_node_fusion(self):
-        x = tf.keras.layers.Input(shape=[32, 32, 3], batch_size=1)
-        conv = tf.keras.layers.Conv2D(filters=3, kernel_size=1)(x)
-        bn = tf.keras.layers.BatchNormalization(axis=-1)(conv)
-        out = tf.keras.layers.Activation('relu')(bn)
-        keras_model = tf.keras.Model(x, out)
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        model = self._test_model(keras_model=keras_model,
-                                 model_path=self.model_path,
-                                 inputs={input_name: (1, 32, 32, 3)})
-        num_batch_norm = 0
-        for layer in model.get_spec().neuralNetwork.layers:
-            if layer.WhichOneof('layer') == 'batchnorm':
-                num_batch_norm += 1
-        self.assertEqual(num_batch_norm, 1)
-
-    def test_conv_bias_fusion(self):
-        x = tf.keras.layers.Input(shape=[32, 32, 3], batch_size=1)
-        conv = tf.keras.layers.Conv2D(filters=3, kernel_size=1)(x)
-        conv = tf.keras.layers.DepthwiseConv2D(kernel_size=1)(conv)
-        keras_model = tf.keras.Model(x, conv)
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        model = self._test_model(keras_model=keras_model,
-                                 model_path=self.model_path,
-                                 decimal=3,
-                                 inputs={input_name: (1, 32, 32, 3)})
-        add_broadcastables = 0
-        load_constants = 0
-        for layer in model.get_spec().neuralNetwork.layers:
-            if layer.WhichOneof('layer') == 'addBroadcastable':
-                add_broadcastables += 1
-            if layer.WhichOneof('layer') == 'loadConstantND':
-                load_constants += 1
-
-        self.assertEqual(add_broadcastables, 0)
-        self.assertEqual(load_constants, 0)
-
-    def test_conv2d_with_activation(self):
-        inputs = tf.keras.layers.Input(shape=[256, 256, 3], batch_size=1)
-        out = tf.keras.layers.Conv2D(
-            filters=5,
-            kernel_size=1,
-            padding='same',
-            activation='softmax')(inputs)
-        keras_model = tf.keras.Model(inputs, out)
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        self._test_model(keras_model=keras_model,
-                         model_path=self.model_path,
-                         inputs={input_name: (1, 256, 256, 3)},
-                         outputs=[output_name])
-
-    def test_extra_transposes_1(self):
-        # this model generates an extra transpose layer
-        keras_model = tf.keras.Sequential()
-        keras_model.add(tf.keras.layers.Reshape((75, 6), input_shape=(6 * 75,)))
-        keras_model.add(tf.keras.layers.Dense(100, activation='relu'))
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        model = self._test_model(keras_model=keras_model,
-                                 model_path=self.model_path,
-                                 inputs={input_name: (1, 6 * 75)},
-                                 outputs=[output_name], verbose=True)
-        num_reshapes = 0
-        num_transposes = 0
-        for layer in model.get_spec().neuralNetwork.layers:
-            if layer.WhichOneof('layer') == 'reshapeStatic':
-                num_reshapes += 1
-            if layer.WhichOneof('layer') == 'transpose':
-                num_transposes += 1
-        self.assertEqual(num_reshapes, 2)
-        self.assertEqual(num_transposes, 0)
-
-    def test_extra_transposes_2(self):
-        keras_model = tf.keras.Sequential()
-        keras_model.add(tf.keras.layers.Reshape((75, 6, 1), input_shape=(6 * 75,)))
-        keras_model.add(tf.keras.layers.Permute((2, 3, 1)))
-        keras_model.add(tf.keras.layers.Permute((2, 3, 1)))
-        # inserting several unnecessary extra transpose layers
-        keras_model.add(tf.keras.layers.Permute((1, 2, 3)))
-        keras_model.add(tf.keras.layers.Permute((1, 2, 3)))
-        keras_model.add(tf.keras.layers.Permute((1, 2, 3)))
-        keras_model.add(tf.keras.layers.Activation(tf.nn.relu))
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        model = self._test_model(keras_model=keras_model,
-                                 model_path=self.model_path,
-                                 inputs={input_name: (1, 6 * 75)},
-                                 outputs=[output_name])
-        num_transposes = 0
-        for layer in model.get_spec().neuralNetwork.layers:
-            if layer.WhichOneof('layer') == 'transpose':
-                num_transposes += 1
-        self.assertEqual(num_transposes, 2)
-
-    def test_extra_reshapes(self):
-        keras_model = tf.keras.Sequential()
-        # inserting several unnecessary extra reshape layers
-        keras_model.add(tf.keras.layers.Reshape((1, 75, 6, 1), input_shape=(6 * 75,)))
-        keras_model.add(tf.keras.layers.Reshape((75, 6, 1)))
-        keras_model.add(tf.keras.layers.Reshape((75, 1, 6, 1)))
-        keras_model.add(tf.keras.layers.Reshape((75, 6, 1)))
-        keras_model.add(tf.keras.layers.Reshape((75, 1, 6, 1)))
-        keras_model.add(tf.keras.layers.Activation(tf.nn.relu))
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        model = self._test_model(keras_model=keras_model,
-                                 model_path=self.model_path,
-                                 inputs={input_name: (1, 6 * 75)},
-                                 outputs=[output_name])
-
-        num_reshapes = 0
-        for layer in model.get_spec().neuralNetwork.layers:
-            if layer.WhichOneof('layer') == 'reshapeStatic':
-                num_reshapes += 1
-        self.assertEqual(num_reshapes, 1)
-
-    def test_gelu_tanh_approx_fusion(self):
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(6,), dtype=tf.float32)])
-        def gelu_tanh(x):
-            y = 0.5 * (1.0 + tf.tanh((math.sqrt(2 / math.pi) * (x + 0.044715 * tf.pow(x, 3)))))
-            return x * y
-
-        conc_func = gelu_tanh.get_concrete_function()
-        mlmodel = coremltools.converters.tensorflow.convert(
-            [conc_func],
-            inputs={conc_func.inputs[0].name[:-2]: conc_func.inputs[0].shape},
-            outputs=[conc_func.outputs[0].name[:-2]]
-        )
-
-        spec = mlmodel.get_spec()
-        nn_spec = spec.neuralNetwork
-        number_gelu_layers = 0
-        for layer in nn_spec.layers:
-            if layer.WhichOneof('layer') == 'gelu':
-                number_gelu_layers += 1
-        self.assertEqual(number_gelu_layers, 1)
-
-    def disable_test_layer_norm_fusion(self):
-        keras_model = tf.keras.Sequential()
-        keras_model.add(tf.keras.layers.LayerNormalization(axis=-1, input_shape=(3, 4, 5)))
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        model = self._test_model(keras_model=keras_model,
-                                 model_path=self.model_path,
-                                 inputs={input_name: (3, 4, 5)},
-                                 outputs=[output_name])
-
-    def test_wrong_out_name_error(self):
-
-        @tf.function(input_signature=[tf.TensorSpec(shape=(1,), dtype=tf.float32)])
-        def sin(x):
-            y = tf.sin(x)
-            return y
-
-        conc_func = sin.get_concrete_function()
-        with self.assertRaises(Exception) as cm:
-            coremltools.converters.tensorflow.convert(
-                [conc_func],
-                inputs={conc_func.inputs[0].name[:-2]: conc_func.inputs[0].shape},
-                outputs=['output_not_present'])
-
-        the_exception = str(cm.exception)
-        self.assertTrue("is not an output node in the source graph" in the_exception)
-
-    def test_softplus(self):
-        keras_model = tf.keras.Sequential([
-            tf.keras.layers.Flatten(input_shape=(28, 28)),
-            tf.keras.layers.Dense(128, activation='softplus')
-        ])
-
-        input_name = keras_model.inputs[0].name.split(':')[0]
-        output_name = keras_model.outputs[0].name.split(':')[0].split('/')[-1]
-        model = self._test_model(keras_model=keras_model,
-                                 model_path=self.model_path,
-                                 inputs={input_name: (1, 28, 28)},
-                                 outputs=[output_name], decimal=3)
-
-    def test_redundant_transpose(self):
-        H = 224
-        W = 224
-        C = 3
-        inputs = tf.keras.layers.Input(shape=(H, W, C), batch_size=1)
-        out = tf.keras.layers.Conv2D(
-            filters=4,
-            kernel_size=3,
-        )(inputs)
-        model = tf.keras.Model(inputs, out)
-        input_name = model.inputs[0].name.split(":")[0]
-        input_shape = (1, H, W, C)
-        output_name = model.outputs[0].name.split(':')[0].split('/')[-1]
-
-        model.save(self.model_path, include_optimizer=False, save_format="h5")
-
-        mlmodel = coremltools.converters.tensorflow.convert(
-            self.model_path,
-            inputs={input_name: input_shape},
-            image_input_names=input_name,
-            outputs=[output_name],
-        )
-
-        spec = mlmodel.get_spec()
-        output_types = [layer.WhichOneof('layer') for layer in spec.neuralNetwork.layers]
-        expected_types = ['convolution', 'transpose']
-        np.testing.assert_array_equal(output_types, expected_types)
-
-
-if __name__ == '__main__':
-    np.random.seed(1984)
-    RUN_ALL_TESTS = True
-    if RUN_ALL_TESTS:
-        unittest.main()
-    else:
-        suite = unittest.TestSuite()
-        suite.addTest(TestCornerCases('test_wrong_out_name_error'))
-        unittest.TextTestRunner().run(suite)
diff --git a/coremltools/converters/tensorflow/test/test_tf_graphs.py b/coremltools/converters/tensorflow/test/test_tf_graphs.py
deleted file mode 100644
index 02ed47887..000000000
--- a/coremltools/converters/tensorflow/test/test_tf_graphs.py
+++ /dev/null
@@ -1,292 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-
-import os, sys
-import tensorflow as tf
-import numpy as np
-import unittest
-
-from test_base import TFNetworkTest
-
-
-class TFSimpleNetworkTest(TFNetworkTest):
-
-    # Allows you to override common test entry for this class
-    # Backend - set use_cpu_only to be True when working on Intel GPU macs
-    def _test_tf_model(
-            self,
-            graph,
-            input_shapes,
-            output_node_names,
-            data_mode='random',
-            input_refs=None,
-            delta=1e-2,
-            use_cpu_only=True,
-            graph_optimizations="freeze",  # one of ["freeze", "convert_variables_to_constants", None]
-            quantize_tf_model=False):
-
-        super(TFSimpleNetworkTest, self)._test_tf_model(
-            graph,
-            input_shapes,
-            output_node_names,
-            data_mode=data_mode,
-            input_refs=input_refs,
-            delta=delta,
-            use_cpu_only=use_cpu_only,
-            graph_optimizations=graph_optimizations,
-            quantize_tf_model=quantize_tf_model)
-
-    def test_simple_matmul(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            matrix1 = tf.placeholder(tf.float32, shape=[1, 2], name='input')
-            matrix2 = tf.Variable(tf.truncated_normal([2, 3]))
-            product = tf.matmul(matrix1, matrix2, name='product')
-        self._test_tf_model(graph, {'input': [1, 2]}, ['product'])
-
-    def test_matmul_transposed_weight(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            matrix1 = tf.placeholder(tf.float32, shape=[1, 2], name='input')
-            matrix2 = tf.Variable(tf.truncated_normal([3, 2]))
-            product = tf.matmul(matrix1, matrix2, transpose_b=True, name='product')
-            bias = tf.Variable(tf.truncated_normal([3]))
-            y = tf.nn.bias_add(product, bias, name='y')
-
-        self._test_tf_model(graph, {'input': [1, 2]}, ['y'])
-
-    def test_variable_assign(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x = tf.placeholder(tf.float32, shape=[1, ], name='input')
-            y = tf.Variable(0.0, dtype=tf.float32, name='y')
-
-            # We set our assign op
-            assign_op = tf.assign(y, y + 10)
-
-            with tf.control_dependencies([assign_op]):
-                out = tf.multiply(x, y, name='output')
-
-        self._test_tf_model(graph, {'input': [1, ]}, ['output', 'y'],
-                            graph_optimizations=None)
-
-    def test_control_dependency_with_no_op(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x = tf.placeholder(tf.float32, shape=[1, ], name='input')
-            y = tf.Variable(0.0, dtype=tf.float32, name='y')
-
-            assign_op = tf.assign(y, y + 10)
-
-            with tf.control_dependencies([assign_op]):
-                c = tf.no_op()
-
-            with tf.control_dependencies([c]):
-                d = tf.no_op()
-
-            with tf.control_dependencies([c, d]):
-                e = tf.no_op()
-
-            with tf.control_dependencies([e]):
-                out = tf.multiply(x, y, name='output')
-
-        self._test_tf_model(graph, {'input': [1, ]}, ['output', 'y'],
-                            graph_optimizations=None)
-
-    def test_matmul_biasadd_sub(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            x = tf.placeholder(tf.float32, shape=[None, 2], name='input')
-            weight = tf.Variable(tf.truncated_normal([2, 3]))
-            y = tf.matmul(x, weight)
-            bias = tf.Variable(tf.truncated_normal([3]))
-            z0 = tf.nn.bias_add(y, bias)
-            c = tf.Variable(tf.truncated_normal([3]))
-            z = tf.subtract(z0, c, name='output')
-        self._test_tf_model(graph, {'input': [1, 2]}, ['output'])
-
-    def test_matmul_transpose(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            matrix1 = tf.placeholder(tf.float32, shape=[1, 5], name='input')
-            matrix2 = tf.Variable(tf.truncated_normal([5, 3]))
-            product = tf.matmul(matrix1, matrix2, name='product')
-            tp = tf.transpose(product, [0, 1], name='tp')
-        self._test_tf_model(graph, {'input': [1, 5]}, ['tp'])
-
-    def test_matmul_unstack(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            matrix1 = tf.placeholder(tf.float32, shape=[2, 5], name='input')
-            matrix2 = tf.Variable(tf.truncated_normal([5, 3]))
-            product = tf.matmul(matrix1, matrix2, name='product')
-            y1, y2 = tf.unstack(product)
-            y1 = tf.identity(y1, name='output_1')
-            y2 = tf.identity(y2, name='output_2')
-        self._test_tf_model(graph, {'input': [2, 5]}, ['output_1', 'output_2'])
-
-    def test_dense_activations(self):
-        # TODO - Add other activations
-        for act_type in ['sigmoid', 'tanh']:
-            graph = tf.Graph()
-            with graph.as_default():
-                matrix1 = tf.placeholder(tf.float32, shape=[1, 8], name='input')
-                matrix2 = tf.Variable(tf.truncated_normal([8, 2]))
-                product = tf.matmul(matrix1, matrix2, name='product')
-                if act_type == 'sigmoid':
-                    act = tf.sigmoid(product, name='act')
-                elif act_type == 'tanh':
-                    act = tf.tanh(product, name='act')
-            self._test_tf_model(graph, {'input': [1, 8]}, ['act'])
-
-    def test_extract_shape(self):
-        dims = [2, 3, 4]
-        for rank in range(1, len(dims) + 1):
-            shape = [None] + dims[:rank]
-            batched_shape = [1] + dims[:rank]
-            graph = tf.Graph()
-            with graph.as_default():
-                x = tf.placeholder(tf.float32, shape=batched_shape, name='input')
-                m = tf.Variable(tf.truncated_normal(tf.shape(x)))
-                y = tf.identity(x + m, name='output')
-            self._test_tf_model(graph, {'input': batched_shape}, ['output'])
-
-    @unittest.skip
-    def test_shape_slice(self):
-        seqlen = 2
-        graph = tf.Graph()
-        with graph.as_default():
-            data = tf.placeholder(
-                tf.float32, [1, None, 1], name='input')  # (batch_size, seq_len, input_dim)
-            m = tf.Variable(tf.truncated_normal([1, 1, 1]))
-            data_t = tf.transpose(data + m, [1, 0, 2], name='tp')
-            data_shape = tf.shape(data_t)
-            output = tf.identity(data_shape[0], name='output')  # What is the slice here?
-        self._test_tf_model(graph, {'input': [1, seqlen, 1]}, ['output'])
-
-    @unittest.skip
-    # "Backend exception: \"Invalid blob shape\": scatter_kernel_cpu: Invalid shape of input blob"
-    def test_array_scatter(self):
-        batch_size = 2
-        graph = tf.Graph()
-        with graph.as_default():
-            data = tf.placeholder(
-                tf.float32, shape=[batch_size, 3], name='input')  # (batch_size, input_dim)
-            m = tf.Variable(tf.truncated_normal([batch_size, 3]))
-            arr = tf.TensorArray(size=2, element_shape=[batch_size, 3], dtype=tf.float32)
-            arr = arr.write(0, data)
-            arr = arr.write(1, m)
-            output = arr.gather([0, 1], name='output')
-        self._test_tf_model(graph, {'input': [batch_size, 3]}, ['output'])
-
-    def test_range(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            data = tf.placeholder(tf.int32, shape=(), name='input')  # input is a scalar
-            m = tf.Variable(1)
-            output = tf.range(0, data + m, 1, name='output')
-        self._test_tf_model(graph, {'input': []}, ['output'], input_refs={'input': 1})
-
-    def test_simple_loop(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            data = tf.placeholder(tf.float32, shape=[None, 2], name='data')
-            i = tf.constant(0)
-            # When providing placeholder directly into while loop structures,
-            # placeholder must be the first one.
-            c = lambda x, i, v: tf.less(i, 10)
-            b = lambda x, i, v: (tf.add(x, v), i + 1, v)  # Dummy
-            w = tf.Variable(2.0, dtype=tf.float32, name='weight')
-            r = tf.while_loop(c, b, [data, i, w], name='output')
-
-        self._test_tf_model(graph, {"data": [1, 2]}, ["output/Exit"])
-
-
-    def test_simple_branch(self):
-        graph = tf.Graph()
-        with graph.as_default():
-            data = tf.placeholder(tf.float32, shape=[None, 2], name='data')
-            switch = tf.placeholder(tf.float32, shape=(), name='switch')
-            m = tf.Variable(1.0)
-            result = tf.cond(switch > 0,
-                lambda: tf.add(data, m),
-                lambda: tf.subtract(data, m),
-                name='output')
-
-        self._test_tf_model(graph=graph,
-            input_shapes={"data": [1, 2], "switch": []},
-            output_node_names=[result.op.name],
-            input_refs={'data': np.array([0.1, 0.2]).reshape((1,2)),
-                        'switch': 1.0})
-
-        self._test_tf_model(graph=graph,
-            input_shapes={"data": [1, 2], "switch": []},
-            output_node_names=[result.op.name],
-            input_refs={'data': np.array([0.1, 0.2]).reshape((1,2)),
-                        'switch': -1.0})
-
-    def test_onehot_matmul_encoding(self):
-        seq_len = 6
-        embedding_dim = 10  # depth
-        out_channels = 4
-        graph = tf.Graph()
-        with graph.as_default():
-            indices = tf.placeholder(tf.int32, shape=[None, seq_len], name='indices')
-            onehot = tf.one_hot(indices, depth=embedding_dim)  # (batch_size, seq_len, embedding_dim)
-            weight = tf.Variable(tf.truncated_normal([1, embedding_dim, out_channels]))
-            y = tf.matmul(onehot, weight, name='output')
-
-        self._test_tf_model(graph, {"indices": [1, seq_len]}, ["output"], data_mode='linear')
-
-    def test_two_input_batch_matmul(self):
-        test_cases = [
-            {'r_x': 6, 'c_x': 10, 'r_y': 10, 'c_y': 4, 'transpose_x': False, 'transpose_y': False},
-            {'r_x': 6, 'c_x': 10, 'r_y': 4, 'c_y': 10, 'transpose_x': False, 'transpose_y': True}
-        ]
-        # r_o, c_o = 6, 4
-        for tc in test_cases:
-            graph = tf.Graph()
-            with graph.as_default():
-                r_x, c_x, r_y, c_y, tp_x, tp_y = tc['r_x'], tc['c_x'], tc['r_y'], tc['c_y'], tc['transpose_x'], tc['transpose_y']
-                data_shape = [1, r_x, c_x]
-                weight_shape = [1, r_y, c_y]
-                input_data = tf.placeholder(tf.float32, shape=data_shape, name='input_data')
-                input_weight = tf.placeholder(tf.float32, shape=weight_shape, name='input_weight')
-                y = tf.matmul(input_data, input_weight, name='output', transpose_a=tp_x, transpose_b=tp_y)
-            self._test_tf_model(graph, {"input_data": data_shape, "input_weight": weight_shape}, ["output"],
-                                graph_optimizations=None)
-
-    def test_layer_norm(self):
-        shapes = [(3, 4), (3, 4, 5), (3, 4, 5, 6)]
-        for shape in shapes:
-            graph = tf.Graph()
-            with graph.as_default():
-                x = tf.placeholder(tf.float32, shape=shape, name='input')
-                y = tf.contrib.layers.layer_norm(x, begin_norm_axis=-1,
-                                                 begin_params_axis=-1)
-                z = tf.identity(y, name='output')
-            self._test_tf_model(graph, {'input': shape}, ['output'])
-
-    def test_gelu_tanh_approx(self):
-        def gelu(x):
-            cdf = 0.5 * (1.0 + tf.tanh(
-                (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
-            return x * cdf
-
-        shapes = [(3, 4), (3, 4, 5), (3, 4, 5, 6)]
-        for shape in shapes:
-            graph = tf.Graph()
-            with graph.as_default():
-                x = tf.placeholder(tf.float32, shape=shape, name='input')
-                y = gelu(x)
-                z = tf.identity(y, name='output')
-            self._test_tf_model_constant(graph, {'input': shape}, ['output'])
-
-
-if __name__ == '__main__':
-    unittest.main()
-    # suite = unittest.TestSuite()
-    # suite.addTest(TFSimpleNetworkTest("test_simple_branch"))
-    # unittest.TextTestRunner().run(suite)
diff --git a/coremltools/converters/tensorflow/test/test_tf_keras_layers.py b/coremltools/converters/tensorflow/test/test_tf_keras_layers.py
deleted file mode 100644
index 5ae756120..000000000
--- a/coremltools/converters/tensorflow/test/test_tf_keras_layers.py
+++ /dev/null
@@ -1,417 +0,0 @@
-import unittest
-import tempfile
-import numpy as np
-import coremltools
-import os
-import shutil
-
-import tensorflow as tf
-from tensorflow.keras import backend as _keras
-from tensorflow.keras import layers
-from coremltools._deps import HAS_TF_2
-from test_utils import generate_data, tf_transpose
-
-
-class TensorFlowKerasTests(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        tf.keras.backend.set_learning_phase(False)
-
-    def setUp(self):
-        self.saved_model_dir = tempfile.mkdtemp()
-        _, self.model_file = tempfile.mkstemp(suffix='.h5', prefix=self.saved_model_dir)
-
-    def tearDown(self):
-        if os.path.exists(self.saved_model_dir):
-            shutil.rmtree(self.saved_model_dir)
-
-    def _get_tf_tensor_name(self, graph, name):
-        return graph.get_operation_by_name(name).outputs[0].name
-
-    def _test_model(self, model, data_mode='random_zero_mean', decimal=4, use_cpu_only=False, has_variables=True, verbose=False):
-        if not HAS_TF_2:
-            self._test_keras_model_tf1(model, data_mode, decimal, use_cpu_only, has_variables, verbose)
-        else:
-            self._test_keras_model_tf2(model, data_mode, decimal, use_cpu_only, has_variables, verbose)
-
-    def _test_keras_model_tf1(self, model, data_mode, decimal, use_cpu_only, has_variables, verbose):
-
-        graph_def_file = os.path.join(self.saved_model_dir, 'graph.pb')
-        frozen_model_file = os.path.join(self.saved_model_dir, 'frozen.pb')
-        core_ml_model_file = os.path.join(self.saved_model_dir, 'model.mlmodel')
-
-        input_shapes = {inp.op.name: inp.shape.as_list() for inp in model.inputs}
-        for name, shape in input_shapes.items():
-            input_shapes[name] = [dim if dim is not None else 1 for dim in shape]
-
-        output_node_names = [output.op.name for output in model.outputs]
-
-        tf_graph = _keras.get_session().graph
-        tf.reset_default_graph()
-        if has_variables:
-            with tf_graph.as_default():
-                saver = tf.train.Saver()
-
-        # note: if Keras backend has_variable is False, we're not making variables constant
-        with tf.Session(graph=tf_graph) as sess:
-            sess.run(tf.global_variables_initializer())
-            feed_dict = {}
-            for name, shape in input_shapes.items():
-                tensor_name = tf_graph.get_operation_by_name(name).outputs[0].name
-                feed_dict[tensor_name] = generate_data(shape, data_mode)
-            # run the result
-            fetches = [
-                tf_graph.get_operation_by_name(name).outputs[0] for name in output_node_names
-            ]
-            result = sess.run(fetches, feed_dict=feed_dict)
-            # save graph definition somewhere
-            tf.train.write_graph(sess.graph, self.saved_model_dir, graph_def_file, as_text=False)
-
-            # freeze_graph() has been raising error with tf.keras models since no
-            # later than TensorFlow 1.6, so we're not using freeze_graph() here.
-            # See: https://github.com/tensorflow/models/issues/5387
-            output_graph_def = tf.graph_util.convert_variables_to_constants(
-                sess,  # The session is used to retrieve the weights
-                tf_graph.as_graph_def(),  # The graph_def is used to retrieve the nodes
-                output_node_names  # The output node names are used to select the useful nodes
-            )
-            with tf.gfile.GFile(frozen_model_file, 'wb') as f:
-                f.write(output_graph_def.SerializeToString())
-
-        _keras.clear_session()
-
-        # convert to Core ML model format
-        core_ml_model = coremltools.converters.tensorflow.convert(
-            frozen_model_file,
-            inputs=input_shapes,
-            outputs=output_node_names,
-            use_cpu_only=use_cpu_only)
-
-        if verbose:
-            print('\nFrozen model saved at {}'.format(frozen_model_file))
-            print('\nCore ML model description:')
-            from coremltools.models.neural_network.printer import print_network_spec
-            print_network_spec(core_ml_model.get_spec(), style='coding')
-            core_ml_model.save(core_ml_model_file)
-            print('\nCore ML model saved at {}'.format(core_ml_model_file))
-
-        # transpose input data as Core ML requires
-        core_ml_inputs = {
-            name: tf_transpose(feed_dict[self._get_tf_tensor_name(tf_graph, name)])
-            for name in input_shapes
-        }
-
-        # run prediction in Core ML
-        core_ml_output = core_ml_model.predict(core_ml_inputs, useCPUOnly=use_cpu_only)
-
-        for idx, out_name in enumerate(output_node_names):
-            tf_out = result[idx]
-            if len(tf_out.shape) == 0:
-                tf_out = np.array([tf_out])
-            tp = tf_out.flatten()
-            coreml_out = core_ml_output[out_name]
-            cp = coreml_out.flatten()
-            self.assertTrue(tf_out.shape == coreml_out.shape)
-            for i in range(len(tp)):
-                max_den = max(1.0, tp[i], cp[i])
-                self.assertAlmostEqual(tp[i] / max_den, cp[i] / max_den, delta=10 ** -decimal)
-
-    def _test_keras_model_tf2(self, model, data_mode, decimal, use_cpu_only, has_variables, verbose):
-
-        core_ml_model_file = self.model_file.rsplit('.')[0] + '.mlmodel'
-
-        input_dict = {inp.op.name: inp.shape.as_list() for inp in model.inputs}
-        for name, shape in input_dict.items():
-            input_dict[name] = [dim if dim is not None else 1 for dim in shape]
-        output_list = ['Identity']
-        model.save(self.model_file)
-
-        # convert Keras model into Core ML model format
-        core_ml_model = coremltools.converters.tensorflow.convert(
-            filename=self.model_file,
-            inputs=input_dict,
-            outputs=output_list,
-            use_cpu_only=use_cpu_only)
-
-        if verbose:
-            print('\nKeras model saved at {}'.format(self.model_file))
-            print('\nCore ML model description:')
-            from coremltools.models.neural_network.printer import print_network_spec
-            print_network_spec(core_ml_model.get_spec(), style='coding')
-            core_ml_model.save(core_ml_model_file)
-            print('\nCore ML model saved at {}'.format(core_ml_model_file))
-
-        core_ml_inputs = {
-            name: generate_data(shape, data_mode) for name, shape in input_dict.items()
-        }
-
-        # run prediction and compare results
-        keras_output = model.predict(list(core_ml_inputs.values())[0])
-        core_ml_output = core_ml_model.predict(
-            core_ml_inputs, useCPUOnly=use_cpu_only)[output_list[0]]
-
-        if verbose:
-            print('\nPredictions', keras_output.shape, ' vs.', core_ml_output.shape)
-            print(keras_output.flatten()[:6])
-            print(core_ml_output.flatten()[:6])
-
-        np.testing.assert_array_equal(
-            keras_output.shape, core_ml_output.shape)
-        np.testing.assert_almost_equal(
-            keras_output.flatten(), core_ml_output.flatten(), decimal=decimal)
-
-
-class SimpleLayerTests(TensorFlowKerasTests):
-
-    def test_dense_softmax(self):
-        model = tf.keras.Sequential()
-        model.add(layers.Dense(16, input_shape=(16,), activation=tf.nn.softmax))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model)
-
-    def test_dense_elu(self):
-        model = tf.keras.Sequential()
-        model.add(layers.Dense(16, input_shape=(16,), activation=tf.nn.elu))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model, decimal=2)
-
-    def test_dense_tanh(self):
-        model = tf.keras.Sequential()
-        model.add(layers.Dense(16, input_shape=(16,), activation=tf.nn.tanh))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model)
-
-    def test_housenet_random(self):
-        num_hidden = 2
-        num_features = 3
-        model = tf.keras.Sequential()
-        model.add(layers.Dense(num_hidden, input_dim=num_features))
-        model.add(layers.Activation(tf.nn.relu))
-        model.add(layers.Dense(1, input_dim=num_features))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model)
-
-    def test_tiny_conv2d_random(self):
-        input_dim = 10
-        input_shape = (input_dim, input_dim, 1)
-        num_kernels, kernel_height, kernel_width = 3, 5, 5
-        model = tf.keras.Sequential()
-        model.add(layers.Conv2D(
-            input_shape=input_shape,
-            filters=num_kernels, kernel_size=(kernel_height, kernel_width)))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model)
-
-    def test_tiny_conv2d_dilated_random(self):
-        input_dim = 10
-        input_shape = (input_dim, input_dim, 1)
-        num_kernels, kernel_height, kernel_width = 3, 5, 5
-        model = tf.keras.Sequential()
-        model.add(layers.Conv2D(
-            input_shape=input_shape, dilation_rate=(2, 2),
-            filters=num_kernels, kernel_size=(kernel_height, kernel_width)))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model)
-
-    def test_tiny_conv1d_same_random(self):
-        input_dim = 2
-        input_length = 10
-        filter_length = 3
-        nb_filters = 4
-        model = tf.keras.Sequential()
-        model.add(layers.Conv1D(
-            nb_filters, kernel_size=filter_length, padding='same',
-            input_shape=(input_length, input_dim)))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model)
-
-    def test_tiny_conv1d_valid_random(self):
-        input_dim = 2
-        input_length = 10
-        filter_length = 3
-        nb_filters = 4
-        model = tf.keras.Sequential()
-        model.add(layers.Conv1D(
-            nb_filters, kernel_size=filter_length, padding='valid',
-            input_shape=(input_length, input_dim)))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model)
-
-    @unittest.skip('non-equal block shape is not yet supported')
-    def test_tiny_conv1d_dilated_random(self):
-        input_shape = (20, 1)
-        num_kernels = 2
-        filter_length = 3
-        model = tf.keras.Sequential()
-        model.add(layers.Conv1D(
-            num_kernels, kernel_size=filter_length, padding='valid',
-            input_shape=input_shape, dilation_rate=3))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model)
-
-    def test_flatten(self):
-        model = tf.keras.Sequential()
-        model.add(layers.Flatten(input_shape=(2, 2, 2)))
-        self._test_model(model, data_mode='linear', has_variables=False)
-
-    def test_conv_dense(self):
-        input_shape = (48, 48, 3)
-        model = tf.keras.Sequential()
-        model.add(layers.Conv2D(32, (3, 3), activation=tf.nn.relu, input_shape=input_shape))
-        model.add(layers.Flatten())
-        model.add(layers.Dense(10, activation=tf.nn.softmax))
-        self._test_model(model)
-
-    def test_conv_batchnorm_random(self):
-        input_dim = 10
-        input_shape = (input_dim, input_dim, 3)
-        num_kernels = 3
-        kernel_height = 5
-        kernel_width = 5
-        model = tf.keras.Sequential()
-        model.add(layers.Conv2D(
-            input_shape=input_shape,
-            filters=num_kernels,
-            kernel_size=(kernel_height, kernel_width)))
-        model.add(layers.BatchNormalization(epsilon=1e-5))
-        model.add(layers.Dense(10, activation=tf.nn.softmax))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model, decimal=2, has_variables=True)
-
-    @unittest.skip('list index out of range')
-    def test_tiny_deconv_random(self):
-        input_dim = 13
-        input_shape = (input_dim, input_dim, 5)
-        num_kernels = 16
-        kernel_height = 3
-        kernel_width = 3
-        model = tf.keras.Sequential()
-        model.add(layers.Conv2DTranspose(
-            filters=num_kernels,
-            kernel_size=(kernel_height, kernel_width),
-            input_shape=input_shape, padding='valid', strides=(2, 2)))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model)
-
-    @unittest.skip('Deconvolution layer has weight matrix of size 432 to encode a 3 x 4 x 3 x 3 convolution.')
-    def test_tiny_deconv_random_same_padding(self):
-        input_dim = 14
-        input_shape = (input_dim, input_dim, 3)
-        num_kernels = 16
-        kernel_height = 3
-        kernel_width = 3
-        model = tf.keras.Sequential()
-        model.add(layers.Conv2DTranspose(
-            filters=num_kernels,
-            kernel_size=(kernel_height, kernel_width),
-            input_shape=input_shape, padding='same', strides=(2, 2)))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model)
-
-    def test_tiny_depthwise_conv_same_pad_depth_multiplier(self):
-        input_dim = 16
-        input_shape = (input_dim, input_dim, 3)
-        depth_multiplier = 4
-        kernel_height = 3
-        kernel_width = 3
-        model = tf.keras.Sequential()
-        model.add(layers.DepthwiseConv2D(
-            depth_multiplier=depth_multiplier,
-            kernel_size=(kernel_height, kernel_width),
-            input_shape=input_shape, padding='same', strides=(1, 1)))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model)
-
-    def test_tiny_depthwise_conv_valid_pad_depth_multiplier(self):
-        input_dim = 16
-        input_shape = (input_dim, input_dim, 3)
-        depth_multiplier = 2
-        kernel_height = 3
-        kernel_width = 3
-        model = tf.keras.Sequential()
-        model.add(layers.DepthwiseConv2D(
-            depth_multiplier=depth_multiplier,
-            kernel_size=(kernel_height, kernel_width),
-            input_shape=input_shape, padding='valid', strides=(1, 1)))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model)
-
-    def test_tiny_separable_conv_valid_depth_multiplier(self):
-        input_dim = 16
-        input_shape = (input_dim, input_dim, 3)
-        depth_multiplier = 5
-        kernel_height = 3
-        kernel_width = 3
-        num_kernels = 40
-        model = tf.keras.Sequential()
-        model.add(layers.SeparableConv2D(
-            filters=num_kernels, kernel_size=(kernel_height, kernel_width),
-            padding='valid', strides=(1, 1), depth_multiplier=depth_multiplier,
-            input_shape=input_shape))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model, decimal=2)
-
-    def test_tiny_separable_conv_same_fancy_depth_multiplier(self):
-        input_dim = 16
-        input_shape = (input_dim, input_dim, 3)
-        depth_multiplier = 2
-        kernel_height = 3
-        kernel_width = 3
-        num_kernels = 40
-        model = tf.keras.Sequential()
-        model.add(layers.SeparableConv2D(
-            filters=num_kernels, kernel_size=(kernel_height, kernel_width),
-            padding='same', strides=(2, 2), activation='relu', depth_multiplier=depth_multiplier,
-            input_shape=input_shape))
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model, decimal=2)
-
-    def test_max_pooling_no_overlap(self):
-        # no_overlap: pool_size = strides
-        model = tf.keras.Sequential()
-        model.add(layers.MaxPooling2D(
-            input_shape=(16, 16, 3), pool_size=(2, 2),
-            strides=None, padding='valid'))
-        self._test_model(model, has_variables=False)
-
-    def test_max_pooling_overlap_multiple(self):
-        # input shape is multiple of pool_size, strides != pool_size
-        model = tf.keras.Sequential()
-        model.add(layers.MaxPooling2D(
-            input_shape=(18, 18, 3), pool_size=(3, 3),
-            strides=(2, 2), padding='valid'))
-        self._test_model(model, has_variables=False)
-
-    def test_max_pooling_overlap_odd(self):
-        model = tf.keras.Sequential()
-        model.add(layers.MaxPooling2D(
-            input_shape=(16, 16, 3), pool_size=(3, 3),
-            strides=(2, 2), padding='valid'))
-        self._test_model(model, has_variables=False)
-
-    def test_max_pooling_overlap_same(self):
-        model = tf.keras.Sequential()
-        model.add(layers.MaxPooling2D(
-            input_shape=(16, 16, 3), pool_size=(3, 3),
-            strides=(2, 2), padding='same'))
-        self._test_model(model, has_variables=False)
-
-    def test_global_max_pooling_2d(self):
-        model = tf.keras.Sequential()
-        model.add(layers.GlobalMaxPooling2D(input_shape=(16, 16, 3)))
-        self._test_model(model, has_variables=False)
-
-    def test_global_avg_pooling_2d(self):
-        model = tf.keras.Sequential()
-        model.add(layers.GlobalAveragePooling2D(input_shape=(16, 16, 3)))
-        self._test_model(model, has_variables=False)
-
-    def test_max_pooling_1d(self):
-        model = tf.keras.Sequential()
-        model.add(layers.MaxPooling1D(input_shape=(16, 3), pool_size=2))
-        self._test_model(model, has_variables=False)
-
-
-if __name__ == '__main__':
-    np.random.seed(1984)
-    unittest.main()
diff --git a/coremltools/converters/tensorflow/test/test_utils.py b/coremltools/converters/tensorflow/test/test_utils.py
deleted file mode 100644
index 0a11728ee..000000000
--- a/coremltools/converters/tensorflow/test/test_utils.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function as _
-from __future__ import division as _
-from __future__ import absolute_import as _
-
-import numpy as np
-
-
-def generate_data(shape, mode='random_zero_mean'):
-    """
-    Generate some random data according to a shape.
-    """
-    if shape is None or len(shape) == 0:
-        return 0.5
-    if mode == 'zeros':
-        x = np.zeros(shape)
-    elif mode == 'ones':
-        x = np.ones(shape)
-    elif mode == 'linear':
-        x = np.array(range(np.product(shape))).reshape(shape) * 1.0
-    elif mode == 'random':
-        x = np.random.rand(*shape)
-    elif mode == 'random_large':
-        x = np.random.rand(*shape) * 100.0
-    elif mode == 'random_int':
-        x = np.random.randint(-50, 50, shape) * 1.0
-    elif mode == 'random_zero_mean':
-        x = np.random.rand(*shape) - 0.5
-    elif mode == 'random_zeros_ones':
-        x = np.random.randint(0, 2, shape) * 1.0
-    elif mode == 'random_zero_mean_with_zeros':
-        x = [np.random.choice([np.random.rand(), 0.0]) for _ in range(np.product(shape))]
-        x = np.array(x).reshape(shape)
-    else:
-        raise ValueError("invalid data mode: '{}'.".format(mode))
-    return x
-
-
-def tf_transpose(x, channel_last_to_first=False):
-    """ Transpose TensorFlow expected input which has channel_last or
-    channel_first option.
-    channel_last_to_first: set this flag to True if TensorFlow is expecting
-    a channel_last 4D Tensor x, while CoreML expects a channel_first 4D Tensor.
-    """
-    if not hasattr(x, 'shape'):
-        return np.array([x], dtype=np.float)
-    elif len(x.shape) == 0:
-        return np.array(x, dtype=np.float)
-    elif len(x.shape) == 1:
-        return x
-    elif len(x.shape) == 2:
-        return x
-    elif len(x.shape) == 3:
-        return x
-    elif len(x.shape) == 4:
-        if channel_last_to_first:
-            return np.transpose(x, (0, 3, 1, 2))
-        else:
-            return x
-    else:
-        raise ValueError('tf_transpose does not support shape = {}'.format(x.shape))
diff --git a/coremltools/converters/xgboost/_tree.py b/coremltools/converters/xgboost/_tree.py
index a9a9480aa..f57a18b26 100644
--- a/coremltools/converters/xgboost/_tree.py
+++ b/coremltools/converters/xgboost/_tree.py
@@ -5,16 +5,19 @@
 
 from ._tree_ensemble import convert_tree_ensemble as _convert_tree_ensemble
 from ...models import MLModel as _MLModel
+from coremltools import __version__ as ct_version
+from coremltools.models import _METADATA_VERSION, _METADATA_SOURCE
+
 
 def convert(
-        model,
-        feature_names = None,
-        target = 'target',
-        force_32bit_float = True,
-        mode="regressor",
-        class_labels=None,
-        n_classes=None,
-    ):
+    model,
+    feature_names=None,
+    target="target",
+    force_32bit_float=True,
+    mode="regressor",
+    class_labels=None,
+    n_classes=None,
+):
     """
     Convert a trained XGBoost model to Core ML format.
 
@@ -67,12 +70,23 @@ def convert(
 		# Saving the Core ML model to a file.
 		>>> coremltools.save('my_model.mlmodel')
     """
-    return _MLModel(_convert_tree_ensemble(
-        model,
-        feature_names,
-        target,
-        force_32bit_float = force_32bit_float,
-        mode=mode,
-        class_labels=class_labels,
-        n_classes=n_classes,
-    ))
+    model = _MLModel(
+        _convert_tree_ensemble(
+            model,
+            feature_names,
+            target,
+            force_32bit_float=force_32bit_float,
+            mode=mode,
+            class_labels=class_labels,
+            n_classes=n_classes,
+        )
+    )
+
+    from xgboost import __version__ as xgboost_version
+
+    model.user_defined_metadata[_METADATA_VERSION] = ct_version
+    model.user_defined_metadata[_METADATA_SOURCE] = "xgboost=={0}".format(
+        xgboost_version
+    )
+
+    return model
diff --git a/coremltools/converters/xgboost/_tree_ensemble.py b/coremltools/converters/xgboost/_tree_ensemble.py
index ab101fc67..b9470ec13 100644
--- a/coremltools/converters/xgboost/_tree_ensemble.py
+++ b/coremltools/converters/xgboost/_tree_ensemble.py
@@ -3,31 +3,43 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from ...models.tree_ensemble import TreeEnsembleRegressor as _TreeEnsembleRegressor, TreeEnsembleClassifier
-
-from ..._deps import HAS_XGBOOST as _HAS_XGBOOST
+from ...models.tree_ensemble import (
+    TreeEnsembleRegressor as _TreeEnsembleRegressor,
+    TreeEnsembleClassifier,
+)
+from ..._deps import _HAS_XGBOOST
 
 import numpy as _np
+from six import string_types as _string_types
 
 if _HAS_XGBOOST:
     import xgboost as _xgboost
 
-def recurse_json(mlkit_tree, xgb_tree_json, tree_id, node_id, feature_map,
-        force_32bit_float, mode="regressor", tree_index=0, n_classes=2):
+
+def recurse_json(
+    mlkit_tree,
+    xgb_tree_json,
+    tree_id,
+    node_id,
+    feature_map,
+    force_32bit_float,
+    mode="regressor",
+    tree_index=0,
+    n_classes=2,
+):
     """Traverse through the tree and append to the tree spec.
     """
     relative_hit_rate = None
 
     try:
-        relative_hit_rate = xgb_tree_json['cover']
+        relative_hit_rate = xgb_tree_json["cover"]
     except KeyError:
         pass
 
-
     # Fill node attributes
-    if 'leaf' not in xgb_tree_json:
-        branch_mode = 'BranchOnValueLessThan'
-        split_name = xgb_tree_json['split']
+    if "leaf" not in xgb_tree_json:
+        branch_mode = "BranchOnValueLessThan"
+        split_name = xgb_tree_json["split"]
         feature_index = split_name if not feature_map else feature_map[split_name]
 
         # xgboost internally uses float32, but the parsing from json pulls it out
@@ -35,28 +47,34 @@ def recurse_json(mlkit_tree, xgb_tree_json, tree_id, node_id, feature_map,
         # tree ensemble compiler, we need to explicitly cast it to a float 32
         # value, then back to the 64 bit float that protobuf expects.  This is
         # controlled with the force_32bit_float flag.
-        feature_value = xgb_tree_json['split_condition']
+        feature_value = xgb_tree_json["split_condition"]
 
         if force_32bit_float:
             feature_value = float(_np.float32(feature_value))
 
-
-        true_child_id = xgb_tree_json['yes']
-        false_child_id = xgb_tree_json['no']
+        true_child_id = xgb_tree_json["yes"]
+        false_child_id = xgb_tree_json["no"]
 
         # Get the missing value behavior correct
         missing_value_tracks_true_child = False
 
         try:
-            if xgb_tree_json['missing'] == true_child_id:
+            if xgb_tree_json["missing"] == true_child_id:
                 missing_value_tracks_true_child = True
         except KeyError:
             pass
 
-        mlkit_tree.add_branch_node(tree_id, node_id, feature_index,
-                feature_value, branch_mode, true_child_id, false_child_id,
-                relative_hit_rate = relative_hit_rate,
-                missing_value_tracks_true_child = missing_value_tracks_true_child)
+        mlkit_tree.add_branch_node(
+            tree_id,
+            node_id,
+            feature_index,
+            feature_value,
+            branch_mode,
+            true_child_id,
+            false_child_id,
+            relative_hit_rate=relative_hit_rate,
+            missing_value_tracks_true_child=missing_value_tracks_true_child,
+        )
 
     else:
         value = xgb_tree_json["leaf"]
@@ -66,23 +84,35 @@ def recurse_json(mlkit_tree, xgb_tree_json, tree_id, node_id, feature_map,
         if mode == "classifier" and n_classes > 2:
             value = {tree_index: value}
 
-        mlkit_tree.add_leaf_node(tree_id, node_id, value,
-                relative_hit_rate = relative_hit_rate)
+        mlkit_tree.add_leaf_node(
+            tree_id, node_id, value, relative_hit_rate=relative_hit_rate
+        )
 
     # Now recurse
     if "children" in xgb_tree_json:
         for child in xgb_tree_json["children"]:
-            recurse_json(mlkit_tree, child, tree_id, child['nodeid'], feature_map, force_32bit_float, mode=mode, tree_index=tree_index, n_classes=n_classes)
+            recurse_json(
+                mlkit_tree,
+                child,
+                tree_id,
+                child["nodeid"],
+                feature_map,
+                force_32bit_float,
+                mode=mode,
+                tree_index=tree_index,
+                n_classes=n_classes,
+            )
+
 
 def convert_tree_ensemble(
-        model,
-        feature_names,
-        target,
-        force_32bit_float,
-        mode="regressor",
-        class_labels=None,
-        n_classes=None,
-    ):
+    model,
+    feature_names,
+    target,
+    force_32bit_float,
+    mode="regressor",
+    class_labels=None,
+    n_classes=None,
+):
     """Convert a generic tree model to the protobuf spec.
 
     This currently supports:
@@ -120,15 +150,18 @@ def convert_tree_ensemble(
     model_spec: An object of type Model_pb.
         Protobuf representation of the model
     """
-    if not(_HAS_XGBOOST):
-        raise RuntimeError('xgboost not found. xgboost conversion API is disabled.')
+    if not (_HAS_XGBOOST):
+        raise RuntimeError("xgboost not found. xgboost conversion API is disabled.")
     accepted_modes = ["regressor", "classifier"]
     if mode not in accepted_modes:
         raise ValueError("mode should be in %s" % accepted_modes)
     import json
     import os
+
     feature_map = None
-    if isinstance(model,  (_xgboost.core.Booster, _xgboost.XGBRegressor, _xgboost.XGBClassifier)):
+    if isinstance(
+        model, (_xgboost.core.Booster, _xgboost.XGBRegressor, _xgboost.XGBClassifier)
+    ):
 
         # Testing a few corner cases that we don't support
         if isinstance(model, _xgboost.XGBRegressor):
@@ -139,7 +172,9 @@ def convert_tree_ensemble(
             except:
                 objective = None
             if objective in ["reg:gamma", "reg:tweedie"]:
-                raise ValueError("Regression objective '%s' not supported for export." % objective)
+                raise ValueError(
+                    "Regression objective '%s' not supported for export." % objective
+                )
 
         if isinstance(model, _xgboost.XGBClassifier):
             if mode == "regressor":
@@ -147,45 +182,58 @@ def convert_tree_ensemble(
             n_classes = model.n_classes_
             if class_labels is not None:
                 if len(class_labels) != n_classes:
-                    raise ValueError("Number of classes in model (%d) does not match "
-                                     "length of supplied class list (%d)."
-                                     % (n_classes, len(class_labels)))
+                    raise ValueError(
+                        "Number of classes in model (%d) does not match "
+                        "length of supplied class list (%d)."
+                        % (n_classes, len(class_labels))
+                    )
             else:
                 class_labels = list(range(n_classes))
 
         # Now use the booster API.
         if isinstance(model, (_xgboost.XGBRegressor, _xgboost.XGBClassifier)):
             # Name change in 0.7
-            if hasattr(model, 'get_booster'):
+            if hasattr(model, "get_booster"):
                 model = model.get_booster()
             else:
                 model = model.booster()
 
         # Xgboost sometimes has feature names in there. Sometimes does not.
         if (feature_names is None) and (model.feature_names is None):
-            raise ValueError("Feature names not present in the model. Must be provided during conversion.")
+            raise ValueError(
+                "The XGBoost model does not have feature names. They must be provided in convert method."
+            )
             feature_names = model.feature_names
         if feature_names is None:
             feature_names = model.feature_names
 
-        xgb_model_str = model.get_dump(with_stats=True, dump_format = 'json')
+        xgb_model_str = model.get_dump(with_stats=True, dump_format="json")
 
         if model.feature_names:
-            feature_map = {f:i for i,f in enumerate(model.feature_names)}
+            feature_map = {f: i for i, f in enumerate(model.feature_names)}
 
     # Path on the file system where the XGboost model exists.
-    elif isinstance(model, str):
+    elif isinstance(model, _string_types):
         if not os.path.exists(model):
             raise TypeError("Invalid path %s." % model)
         with open(model) as f:
             xgb_model_str = json.load(f)
-        feature_map = {f:i for i,f in enumerate(feature_names)}
+
+        if feature_names is None:
+            raise ValueError(
+                "feature names must be provided in convert method if the model is a path on file system."
+            )
+        else:
+            feature_map = {f: i for i, f in enumerate(feature_names)}
+
     else:
         raise TypeError("Unexpected type. Expecting XGBoost model.")
 
     if mode == "classifier":
         if n_classes is None and class_labels is None:
-            raise ValueError("You must provide class_labels or n_classes when not providing the XGBClassifier")
+            raise ValueError(
+                "You must provide class_labels or n_classes when not providing the XGBClassifier"
+            )
         elif n_classes is None:
             n_classes = len(class_labels)
         elif class_labels is None:
@@ -200,6 +248,8 @@ def convert_tree_ensemble(
         mlkit_tree.set_default_prediction_value(base_prediction)
         if n_classes == 2:
             mlkit_tree.set_post_evaluation_transform("Regression_Logistic")
+        else:
+            mlkit_tree.set_post_evaluation_transform("Classification_SoftMax")
     else:
         mlkit_tree = _TreeEnsembleRegressor(feature_names, target)
         mlkit_tree.set_default_prediction_value(0.5)
@@ -209,7 +259,24 @@ def convert_tree_ensemble(
             tree_index = xgb_tree_id % n_classes
         else:
             tree_index = 0
-        xgb_tree_json = json.loads(xgb_tree_str)
-        recurse_json(mlkit_tree, xgb_tree_json, xgb_tree_id, node_id = 0,
-                feature_map = feature_map, force_32bit_float = force_32bit_float, mode=mode, tree_index=tree_index, n_classes=n_classes)
+
+        try:
+            # this means that the xgb_tree_str is a json dump and needs to be loaded
+            xgb_tree_json = json.loads(xgb_tree_str)
+        except:
+            # this means that the xgb_tree_str is loaded from a path in file system already and does not need to be reloaded
+            xgb_tree_json = xgb_tree_str
+
+        recurse_json(
+            mlkit_tree,
+            xgb_tree_json,
+            xgb_tree_id,
+            node_id=0,
+            feature_map=feature_map,
+            force_32bit_float=force_32bit_float,
+            mode=mode,
+            tree_index=tree_index,
+            n_classes=n_classes,
+        )
+
     return mlkit_tree.spec
diff --git a/coremltools/graph_visualization/app.js b/coremltools/graph_visualization/app.js
deleted file mode 100644
index 645033bee..000000000
--- a/coremltools/graph_visualization/app.js
+++ /dev/null
@@ -1,593 +0,0 @@
-"use strict";
-
-document.addEventListener('DOMContentLoaded', function() {
-	var options = {
-	  name: 'dagre',
-      nodeSep: 3,
-	  edgeSep: 5,
-	  minLen: function( edge ){ return 1; },
-	  edgeWeight: function( edge ){ return 2; },
-	  fit: true,
-	  spacingFactor: 1.1,
-	  nodeDimensionsIncludeLabels: true
-	};
-
-	var nodeInfo = getModelData();
-	nodeInfo.then(function(modelData) {
-		setTitle(modelData['title']);
-
-		var nodesArray = modelData['cy_data'];
-		var cy = window.cy = cytoscape({
-			container: document.getElementById('cy'),
-		    elements: nodesArray,
-		    layout: options,
-		    style: [
-			{
-			    selector: "node",
-			    style: {
-			        shape: 'roundrectangle',
-			        label: 'data(name)',
-                    'font-size' : 30,
-			        'border-width': 3,
-                    'border-color': 'black',
-			        width: 'label',
-			        'color': '#000000',
-			        'text-valign': 'center',
-					'background-image': 'icons/node.png',
-			        padding: 10,
-		    	}
-		    },
-			{
-			    selector: "node.parent",
-			    style: {
-					'compound-sizing-wrt-labels': 'include',
-                    'background-image' : 'icons/parent.png',
-                    'text-rotation' : '90deg',
-                    'text-margin-x' : 10
-		    	}
-		    },
-            {
-			    selector: "node.parent > node",
-			    style: {
-			        opacity : 0
-		    	}
-		    },
-            {
-			    selector: "node.arrayFeatureExtractor",
-			    style: {
-					'background-image': 'icons/arrayFeatureExtractor.png'
-		    	}
-		    },
-            {
-			    selector: "node.categoricalMapping",
-			    style: {
-					'background-image': 'icons/categoricalMapping.png'
-		    	}
-		    },
-            {
-			    selector: "node.dictVectorizer",
-			    style: {
-					'background-image': 'icons/dictVectorizer.png'
-		    	}
-		    },
-			{
-			    selector: "node.custom",
-			    style: {
-					'background-image': 'icons/custom.png'
-		    	}
-		    },
-            {
-			    selector: "node.featureVectorizer",
-			    style: {
-					'background-image': 'icons/featureVectorizer.png'
-		    	}
-		    },
-            {
-			    selector: "node.glmClassifier",
-			    style: {
-					'background-image': 'icons/glmClassifier.png'
-		    	}
-		    },
-            {
-			    selector: "node.glmRegressor",
-			    style: {
-					'background-image': 'icons/glmRegressor.png'
-		    	}
-		    },
-            {
-			    selector: "node.identity",
-			    style: {
-					'background-image': 'icons/identity.png'
-		    	}
-		    },
-            {
-			    selector: "node.imputer",
-			    style: {
-					'background-image': 'icons/imputer.png'
-		    	}
-		    },
-            {
-			    selector: "node.neuralNetwork",
-			    style: {
-					'background-image': 'icons/neuralNetwork.png'
-		    	}
-		    },
-            {
-			    selector: "node.neuralNetworkClassifier",
-			    style: {
-					'background-image': 'icons/neuralNetworkClassifier.png'
-		    	}
-		    },
-            {
-			    selector: "node.neuralNetworkRegressor",
-			    style: {
-					'background-image': 'icons/neuralNetworkRegressor.png'
-		    	}
-		    },
-            {
-			    selector: "node.normalizer",
-			    style: {
-					'background-image': 'icons/normalizer.png'
-		    	}
-		    },
-            {
-			    selector: "node.oneHotEncoder",
-			    style: {
-					'background-image': 'icons/oneHotEncoder.png'
-		    	}
-		    },
-            {
-			    selector: "node.scaler",
-			    style: {
-					'background-image': 'icons/scaler.png'
-		    	}
-		    },
-            {
-			    selector: "node.supportVectorClassifier",
-			    style: {
-					'background-image': 'icons/supportVectorClassifier.png'
-		    	}
-		    },
-            {
-			    selector: "node.supportVectorRegressor",
-			    style: {
-					'background-image': 'icons/supportVectorRegressor.png'
-		    	}
-		    },
-            {
-			    selector: "node.treeEnsembleClassifier",
-			    style: {
-					'background-image': 'icons/treeEnsembleClassifier.png'
-		    	}
-		    },
-            {
-			    selector: "node.treeEnsembleRegressor",
-			    style: {
-					'background-image': 'icons/treeEnsembleRegressor.png'
-		    	}
-		    },
-            {
-			    selector: "node.convolution",
-			    style: {
-					'color': 'white',
-					'background-image': 'icons/convolution.png'
-		    	}
-		    },
-			{
-			    selector: "node.deconvolution",
-			    style: {
-					'color': 'white',
-					'background-image': 'icons/convolution.png'
-		    	}
-		    },
-		    {
-			    selector: "node.pooling",
-			    style: {
-					'color': 'white',
-					'background-image': 'icons/pooling.png'
-		    	}
-		    },
-		    {
-			    selector: "node.activation",
-			    style: {
-					'background-image': 'icons/activation.png'
-		    	}
-		    },
-            {
-			    selector: "node.add",
-			    style: {
-					'background-image': 'icons/add.png'
-		    	}
-		    },
-            {
-			    selector: "node.average",
-			    style: {
-					'background-image': 'icons/average.png'
-		    	}
-		    },
-            {
-			    selector: "node.batchnorm",
-			    style: {
-					'background-image': 'icons/batchnorm.png'
-		    	}
-		    },
-            {
-			    selector: "node.biDirectionalLSTM",
-			    style: {
-					'background-image': 'icons/biDirectionalLSTM.png'
-		    	}
-		    },
-            {
-			    selector: "node.bias",
-			    style: {
-					'background-image': 'icons/bias.png'
-		    	}
-		    },
-            {
-			    selector: "node.concat",
-			    style: {
-					'background-image': 'icons/concat.png'
-		    	}
-		    },
-            {
-			    selector: "node.crop",
-			    style: {
-					'background-image': 'icons/crop.png'
-		    	}
-		    },
-            {
-			    selector: "node.dot",
-			    style: {
-					'background-image': 'icons/dot.png'
-		    	}
-		    },
-            {
-			    selector: "node.embedding",
-			    style: {
-					'background-image': 'icons/embedding.png'
-		    	}
-		    },
-            {
-			    selector: "node.flatten",
-			    style: {
-					'background-image': 'icons/flatten.png'
-		    	}
-		    },
-            {
-			    selector: "node.gru",
-			    style: {
-					'background-image': 'icons/gru.png'
-		    	}
-		    },
-            {
-			    selector: "node.innerProduct",
-			    style: {
-					'background-image': 'icons/innerProduct.png'
-		    	}
-		    },
-            {
-			    selector: "node.input",
-			    style: {
-					'background-image': 'icons/input.png'
-		    	}
-		    },
-            {
-			    selector: "node.output",
-			    style: {
-					'background-image': 'icons/output.png'
-		    	}
-		    },
-            {
-			    selector: "node.l2normalize",
-			    style: {
-					'background-image': 'icons/l2normalize.png'
-		    	}
-		    },
-            {
-			    selector: "node.loadConstant",
-			    style: {
-					'background-image': 'icons/loadConstant.png'
-		    	}
-		    },
-            {
-			    selector: "node.lrn",
-			    style: {
-					'background-image': 'icons/lrn.png'
-		    	}
-		    },
-            {
-			    selector: "node.max",
-			    style: {
-					'background-image': 'icons/max.png'
-		    	}
-		    },
-            {
-			    selector: "node.min",
-			    style: {
-					'background-image': 'icons/min.png'
-		    	}
-		    },
-            {
-			    selector: "node.multiply",
-			    style: {
-					'background-image': 'icons/multiply.png'
-		    	}
-		    },
-            {
-			    selector: "node.mvn",
-			    style: {
-					'background-image': 'icons/mvn.png'
-		    	}
-		    },
-            {
-			    selector: "node.padding",
-			    style: {
-					'background-image': 'icons/padding.png'
-		    	}
-		    },
-            {
-			    selector: "node.permute",
-			    style: {
-					'background-image': 'icons/permute.png'
-		    	}
-		    },
-            {
-			    selector: "node.pooling",
-			    style: {
-					'background-image': 'icons/pooling.png'
-		    	}
-		    },
-            {
-			    selector: "node.reduce",
-			    style: {
-					'background-image': 'icons/reduce.png'
-		    	}
-		    },
-            {
-			    selector: "node.reorganizeData",
-			    style: {
-					'background-image': 'icons/reorganizeData.png'
-		    	}
-		    },
-            {
-			    selector: "node.reshape",
-			    style: {
-					'background-image': 'icons/reshape.png'
-		    	}
-		    },
-            {
-			    selector: "node.scale",
-			    style: {
-					'background-image': 'icons/scale.png'
-		    	}
-		    },
-            {
-			    selector: "node.sequenceRepeat",
-			    style: {
-					'background-image': 'icons/sequenceRepeat.png'
-		    	}
-		    },
-            {
-			    selector: "node.simpleRecurrent",
-			    style: {
-					'background-image': 'icons/simpleRecurrent.png'
-		    	}
-		    },
-            {
-			    selector: "node.slice",
-			    style: {
-					'background-image': 'icons/slice.png'
-		    	}
-		    },
-            {
-			    selector: "node.softmax",
-			    style: {
-					'background-image': 'icons/softmax.png'
-		    	}
-		    },
-            {
-			    selector: "node.split",
-			    style: {
-					'background-image': 'icons/split.png'
-		    	}
-		    },
-            {
-			    selector: "node.unary",
-			    style: {
-					'background-image': 'icons/unary.png'
-		    	}
-		    },
-            {
-			    selector: "node.uniDirectionalLSTM",
-			    style: {
-					'background-image': 'icons/uniDirectionalLSTM.png'
-		    	}
-		    },
-            {
-			    selector: "node.upsample",
-			    style: {
-					'background-image': 'icons/upsample.png'
-		    	}
-		    },
-		    {
-		    	selector: "edge",
-		    	style: {
-		    		'curve-style': 'bezier',
-		    		'control-point-weights': 1,
-		    		'line-color': '#111111',
-					'color' : '#000000',
-					'border-width': 5,
-					'font-size': 20,
-		    		'target-arrow-shape': 'triangle',
-		    		'target-arrow-color': '#111111',
-					 label: 'data(label)',
-					'text-background-opacity': 0,
-					'text-background-color': '#ffffff',
-					'text-background-shape': 'rectangle',
-					'text-border-style': 'solid',
-					'text-border-opacity': 0,
-					'text-border-width': '1px',
-					'text-border-color': 'darkgray',
-					'text-opacity': 0
-		    	}
-		    }
-		    ]
-		});
-        cy.fit();
-        var childNodeCollection = cy.elements("node.parent > node");
-        var childEdges  = childNodeCollection.connectedEdges();
-        childEdges.style({'opacity': 0});
-		cy.$('node').on('mouseover', function(e){
-            var ele = e.target;
-		    var keys = Object.keys(ele.data('info'));
-		    var div = document.getElementById('node-info');
-		    var content = '<br />';
-		    content += '<div class="subtitle" align="left">Parameters</div>';
-		    content += '<br />';
-			content += '<div align="left">';
-		    for (var i = keys.length - 1; i >= 0; i--) {
-		  	    if (keys[i] != 'desc') {
-                    var val = ele.data('info')[keys[i]].replace(/["]+/g, '');
-                    content += keys[i] + ' : ' + val.charAt(0).replace(/(?:\r\n|\r|\n)/g, '<br>') + val.slice(1) + '<br />';
-                }
-            }
-			content += '</div>';
-            if (ele.data('info')["desc"] != undefined) {
-                content += '<br /><br /><div class="subtitle" align="left">Description</div><br />';
-                content += ele.data('info')["desc"].replace(/(?:\r\n|\r|\n)/g, '<br>') + '<br />';
-            }
-            div.innerHTML = content;
-		});
-
-		cy.on('tap', 'edge', function (evt) {
-           var edge = evt.target;
-           var edgeLabel = edge.data().source;
-           edge.style({'label': edgeLabel});
-           edge.animate({
-           		style: {
-           			'text-opacity': 1,
-		           	'text-margin-x': 15,
-		           	'text-border-opacity': 1,
-		           	'text-background-opacity': 1
-           		}
-           });
-        });
-
-		cy.on('click', 'node.parent', function(evt){
-		    var node = evt.target;
-            node.children().style({'opacity': 1});
-            node.style({'color' : '#d5e1df'});
-            var selectedChildNodeCollection = node.children();
-			var parentEdges = node.connectedEdges();
-            var selectedChildEdges  = selectedChildNodeCollection.connectedEdges();
-            selectedChildEdges.style({'opacity' : 1});
-
-            var selectedChildEdgesTarget = [];
-
-            for(var idx = 0; idx < selectedChildEdges.length; idx++) {
-				if (selectedChildEdges[idx].data('target') != node.data().id) {
-					selectedChildEdgesTarget.push(selectedChildEdges[idx].data('target'));
-				}
-			}
-			parentEdges.style({'opacity' : 0});
-
-			for(var idx = 0; idx < parentEdges.length; idx++) {
-				if (parentEdges[idx].data('target') != node.data().id) {
-
-					if (selectedChildEdgesTarget.includes(parentEdges[idx].data('target')) == false) {
-						parentEdges[idx].style({'opacity' : "1"});
-					}
-				}
-			}
-
-            cy.animate({
-                fit : {
-                    eles : selectedChildNodeCollection,
-                    padding : 20
-                }
-            }, {
-                duration: 500
-            });
-
-		});
-
-		$('#label-switch').on('click', function(e) {
-
-			var edges = cy.$('edge');
-			for(var idx = 0 ; idx < edges.length; idx++) {
-				edges[idx].style({label: edges[idx].data().shape});
-			}
-			if (edges.style().textOpacity == 0) {
-				edges.animate({
-					style: {
-					'text-opacity': 1,
-					'text-background-opacity': 1,
-					'text-border-opacity': 1
-					}
-				});
-			}
-			else {
-				edges.animate({
-					style: {
-					'text-opacity': 0,
-					'text-background-opacity': 0,
-					'text-border-opacity': 0
-					}
-				});
-			}
-		});
-
-		$('#reset-state').on('click', function (e) {
-			var childNodes = cy.$("node.parent > node");
-			childNodes.style({
-					"opacity": 0
-				});
-			childNodes.connectedEdges().style({
-				'opacity': 0
-			});
-			var parentNodes = cy.$("node.parent");
-			parentNodes.style({
-				'color': 'black'
-			});
-			parentNodes.connectedEdges().style({
-				'opacity': 1
-			});
-			var edges = cy.edges();
-			for(var idx = 0 ; idx < edges.length; idx++) {
-				edges[idx].style({label: null});
-			}
-			edges.animate({
-				style: {
-				'text-opacity': 0,
-				'text-background-opacity': 0,
-				'text-border-opacity': 0
-				}
-			});
-			cy.fit();
-		});
-
-	});
-
-});
-
-
-function setTitle(name) {
-	var title = document.getElementById('title');
-	title.innerText = name;
-}
-
-
-function getModelData() {
-	var graphPromise  = $.ajax({
-		url: 'model.json',
-		type: 'GET',
-		dataType: 'json',
-		contentType: "application/json; charset=utf-8",
-	})
-	.then(function(msg) {
-		return msg;
-	}
-	);
-	return Promise.resolve(graphPromise);
-
-}
diff --git a/coremltools/graph_visualization/assets/cytoscape-dagre.js b/coremltools/graph_visualization/assets/cytoscape-dagre.js
deleted file mode 100644
index b3d143b12..000000000
--- a/coremltools/graph_visualization/assets/cytoscape-dagre.js
+++ /dev/null
@@ -1,219 +0,0 @@
-/*!
-Copyright (c) The Cytoscape Consortium
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the “Software”), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-*/
-
-;(function(){ 'use strict';
-
-  // registers the extension on a cytoscape lib ref
-  var register = function( cytoscape, dagre ){
-    if( !cytoscape || !dagre ){ return; } // can't register if cytoscape unspecified
-
-    var isFunction = function(o){ return typeof o === 'function'; };
-
-    // default layout options
-    var defaults = {
-      // dagre algo options, uses default value on undefined
-      nodeSep: undefined, // the separation between adjacent nodes in the same rank
-      edgeSep: undefined, // the separation between adjacent edges in the same rank
-      rankSep: undefined, // the separation between adjacent nodes in the same rank
-      rankDir: undefined, // 'TB' for top to bottom flow, 'LR' for left to right
-      minLen: function( edge ){ return 1; }, // number of ranks to keep between the source and target of the edge
-      edgeWeight: function( edge ){ return 1; }, // higher weight edges are generally made shorter and straighter than lower weight edges
-
-      // general layout options
-      fit: true, // whether to fit to viewport
-      padding: 30, // fit padding
-      spacingFactor: undefined, // Applies a multiplicative factor (>0) to expand or compress the overall area that the nodes take up
-      nodeDimensionsIncludeLabels: undefined, // whether labels should be included in determining the space used by a node (default true)
-      animate: false, // whether to transition the node positions
-      animateFilter: function( node, i ){ return true; }, // whether to animate specific nodes when animation is on; non-animated nodes immediately go to their final positions
-      animationDuration: 500, // duration of animation in ms if enabled
-      animationEasing: undefined, // easing of animation if enabled
-      boundingBox: undefined, // constrain layout bounds; { x1, y1, x2, y2 } or { x1, y1, w, h }
-      transform: function( node, pos ){ return pos; }, // a function that applies a transform to the final node position
-      ready: function(){}, // on layoutready
-      stop: function(){} // on layoutstop
-    };
-
-    // constructor
-    // options : object containing layout options
-    function DagreLayout( options ){
-      var opts = this.options = {};
-      for( var i in defaults ){ opts[i] = defaults[i]; }
-      for( var i in options ){ opts[i] = options[i]; }
-    }
-
-    // runs the layout
-    DagreLayout.prototype.run = function(){
-      var options = this.options;
-      var layout = this;
-
-      var cy = options.cy; // cy is automatically populated for us in the constructor
-      var eles = options.eles;
-
-      var getVal = function( ele, val ){
-        return isFunction(val) ? val.apply( ele, [ ele ] ) : val;
-      };
-
-      var bb = options.boundingBox || { x1: 0, y1: 0, w: cy.width(), h: cy.height() };
-      if( bb.x2 === undefined ){ bb.x2 = bb.x1 + bb.w; }
-      if( bb.w === undefined ){ bb.w = bb.x2 - bb.x1; }
-      if( bb.y2 === undefined ){ bb.y2 = bb.y1 + bb.h; }
-      if( bb.h === undefined ){ bb.h = bb.y2 - bb.y1; }
-
-      var g = new dagre.graphlib.Graph({
-        multigraph: true,
-        compound: true
-      });
-
-      var gObj = {};
-      var setGObj = function( name, val ){
-        if( val != null ){
-          gObj[ name ] = val;
-        }
-      };
-
-      setGObj( 'nodesep', options.nodeSep );
-      setGObj( 'edgesep', options.edgeSep );
-      setGObj( 'ranksep', options.rankSep );
-      setGObj( 'rankdir', options.rankDir );
-
-      g.setGraph( gObj );
-
-      g.setDefaultEdgeLabel(function() { return {}; });
-      g.setDefaultNodeLabel(function() { return {}; });
-
-      // add nodes to dagre
-      var nodes = eles.nodes();
-      for( var i = 0; i < nodes.length; i++ ){
-        var node = nodes[i];
-        var nbb = node.layoutDimensions( options );
-
-        g.setNode( node.id(), {
-          width: nbb.w,
-          height: nbb.h,
-          name: node.id()
-        } );
-
-        // console.log( g.node(node.id()) );
-      }
-
-      // set compound parents
-      for( var i = 0; i < nodes.length; i++ ){
-        var node = nodes[i];
-
-        if( node.isChild() ){
-          g.setParent( node.id(), node.parent().id() );
-        }
-      }
-
-      // add edges to dagre
-      var edges = eles.edges().stdFilter(function( edge ){
-        return !edge.source().isParent() && !edge.target().isParent(); // dagre can't handle edges on compound nodes
-      });
-      for( var i = 0; i < edges.length; i++ ){
-        var edge = edges[i];
-
-        g.setEdge( edge.source().id(), edge.target().id(), {
-          minlen: getVal( edge, options.minLen ),
-          weight: getVal( edge, options.edgeWeight ),
-          name: edge.id()
-        }, edge.id() );
-
-        // console.log( g.edge(edge.source().id(), edge.target().id(), edge.id()) );
-      }
-
-      dagre.layout( g );
-
-      var gNodeIds = g.nodes();
-      for( var i = 0; i < gNodeIds.length; i++ ){
-        var id = gNodeIds[i];
-        var n = g.node( id );
-
-        cy.getElementById(id).scratch().dagre = n;
-      }
-
-      var dagreBB;
-
-      if( options.boundingBox ){
-        dagreBB = { x1: Infinity, x2: -Infinity, y1: Infinity, y2: -Infinity };
-        nodes.forEach(function( node ){
-          var dModel = node.scratch().dagre;
-
-          dagreBB.x1 = Math.min( dagreBB.x1, dModel.x );
-          dagreBB.x2 = Math.max( dagreBB.x2, dModel.x );
-
-          dagreBB.y1 = Math.min( dagreBB.y1, dModel.y );
-          dagreBB.y2 = Math.max( dagreBB.y2, dModel.y );
-        });
-
-        dagreBB.w = dagreBB.x2 - dagreBB.x1;
-        dagreBB.h = dagreBB.y2 - dagreBB.y1;
-      } else {
-        dagreBB = bb;
-      }
-
-      var constrainPos = function( p ){
-        if( options.boundingBox ){
-          var xPct = dagreBB.w === 0 ? 0 : (p.x - dagreBB.x1) / dagreBB.w;
-          var yPct = dagreBB.h === 0 ? 0 : (p.y - dagreBB.y1) / dagreBB.h;
-
-          return {
-            x: bb.x1 + xPct * bb.w,
-            y: bb.y1 + yPct * bb.h
-          };
-        } else {
-          return p;
-        }
-      };
-
-      nodes.layoutPositions(layout, options, function( ele ){
-        ele = typeof ele === "object" ? ele : this;
-        var dModel = ele.scratch().dagre;
-
-        return constrainPos({
-          x: dModel.x,
-          y: dModel.y
-        });
-      });
-
-      return this; // chaining
-    };
-
-    cytoscape('layout', 'dagre', DagreLayout);
-
-  };
-
-  if( typeof module !== 'undefined' && module.exports ){ // expose as a commonjs module
-    module.exports = function( cytoscape, dagre ){
-      register( cytoscape, dagre || require('dagre') );
-    };
-  } else if( typeof define !== 'undefined' && define.amd ){ // expose as an amd/requirejs module
-    define('cytoscape-dagre', function(){
-      return register;
-    });
-  }
-
-  if( typeof cytoscape !== 'undefined' && typeof dagre !== 'undefined' ){ // expose to global cytoscape (i.e. window.cytoscape)
-    register( cytoscape, dagre );
-  }
-
-})();
diff --git a/coremltools/graph_visualization/assets/cytoscape.min.js b/coremltools/graph_visualization/assets/cytoscape.min.js
deleted file mode 100644
index 29a9b3a90..000000000
--- a/coremltools/graph_visualization/assets/cytoscape.min.js
+++ /dev/null
@@ -1,63 +0,0 @@
-!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var t;t="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this,t.cytoscape=e()}}(function(){return function e(t,r,n){function i(o,s){if(!r[o]){if(!t[o]){var l="function"==typeof require&&require;if(!s&&l)return l(o,!0);if(a)return a(o,!0);var u=new Error("Cannot find module '"+o+"'");throw u.code="MODULE_NOT_FOUND",u}var c=r[o]={exports:{}};t[o][0].call(c.exports,function(e){var r=t[o][1][e];return i(r?r:e)},c,c.exports,e,t,r,n)}return r[o].exports}for(var a="function"==typeof require&&require,o=0;o<n.length;o++)i(n[o]);return i}({1:[function(e,t,r){/*!
-
-Cytoscape.js 3.1.4 (MIT licensed)
-
-Copyright (c) The Cytoscape Consortium
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the “Software”), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-*/
-"use strict"},{}],2:[function(e,t,r){"use strict";var n=e("./util"),i=e("./is"),a=e("./promise"),o=function(e,t,r){var a=this._private=n.extend({duration:1e3},t,r);a.target=e,a.style=a.style||a.css,a.started=!1,a.playing=!1,a.hooked=!1,a.applying=!1,a.progress=0,a.completes=[],a.frames=[],a.complete&&i.fn(a.complete)&&a.completes.push(a.complete),this.length=1,this[0]=this},s=o.prototype;n.extend(s,{instanceString:function(){return"animation"},hook:function(){var e=this._private;if(!e.hooked){var t,r=e.target._private.animation;t=e.queue?r.queue:r.current,t.push(this),i.elementOrCollection(e.target)&&e.target.cy().addToAnimationPool(e.target),e.hooked=!0}return this},play:function(){var e=this._private;return 1===e.progress&&(e.progress=0),e.playing=!0,e.started=!1,e.stopped=!1,this.hook(),this},playing:function(){return this._private.playing},apply:function(){var e=this._private;return e.applying=!0,e.started=!1,e.stopped=!1,this.hook(),this},applying:function(){return this._private.applying},pause:function(){var e=this._private;return e.playing=!1,e.started=!1,this},stop:function(){var e=this._private;return e.playing=!1,e.started=!1,e.stopped=!0,this},rewind:function(){return this.progress(0)},fastforward:function(){return this.progress(1)},time:function(e){var t=this._private;return void 0===e?t.progress*t.duration:this.progress(e/t.duration)},progress:function(e){var t=this._private,r=t.playing;return void 0===e?t.progress:(r&&this.pause(),t.progress=e,t.started=!1,r&&this.play(),this)},completed:function(){return 1===this._private.progress},reverse:function(){var e=this._private,t=e.playing;t&&this.pause(),e.progress=1-e.progress,e.started=!1;var r=function(t,r){var n=e[t];null!=n&&(e[t]=e[r],e[r]=n)};if(r("zoom","startZoom"),r("pan","startPan"),r("position","startPosition"),e.style)for(var n=0;n<e.style.length;n++){var i=e.style[n],a=i.name,o=e.startStyle[a];e.startStyle[a]=i,e.style[n]=o}return t&&this.play(),this},promise:function(e){var t,r=this._private;switch(e){case"frame":t=r.frames;break;default:case"complete":case"completed":t=r.completes}return new a(function(e,r){t.push(function(){e()})})}}),s.complete=s.completed,t.exports=o},{"./is":91,"./promise":94,"./util":108}],3:[function(e,t,r){"use strict";var n=e("../../is"),i={aStar:function(e){var t=this;e=e||{};var r=function(e,t,n,i){if(e==t)return i.unshift(a.getElementById(t)),i;if(t in n){var o=n[t],s=v[t];return i.unshift(a.getElementById(s)),i.unshift(a.getElementById(t)),r(e,o,n,i)}},i=function(e,t){if(0!==e.length){for(var r=0,n=t[e[0]],i=1;i<e.length;i++){var a=t[e[i]];a<n&&(n=a,r=i)}return r}},a=this._private.cy;if(null!=e&&null!=e.root){var o=n.string(e.root)?this.filter(e.root)[0]:e.root[0];if(null!=e.goal){var s=n.string(e.goal)?this.filter(e.goal)[0]:e.goal[0];if(null!=e.heuristic&&n.fn(e.heuristic))var l=e.heuristic;else var l=function(){return 0};if(null!=e.weight&&n.fn(e.weight))var u=e.weight;else var u=function(e){return 1};if(null!=e.directed)var c=e.directed;else var c=!1;var d=o.id(),h=s.id(),p=[],f=[d],g={},v={},y={},m={};y[d]=0,m[d]=l(o);for(var b=0;f.length>0;){var x=i(f,m),w=a.getElementById(f[x]),E=w.id();if(b++,E==h){var C=r(d,h,g,[]);return{found:!0,distance:y[E],path:t.spawn(C),steps:b}}p.push(E),f.splice(x,1);for(var P=w._private.edges,D=0;D<P.length;D++){var T=P[D];if(this.hasElementWithId(T.id())&&(!c||T.data("source")===E)){var k=T.source(),S=T.target(),_=k.id()!==E?k:S,M=_.id();if(this.hasElementWithId(M)&&p.indexOf(M)==-1){var I=y[E]+u(T);f.indexOf(M)!=-1?I<y[M]&&(y[M]=I,m[M]=I+l(_),g[M]=E):(y[M]=I,m[M]=I+l(_),f.push(M),g[M]=E,v[M]=T.id())}}}}return{found:!1,distance:void 0,path:void 0,steps:b}}}}};t.exports=i},{"../../is":91}],4:[function(e,t,r){"use strict";var n=e("../../is"),i=e("../../util"),a={bellmanFord:function(e){var t=this;if(e=e||{},null!=e.weight&&n.fn(e.weight))var r=e.weight;else var r=function(e){return 1};if(null!=e.directed)var a=e.directed;else var a=!1;if(null!=e.root){if(n.string(e.root))var o=this.filter(e.root)[0];else var o=e.root[0];for(var s=this._private.cy,l=this.edges().stdFilter(function(e){return!e.isLoop()}),u=this.nodes(),c=u.length,d={},h=0;h<c;h++)d[u[h].id()]=h;for(var p=[],f=[],g=[],h=0;h<c;h++)u[h].id()===o.id()?p[h]=0:p[h]=1/0,f[h]=void 0;for(var v=!1,h=1;h<c;h++){v=!1;for(var y=0;y<l.length;y++){var m=d[l[y].source().id()],b=d[l[y].target().id()],x=r(l[y]),w=p[m]+x;if(w<p[b]&&(p[b]=w,f[b]=m,g[b]=l[y],v=!0),!a){var w=p[b]+x;w<p[m]&&(p[m]=w,f[m]=b,g[m]=l[y],v=!0)}}if(!v)break}if(v)for(var y=0;y<l.length;y++){var m=d[l[y].source().id()],b=d[l[y].target().id()],x=r(l[y]);if(p[m]+x<p[b])return i.error("Graph contains a negative weight cycle for Bellman-Ford"),{pathTo:void 0,distanceTo:void 0,hasNegativeWeightCycle:!0}}for(var E=[],h=0;h<c;h++)E.push(u[h].id());var C={distanceTo:function(e){if(n.string(e))var t=s.filter(e)[0].id();else var t=e.id();return p[d[t]]},pathTo:function(e){var r=function(e,t,r,n,i,a){for(;;){if(i.push(s.getElementById(n[r])),i.push(a[r]),t===r)return i;var o=e[r];if("undefined"==typeof o)return;r=o}};if(n.string(e))var i=s.filter(e)[0].id();else var i=e.id();var a=[],l=r(f,d[o.id()],d[i],E,a,g);return null!=l&&l.reverse(),t.spawn(l)},hasNegativeWeightCycle:!1};return C}}};t.exports=a},{"../../is":91,"../../util":108}],5:[function(e,t,r){"use strict";var n=e("../../is"),i=e("../../heap"),a={betweennessCentrality:function(e){e=e||{};var t,r;n.fn(e.weight)?(r=e.weight,t=!0):t=!1;for(var a=null!=e.directed&&e.directed,o=this._private.cy,s=this.nodes(),l={},u={},c=0,d={set:function(e,t){u[e]=t,t>c&&(c=t)},get:function(e){return u[e]}},h=0;h<s.length;h++){var p=s[h],f=p.id();a?l[f]=p.outgoers().nodes():l[f]=p.openNeighborhood().nodes(),d.set(f,0)}for(var g=0;g<s.length;g++){for(var v=s[g].id(),y=[],m={},b={},x={},w=new i(function(e,t){return x[e]-x[t]}),h=0;h<s.length;h++){var f=s[h].id();m[f]=[],b[f]=0,x[f]=1/0}for(b[v]=1,x[v]=0,w.push(v);!w.empty();){var p=w.pop();if(y.push(p),t)for(var E=0;E<l[p].length;E++){var C,P=l[p][E],D=o.getElementById(p);C=D.edgesTo(P).length>0?D.edgesTo(P)[0]:P.edgesTo(D)[0];var T=r(C);P=P.id(),x[P]>x[p]+T&&(x[P]=x[p]+T,w.nodes.indexOf(P)<0?w.push(P):w.updateItem(P),b[P]=0,m[P]=[]),x[P]==x[p]+T&&(b[P]=b[P]+b[p],m[P].push(p))}else for(var E=0;E<l[p].length;E++){var P=l[p][E].id();x[P]==1/0&&(w.push(P),x[P]=x[p]+1),x[P]==x[p]+1&&(b[P]=b[P]+b[p],m[P].push(p))}}for(var k={},h=0;h<s.length;h++)k[s[h].id()]=0;for(;y.length>0;)for(var P=y.pop(),E=0;E<m[P].length;E++){var p=m[P][E];k[p]=k[p]+b[p]/b[P]*(1+k[P]),P!=s[g].id()&&d.set(P,d.get(P)+k[P])}}var S={betweenness:function(e){if(n.string(e))var e=o.filter(e).id();else var e=e.id();return d.get(e)},betweennessNormalized:function(e){if(0==c)return 0;if(n.string(e))var e=o.filter(e).id();else var e=e.id();return d.get(e)/c}};return S.betweennessNormalised=S.betweennessNormalized,S}};a.bc=a.betweennessCentrality,t.exports=a},{"../../heap":89,"../../is":91}],6:[function(e,t,r){"use strict";var n=e("../../is"),i=function(e){return e={bfs:e.bfs||!e.dfs,dfs:e.dfs||!e.bfs},function(t,r,i){var a;n.plainObject(t)&&!n.elementOrCollection(t)&&(a=t,t=a.roots||a.root,r=a.visit,i=a.directed),i=2!==arguments.length||n.fn(r)?i:r,r=n.fn(r)?r:function(){};for(var o,s=this._private.cy,l=t=n.string(t)?this.filter(t):t,u=[],c=[],d={},h={},p={},f=0,g=this.nodes(),v=this.edges(),y=0;y<l.length;y++)l[y].isNode()&&(u.unshift(l[y]),e.bfs&&(p[l[y].id()]=!0,c.push(l[y])),h[l[y].id()]=0);for(;0!==u.length;){var l=e.bfs?u.shift():u.pop();if(e.dfs){if(p[l.id()])continue;p[l.id()]=!0,c.push(l)}var m,b=h[l.id()],x=d[l.id()],w=null==x?void 0:x.connectedNodes().not(l)[0];if(m=r(l,x,w,f++,b),m===!0){o=l;break}if(m===!1)break;for(var E=l.connectedEdges(i?function(e){return e.data("source")===l.id()}:void 0).intersect(v),y=0;y<E.length;y++){var C=E[y],P=C.connectedNodes(function(e){return e.id()!==l.id()}).intersect(g);0===P.length||p[P.id()]||(P=P[0],u.push(P),e.bfs&&(p[P.id()]=!0,c.push(P)),d[P.id()]=C,h[P.id()]=h[l.id()]+1)}}for(var D=[],y=0;y<c.length;y++){var T=c[y],k=d[T.id()];k&&D.push(k),D.push(T)}return{path:s.collection(D,{unique:!0}),found:s.collection(o)}}},a={breadthFirstSearch:i({bfs:!0}),depthFirstSearch:i({dfs:!0})};a.bfs=a.breadthFirstSearch,a.dfs=a.depthFirstSearch,t.exports=a},{"../../is":91}],7:[function(e,t,r){"use strict";var n=e("../../is"),i={closenessCentralityNormalized:function(e){e=e||{};var t=this.cy(),r=e.harmonic;void 0===r&&(r=!0);for(var i={},a=0,o=this.nodes(),s=this.floydWarshall({weight:e.weight,directed:e.directed}),l=0;l<o.length;l++){for(var u=0,c=0;c<o.length;c++)if(l!=c){var d=s.distance(o[l],o[c]);u+=r?1/d:d}r||(u=1/u),a<u&&(a=u),i[o[l].id()]=u}return{closeness:function(e){if(0==a)return 0;if(n.string(e))var e=t.filter(e)[0].id();else var e=e.id();return i[e]/a}}},closenessCentrality:function(e){if(e=e||{},null!=e.root){if(n.string(e.root))var t=this.filter(e.root)[0];else var t=e.root[0];if(null!=e.weight&&n.fn(e.weight))var r=e.weight;else var r=function(){return 1};if(null!=e.directed&&n.bool(e.directed))var i=e.directed;else var i=!1;var a=e.harmonic;void 0===a&&(a=!0);for(var o=this.dijkstra({root:t,weight:r,directed:i}),s=0,l=this.nodes(),u=0;u<l.length;u++)if(l[u].id()!=t.id()){var c=o.distanceTo(l[u]);s+=a?1/c:c}return a?s:1/s}}};i.cc=i.closenessCentrality,i.ccn=i.closenessCentralityNormalised=i.closenessCentralityNormalized,t.exports=i},{"../../is":91}],8:[function(e,t,r){"use strict";var n=e("../../is"),i=e("../../util"),a={degreeCentralityNormalized:function(e){e=e||{};var t=this.cy();if(null!=e.directed)var r=e.directed;else var r=!1;var a=this.nodes(),o=a.length;if(r){for(var s={},l={},u=0,c=0,d=0;d<o;d++){var h=a[d],p=this.degreeCentrality(i.extend({},e,{root:h}));u<p.indegree&&(u=p.indegree),c<p.outdegree&&(c=p.outdegree),s[h.id()]=p.indegree,l[h.id()]=p.outdegree}return{indegree:function(e){if(0==u)return 0;if(n.string(e))var e=t.filter(e)[0].id();else var e=e.id();return s[e]/u},outdegree:function(e){if(0==c)return 0;if(n.string(e))var e=t.filter(e)[0].id();else var e=e.id();return l[e]/c}}}for(var f={},g=0,d=0;d<o;d++){var h=a[d],p=this.degreeCentrality(i.extend({},e,{root:h}));g<p.degree&&(g=p.degree),f[h.id()]=p.degree}return{degree:function(e){if(0==g)return 0;if(n.string(e))var e=t.filter(e)[0].id();else var e=e.id();return f[e]/g}}},degreeCentrality:function(e){e=e||{};var t=this;if(null!=e&&null!=e.root){var r=n.string(e.root)?this.filter(e.root)[0]:e.root[0];if(null!=e.weight&&n.fn(e.weight))var i=e.weight;else var i=function(e){return 1};if(null!=e.directed)var a=e.directed;else var a=!1;if(null!=e.alpha&&n.number(e.alpha))var o=e.alpha;else o=0;if(a){for(var s=r.connectedEdges('edge[target = "'+r.id()+'"]').intersection(t),l=r.connectedEdges('edge[source = "'+r.id()+'"]').intersection(t),u=s.length,c=l.length,d=0,h=0,p=0;p<s.length;p++){var f=s[p];d+=i(f)}for(var p=0;p<l.length;p++){var f=l[p];h+=i(f)}return{indegree:Math.pow(u,1-o)*Math.pow(d,o),outdegree:Math.pow(c,1-o)*Math.pow(h,o)}}for(var g=r.connectedEdges().intersection(t),v=g.length,y=0,p=0;p<g.length;p++){var f=g[p];y+=i(f)}return{degree:Math.pow(v,1-o)*Math.pow(y,o)}}}};a.dc=a.degreeCentrality,a.dcn=a.degreeCentralityNormalised=a.degreeCentralityNormalized,t.exports=a},{"../../is":91,"../../util":108}],9:[function(e,t,r){"use strict";var n=e("../../is"),i=e("../../heap"),a={dijkstra:function(e,t,r){var a;n.plainObject(e)&&!n.elementOrCollection(e)&&(a=e,e=a.root,t=a.weight,r=a.directed);var o=this._private.cy;t=n.fn(t)?t:function(){return 1};for(var s=n.string(e)?this.filter(e)[0]:e[0],l={},u={},c={},d=this.edges().filter(function(e){return!e.isLoop()}),h=this.nodes(),p=function(e){return l[e.id()]},f=function(e,t){l[e.id()]=t,g.updateItem(e)},g=new i(function(e,t){return p(e)-p(t)}),v=0;v<h.length;v++){var y=h[v];l[y.id()]=y.same(s)?0:1/0,g.push(y)}for(var m=function(e,n){for(var i,a=(r?e.edgesTo(n):e.edgesWith(n)).intersect(d),o=1/0,s=0;s<a.length;s++){var l=a[s],u=t(l);(u<o||!i)&&(o=u,i=l)}return{edge:i,dist:o}};g.size()>0;){var b=g.pop(),x=p(b),w=b.id();if(c[w]=x,x===Math.Infinite)break;for(var E=b.neighborhood().intersect(h),v=0;v<E.length;v++){var C=E[v],P=C.id(),D=m(b,C),T=x+D.dist;T<p(C)&&(f(C,T),u[P]={node:b,edge:D.edge})}}return{distanceTo:function(e){var t=n.string(e)?h.filter(e)[0]:e[0];return c[t.id()]},pathTo:function(e){var t=n.string(e)?h.filter(e)[0]:e[0],r=[],i=t;if(t.length>0)for(r.unshift(t);u[i.id()];){var a=u[i.id()];r.unshift(a.edge),r.unshift(a.node),i=a.node}return o.collection(r)}}}};t.exports=a},{"../../heap":89,"../../is":91}],10:[function(e,t,r){"use strict";var n=e("../../is"),i={floydWarshall:function(e){e=e||{};var t=this.cy();if(null!=e.weight&&n.fn(e.weight))var r=e.weight;else var r=function(e){return 1};if(null!=e.directed)var i=e.directed;else var i=!1;for(var a=this.edges().stdFilter(function(e){return!e.isLoop()}),o=this.nodes(),s=o.length,l={},u=0;u<s;u++)l[o[u].id()]=u;for(var c=[],u=0;u<s;u++){for(var d=new Array(s),h=0;h<s;h++)u==h?d[h]=0:d[h]=1/0;c.push(d)}var p=[],f=[],g=function(e){for(var t=0;t<s;t++){for(var r=new Array(s),n=0;n<s;n++)r[n]=void 0;e.push(r)}};g(p),g(f);for(var u=0;u<a.length;u++){var v=l[a[u].source().id()],y=l[a[u].target().id()],m=r(a[u]);c[v][y]>m&&(c[v][y]=m,p[v][y]=y,f[v][y]=a[u])}if(!i)for(var u=0;u<a.length;u++){var v=l[a[u].target().id()],y=l[a[u].source().id()],m=r(a[u]);c[v][y]>m&&(c[v][y]=m,p[v][y]=y,f[v][y]=a[u])}for(var b=0;b<s;b++)for(var u=0;u<s;u++)for(var h=0;h<s;h++)c[u][b]+c[b][h]<c[u][h]&&(c[u][h]=c[u][b]+c[b][h],p[u][h]=p[u][b]);for(var x=[],u=0;u<s;u++)x.push(o[u].id());var w={distance:function(e,r){if(n.string(e))var i=t.filter(e)[0].id();else var i=e.id();if(n.string(r))var a=t.filter(r)[0].id();else var a=r.id();return c[l[i]][l[a]]},path:function(e,r){var i=function(e,r,n,i,a){if(e===r)return t.getElementById(i[e]);if(void 0!==n[e][r]){for(var o=[t.getElementById(i[e])],s=e;e!==r;){s=e,e=n[e][r];var l=a[s][e];o.push(l),o.push(t.getElementById(i[e]))}return o}};if(n.string(e))var a=t.filter(e)[0].id();else var a=e.id();if(n.string(r))var o=t.filter(r)[0].id();else var o=r.id();var s=i(l[a],l[o],p,x,f);return t.collection(s)}};return w}};t.exports=i},{"../../is":91}],11:[function(e,t,r){"use strict";var n=e("../../util"),i={};[e("./bfs-dfs"),e("./dijkstra"),e("./kruskal"),e("./a-star"),e("./floyd-warshall"),e("./bellman-ford"),e("./kerger-stein"),e("./page-rank"),e("./degree-centrality"),e("./closeness-centrality"),e("./betweenness-centrality")].forEach(function(e){n.extend(i,e)}),t.exports=i},{"../../util":108,"./a-star":3,"./bellman-ford":4,"./betweenness-centrality":5,"./bfs-dfs":6,"./closeness-centrality":7,"./degree-centrality":8,"./dijkstra":9,"./floyd-warshall":10,"./kerger-stein":12,"./kruskal":13,"./page-rank":14}],12:[function(e,t,r){"use strict";var n=e("../../util"),i={kargerStein:function(e){var t=this;e=e||{};var r=function(e,t,r){for(var n=r[e],i=n[1],a=n[2],o=t[i],s=t[a],l=r.filter(function(e){return(t[e[1]]!==o||t[e[2]]!==s)&&(t[e[1]]!==s||t[e[2]]!==o)}),u=0;u<l.length;u++){var c=l[u];c[1]===s?(l[u]=c.slice(0),l[u][1]=o):c[2]===s&&(l[u]=c.slice(0),l[u][2]=o)}for(var u=0;u<t.length;u++)t[u]===s&&(t[u]=o);return l},i=function(e,t,n,a){if(n<=a)return t;var o=Math.floor(Math.random()*t.length),s=r(o,e,t);return i(e,s,n-1,a)},a=this._private.cy,o=this.edges().stdFilter(function(e){return!e.isLoop()}),s=this.nodes(),l=s.length,u=o.length,c=Math.ceil(Math.pow(Math.log(l)/Math.LN2,2)),d=Math.floor(l/Math.sqrt(2));if(l<2)return void n.error("At least 2 nodes are required for Karger-Stein algorithm");for(var h={},p=0;p<l;p++)h[s[p].id()]=p;for(var f=[],p=0;p<u;p++){var g=o[p];f.push([p,h[g.source().id()],h[g.target().id()]])}for(var v,y=1/0,m=[],p=0;p<l;p++)m.push(p);for(var b=0;b<=c;b++){var x=m.slice(0),w=i(x,f,l,d),E=x.slice(0),C=i(x,w,d,2),P=i(E,w,d,2);C.length<=P.length&&C.length<y?(y=C.length,v=[C,x]):P.length<=C.length&&P.length<y&&(y=P.length,v=[P,E])}for(var D=v[0].map(function(e){return o[e[0]]}),T=[],k=[],S=v[1][0],p=0;p<v[1].length;p++){var _=v[1][p];_===S?T.push(s[p]):k.push(s[p])}var M={cut:t.spawn(a,D),partition1:t.spawn(T),partition2:t.spawn(k)};return M}};t.exports=i},{"../../util":108}],13:[function(e,t,r){"use strict";var n=e("../../is"),i={kruskal:function(e){function t(e){for(var t=0;t<a.length;t++){var r=a[t];if(r.anySame(e))return{eles:r,index:t}}}var r=this.cy();e=n.fn(e)?e:function(){return 1};for(var i=r.collection(r,[]),a=[],o=this.nodes(),s=0;s<o.length;s++)a.push(o[s].collection());for(var l=this.edges(),u=l.toArray().sort(function(t,r){var n=e(t),i=e(r);return n-i}),s=0;s<u.length;s++){var c=u[s],d=c.source()[0],h=c.target()[0],p=t(d),f=t(h);p.index!==f.index&&(i=i.add(c),a[p.index]=p.eles.add(f.eles),a.splice(f.index,1))}return o.add(i)}};t.exports=i},{"../../is":91}],14:[function(e,t,r){"use strict";var n=e("../../is"),i={pageRank:function(e){e=e||{};var t=function(e){for(var t=e.length,r=0,n=0;n<t;n++)r+=e[n];for(var n=0;n<t;n++)e[n]=e[n]/r};if(null!=e&&null!=e.dampingFactor)var r=e.dampingFactor;else var r=.8;if(null!=e&&null!=e.precision)var i=e.precision;else var i=1e-6;if(null!=e&&null!=e.iterations)var a=e.iterations;else var a=200;if(null!=e&&null!=e.weight&&n.fn(e.weight))var o=e.weight;else var o=function(e){return 1};for(var s=this._private.cy,l=this.edges().stdFilter(function(e){return!e.isLoop()}),u=this.nodes(),c=u.length,d=l.length,h={},p=0;p<c;p++)h[u[p].id()]=p;for(var f=[],g=[],v=(1-r)/c,p=0;p<c;p++){for(var y=[],m=0;m<c;m++)y.push(0);f.push(y),g.push(0)}for(var p=0;p<d;p++){var b=l[p],x=h[b.source().id()],w=h[b.target().id()],E=o(b);f[w][x]+=E,g[x]+=E}for(var C=1/c+v,m=0;m<c;m++)if(0===g[m])for(var p=0;p<c;p++)f[p][m]=C;else for(var p=0;p<c;p++)f[p][m]=f[p][m]/g[m]+v;for(var P,D=[],T=[],p=0;p<c;p++)D.push(1),T.push(0);for(var k=0;k<a;k++){for(var S=T.slice(0),p=0;p<c;p++)for(var m=0;m<c;m++)S[p]+=f[p][m]*D[m];t(S),P=D,D=S;for(var _=0,p=0;p<c;p++)_+=Math.pow(P[p]-D[p],2);if(_<i)break}var M={rank:function(e){if(n.string(e))var t=s.filter(e)[0].id();else var t=e.id();return D[h[t]]}};return M}};t.exports=i},{"../../is":91}],15:[function(e,t,r){"use strict";var n=e("../define"),i={animate:n.animate(),animation:n.animation(),animated:n.animated(),clearQueue:n.clearQueue(),delay:n.delay(),delayAnimation:n.delayAnimation(),stop:n.stop()};t.exports=i},{"../define":44}],16:[function(e,t,r){"use strict";var n=e("../util"),i={classes:function(e){e=(e||"").match(/\S+/g)||[];for(var t=this,r=[],i={},a=0;a<e.length;a++){var o=e[a];i[o]=!0}for(var s=0;s<t.length;s++){for(var l=t[s],u=l._private,c=u.classes,d=!1,a=0;a<e.length;a++){var o=e[a],h=c[o];if(!h){d=!0;break}}if(!d)for(var e=Object.keys(c),a=0;a<e.length;a++){var p=e[a],h=c[p],f=i[p];if(h&&!f){d=!0;break}}d&&(u.classes=n.copy(i),r.push(l))}return r.length>0&&this.spawn(r).updateStyle().trigger("class"),t},addClass:function(e){return this.toggleClass(e,!0)},hasClass:function(e){var t=this[0];return!(null==t||!t._private.classes[e])},toggleClass:function(e,t){for(var r=e.match(/\S+/g)||[],n=this,i=[],a=0,o=n.length;a<o;a++)for(var s=n[a],l=!1,u=0;u<r.length;u++){var c=r[u],d=s._private.classes,h=d[c],p=t||void 0===t&&!h;p?(d[c]=!0,h||l||(i.push(s),l=!0)):(d[c]=!1,h&&!l&&(i.push(s),l=!0))}return i.length>0&&this.spawn(i).updateStyle().trigger("class"),n},removeClass:function(e){return this.toggleClass(e,!1)},flashClass:function(e,t){var r=this;if(null==t)t=250;else if(0===t)return r;return r.addClass(e),setTimeout(function(){r.removeClass(e)},t),r}};t.exports=i},{"../util":108}],17:[function(e,t,r){"use strict";var n=(e("../is"),e("../selector")),i={allAre:function(e){var t=new n(e);return this.every(function(e){return t.matches(e)})},is:function(e){var t=new n(e);return this.some(function(e){return t.matches(e)})},some:function(e,t){for(var r=0;r<this.length;r++){var n=t?e.apply(t,[this[r],r,this]):e(this[r],r,this);if(n)return!0}return!1},every:function(e,t){for(var r=0;r<this.length;r++){var n=t?e.apply(t,[this[r],r,this]):e(this[r],r,this);if(!n)return!1}return!0},same:function(e){return e=this.cy().collection(e),this.length===e.length&&this.every(function(t){return e.hasElementWithId(t.id())})},anySame:function(e){return e=this.cy().collection(e),this.some(function(t){return e.hasElementWithId(t.id())})},allAreNeighbors:function(e){e=this.cy().collection(e);var t=this.neighborhood();return e.every(function(e){return t.hasElementWithId(e.id())})},contains:function(e){e=this.cy().collection(e);var t=this;return e.every(function(e){return t.hasElementWithId(e.id())})}};i.allAreNeighbours=i.allAreNeighbors,i.has=i.contains,t.exports=i},{"../is":91,"../selector":95}],18:[function(e,t,r){"use strict";var n={parent:function(e){var t=[];this._private.cy;if(1===this.length){var r=this[0]._private.parent;if(r)return r}for(var n=0;n<this.length;n++){var i=this[n],r=i._private.parent;r&&t.push(r)}return this.spawn(t,{unique:!0}).filter(e)},parents:function(e){for(var t=[],r=this.parent();r.nonempty();){for(var n=0;n<r.length;n++){var i=r[n];t.push(i)}r=r.parent()}return this.spawn(t,{unique:!0}).filter(e)},commonAncestors:function(e){for(var t,r=0;r<this.length;r++){var n=this[r],i=n.parents();t=t||i,t=t.intersect(i)}return t.filter(e)},orphans:function(e){return this.stdFilter(function(e){return e.isOrphan()}).filter(e)},nonorphans:function(e){return this.stdFilter(function(e){return e.isChild()}).filter(e)},children:function(e){for(var t=[],r=0;r<this.length;r++){var n=this[r];t=t.concat(n._private.children)}return this.spawn(t,{unique:!0}).filter(e)},siblings:function(e){return this.parent().children().not(this).filter(e)},isParent:function(){var e=this[0];if(e)return e.isNode()&&0!==e._private.children.length},isChildless:function(){var e=this[0];if(e)return e.isNode()&&0===e._private.children.length},isChild:function(){var e=this[0];if(e)return e.isNode()&&null!=e._private.parent},isOrphan:function(){var e=this[0];if(e)return e.isNode()&&null==e._private.parent},descendants:function(e){function t(e){for(var n=0;n<e.length;n++){var i=e[n];r.push(i),i.children().nonempty()&&t(i.children())}}var r=[];return t(this.children()),this.spawn(r,{unique:!0}).filter(e)}};n.ancestors=n.parents,t.exports=n},{}],19:[function(e,t,r){"use strict";var n,i,a=e("../define");n=i={data:a.data({field:"data",bindingEvent:"data",allowBinding:!0,allowSetting:!0,settingEvent:"data",settingTriggersEvent:!0,triggerFnName:"trigger",allowGetting:!0,immutableKeys:{id:!0,source:!0,target:!0,parent:!0},updateStyle:!0}),removeData:a.removeData({field:"data",event:"data",triggerFnName:"trigger",triggerEvent:!0,immutableKeys:{id:!0,source:!0,target:!0,parent:!0},updateStyle:!0}),scratch:a.data({field:"scratch",bindingEvent:"scratch",allowBinding:!0,allowSetting:!0,settingEvent:"scratch",settingTriggersEvent:!0,triggerFnName:"trigger",allowGetting:!0,updateStyle:!0}),removeScratch:a.removeData({field:"scratch",event:"scratch",triggerFnName:"trigger",triggerEvent:!0,updateStyle:!0}),rscratch:a.data({field:"rscratch",allowBinding:!1,allowSetting:!0,settingTriggersEvent:!1,allowGetting:!0}),removeRscratch:a.removeData({field:"rscratch",triggerEvent:!1}),id:function(){var e=this[0];if(e)return e._private.data.id}},n.attr=n.data,n.removeAttr=n.removeData,t.exports=i},{"../define":44}],20:[function(e,t,r){"use strict";function n(e){return function(t){var r=this;if(void 0===t&&(t=!0),0!==r.length&&r.isNode()&&!r.removed()){for(var n=0,i=r[0],a=i._private.edges,o=0;o<a.length;o++){var s=a[o];!t&&s.isLoop()||(n+=e(i,s))}return n}}}function i(e,t){return function(r){for(var n,i=this.nodes(),a=0;a<i.length;a++){var o=i[a],s=o[e](r);void 0===s||void 0!==n&&!t(s,n)||(n=s)}return n}}var a=e("../util"),o={};a.extend(o,{degree:n(function(e,t){return t.source().same(t.target())?2:1}),indegree:n(function(e,t){return t.target().same(e)?1:0}),outdegree:n(function(e,t){return t.source().same(e)?1:0})}),a.extend(o,{minDegree:i("degree",function(e,t){return e<t}),maxDegree:i("degree",function(e,t){return e>t}),minIndegree:i("indegree",function(e,t){return e<t}),maxIndegree:i("indegree",function(e,t){return e>t}),minOutdegree:i("outdegree",function(e,t){return e<t}),maxOutdegree:i("outdegree",function(e,t){return e>t})}),a.extend(o,{totalDegree:function(e){for(var t=0,r=this.nodes(),n=0;n<r.length;n++)t+=r[n].degree(e);return t}}),t.exports=o},{"../util":108}],21:[function(e,t,r){"use strict";function n(e){return{includeNodes:l["default"](e.includeNodes,x.includeNodes),includeEdges:l["default"](e.includeEdges,x.includeEdges),includeLabels:l["default"](e.includeLabels,x.includeLabels),includeOverlays:l["default"](e.includeOverlays,x.includeOverlays),useCache:l["default"](e.useCache,x.useCache)}}var i,a,o=e("../define"),s=e("../is"),l=e("../util"),u=e("../math");i=a={position:o.data({field:"position",bindingEvent:"position",allowBinding:!0,allowSetting:!0,settingEvent:"position",settingTriggersEvent:!0,triggerFnName:"rtrigger",allowGetting:!0,validKeys:["x","y"],beforeGet:function(e){e.updateCompoundBounds()},onSet:function(e){e.dirtyCompoundBoundsCache()},canSet:function(e){return!e.locked()&&!e.isParent()}}),silentPosition:o.data({field:"position",bindingEvent:"position",allowBinding:!1,allowSetting:!0,settingEvent:"position",settingTriggersEvent:!1,triggerFnName:"trigger",allowGetting:!1,validKeys:["x","y"],onSet:function(e){e.dirtyCompoundBoundsCache()},canSet:function(e){return!e.locked()&&!e.isParent()}}),positions:function(e,t){if(s.plainObject(e))t?this.silentPosition(e):this.position(e);else if(s.fn(e)){for(var r=e,n=0;n<this.length;n++){var e,i=this[n];if(!i.locked()&&!i.isParent()&&(e=r(i,n))){var a=i._private.position;a.x=e.x,a.y=e.y}}this.dirtyCompoundBoundsCache(),t?this.trigger("position"):this.rtrigger("position")}return this},silentPositions:function(e){return this.positions(e,!0)},renderedPosition:function(e,t){var r=this[0],n=this.cy(),i=n.zoom(),a=n.pan(),o=s.plainObject(e)?e:void 0,l=void 0!==o||void 0!==t&&s.string(e);if(r&&r.isNode()){if(!l){var u=r.position();return o={x:u.x*i+a.x,y:u.y*i+a.y},void 0===e?o:o[e]}for(var c=0;c<this.length;c++){var r=this[c];void 0!==t?r.position(e,(t-a[e])/i):void 0!==o&&r.position({x:(o.x-a.x)/i,y:(o.y-a.y)/i})}}else if(!l)return;return this},relativePosition:function(e,t){var r=this[0],n=this.cy(),i=s.plainObject(e)?e:void 0,a=void 0!==i||void 0!==t&&s.string(e),o=n.hasCompoundNodes();if(r&&r.isNode()){if(!a){var l=r.position(),u=o?r.parent():null,c=u&&u.length>0,d=c;c&&(u=u[0]);var h=d?u.position():{x:0,y:0};return i={x:l.x-h.x,y:l.y-h.y},void 0===e?i:i[e]}for(var p=0;p<this.length;p++){var r=this[p],u=o?r.parent():null,c=u&&u.length>0,d=c;c&&(u=u[0]);var h=d?u.position():{x:0,y:0};void 0!==t?r.position(e,t+h[e]):void 0!==i&&r.position({x:i.x+h.x,y:i.y+h.y})}}else if(!a)return;return this},renderedBoundingBox:function(e){var t=this.boundingBox(e),r=this.cy(),n=r.zoom(),i=r.pan(),a=t.x1*n+i.x,o=t.x2*n+i.x,s=t.y1*n+i.y,l=t.y2*n+i.y;return{x1:a,x2:o,y1:s,y2:l,w:o-a,h:l-s}},dirtyCompoundBoundsCache:function(){var e=this.cy();if(!e.styleEnabled()||!e.hasCompoundNodes())return this;for(var t=this,r=[],n=0;n<t.length;n++)r.push(t[n]);for(;r.length>0;){var i=r.shift();i._private.compoundBoundsClean=!1,i.isParent()&&i.trigger("bounds"),i.isChild()&&r.push(i.parent())}return this},updateCompoundBounds:function(){function e(e){function t(e,t,r){var n=0,i=0,a=t+r;return e>0&&a>0&&(n=t/a*e,i=r/a*e),{biasDiff:n,biasComplementDiff:i}}function n(e,t,r,n){if("%"!==r.units)return"px"===r.units?r.pfValue:0;switch(n){case"width":return e>0?r.pfValue*e:0;case"height":return t>0?r.pfValue*t:0;case"average":return e>0&&t>0?r.pfValue*(e+t)/2:0;case"min":return e>0&&t>0?e>t?r.pfValue*t:r.pfValue*e:0;case"max":return e>0&&t>0?e>t?r.pfValue*e:r.pfValue*t:0;default:return 0}}if(e.isParent()){var a=e._private,o=e.children(),s="include"===e.pstyle("compound-sizing-wrt-labels").value,l={width:{val:e.pstyle("min-width").pfValue,left:e.pstyle("min-width-bias-left"),right:e.pstyle("min-width-bias-right")},height:{val:e.pstyle("min-height").pfValue,top:e.pstyle("min-height-bias-top"),bottom:e.pstyle("min-height-bias-bottom")}},u=o.boundingBox({includeLabels:s,includeOverlays:!1,useCache:!1}),c=a.position,d=l.width.left.value;"px"===l.width.left.units&&l.width.val>0&&(d=100*d/l.width.val);var h=l.width.right.value;"px"===l.width.right.units&&l.width.val>0&&(h=100*h/l.width.val);var p=l.height.top.value;"px"===l.height.top.units&&l.height.val>0&&(p=100*p/l.height.val);var f=l.height.bottom.value;"px"===l.height.bottom.units&&l.height.val>0&&(f=100*f/l.height.val);var g=t(l.width.val-u.w,d,h),v=g.biasDiff,y=g.biasComplementDiff,m=t(l.height.val-u.h,p,f),b=m.biasDiff,x=m.biasComplementDiff;a.autoPadding=n(u.w,u.h,i.pstyle("padding"),i.pstyle("padding-relative-to").value),a.autoWidth=Math.max(u.w,l.width.val),c.x=(-v+u.x1+u.x2+y)/2,a.autoHeight=Math.max(u.h,l.height.val),c.y=(-b+u.y1+u.y2+x)/2,r.push(e)}}var t=this.cy();if(!t.styleEnabled()||!t.hasCompoundNodes())return this;for(var r=[],n=0;n<this.length;n++){var i=this[n],a=i._private;a.compoundBoundsClean||(e(i),t._private.batchingStyle||(a.compoundBoundsClean=!0))}return this}};var c=function(e){return e===1/0||e===-(1/0)?0:e},d=function(e,t,r,n,i){n-t!==0&&i-r!==0&&(e.x1=t<e.x1?t:e.x1,e.x2=n>e.x2?n:e.x2,e.y1=r<e.y1?r:e.y1,e.y2=i>e.y2?i:e.y2)},h=function(e,t){return d(e,t.x1,t.y1,t.x2,t.y2)},p=function(e,t,r){return l.getPrefixedProperty(e,t,r)},f=function(e,t,r,n){var i,a,o=t._private,s=o.rstyle,l=s.arrowWidth/2,u=t.pstyle(r+"-arrow-shape").value;"none"!==u&&("source"===r?(i=s.srcX,a=s.srcY):"target"===r?(i=s.tgtX,a=s.tgtY):(i=s.midX,a=s.midY),d(e,i-l,a-l,i+l,a+l))},g=function(e,t,r,n){var i;i=r?r+"-":"";var a=t._private,o=a.rstyle,s=t.pstyle(i+"label").strValue;if(s){var l,u,c,h,f=t.pstyle("text-halign"),g=t.pstyle("text-valign"),v=p(o,"labelWidth",r),y=p(o,"labelHeight",r),m=p(o,"labelX",r),b=p(o,"labelY",r),x=t.pstyle(i+"text-margin-x").pfValue,w=t.pstyle(i+"text-margin-y").pfValue,E=t.isEdge(),C=t.pstyle(i+"text-rotation"),P=t.pstyle("text-outline-width").pfValue,D=t.pstyle("text-border-width").pfValue,T=D/2,k=t.pstyle("text-background-padding").pfValue,S=y+2*k,_=v+2*k,M=_/2,I=S/2;if(E)l=m-M,u=m+M,c=b-I,h=b+I;else{switch(f.value){case"left":l=m-_,u=m;break;case"center":l=m-M,u=m+M;break;case"right":l=m,u=m+_}switch(g.value){case"top":c=b-S,h=b;break;case"center":c=b-I,h=b+I;break;case"bottom":c=b,h=b+S}}var N=E&&"autorotate"===C.strValue,B=null!=C.pfValue&&0!==C.pfValue;if(N||B){var z=N?p(a.rstyle,"labelAngle",r):C.pfValue,L=Math.cos(z),O=Math.sin(z),A=function(e,t){return e-=m,t-=b,{x:e*L-t*O+m,y:e*O+t*L+b}},R=A(l,c),V=A(l,h),q=A(u,c),F=A(u,h);l=Math.min(R.x,V.x,q.x,F.x),u=Math.max(R.x,V.x,q.x,F.x),c=Math.min(R.y,V.y,q.y,F.y),h=Math.max(R.y,V.y,q.y,F.y)}l+=x-Math.max(P,T),u+=x+Math.max(P,T),c+=w-Math.max(P,T),h+=w+Math.max(P,T),d(e,l,c,u,h)}return e},v=function(e,t){var r,n,i,a,o,s,l=e._private.cy,h=l._private,p=h.styleEnabled,v={x1:1/0,y1:1/0,x2:-(1/0),y2:-(1/0)},y=e._private,m=p?e.pstyle("display").value:"element",b=e.isNode(),x=e.isEdge(),w="none"!==m;if(w){var E=0,C=0;p&&t.includeOverlays&&(E=e.pstyle("overlay-opacity").value,0!==E&&(C=e.pstyle("overlay-padding").value));var P=0,D=0;if(p&&(P=e.pstyle("width").pfValue,D=P/2),b&&t.includeNodes){var T=e.position();o=T.x,s=T.y;var P=e.outerWidth(),k=P/2,S=e.outerHeight(),_=S/2;r=o-k-C,n=o+k+C,i=s-_-C,a=s+_+C,d(v,r,i,n,a)}else if(x&&t.includeEdges){var M=y.rstyle||{};if(p&&(r=Math.min(M.srcX,M.midX,M.tgtX),n=Math.max(M.srcX,M.midX,M.tgtX),i=Math.min(M.srcY,M.midY,M.tgtY),a=Math.max(M.srcY,M.midY,M.tgtY),r-=D,n+=D,i-=D,a+=D,d(v,r,i,n,a)),p&&"haystack"===e.pstyle("curve-style").strValue){var I=M.haystackPts;if(r=I[0].x,i=I[0].y,n=I[1].x,a=I[1].y,r>n){var N=r;r=n,n=N}if(i>a){var N=i;i=a,a=N}d(v,r-D,i-D,n+D,a+D)}else{for(var B=M.bezierPts||M.linePts||[],z=0;z<B.length;z++){var L=B[z];r=L.x-D,n=L.x+D,i=L.y-D,a=L.y+D,d(v,r,i,n,a)}if(0===B.length){var O=e.source(),A=O.position(),R=e.target(),V=R.position();if(r=A.x,n=V.x,i=A.y,a=V.y,r>n){var N=r;r=n,n=N}if(i>a){var N=i;i=a,a=N}r-=D,n+=D,i-=D,a+=D,d(v,r,i,n,a)}}}p&&(r=v.x1,n=v.x2,
-i=v.y1,a=v.y2,d(v,r-C,i-C,n+C,a+C)),p&&t.includeEdges&&x&&(f(v,e,"mid-source",t),f(v,e,"mid-target",t),f(v,e,"source",t),f(v,e,"target",t)),p&&t.includeLabels&&(g(v,e,null,t),x&&(g(v,e,"source",t),g(v,e,"target",t)))}return v.x1=c(v.x1),v.y1=c(v.y1),v.x2=c(v.x2),v.y2=c(v.y2),v.w=c(v.x2-v.x1),v.h=c(v.y2-v.y1),v.w>0&&v.h>0&&w&&u.expandBoundingBox(v,1),v},y=function(e){return e?"t":"f"},m=function(e){var t="";return t+=y(e.incudeNodes),t+=y(e.includeEdges),t+=y(e.includeLabels),t+=y(e.includeOverlays)},b=function(e,t){var r,n=e._private,i=e.cy().headless(),a=t===x?w:m(t);return t.useCache&&!i&&n.bbCache&&n.bbCache[a]?r=n.bbCache[a]:(r=v(e,t),i||(n.bbCache=n.bbCache||{},n.bbCache[a]=r)),r},x={includeNodes:!0,includeEdges:!0,includeLabels:!0,includeOverlays:!0,useCache:!0},w=m(x);a.recalculateRenderedStyle=function(e){var t=this.cy(),r=t.renderer(),n=t.styleEnabled();return r&&n&&r.recalculateRenderedStyle(this,e),this},a.boundingBox=function(e){if(1===this.length&&this[0]._private.bbCache&&(void 0===e||void 0===e.useCache||e.useCache===!0))return e=void 0===e?x:n(e),b(this[0],e);var t={x1:1/0,y1:1/0,x2:-(1/0),y2:-(1/0)};e=e||l.staticEmptyObject();var r=n(e),i=this,a=i.cy(),o=a.styleEnabled();o&&this.recalculateRenderedStyle(r.useCache),this.updateCompoundBounds();for(var s={},u=0;u<i.length;u++){var d=i[u];if(o&&d.isEdge()&&"bezier"===d.pstyle("curve-style").strValue&&!s[d.id()]){for(var p=d.parallelEdges(),f=0;f<p.length;f++)s[p[f].id()]=!0;p.recalculateRenderedStyle(r.useCache)}h(t,b(d,r))}return t.x1=c(t.x1),t.y1=c(t.y1),t.x2=c(t.x2),t.y2=c(t.y2),t.w=c(t.x2-t.x1),t.h=c(t.y2-t.y1),t},a.boundingBoxAt=function(e){var t=this.nodes();if(s.plainObject(e)){var r=e;e=function(){return r}}for(var n=0;n<t.length;n++){var i=t[n],a=i._private,o=a.position,l=e.call(i,n,i);a.bbAtOldPos={x:o.x,y:o.y},l&&(o.x=l.x,o.y=l.y)}this.trigger("dirty"),t.dirtyCompoundBoundsCache().updateCompoundBounds();for(var u=this.boundingBox({useCache:!1}),n=0;n<t.length;n++){var i=t[n],a=i._private,o=i._private.position,c=a.bbAtOldPos;o.x=c.x,o.y=c.y}return t.dirtyCompoundBoundsCache(),this.trigger("dirty"),u};var E=function(e){e.uppercaseName=l.capitalize(e.name),e.autoName="auto"+e.uppercaseName,e.labelName="label"+e.uppercaseName,e.outerName="outer"+e.uppercaseName,e.uppercaseOuterName=l.capitalize(e.outerName),i[e.name]=function(){var t=this[0],r=t._private,n=r.cy,i=n._private.styleEnabled;if(t){if(!i)return 1;if(t.isParent())return t.updateCompoundBounds(),r[e.autoName]||0;var a=t.pstyle(e.name);switch(a.strValue){case"label":return t.recalculateRenderedStyle(),r.rstyle[e.labelName]||0;default:return a.pfValue}}},i["outer"+e.uppercaseName]=function(){var t=this[0],r=t._private,n=r.cy,i=n._private.styleEnabled;if(t){if(i){var a=t[e.name](),o=t.pstyle("border-width").pfValue,s=2*t.padding();return a+o+s}return 1}},i["rendered"+e.uppercaseName]=function(){var t=this[0];if(t){var r=t[e.name]();return r*this.cy().zoom()}},i["rendered"+e.uppercaseOuterName]=function(){var t=this[0];if(t){var r=t[e.outerName]();return r*this.cy().zoom()}}};E({name:"width"}),E({name:"height"}),a.padding=function(){var e=this[0],t=e._private;return e.isParent()?(e.updateCompoundBounds(),void 0!==t.autoPadding?t.autoPadding:e.pstyle("padding").pfValue):e.pstyle("padding").pfValue},i.modelPosition=i.point=i.position,i.modelPositions=i.points=i.positions,i.renderedPoint=i.renderedPosition,i.relativePoint=i.relativePosition,i.boundingbox=i.boundingBox,i.renderedBoundingbox=i.renderedBoundingBox,t.exports=a},{"../define":44,"../is":91,"../math":93,"../util":108}],22:[function(e,t,r){"use strict";var n=e("../util"),i=e("../is"),a=function(e,t,r){if(r=!(void 0!==r&&!r),void 0===e||void 0===t||!i.core(e))return void n.error("An element must have a core reference and parameters set");var a=t.group;if(null==a&&(a=t.data&&null!=t.data.source&&null!=t.data.target?"edges":"nodes"),"nodes"!==a&&"edges"!==a)return void n.error("An element must be of type `nodes` or `edges`; you specified `"+a+"`");this.length=1,this[0]=this;var o=this._private={cy:e,single:!0,data:t.data||{},position:t.position||{},autoWidth:void 0,autoHeight:void 0,autoPadding:void 0,compoundBoundsClean:!1,listeners:[],group:a,style:{},rstyle:{},styleCxts:[],removed:!0,selected:!!t.selected,selectable:void 0===t.selectable||!!t.selectable,locked:!!t.locked,grabbed:!1,grabbable:void 0===t.grabbable||!!t.grabbable,active:!1,classes:{},animation:{current:[],queue:[]},rscratch:{},scratch:t.scratch||{},edges:[],children:[],parent:null,traversalCache:{}};if(t.renderedPosition){var s=t.renderedPosition,l=e.pan(),u=e.zoom();o.position={x:(s.x-l.x)/u,y:(s.y-l.y)/u}}if(i.string(t.classes))for(var c=t.classes.split(/\s+/),d=0,h=c.length;d<h;d++){var p=c[d];p&&""!==p&&(o.classes[p]=!0)}(t.style||t.css)&&e.style().applyBypass(this,t.style||t.css),(void 0===r||r)&&this.restore()};t.exports=a},{"../is":91,"../util":108}],23:[function(e,t,r){"use strict";var n=e("../define"),i={on:n.on(),one:n.on({unbindSelfOnTrigger:!0}),once:n.on({unbindAllBindersOnTrigger:!0}),off:n.off(),trigger:n.trigger(),rtrigger:function(e,t){if(0!==this.length)return this.cy().notify({type:e,eles:this}),this.trigger(e,t),this}};n.eventAliasesOn(i),t.exports=i},{"../define":44}],24:[function(e,t,r){"use strict";var n=e("../is"),i=e("../selector"),a={nodes:function(e){return this.filter(function(e,t){return e.isNode()}).filter(e)},edges:function(e){return this.filter(function(e,t){return e.isEdge()}).filter(e)},filter:function(e,t){if(void 0===e)return this;if(n.string(e)||n.elementOrCollection(e))return new i(e).filter(this);if(n.fn(e)){for(var r=this.spawn(),a=this,o=0;o<a.length;o++){var s=a[o],l=t?e.apply(t,[s,o,a]):e(s,o,a);l&&r.merge(s)}return r}return this.spawn()},not:function(e){if(e){n.string(e)&&(e=this.filter(e));for(var t=[],r=e._private.map,i=0;i<this.length;i++){var a=this[i],o=r.has(a.id());o||t.push(a)}return this.spawn(t)}return this},absoluteComplement:function(){var e=this._private.cy;return e.mutableElements().not(this)},intersect:function(e){if(n.string(e)){var t=e;return this.filter(t)}for(var r=[],i=this,a=e,o=this.length<e.length,s=o?a._private.map:i._private.map,l=o?i:a,u=0;u<l.length;u++){var c=l[u]._private.data.id,d=s.get(c);d&&r.push(d.ele)}return this.spawn(r)},xor:function(e){var t=this._private.cy;n.string(e)&&(e=t.$(e));var r=[],i=this,a=e,o=function(e,t){for(var n=0;n<e.length;n++){var i=e[n],a=i._private.data.id,o=t.hasElementWithId(a);o||r.push(i)}};return o(i,a),o(a,i),this.spawn(r)},diff:function(e){var t=this._private.cy;n.string(e)&&(e=t.$(e));var r=[],i=[],a=[],o=this,s=e,l=function(e,t,r){for(var n=0;n<e.length;n++){var i=e[n],o=i._private.data.id,s=t.hasElementWithId(o);s?a.push(i):r.push(i)}};return l(o,s,r),l(s,o,i),{left:this.spawn(r,{unique:!0}),right:this.spawn(i,{unique:!0}),both:this.spawn(a,{unique:!0})}},add:function(e){var t=this._private.cy;if(!e)return this;if(n.string(e)){var r=e;e=t.mutableElements().filter(r)}for(var i=[],a=0;a<this.length;a++)i.push(this[a]);for(var o=this._private.map,a=0;a<e.length;a++){var s=!o.has(e[a].id());s&&i.push(e[a])}return this.spawn(i)},merge:function(e){var t=this._private,r=t.cy;if(!e)return this;if(e&&n.string(e)){var i=e;e=r.mutableElements().filter(i)}for(var a=t.map,o=0;o<e.length;o++){var s=e[o],l=s._private.data.id,u=!a.has(l);if(u){var c=this.length++;this[c]=s,a.set(l,{ele:s,index:c})}else{var c=a.get(l).index;this[c]=s,a.set(l,{ele:s,index:c})}}return this},unmergeOne:function(e){e=e[0];var t=this._private,r=e._private.data.id,n=t.map,i=n.get(r);if(!i)return this;var a=i.index;this[a]=void 0,n["delete"](r);var o=a===this.length-1;if(this.length>1&&!o){var s=this.length-1,l=this[s],u=l._private.data.id;this[s]=void 0,this[a]=l,n.set(u,{ele:l,index:a})}return this.length--,this},unmerge:function(e){var t=this._private.cy;if(!e)return this;if(e&&n.string(e)){var r=e;e=t.mutableElements().filter(r)}for(var i=0;i<e.length;i++)this.unmergeOne(e[i]);return this},map:function(e,t){for(var r=[],n=this,i=0;i<n.length;i++){var a=n[i],o=t?e.apply(t,[a,i,n]):e(a,i,n);r.push(o)}return r},reduce:function(e,t){for(var r=t,n=this,i=0;i<n.length;i++)r=e(r,n[i],i,n);return r},max:function(e,t){for(var r,n=-(1/0),i=this,a=0;a<i.length;a++){var o=i[a],s=t?e.apply(t,[o,a,i]):e(o,a,i);s>n&&(n=s,r=o)}return{value:n,ele:r}},min:function(e,t){for(var r,n=1/0,i=this,a=0;a<i.length;a++){var o=i[a],s=t?e.apply(t,[o,a,i]):e(o,a,i);s<n&&(n=s,r=o)}return{value:n,ele:r}}},o=a;o.u=o["|"]=o["+"]=o.union=o.or=o.add,o["\\"]=o["!"]=o["-"]=o.difference=o.relativeComplement=o.subtract=o.not,o.n=o["&"]=o["."]=o.and=o.intersection=o.intersect,o["^"]=o["(+)"]=o["(-)"]=o.symmetricDifference=o.symdiff=o.xor,o.fnFilter=o.filterFn=o.stdFilter=o.filter,o.complement=o.abscomp=o.absoluteComplement,t.exports=a},{"../is":91,"../selector":95}],25:[function(e,t,r){"use strict";var n={isNode:function(){return"nodes"===this.group()},isEdge:function(){return"edges"===this.group()},isLoop:function(){return this.isEdge()&&this.source().id()===this.target().id()},isSimple:function(){return this.isEdge()&&this.source().id()!==this.target().id()},group:function(){var e=this[0];if(e)return e._private.group}};t.exports=n},{}],26:[function(e,t,r){"use strict";var n=e("../util"),i=e("../is"),a=e("../map"),o=e("../set"),s=e("./element"),l={generate:function(e,t,r){for(var i=null!=r?r:n.uuid();e.hasElementWithId(i);)i=n.uuid();return i}},u=function(e,t,r){if(void 0===e||!i.core(e))return void n.error("A collection must have a reference to the core");var u=new a,c=!1;if(t){if(t.length>0&&i.plainObject(t[0])&&!i.element(t[0])){c=!0;for(var d=[],h=new o,p=0,f=t.length;p<f;p++){var g=t[p];null==g.data&&(g.data={});var v=g.data;if(null==v.id)v.id=l.generate(e,g);else if(e.hasElementWithId(v.id)||h.has(v.id))continue;var y=new s(e,g,(!1));d.push(y),h.add(v.id)}t=d}}else t=[];this.length=0;for(var p=0,f=t.length;p<f;p++){var m=t[p];if(m){var b=m._private.data.id;(!r||r.unique&&!u.has(b))&&(u.set(b,{index:this.length,ele:m}),this[this.length]=m,this.length++)}}this._private={cy:e,map:u},c&&this.restore()},c=s.prototype=u.prototype;c.instanceString=function(){return"collection"},c.spawn=function(e,t,r){return i.core(e)||(r=t,t=e,e=this.cy()),new u(e,t,r)},c.spawnSelf=function(){return this.spawn(this)},c.cy=function(){return this._private.cy},c.element=function(){return this[0]},c.collection=function(){return i.collection(this)?this:new u(this._private.cy,[this])},c.unique=function(){return new u(this._private.cy,this,{unique:!0})},c.hasElementWithId=function(e){return this._private.map.has(e)},c.getElementById=function(e){var t=this._private.cy,r=this._private.map.get(e);return r?r.ele:new u(t)},c.poolIndex=function(){var e=this._private.cy,t=e._private.elements,r=this._private.data.id;return t._private.map.get(r).index},c.json=function(e){var t=this.element(),r=this.cy();if(null==t&&e)return this;if(null!=t){var a=t._private;if(i.plainObject(e)){r.startBatch(),e.data&&t.data(e.data),e.position&&t.position(e.position);var o=function(r,n,i){var o=e[r];null!=o&&o!==a[r]&&(o?t[n]():t[i]())};return o("removed","remove","restore"),o("selected","select","unselect"),o("selectable","selectify","unselectify"),o("locked","lock","unlock"),o("grabbable","grabify","ungrabify"),null!=e.classes&&t.classes(e.classes),r.endBatch(),this}if(void 0===e){var s={data:n.copy(a.data),position:n.copy(a.position),group:a.group,removed:a.removed,selected:a.selected,selectable:a.selectable,locked:a.locked,grabbable:a.grabbable,classes:null};return s.classes=Object.keys(a.classes).filter(function(e){return a.classes[e]}).join(" "),s}}},c.jsons=function(){for(var e=[],t=0;t<this.length;t++){var r=this[t],n=r.json();e.push(n)}return e},c.clone=function(){for(var e=this.cy(),t=[],r=0;r<this.length;r++){var n=this[r],i=n.json(),a=new s(e,i,(!1));t.push(a)}return new u(e,t)},c.copy=c.clone,c.restore=function(e){var t=this,r=t.cy(),o=r._private;void 0===e&&(e=!0);for(var s,c=[],d=[],h=0,p=t.length;h<p;h++){var f=t[h];f.removed()&&(f.isNode()?c.push(f):d.push(f))}s=c.concat(d);var h,g=function(){s.splice(h,1),h--};for(h=0;h<s.length;h++){var f=s[h],v=f._private,y=v.data;if(f.clearTraversalCache(),void 0===y.id)y.id=l.generate(r,f);else if(i.number(y.id))y.id=""+y.id;else{if(i.emptyString(y.id)||!i.string(y.id)){n.error("Can not create element with invalid string ID `"+y.id+"`"),g();continue}if(r.hasElementWithId(y.id)){n.error("Can not create second element with ID `"+y.id+"`"),g();continue}}var m=y.id;if(f.isNode()){var b=f,x=v.position;null==x.x&&(x.x=0),null==x.y&&(x.y=0)}if(f.isEdge()){for(var w=f,E=["source","target"],C=E.length,P=!1,D=0;D<C;D++){var T=E[D],k=y[T];i.number(k)&&(k=y[T]=""+y[T]),null==k||""===k?(n.error("Can not create edge `"+m+"` with unspecified "+T),P=!0):r.hasElementWithId(k)||(n.error("Can not create edge `"+m+"` with nonexistant "+T+" `"+k+"`"),P=!0)}if(P){g();continue}var S=r.getElementById(y.source),_=r.getElementById(y.target);S._private.edges.push(w),_._private.edges.push(w),w._private.source=S,w._private.target=_}v.map=new a,v.map.set(m,{ele:f,index:0}),v.removed=!1,r.addToPool(f)}for(var h=0;h<c.length;h++){var b=c[h],y=b._private.data;i.number(y.parent)&&(y.parent=""+y.parent);var M=y.parent,I=null!=M;if(I){var N=r.getElementById(M);if(N.empty())y.parent=void 0;else{for(var B=!1,z=N;!z.empty();){if(b.same(z)){B=!0,y.parent=void 0;break}z=z.parent()}B||(N[0]._private.children.push(b),b._private.parent=N[0],o.hasCompoundNodes=!0)}}}if(s.length>0){for(var L=new u(r,s),h=0;h<L.length;h++){var f=L[h];if(!f.isNode()){f.parallelEdges().clearTraversalCache();f.source().clearTraversalCache(),f.target().clearTraversalCache()}}var O;O=o.hasCompoundNodes?r.collection().merge(L).merge(L.connectedNodes()).merge(L.parent()):L,O.dirtyCompoundBoundsCache().updateStyle(e),e?L.rtrigger("add"):L.trigger("add")}return t},c.removed=function(){var e=this[0];return e&&e._private.removed},c.inside=function(){var e=this[0];return e&&!e._private.removed},c.remove=function(e){function t(e){for(var t=e._private.edges,r=0;r<t.length;r++)i(t[r])}function r(e){for(var t=e._private.children,r=0;r<t.length;r++)i(t[r])}function i(e){var n=h[e.id()];n||(h[e.id()]=!0,e.isNode()?(d.push(e),t(e),r(e)):d.unshift(e))}function a(e,t){var r=e._private.edges;n.removeFromArray(r,t),e.clearTraversalCache()}function o(e){e.parallelEdges().clearTraversalCache()}function s(e,t){t=t[0],e=e[0];var r=e._private.children,i=e.id();n.removeFromArray(r,t),y.ids[i]||(y.ids[i]=!0,y.push(e))}var l=this,c=[],d=[],h={},p=l._private.cy;void 0===e&&(e=!0);for(var f=0,g=l.length;f<g;f++){var v=l[f];i(v)}var y=[];y.ids={},l.dirtyCompoundBoundsCache(),p.removeFromPool(d);for(var f=0;f<d.length;f++){var v=d[f];if(v._private.removed=!0,c.push(v),v.isEdge()){var m=v.source()[0],b=v.target()[0];a(m,v),a(b,v),o(v)}else{var x=v.parent();0!==x.length&&s(x,v)}}var w=p._private.elements;p._private.hasCompoundNodes=!1;for(var f=0;f<w.length;f++){var v=w[f];if(v.isParent()){p._private.hasCompoundNodes=!0;break}}var E=new u(this.cy(),c);E.size()>0&&(e&&this.cy().notify({type:"remove",eles:E}),E.trigger("remove"));for(var f=0;f<y.length;f++){var v=y[f];v.removed()||v.updateStyle()}return new u(p,c)},c.move=function(e){var t=this._private.cy;if(void 0!==e.source||void 0!==e.target){var r=e.source,n=e.target,i=t.hasElementWithId(r),a=t.hasElementWithId(n);if(i||a){var o=this.jsons();this.remove();for(var s=0;s<o.length;s++){var l=o[s],u=this[s];"edges"===l.group&&(i&&(l.data.source=r),a&&(l.data.target=n),l.scratch=u._private.scratch)}return t.add(o)}}else if(void 0!==e.parent){var c=e.parent,d=null===c||t.hasElementWithId(c);if(d){var o=this.jsons(),h=this.descendants(),p=h.union(h.union(this).connectedEdges()).jsons();this.remove();for(var s=0;s<o.length;s++){var l=o[s],u=this[s];"nodes"===l.group&&(l.data.parent=null===c?void 0:c,l.scratch=u._private.scratch)}return t.add(o.concat(p))}}return this},[e("./algorithms"),e("./animation"),e("./class"),e("./comparators"),e("./compounds"),e("./data"),e("./degree"),e("./dimensions"),e("./events"),e("./filter"),e("./group"),e("./index"),e("./iteration"),e("./layout"),e("./style"),e("./switch-functions"),e("./traversing")].forEach(function(e){n.extend(c,e)}),t.exports=u},{"../is":91,"../map":92,"../set":96,"../util":108,"./algorithms":11,"./animation":15,"./class":16,"./comparators":17,"./compounds":18,"./data":19,"./degree":20,"./dimensions":21,"./element":22,"./events":23,"./filter":24,"./group":25,"./index":26,"./iteration":27,"./layout":28,"./style":29,"./switch-functions":30,"./traversing":31}],27:[function(e,t,r){"use strict";var n=e("../is"),i=e("./zsort"),a={forEach:function(e,t){if(n.fn(e))for(var r=0;r<this.length;r++){var i=this[r],a=t?e.apply(t,[i,r,this]):e(i,r,this);if(a===!1)break}return this},toArray:function(){for(var e=[],t=0;t<this.length;t++)e.push(this[t]);return e},slice:function(e,t){var r=[],n=this.length;null==t&&(t=n),null==e&&(e=0),e<0&&(e=n+e),t<0&&(t=n+t);for(var i=e;i>=0&&i<t&&i<n;i++)r.push(this[i]);return this.spawn(r)},size:function(){return this.length},eq:function(e){return this[e]||this.spawn()},first:function(){return this[0]||this.spawn()},last:function(){return this[this.length-1]||this.spawn()},empty:function(){return 0===this.length},nonempty:function(){return!this.empty()},sort:function(e){if(!n.fn(e))return this;var t=this.toArray().sort(e);return this.spawn(t)},sortByZIndex:function(){return this.sort(i)},zDepth:function(){var e=this[0];if(e){var t=e._private,r=t.group;if("nodes"===r){var n=t.data.parent?e.parents().size():0;return e.isParent()?n:Number.MAX_SAFE_INTEGER-1}var i=t.source,a=t.target,o=i.zDepth(),s=a.zDepth();return Math.max(o,s,0)}}};a.each=a.forEach,t.exports=a},{"../is":91,"./zsort":32}],28:[function(e,t,r){"use strict";var n=e("../is"),i=e("../util"),a=e("../promise"),o=e("../math"),s={layoutDimensions:function(e){if(e.nodeDimensionsIncludeLabels){var t=this.boundingBox();return{w:t.w,h:t.h}}return{w:this.outerWidth(),h:this.outerHeight()}},layoutPositions:function(e,t,r){var s=this.nodes(),l=this.cy(),u=t.eles,c=i.memoize(r,function(e,t){return e.id()+"$"+t});e.trigger({type:"layoutstart",layout:e}),e.animations=[];var d=function(e,t,r){var n={x:t.x1+t.w/2,y:t.y1+t.h/2},i={x:(r.x-n.x)*e,y:(r.y-n.y)*e};return{x:n.x+i.x,y:n.y+i.y}},h=function(){for(var e=o.makeBoundingBox(),t=0;t<s.length;t++){var r=s[t],n=c(r,t);o.expandBoundingBoxByPoint(e,n.x,n.y)}return e};if(t.animate){for(var p=h(),f={},g=0;g<s.length;g++){var v=s[g],y=c(v,g),m=v.position();if(n.number(m.x)&&n.number(m.y)||v.silentPosition({x:0,y:0}),t.spacingFactor&&1!==t.spacingFactor){var b=Math.abs(t.spacingFactor);y=d(b,p,y)}f[v.id()]=y}for(var g=0;g<s.length;g++){var v=s[g],y=f[v.id()],x=v.animation({position:y,duration:t.animationDuration,easing:t.animationEasing});e.animations.push(x),x.play()}if(t.fit){var w=l.animation({fit:{boundingBox:u.boundingBoxAt(function(e,t){return f[t.id()]}),padding:t.padding},duration:t.animationDuration,easing:t.animationEasing});e.animations.push(w),w.play()}else if(void 0!==t.zoom&&void 0!==t.pan){var E=l.animation({zoom:t.zoom,pan:t.pan,duration:t.animationDuration,easing:t.animationEasing});e.animations.push(E),E.play()}e.one("layoutready",t.ready),e.trigger({type:"layoutready",layout:e}),a.all(e.animations.map(function(e){return e.promise()})).then(function(){e.one("layoutstop",t.stop),e.trigger({type:"layoutstop",layout:e})})}else{if(t.spacingFactor&&1!==t.spacingFactor){var b=Math.abs(t.spacingFactor),p=h();s.positions(function(e,t){var r=c(e,t);return d(b,p,r)})}else s.positions(r);t.fit&&l.fit(t.eles,t.padding),null!=t.zoom&&l.zoom(t.zoom),t.pan&&l.pan(t.pan),e.one("layoutready",t.ready),e.trigger({type:"layoutready",layout:e}),e.one("layoutstop",t.stop),e.trigger({type:"layoutstop",layout:e})}return this},layout:function(e){var t=this.cy();return t.makeLayout(i.extend({},e,{eles:this}))}};s.createLayout=s.makeLayout=s.layout,t.exports=s},{"../is":91,"../math":93,"../promise":94,"../util":108}],29:[function(e,t,r){"use strict";function n(e,t){var r=e._private,n=r.data.parent?e.parents():null;if(n)for(var i=0;i<n.length;i++){var a=n[i];if(!t(a))return!1}return!0}function i(e){var t=e.ok,r=e.edgeOkViaNode||e.ok,i=e.parentOk||e.ok;return function(){var e=this.cy();if(!e.styleEnabled())return!0;var a=this[0],o=a._private,s=e.hasCompoundNodes();if(a){if(!t(a))return!1;if(a.isNode())return!s||n(a,i);var l=o.source,u=o.target;return r(l)&&(!s||n(l,r))&&(o.source===o.target||r(u)&&(!s||n(u,r)))}}}var a=e("../is"),o={updateStyle:function(e){var t=this._private.cy;if(!t.styleEnabled())return this;if(t._private.batchingStyle){var r=t._private.batchStyleEles;return r.merge(this),this}var n=t.hasCompoundNodes(),i=t.style(),a=this;e=!(!e&&void 0!==e),n&&(a=this.spawnSelf().merge(this.descendants()).merge(this.parents()));var o=i.apply(a);return o.dirtyCompoundBoundsCache(),e?o.rtrigger("style"):o.trigger("style"),this},updateMappers:function(e){var t=this._private.cy,r=t.style();if(e=!(!e&&void 0!==e),!t.styleEnabled())return this;var n=r.updateMappers(this);return n.dirtyCompoundBoundsCache(),e?n.rtrigger("style"):n.trigger("style"),this},parsedStyle:function(e){var t=this[0],r=t.cy();if(r.styleEnabled())return t?t._private.style[e]||r.style().getDefaultProperty(e):void 0},numericStyle:function(e){var t=this[0];if(t.cy().styleEnabled()&&t){var r=t.pstyle(e);return void 0!==r.pfValue?r.pfValue:r.value}},numericStyleUnits:function(e){var t=this[0];if(t.cy().styleEnabled())return t?t.pstyle(e).units:void 0},renderedStyle:function(e){var t=this.cy();if(!t.styleEnabled())return this;var r=this[0];return r?t.style().getRenderedStyle(r,e):void 0},style:function(e,t){var r=this.cy();if(!r.styleEnabled())return this;var n=!1,i=r.style();if(a.plainObject(e)){var o=e;i.applyBypass(this,o,n),this.dirtyCompoundBoundsCache(),this.rtrigger("style")}else if(a.string(e)){if(void 0===t){var s=this[0];return s?i.getStylePropertyValue(s,e):void 0}i.applyBypass(this,e,t,n),this.dirtyCompoundBoundsCache(),this.rtrigger("style")}else if(void 0===e){var s=this[0];return s?i.getRawStyle(s):void 0}return this},removeStyle:function(e){var t=this.cy();if(!t.styleEnabled())return this;var r=!1,n=t.style(),i=this;if(void 0===e)for(var a=0;a<i.length;a++){var o=i[a];n.removeAllBypasses(o,r)}else{e=e.split(/\s+/);for(var a=0;a<i.length;a++){var o=i[a];n.removeBypasses(o,e,r)}}return this.dirtyCompoundBoundsCache(),this.rtrigger("style"),this},show:function(){return this.css("display","element"),this},hide:function(){return this.css("display","none"),this},effectiveOpacity:function(){var e=this.cy();if(!e.styleEnabled())return 1;var t=e.hasCompoundNodes(),r=this[0];if(r){var n=r._private,i=r.pstyle("opacity").value;if(!t)return i;var a=n.data.parent?r.parents():null;if(a)for(var o=0;o<a.length;o++){var s=a[o],l=s.pstyle("opacity").value;i=l*i}return i}},transparent:function(){var e=this.cy();if(!e.styleEnabled())return!1;var t=this[0],r=t.cy().hasCompoundNodes();return t?r?0===t.effectiveOpacity():0===t.pstyle("opacity").value:void 0},backgrounding:function(){var e=this.cy();if(!e.styleEnabled())return!1;var t=this[0];return!!t._private.backgrounding}},s=function(e){return"element"===e.pstyle("display").value&&0!==e.width()&&(!e.isNode()||0!==e.height())};o.takesUpSpace=i({ok:s});var l=function(e){return"yes"===e.pstyle("events").value&&"visible"===e.pstyle("visibility").value&&s(e)},u=function(e){return"visible"===e.pstyle("visibility").value&&s(e)};o.interactive=i({ok:l,parentOk:u,edgeOkViaNode:s}),o.noninteractive=function(){var e=this[0];if(e)return!e.interactive()};var c=function(e){return"visible"===e.pstyle("visibility").value&&0!==e.pstyle("opacity").pfValue&&s(e)},d=s;o.visible=i({ok:c,edgeOkViaNode:d}),o.hidden=function(){var e=this[0];if(e)return!e.visible()},o.bypass=o.css=o.style,o.renderedCss=o.renderedStyle,o.removeBypass=o.removeCss=o.removeStyle,o.pstyle=o.parsedStyle,t.exports=o},{"../is":91}],30:[function(e,t,r){"use strict";function n(e){return function(){var t=arguments,r=[];if(2===t.length){var n=t[0],i=t[1];this.on(e.event,n,i)}else if(1===t.length){var i=t[0];this.on(e.event,i)}else if(0===t.length){for(var a=0;a<this.length;a++){var o=this[a],s=!e.ableField||o._private[e.ableField],l=o._private[e.field]!=e.value;if(e.overrideAble){var u=e.overrideAble(o);if(void 0!==u&&(s=u,!u))return this}s&&(o._private[e.field]=e.value,l&&r.push(o))}var c=this.spawn(r);c.updateStyle(),c.trigger(e.event)}return this}}function i(e){a[e.field]=function(){var t=this[0];if(t){if(e.overrideField){var r=e.overrideField(t);if(void 0!==r)return r}return t._private[e.field]}},a[e.on]=n({event:e.on,field:e.field,ableField:e.ableField,overrideAble:e.overrideAble,value:!0}),a[e.off]=n({event:e.off,field:e.field,ableField:e.ableField,overrideAble:e.overrideAble,value:!1})}var a={};i({field:"locked",overrideField:function(e){return!!e.cy().autolock()||void 0},on:"lock",off:"unlock"}),i({field:"grabbable",overrideField:function(e){return!e.cy().autoungrabify()&&void 0},on:"grabify",off:"ungrabify"}),i({field:"selected",ableField:"selectable",overrideAble:function(e){return!e.cy().autounselectify()&&void 0},on:"select",off:"unselect"}),i({field:"selectable",overrideField:function(e){return!e.cy().autounselectify()&&void 0},on:"selectify",off:"unselectify"}),a.deselect=a.unselect,a.grabbed=function(){var e=this[0];if(e)return e._private.grabbed},i({field:"active",on:"activate",off:"unactivate"}),a.inactive=function(){var e=this[0];if(e)return!e._private.active},t.exports=a},{}],31:[function(e,t,r){"use strict";function n(e){return function(t){for(var r=[],n=0;n<this.length;n++){var i=this[n],a=i._private[e.attr];a&&r.push(a)}return this.spawn(r,{unique:!0}).filter(t)}}function i(e){return function(t){var r=[],n=this._private.cy,i=e||{};s.string(t)&&(t=n.$(t));for(var a=0;a<t.length;a++)for(var o=t[a]._private.edges,l=0;l<o.length;l++){var u=o[l],c=u._private.data,d=this.hasElementWithId(c.source)&&t.hasElementWithId(c.target),h=t.hasElementWithId(c.source)&&this.hasElementWithId(c.target),p=d||h;if(p){if(i.thisIsSrc||i.thisIsTgt){if(i.thisIsSrc&&!d)continue;if(i.thisIsTgt&&!h)continue}r.push(u)}}return this.spawn(r,{unique:!0})}}function a(e){var t={codirected:!1};return e=o.extend({},t,e),function(t){for(var r=[],n=this.edges(),i=e,a=0;a<n.length;a++)for(var o=n[a],s=o._private,l=s.source,u=l._private.data.id,c=s.data.target,d=l._private.edges,h=0;h<d.length;h++){var p=d[h],f=p._private.data,g=f.target,v=f.source,y=g===c&&v===u,m=u===g&&c===v;(i.codirected&&y||!i.codirected&&(y||m))&&r.push(p)}return this.spawn(r,{unique:!0}).filter(t)}}var o=e("../util"),s=e("../is"),l={},u=function(e,t){return function(r,n,i,a){var o,l=r,u=this;if(null==l?o="null":s.elementOrCollection(l)&&1===l.length&&(o="#"+l.id()),1===u.length&&o){var c=u[0]._private,d=c.traversalCache=c.traversalCache||{},h=d[t]=d[t]||{},p=h[o];return p?p:h[o]=e.call(u,r,n,i,a)}return e.call(u,r,n,i,a)}},c=function(e){return function(t){for(var r=this,n=[],i=0;i<r.length;i++){var a=r[i];if(a.isNode()){for(var o=!1,s=a.connectedEdges(),l=0;l<s.length;l++){var u=s[l],c=u.source(),d=u.target();if(e.noIncomingEdges&&d===a&&c!==a||e.noOutgoingEdges&&c===a&&d!==a){o=!0;break}}o||n.push(a)}}return this.spawn(n,{unique:!0}).filter(t)}},d=function(e){return function(t){for(var r=this,n=[],i=0;i<r.length;i++){var a=r[i];if(a.isNode())for(var o=a.connectedEdges(),s=0;s<o.length;s++){var l=o[s],u=l.source(),c=l.target();e.outgoing&&u===a?(n.push(l),n.push(c)):e.incoming&&c===a&&(n.push(l),n.push(u))}}return this.spawn(n,{unique:!0}).filter(t)}},h=function(e){return function(t){for(var r=this,n=[],i={};;){var a=e.outgoing?r.outgoers():r.incomers();if(0===a.length)break;for(var o=!1,s=0;s<a.length;s++){var l=a[s],u=l.id();i[u]||(i[u]=!0,n.push(l),o=!0)}if(!o)break;r=a}return this.spawn(n,{unique:!0}).filter(t)}};l.clearTraversalCache=function(){for(var e=0;e<this.length;e++)this[e]._private.traversalCache=null},o.extend(l,{roots:c({noIncomingEdges:!0}),leaves:c({noOutgoingEdges:!0}),outgoers:u(d({outgoing:!0}),"outgoers"),successors:h({outgoing:!0}),incomers:u(d({incoming:!0}),"incomers"),predecessors:h({incoming:!0})}),o.extend(l,{neighborhood:u(function(e){for(var t=[],r=this.nodes(),n=0;n<r.length;n++)for(var i=r[n],a=i.connectedEdges(),o=0;o<a.length;o++){var s=a[o],l=s.source(),u=s.target(),c=i===l?u:l;c.length>0&&t.push(c[0]),t.push(s[0])}return this.spawn(t,{unique:!0}).filter(e)},"neighborhood"),closedNeighborhood:function(e){return this.neighborhood().add(this).filter(e)},openNeighborhood:function(e){return this.neighborhood(e)}}),l.neighbourhood=l.neighborhood,l.closedNeighbourhood=l.closedNeighborhood,l.openNeighbourhood=l.openNeighborhood,o.extend(l,{source:u(function(e){var t,r=this[0];return r&&(t=r._private.source||r.cy().collection()),t&&e?t.filter(e):t},"source"),target:u(function(e){var t,r=this[0];return r&&(t=r._private.target||r.cy().collection()),t&&e?t.filter(e):t},"target"),sources:n({attr:"source"}),targets:n({attr:"target"})}),o.extend(l,{edgesWith:u(i(),"edgesWith",!0),edgesTo:u(i({thisIsSrc:!0}),"edgesTo",!0)}),o.extend(l,{connectedEdges:u(function(e){for(var t=[],r=this,n=0;n<r.length;n++){var i=r[n];if(i.isNode())for(var a=i._private.edges,o=0;o<a.length;o++){var s=a[o];t.push(s)}}return this.spawn(t,{unique:!0}).filter(e)},"connectedEdges"),connectedNodes:u(function(e){for(var t=[],r=this,n=0;n<r.length;n++){var i=r[n];i.isEdge()&&(t.push(i.source()[0]),t.push(i.target()[0]))}return this.spawn(t,{unique:!0}).filter(e)},"connectedNodes"),parallelEdges:u(a(),"parallelEdges"),codirectedEdges:u(a({codirected:!0}),"codirectedEdges")}),o.extend(l,{components:function(){var e=this,t=e.cy(),r=e.spawn(),n=e.nodes().spawnSelf(),i=[],a=function(e,t){r.merge(e),n.unmerge(e),t.merge(e)};if(n.empty())return e.spawn();do{var o=t.collection();i.push(o);var s=n[0];a(s,o),e.bfs({directed:!1,roots:s,visit:function(e,t,r,n,i){a(e,o)}})}while(n.length>0);return i.map(function(e){var t=e.connectedEdges().stdFilter(function(t){return e.anySame(t.source())&&e.anySame(t.target())});return e.union(t)})}}),t.exports=l},{"../is":91,"../util":108}],32:[function(e,t,r){"use strict";var n=function(e,t){function r(e){var t=e.pstyle("z-compound-depth");return"auto"===t.value?a?e.zDepth():0:"bottom"===t.value?-1:"top"===t.value?Number.MAX_SAFE_INTEGER:0}function n(e){var t=e.pstyle("z-index-compare");return"auto"===t.value&&e.isNode()?1:0}var i=e.cy(),a=i.hasCompoundNodes(),o=r(e)-r(t);if(0!==o)return o;var s=n(e)-n(t);if(0!==s)return s;var l=e.pstyle("z-index").value-t.pstyle("z-index").value;return 0!==l?l:e.poolIndex()-t.poolIndex()};t.exports=n},{}],33:[function(e,t,r){"use strict";var n=e("../is"),i=e("../util"),a=e("../collection"),o=e("../collection/element"),s={add:function(e){var t,r=this;if(n.elementOrCollection(e)){var s=e;if(s._private.cy===r)t=s.restore();else{for(var l=[],u=0;u<s.length;u++){var c=s[u];l.push(c.json())}t=new a(r,l)}}else if(n.array(e)){var l=e;t=new a(r,l)}else if(n.plainObject(e)&&(n.array(e.nodes)||n.array(e.edges))){for(var d=e,l=[],h=["nodes","edges"],u=0,p=h.length;u<p;u++){var f=h[u],g=d[f];if(n.array(g))for(var v=0,y=g.length;v<y;v++){var m=i.extend({group:f},g[v]);l.push(m)}}t=new a(r,l)}else{var m=e;t=new o(r,m).collection()}return t},remove:function(e){if(n.elementOrCollection(e));else if(n.string(e)){var t=e;e=this.$(t)}return e.remove()}};t.exports=s},{"../collection":26,"../collection/element":22,"../is":91,"../util":108}],34:[function(e,t,r){"use strict";var n=e("../define"),i=e("../util"),a=e("../is"),o={animate:n.animate(),animation:n.animation(),animated:n.animated(),clearQueue:n.clearQueue(),delay:n.delay(),delayAnimation:n.delayAnimation(),stop:n.stop(),addToAnimationPool:function(e){var t=this;t.styleEnabled()&&t._private.aniEles.merge(e)},stopAnimationLoop:function(){this._private.animationsRunning=!1},startAnimationLoop:function(){function e(){c._private.animationsRunning&&i.requestAnimationFrame(function(r){t(r),e()})}function t(e){function t(t,i){
-var o=t._private,s=o.animation.current,l=o.animation.queue,u=!1;if(!i&&"none"===t.pstyle("display").value){s=s.splice(0,s.length).concat(l.splice(0,l.length));for(var c=0;c<s.length;c++)s[c].stop()}if(0===s.length){var d=l.shift();d&&s.push(d)}for(var h=function(e){for(var t=e.length-1;t>=0;t--){var r=e[t];r()}e.splice(0,e.length)},c=s.length-1;c>=0;c--){var p=s[c],f=p._private;f.stopped?(s.splice(c,1),f.hooked=!1,f.playing=!1,f.started=!1,h(f.frames)):(f.playing||f.applying)&&(f.playing&&f.applying&&(f.applying=!1),f.started||r(t,p,e),n(t,p,e,i),f.applying&&(f.applying=!1),h(f.frames),p.completed()&&(s.splice(c,1),f.hooked=!1,f.playing=!1,f.started=!1,h(f.completes)),u=!0)}return i||0!==s.length||0!==l.length||a.push(t),u}for(var i=c._private.aniEles,a=[],o=!1,s=0;s<i.length;s++){var l=i[s],u=t(l);o=o||u}var d=t(c,!0);(o||d)&&(i.length>0?(i.dirtyCompoundBoundsCache(),c.notify({type:"draw",eles:i})):c.notify({type:"draw"})),i.unmerge(a),c.trigger("step")}function r(e,t,r){var n=a.core(e),i=!n,o=e,s=t._private;if(i){var l=o.position();s.startPosition=s.startPosition||{x:l.x,y:l.y},s.startStyle=s.startStyle||d.getAnimationStartStyle(o,s.style)}if(n){var u=c._private.pan;s.startPan=s.startPan||{x:u.x,y:u.y},s.startZoom=null!=s.startZoom?s.startZoom:c._private.zoom}s.started=!0,s.startTime=r-s.progress*s.duration}function n(e,t,r,n){var i=!n,s=e._private,l=t._private,c=l.easing,h=l.startTime;if(!l.easingImpl)if(null==c)l.easingImpl=g.linear;else{var p;if(a.string(c)){var f=d.parse("transition-timing-function",c);p=f.value}else p=c;var v,y;a.string(p)?(v=p,y=[]):(v=p[1],y=p.slice(2).map(function(e){return+e})),y.length>0?("spring"===v&&y.push(l.duration),l.easingImpl=g[v].apply(null,y)):l.easingImpl=g[v]}var m,b=l.easingImpl;if(m=0===l.duration?1:(r-h)/l.duration,l.applying&&(m=l.progress),m<0?m=0:m>1&&(m=1),null==l.delay){var x=l.startPosition,w=l.position;if(w&&i&&!e.locked()){var E=e.position();o(x.x,w.x)&&(E.x=u(x.x,w.x,m,b)),o(x.y,w.y)&&(E.y=u(x.y,w.y,m,b)),e.trigger("position")}var C=l.startPan,P=l.pan,D=s.pan,T=null!=P&&n;T&&(o(C.x,P.x)&&(D.x=u(C.x,P.x,m,b)),o(C.y,P.y)&&(D.y=u(C.y,P.y,m,b)),e.trigger("pan"));var k=l.startZoom,S=l.zoom,_=null!=S&&n;_&&(o(k,S)&&(s.zoom=u(k,S,m,b)),e.trigger("zoom")),(T||_)&&e.trigger("viewport");var M=l.style;if(M&&M.length>0&&i){for(var I=0;I<M.length;I++){var N=M[I],v=N.name,B=N,z=l.startStyle[v],L=u(z,B,m,b);d.overrideBypass(e,v,L)}e.trigger("style")}}return l.progress=m,m}function o(e,t){return null!=e&&null!=t&&(!(!a.number(e)||!a.number(t))||!(!e||!t))}/*! Bezier curve function generator. Copyright Gaetan Renaudeau. MIT License: http://en.wikipedia.org/wiki/MIT_License */
-function s(e,t,r,n){function i(e,t){return 1-3*t+3*e}function a(e,t){return 3*t-6*e}function o(e){return 3*e}function s(e,t,r){return((i(t,r)*e+a(t,r))*e+o(t))*e}function l(e,t,r){return 3*i(t,r)*e*e+2*a(t,r)*e+o(t)}function u(t,n){for(var i=0;i<f;++i){var a=l(n,e,r);if(0===a)return n;var o=s(n,e,r)-t;n-=o/a}return n}function c(){for(var t=0;t<m;++t)E[t]=s(t*b,e,r)}function d(t,n,i){var a,o,l=0;do o=n+(i-n)/2,a=s(o,e,r)-t,a>0?i=o:n=o;while(Math.abs(a)>v&&++l<y);return o}function h(t){for(var n=0,i=1,a=m-1;i!==a&&E[i]<=t;++i)n+=b;--i;var o=(t-E[i])/(E[i+1]-E[i]),s=n+o*b,c=l(s,e,r);return c>=g?u(t,s):0===c?s:d(t,n,n+b)}function p(){C=!0,e===t&&r===n||c()}var f=4,g=.001,v=1e-7,y=10,m=11,b=1/(m-1),x="undefined"!=typeof Float32Array;if(4!==arguments.length)return!1;for(var w=0;w<4;++w)if("number"!=typeof arguments[w]||isNaN(arguments[w])||!isFinite(arguments[w]))return!1;e=Math.min(e,1),r=Math.min(r,1),e=Math.max(e,0),r=Math.max(r,0);var E=x?new Float32Array(m):new Array(m),C=!1,P=function(i){return C||p(),e===t&&r===n?i:0===i?0:1===i?1:s(h(i),t,n)};P.getControlPoints=function(){return[{x:e,y:t},{x:r,y:n}]};var D="generateBezier("+[e,t,r,n]+")";return P.toString=function(){return D},P}function l(e,t,r,n,i){if(1===n)return r;var a=i(t,r,n);return null==e?a:((e.roundValue||e.color)&&(a=Math.round(a)),void 0!==e.min&&(a=Math.max(a,e.min)),void 0!==e.max&&(a=Math.min(a,e.max)),a)}function u(e,t,r,n){var i=null!=e.name?d.properties[e.name]:null,o=null!=i?i.type:null;r<0?r=0:r>1&&(r=1);var s,u;if(s=null!=e.pfValue||null!=e.value?null!=e.pfValue?e.pfValue:e.value:e,u=null!=t.pfValue||null!=t.value?null!=t.pfValue?t.pfValue:t.value:t,a.number(s)&&a.number(u))return l(o,s,u,r,n);if(a.array(s)&&a.array(u)){for(var c=[],h=0;h<u.length;h++){var p=s[h],f=u[h];if(null!=p&&null!=f){var g=l(o,p,f,r,n);c.push(g)}else c.push(f)}return c}}var c=this;if(c._private.animationsRunning=!0,c.styleEnabled()){var d=c.style(),h=c.renderer();h&&h.beforeRender?h.beforeRender(function(e,r){t(r)},h.beforeRenderPriorities.animations):e();/*! Runge-Kutta spring physics function generator. Adapted from Framer.js, copyright Koen Bok. MIT License: http://en.wikipedia.org/wiki/MIT_License */
-/* Given a tension, friction, and duration, a simulation at 60FPS will first run without a defined duration in order to calculate the full path. A second pass
-       then adjusts the time delta -- using the relation between actual time and duration -- to calculate the path for the duration-constrained animation. */
-var p=function(){function e(e){return-e.tension*e.x-e.friction*e.v}function t(t,r,n){var i={x:t.x+n.dx*r,v:t.v+n.dv*r,tension:t.tension,friction:t.friction};return{dx:i.v,dv:e(i)}}function r(r,n){var i={dx:r.v,dv:e(r)},a=t(r,.5*n,i),o=t(r,.5*n,a),s=t(r,n,o),l=1/6*(i.dx+2*(a.dx+o.dx)+s.dx),u=1/6*(i.dv+2*(a.dv+o.dv)+s.dv);return r.x=r.x+l*n,r.v=r.v+u*n,r}return function n(e,t,i){var a,o,s,l={x:-1,v:0,tension:null,friction:null},u=[0],c=0,d=1e-4,h=.016;for(e=parseFloat(e)||500,t=parseFloat(t)||20,i=i||null,l.tension=e,l.friction=t,a=null!==i,a?(c=n(e,t),o=c/i*h):o=h;;)if(s=r(s||l,o),u.push(1+s.x),c+=16,!(Math.abs(s.x)>d&&Math.abs(s.v)>d))break;return a?function(e){return u[e*(u.length-1)|0]}:c}}(),f=function(e,t,r,n){var i=s(e,t,r,n);return function(e,t,r){return e+(t-e)*i(r)}},g={linear:function(e,t,r){return e+(t-e)*r},ease:f(.25,.1,.25,1),"ease-in":f(.42,0,1,1),"ease-out":f(0,0,.58,1),"ease-in-out":f(.42,0,.58,1),"ease-in-sine":f(.47,0,.745,.715),"ease-out-sine":f(.39,.575,.565,1),"ease-in-out-sine":f(.445,.05,.55,.95),"ease-in-quad":f(.55,.085,.68,.53),"ease-out-quad":f(.25,.46,.45,.94),"ease-in-out-quad":f(.455,.03,.515,.955),"ease-in-cubic":f(.55,.055,.675,.19),"ease-out-cubic":f(.215,.61,.355,1),"ease-in-out-cubic":f(.645,.045,.355,1),"ease-in-quart":f(.895,.03,.685,.22),"ease-out-quart":f(.165,.84,.44,1),"ease-in-out-quart":f(.77,0,.175,1),"ease-in-quint":f(.755,.05,.855,.06),"ease-out-quint":f(.23,1,.32,1),"ease-in-out-quint":f(.86,0,.07,1),"ease-in-expo":f(.95,.05,.795,.035),"ease-out-expo":f(.19,1,.22,1),"ease-in-out-expo":f(1,0,0,1),"ease-in-circ":f(.6,.04,.98,.335),"ease-out-circ":f(.075,.82,.165,1),"ease-in-out-circ":f(.785,.135,.15,.86),spring:function(e,t,r){if(0===r)return g.linear;var n=p(e,t,r);return function(e,t,r){return e+(t-e)*n(r)}},"cubic-bezier":f}}}};t.exports=o},{"../define":44,"../is":91,"../util":108}],35:[function(e,t,r){"use strict";var n=e("../define"),i={on:n.on(),one:n.on({unbindSelfOnTrigger:!0}),once:n.on({unbindAllBindersOnTrigger:!0}),off:n.off(),trigger:n.trigger()};n.eventAliasesOn(i),t.exports=i},{"../define":44}],36:[function(e,t,r){"use strict";var n={png:function(e){var t=this._private.renderer;return e=e||{},t.png(e)},jpg:function(e){var t=this._private.renderer;return e=e||{},e.bg=e.bg||"#fff",t.jpg(e)}};n.jpeg=n.jpg,t.exports=n},{}],37:[function(e,t,r){"use strict";var n=e("../window"),i=e("../util"),a=e("../collection"),o=e("../is"),s=e("../promise"),l=e("../define"),u=function(e){var t=this;e=i.extend({},e);var r=e.container;r&&!o.htmlElement(r)&&o.htmlElement(r[0])&&(r=r[0]);var l=r?r._cyreg:null;l=l||{},l&&l.cy&&(l.cy.destroy(),l={});var u=l.readies=l.readies||[];r&&(r._cyreg=l),l.cy=t;var c=void 0!==n&&void 0!==r&&!e.headless,d=e;d.layout=i.extend({name:c?"grid":"null"},d.layout),d.renderer=i.extend({name:c?"canvas":"null"},d.renderer);var h=function(e,t,r){return void 0!==t?t:void 0!==r?r:e},p=this._private={container:r,ready:!1,options:d,elements:new a(this),listeners:[],aniEles:new a(this),scratch:{},layout:null,renderer:null,destroyed:!1,notificationsEnabled:!0,minZoom:1e-50,maxZoom:1e50,zoomingEnabled:h(!0,d.zoomingEnabled),userZoomingEnabled:h(!0,d.userZoomingEnabled),panningEnabled:h(!0,d.panningEnabled),userPanningEnabled:h(!0,d.userPanningEnabled),boxSelectionEnabled:h(!0,d.boxSelectionEnabled),autolock:h(!1,d.autolock,d.autolockNodes),autoungrabify:h(!1,d.autoungrabify,d.autoungrabifyNodes),autounselectify:h(!1,d.autounselectify),styleEnabled:void 0===d.styleEnabled?c:d.styleEnabled,zoom:o.number(d.zoom)?d.zoom:1,pan:{x:o.plainObject(d.pan)&&o.number(d.pan.x)?d.pan.x:0,y:o.plainObject(d.pan)&&o.number(d.pan.y)?d.pan.y:0},animation:{current:[],queue:[]},hasCompoundNodes:!1},f=d.selectionType;void 0===f||"additive"!==f&&"single"!==f?p.selectionType="single":p.selectionType=f,o.number(d.minZoom)&&o.number(d.maxZoom)&&d.minZoom<d.maxZoom?(p.minZoom=d.minZoom,p.maxZoom=d.maxZoom):o.number(d.minZoom)&&void 0===d.maxZoom?p.minZoom=d.minZoom:o.number(d.maxZoom)&&void 0===d.minZoom&&(p.maxZoom=d.maxZoom);var g=function(e,t){var r=e.some(o.promise);return r?s.all(e).then(t):void t(e)};t.initRenderer(i.extend({hideEdgesOnViewport:d.hideEdgesOnViewport,textureOnViewport:d.textureOnViewport,wheelSensitivity:o.number(d.wheelSensitivity)&&d.wheelSensitivity>0?d.wheelSensitivity:1,motionBlur:void 0!==d.motionBlur&&d.motionBlur,motionBlurOpacity:void 0===d.motionBlurOpacity?.05:d.motionBlurOpacity,pixelRatio:o.number(d.pixelRatio)&&d.pixelRatio>0?d.pixelRatio:void 0,desktopTapThreshold:void 0===d.desktopTapThreshold?4:d.desktopTapThreshold,touchTapThreshold:void 0===d.touchTapThreshold?8:d.touchTapThreshold},d.renderer));var v=function(e,r,n){t.notifications(!1);var a=t.mutableElements();a.length>0&&a.remove(),null!=e&&(o.plainObject(e)||o.array(e))&&t.add(e),t.one("layoutready",function(e){t.notifications(!0),t.trigger(e),t.notify({type:"load",eles:t.mutableElements()}),t.one("load",r),t.trigger("load")}).one("layoutstop",function(){t.one("done",n),t.trigger("done")});var s=i.extend({},t._private.options.layout);s.eles=t.elements(),t.layout(s).run()};g([d.style,d.elements],function(e){var r=e[0],n=e[1];p.styleEnabled&&t.setStyle(r),v(n,function(){t.startAnimationLoop(),p.ready=!0,o.fn(d.ready)&&t.on("ready",d.ready);for(var e=0;e<u.length;e++){var r=u[e];t.on("ready",r)}l&&(l.readies=[]),t.trigger("ready")},d.done)})},c=u.prototype;i.extend(c,{instanceString:function(){return"core"},isReady:function(){return this._private.ready},isDestroyed:function(){return this._private.destroyed},ready:function(e){return this.isReady()?this.trigger("ready",[],e):this.on("ready",e),this},destroy:function(){var e=this;if(!e.isDestroyed())return e.stopAnimationLoop(),e.destroyRenderer(),this.trigger("destroy"),e._private.destroyed=!0,e},hasElementWithId:function(e){return this._private.elements.hasElementWithId(e)},getElementById:function(e){return this._private.elements.getElementById(e)},selectionType:function(){return this._private.selectionType},hasCompoundNodes:function(){return this._private.hasCompoundNodes},headless:function(){return"null"===this._private.options.renderer.name},styleEnabled:function(){return this._private.styleEnabled},addToPool:function(e){return this._private.elements.merge(e),this},removeFromPool:function(e){return this._private.elements.unmerge(e),this},container:function(){return this._private.container},options:function(){return i.copy(this._private.options)},json:function(e){var t=this,r=t._private,n=t.mutableElements();if(o.plainObject(e)){if(t.startBatch(),e.elements){var a={},s=function(e,r){for(var n=0;n<e.length;n++){var o=e[n],s=o.data.id,l=t.getElementById(s);a[s]=!0,0!==l.length?l.json(o):r?t.add(i.extend({group:r},o)):t.add(o)}};if(o.array(e.elements))s(e.elements);else for(var l=["nodes","edges"],u=0;u<l.length;u++){var c=l[u],d=e.elements[c];o.array(d)&&s(d,c)}n.stdFilter(function(e){return!a[e.id()]}).remove()}e.style&&t.style(e.style),null!=e.zoom&&e.zoom!==r.zoom&&t.zoom(e.zoom),e.pan&&(e.pan.x===r.pan.x&&e.pan.y===r.pan.y||t.pan(e.pan));for(var h=["minZoom","maxZoom","zoomingEnabled","userZoomingEnabled","panningEnabled","userPanningEnabled","boxSelectionEnabled","autolock","autoungrabify","autounselectify"],u=0;u<h.length;u++){var p=h[u];null!=e[p]&&t[p](e[p])}return t.endBatch(),this}if(void 0===e){var f={};return f.elements={},n.forEach(function(e){var t=e.group();f.elements[t]||(f.elements[t]=[]),f.elements[t].push(e.json())}),this._private.styleEnabled&&(f.style=t.style().json()),f.zoomingEnabled=t._private.zoomingEnabled,f.userZoomingEnabled=t._private.userZoomingEnabled,f.zoom=t._private.zoom,f.minZoom=t._private.minZoom,f.maxZoom=t._private.maxZoom,f.panningEnabled=t._private.panningEnabled,f.userPanningEnabled=t._private.userPanningEnabled,f.pan=i.copy(t._private.pan),f.boxSelectionEnabled=t._private.boxSelectionEnabled,f.renderer=i.copy(t._private.options.renderer),f.hideEdgesOnViewport=t._private.options.hideEdgesOnViewport,f.textureOnViewport=t._private.options.textureOnViewport,f.wheelSensitivity=t._private.options.wheelSensitivity,f.motionBlur=t._private.options.motionBlur,f}},scratch:l.data({field:"scratch",bindingEvent:"scratch",allowBinding:!0,allowSetting:!0,settingEvent:"scratch",settingTriggersEvent:!0,triggerFnName:"trigger",allowGetting:!0}),removeScratch:l.removeData({field:"scratch",event:"scratch",triggerFnName:"trigger",triggerEvent:!0})}),c.$id=c.getElementById,[e("./add-remove"),e("./animation"),e("./events"),e("./export"),e("./layout"),e("./notification"),e("./renderer"),e("./search"),e("./style"),e("./viewport")].forEach(function(e){i.extend(c,e)}),t.exports=u},{"../collection":26,"../define":44,"../is":91,"../promise":94,"../util":108,"../window":116,"./add-remove":33,"./animation":34,"./events":35,"./export":36,"./layout":38,"./notification":39,"./renderer":40,"./search":41,"./style":42,"./viewport":43}],38:[function(e,t,r){"use strict";var n=e("../util"),i=e("../is"),a={layout:function(e){var t=this;if(null==e)return void n.error("Layout options must be specified to make a layout");if(null==e.name)return void n.error("A `name` must be specified to make a layout");var r=e.name,a=t.extension("layout",r);if(null==a)return void n.error("Can not apply layout: No such layout `"+r+"` found; did you include its JS file?");var o;o=i.string(e.eles)?t.$(e.eles):null!=e.eles?e.eles:t.$();var s=new a(n.extend({},e,{cy:t,eles:o}));return s}};a.createLayout=a.makeLayout=a.layout,t.exports=a},{"../is":91,"../util":108}],39:[function(e,t,r){"use strict";var n={notify:function(e){var t=this._private;if(t.batchingNotify){var r=t.batchNotifyEles,n=t.batchNotifyTypes;return e.eles&&r.merge(e.eles),void(n.ids[e.type]||(n.push(e.type),n.ids[e.type]=!0))}if(t.notificationsEnabled){var i=this.renderer();!this.isDestroyed()&&i&&i.notify(e)}},notifications:function(e){var t=this._private;return void 0===e?t.notificationsEnabled:void(t.notificationsEnabled=!!e)},noNotifications:function(e){this.notifications(!1),e(),this.notifications(!0)},startBatch:function(){var e=this._private;return null==e.batchCount&&(e.batchCount=0),0===e.batchCount&&(e.batchingStyle=e.batchingNotify=!0,e.batchStyleEles=this.collection(),e.batchNotifyEles=this.collection(),e.batchNotifyTypes=[],e.batchNotifyTypes.ids={}),e.batchCount++,this},endBatch:function(){var e=this._private;return e.batchCount--,0===e.batchCount&&(e.batchingStyle=!1,e.batchStyleEles.updateStyle(),e.batchingNotify=!1,this.notify({type:e.batchNotifyTypes,eles:e.batchNotifyEles})),this},batch:function(e){return this.startBatch(),e(),this.endBatch(),this},batchData:function(e){var t=this;return this.batch(function(){for(var r=Object.keys(e),n=0;n<r.length;n++){var i=r[n],a=e[i],o=t.getElementById(i);o.data(a)}})}};t.exports=n},{}],40:[function(e,t,r){"use strict";var n=e("../util"),i={renderTo:function(e,t,r,n){var i=this._private.renderer;return i.renderTo(e,t,r,n),this},renderer:function(){return this._private.renderer},forceRender:function(){return this.notify({type:"draw"}),this},resize:function(){return this.invalidateSize(),this.notify({type:"resize"}),this.trigger("resize"),this},initRenderer:function(e){var t=this,r=t.extension("renderer",e.name);if(null==r)return void n.error("Can not initialise: No such renderer `%s` found; did you include its JS file?",e.name);var i=n.extend({},e,{cy:t});t._private.renderer=new r(i)},destroyRenderer:function(){var e=this;e.notify({type:"destroy"});var t=e.container();if(t)for(t._cyreg=null;t.childNodes.length>0;)t.removeChild(t.childNodes[0]);e._private.renderer=null},onRender:function(e){return this.on("render",e)},offRender:function(e){return this.off("render",e)}};i.invalidateDimensions=i.resize,t.exports=i},{"../util":108}],41:[function(e,t,r){"use strict";var n=e("../is"),i=e("../collection"),a={collection:function(e,t){return n.string(e)?this.$(e):n.elementOrCollection(e)?e.collection():n.array(e)?new i(this,e,t):new i(this)},nodes:function(e){var t=this.$(function(e){return e.isNode()});return e?t.filter(e):t},edges:function(e){var t=this.$(function(e){return e.isEdge()});return e?t.filter(e):t},$:function(e){var t=this._private.elements;return e?t.filter(e):t.spawnSelf()},mutableElements:function(){return this._private.elements}};a.elements=a.filter=a.$,t.exports=a},{"../collection":26,"../is":91}],42:[function(e,t,r){"use strict";var n=e("../is"),i=e("../style"),a={style:function(e){if(e){var t=this.setStyle(e);t.update()}return this._private.style},setStyle:function(e){var t=this._private;return n.stylesheet(e)?t.style=e.generateStyle(this):n.array(e)?t.style=i.fromJson(this,e):n.string(e)?t.style=i.fromString(this,e):t.style=i(this),t.style}};t.exports=a},{"../is":91,"../style":101}],43:[function(e,t,r){"use strict";var n=e("../is"),i=e("../window"),a={autolock:function(e){return void 0===e?this._private.autolock:(this._private.autolock=!!e,this)},autoungrabify:function(e){return void 0===e?this._private.autoungrabify:(this._private.autoungrabify=!!e,this)},autounselectify:function(e){return void 0===e?this._private.autounselectify:(this._private.autounselectify=!!e,this)},panningEnabled:function(e){return void 0===e?this._private.panningEnabled:(this._private.panningEnabled=!!e,this)},userPanningEnabled:function(e){return void 0===e?this._private.userPanningEnabled:(this._private.userPanningEnabled=!!e,this)},zoomingEnabled:function(e){return void 0===e?this._private.zoomingEnabled:(this._private.zoomingEnabled=!!e,this)},userZoomingEnabled:function(e){return void 0===e?this._private.userZoomingEnabled:(this._private.userZoomingEnabled=!!e,this)},boxSelectionEnabled:function(e){return void 0===e?this._private.boxSelectionEnabled:(this._private.boxSelectionEnabled=!!e,this)},pan:function(){var e,t,r,i,a,o=arguments,s=this._private.pan;switch(o.length){case 0:return s;case 1:if(n.string(o[0]))return e=o[0],s[e];if(n.plainObject(o[0])){if(!this._private.panningEnabled)return this;r=o[0],i=r.x,a=r.y,n.number(i)&&(s.x=i),n.number(a)&&(s.y=a),this.trigger("pan viewport")}break;case 2:if(!this._private.panningEnabled)return this;e=o[0],t=o[1],"x"!==e&&"y"!==e||!n.number(t)||(s[e]=t),this.trigger("pan viewport")}return this.notify({type:"viewport"}),this},panBy:function(e){var t,r,i,a,o,s=arguments,l=this._private.pan;if(!this._private.panningEnabled)return this;switch(s.length){case 1:n.plainObject(s[0])&&(i=s[0],a=i.x,o=i.y,n.number(a)&&(l.x+=a),n.number(o)&&(l.y+=o),this.trigger("pan viewport"));break;case 2:t=s[0],r=s[1],"x"!==t&&"y"!==t||!n.number(r)||(l[t]+=r),this.trigger("pan viewport")}return this.notify({type:"viewport"}),this},fit:function(e,t){var r=this.getFitViewport(e,t);if(r){var n=this._private;n.zoom=r.zoom,n.pan=r.pan,this.trigger("pan zoom viewport"),this.notify({type:"viewport"})}return this},getFitViewport:function(e,t){if(n.number(e)&&void 0===t&&(t=e,e=void 0),this._private.panningEnabled&&this._private.zoomingEnabled){var r;if(n.string(e)){var i=e;e=this.$(i)}else if(n.boundingBox(e)){var a=e;r={x1:a.x1,y1:a.y1,x2:a.x2,y2:a.y2},r.w=r.x2-r.x1,r.h=r.y2-r.y1}else n.elementOrCollection(e)||(e=this.mutableElements());r=r||e.boundingBox();var o,s=this.width(),l=this.height();if(t=n.number(t)?t:0,!isNaN(s)&&!isNaN(l)&&s>0&&l>0&&!isNaN(r.w)&&!isNaN(r.h)&&r.w>0&&r.h>0){o=Math.min((s-2*t)/r.w,(l-2*t)/r.h),o=o>this._private.maxZoom?this._private.maxZoom:o,o=o<this._private.minZoom?this._private.minZoom:o;var u={x:(s-o*(r.x1+r.x2))/2,y:(l-o*(r.y1+r.y2))/2};return{zoom:o,pan:u}}}},minZoom:function(e){return void 0===e?this._private.minZoom:(n.number(e)&&(this._private.minZoom=e),this)},maxZoom:function(e){return void 0===e?this._private.maxZoom:(n.number(e)&&(this._private.maxZoom=e),this)},zoom:function(e){var t,r;if(void 0===e)return this._private.zoom;if(n.number(e))r=e;else if(n.plainObject(e)){if(r=e.level,e.position){var i=e.position,a=this._private.pan,o=this._private.zoom;t={x:i.x*o+a.x,y:i.y*o+a.y}}else e.renderedPosition&&(t=e.renderedPosition);if(t&&!this._private.panningEnabled)return this}if(!this._private.zoomingEnabled)return this;if(!n.number(r)||t&&(!n.number(t.x)||!n.number(t.y)))return this;if(r=r>this._private.maxZoom?this._private.maxZoom:r,r=r<this._private.minZoom?this._private.minZoom:r,t){var s=this._private.pan,l=this._private.zoom,u=r,c={x:-u/l*(t.x-s.x)+t.x,y:-u/l*(t.y-s.y)+t.y};this._private.zoom=r,this._private.pan=c;var d=s.x!==c.x||s.y!==c.y;this.trigger(" zoom "+(d?" pan ":"")+" viewport ")}else this._private.zoom=r,this.trigger("zoom viewport");return this.notify({type:"viewport"}),this},viewport:function(e){var t=this._private,r=!0,i=!0,a=[],o=!1,s=!1;if(!e)return this;if(n.number(e.zoom)||(r=!1),n.plainObject(e.pan)||(i=!1),!r&&!i)return this;if(r){var l=e.zoom;l<t.minZoom||l>t.maxZoom||!t.zoomingEnabled?o=!0:(t.zoom=l,a.push("zoom"))}if(i&&(!o||!e.cancelOnFailedZoom)&&t.panningEnabled){var u=e.pan;n.number(u.x)&&(t.pan.x=u.x,s=!1),n.number(u.y)&&(t.pan.y=u.y,s=!1),s||a.push("pan")}return a.length>0&&(a.push("viewport"),this.trigger(a.join(" ")),this.notify({type:"viewport"})),this},center:function(e){var t=this.getCenterPan(e);return t&&(this._private.pan=t,this.trigger("pan viewport"),this.notify({type:"viewport"})),this},getCenterPan:function(e,t){if(this._private.panningEnabled){if(n.string(e)){var r=e;e=this.mutableElements().filter(r)}else n.elementOrCollection(e)||(e=this.mutableElements());var i=e.boundingBox(),a=this.width(),o=this.height();t=void 0===t?this._private.zoom:t;var s={x:(a-t*(i.x1+i.x2))/2,y:(o-t*(i.y1+i.y2))/2};return s}},reset:function(){return this._private.panningEnabled&&this._private.zoomingEnabled?(this.viewport({pan:{x:0,y:0},zoom:1}),this):this},invalidateSize:function(){this._private.sizeCache=null},size:function(){var e=this._private,t=e.container;return e.sizeCache=e.sizeCache||(t?function(){var e=t.getBoundingClientRect(),r=i.getComputedStyle(t),n=function(e){return parseFloat(r.getPropertyValue(e))};return{width:e.width-n("padding-left")-n("padding-right")-n("border-left-width")-n("border-right-width"),height:e.height-n("padding-top")-n("padding-bottom")-n("border-top-width")-n("border-bottom-width")}}():{width:1,height:1})},width:function(){return this.size().width},height:function(){return this.size().height},extent:function(){var e=this._private.pan,t=this._private.zoom,r=this.renderedExtent(),n={x1:(r.x1-e.x)/t,x2:(r.x2-e.x)/t,y1:(r.y1-e.y)/t,y2:(r.y2-e.y)/t};return n.w=n.x2-n.x1,n.h=n.y2-n.y1,n},renderedExtent:function(){var e=this.width(),t=this.height();return{x1:0,y1:0,x2:e,y2:t,w:e,h:t}}};a.centre=a.center,a.autolockNodes=a.autolock,a.autoungrabifyNodes=a.autoungrabify,t.exports=a},{"../is":91,"../window":116}],44:[function(e,t,r){"use strict";var n=e("./util"),i=e("./is"),a=e("./selector"),o=e("./promise"),s=e("./event"),l=e("./animation"),u={data:function(e){var t={field:"data",bindingEvent:"data",allowBinding:!1,allowSetting:!1,allowGetting:!1,settingEvent:"data",settingTriggersEvent:!1,triggerFnName:"trigger",immutableKeys:{},updateStyle:!1,beforeGet:function(e){},onSet:function(e){},canSet:function(e){return!0}};return e=n.extend({},t,e),function(t,r){var n=e,a=this,o=void 0!==a.length,s=o?a:[a],l=o?a[0]:a;if(i.string(t)){if(n.allowGetting&&void 0===r){var u;return l&&(n.beforeGet(l),u=l._private[n.field][t]),u}if(n.allowSetting&&void 0!==r){var c=!n.immutableKeys[t];if(c){for(var d=0,h=s.length;d<h;d++)n.canSet(s[d])&&(s[d]._private[n.field][t]=r);n.updateStyle&&a.updateStyle(),n.onSet(a),n.settingTriggersEvent&&a[n.triggerFnName](n.settingEvent)}}}else if(n.allowSetting&&i.plainObject(t)){for(var p,f,g=t,v=Object.keys(g),d=0;d<v.length;d++){p=v[d],f=g[p];var c=!n.immutableKeys[p];if(c)for(var y=0;y<s.length;y++){var m=s[y];n.canSet(m)&&(m._private[n.field][p]=f)}}n.updateStyle&&a.updateStyle(),n.onSet(a),n.settingTriggersEvent&&a[n.triggerFnName](n.settingEvent)}else if(n.allowBinding&&i.fn(t)){var b=t;a.on(n.bindingEvent,b)}else if(n.allowGetting&&void 0===t){var u;return l&&(n.beforeGet(l),u=l._private[n.field]),u}return a}},removeData:function(e){var t={field:"data",event:"data",triggerFnName:"trigger",triggerEvent:!1,immutableKeys:{}};return e=n.extend({},t,e),function(t){var r=e,n=this,a=void 0!==n.length,o=a?n:[n];if(i.string(t)){for(var s=t.split(/\s+/),l=s.length,u=0;u<l;u++){var c=s[u];if(!i.emptyString(c)){var d=!r.immutableKeys[c];if(d)for(var h=0,p=o.length;h<p;h++)o[h]._private[r.field][c]=void 0}}r.triggerEvent&&n[r.triggerFnName](r.event)}else if(void 0===t){for(var h=0,p=o.length;h<p;h++)for(var f=o[h]._private[r.field],s=Object.keys(f),u=0;u<s.length;u++){var c=s[u],g=!r.immutableKeys[c];g&&(f[c]=void 0)}r.triggerEvent&&n[r.triggerFnName](r.event)}return n}},event:{regex:/(\w+)(\.(?:\w+|\*))?/,universalNamespace:".*",optionalTypeRegex:/(\w+)?(\.(?:\w+|\*))?/,falseCallback:function(){return!1}},on:function(e){var t={unbindSelfOnTrigger:!1,unbindAllBindersOnTrigger:!1};return e=n.extend({},t,e),function(t,r,n){var o=this,s=void 0!==o.length,l=s?o:[o],c=i.string(t),d=e;if(i.fn(r)&&(n=r,r=void 0),!i.fn(n)&&n!==!1&&c)return o;if(c){var h={};h[t]=n,t=h}for(var p=Object.keys(t),f=0;f<p.length;f++){var g=p[f];if(n=t[g],n===!1&&(n=u.event.falseCallback),i.fn(n)){g=g.split(/\s+/);for(var v=0;v<g.length;v++){var y=g[v];if(!i.emptyString(y)){var m=y.match(u.event.regex);if(m)for(var b=m[1],x=m[2]?m[2]:void 0,w={callback:n,delegated:!!r,selector:r,selObj:new a(r),type:b,namespace:x,unbindSelfOnTrigger:d.unbindSelfOnTrigger,unbindAllBindersOnTrigger:d.unbindAllBindersOnTrigger,binders:l},E=0;E<l.length;E++){var C=l[E]._private=l[E]._private||{};C.listeners=C.listeners||[],C.listeners.push(w)}}}}}return o}},eventAliasesOn:function(e){var t=e;t.addListener=t.listen=t.bind=t.on,t.removeListener=t.unlisten=t.unbind=t.off,t.emit=t.trigger,t.pon=t.promiseOn=function(e,t){var r=this,n=Array.prototype.slice.call(arguments,0);return new o(function(e,t){var i=function(t){r.off.apply(r,o),e(t)},a=n.concat([i]),o=a.concat([]);r.on.apply(r,a)})}},off:function(e){var t={};return e=n.extend({},t,e),function(e,t,r){var n=this,a=void 0!==n.length,o=a?n:[n],s=i.string(e);if(0===arguments.length){for(var l=0;l<o.length;l++)o[l]._private=o[l]._private||{},b.listeners=[];return n}if((i.fn(t)||t===!1)&&(r=t,t=void 0),s){var c={};c[e]=r,e=c}for(var d=Object.keys(e),h=0;h<d.length;h++){var p=d[h];r=e[p],r===!1&&(r=u.event.falseCallback),p=p.split(/\s+/);for(var f=0;f<p.length;f++){var g=p[f];if(!i.emptyString(g)){var v=g.match(u.event.optionalTypeRegex);if(v)for(var y=v[1]?v[1]:void 0,m=v[2]?v[2]:void 0,l=0;l<o.length;l++)for(var b=o[l]._private=o[l]._private||{},x=b.listeners=b.listeners||[],w=0;w<x.length;w++){var E=x[w],C=!m||m===E.namespace,P=!y||E.type===y,D=!r||r===E.callback,T=C&&P&&D;T&&(x.splice(w,1),w--)}}}}return n}},trigger:function(e){var t={};return e=n.extend({},t,e),function(t,r,n){var a=this,o=void 0!==a.length,l=o?a:[a],c=i.string(t),d=i.plainObject(t),h=i.event(t),p=this._private=this._private||{},f=p.cy||(i.core(this)?this:null),g=!!f&&f.hasCompoundNodes();if(c){var v=t.split(/\s+/);t=[];for(var y=0;y<v.length;y++){var m=v[y];if(!i.emptyString(m)){var b=m.match(u.event.regex),x=b[1],w=b[2]?b[2]:void 0;t.push({type:x,namespace:w})}}}else if(d){var E=t;t=[E]}r?i.array(r)||(r=[r]):r=[];for(var y=0;y<t.length;y++)for(var C=t[y],P=0;P<l.length;P++){var m,D=l[P],p=D._private=D._private||{},T=p.listeners=p.listeners||[],k=i.element(D),S=k||e.layout;if(h?(m=C,m.target=m.target||D,m.cy=m.cy||f):m=new s(C,{target:D,cy:f,namespace:C.namespace}),C.layout&&(m.layout=C.layout),e.layout&&(m.layout=D),m.position){var _=m.position,M=f.zoom(),I=f.pan();m.renderedPosition={x:_.x*M+I.x,y:_.y*M+I.y}}n&&(T=[{namespace:m.namespace,type:m.type,callback:n}]);for(var N=0;N<T.length;N++){var B=T[N],z=!B.namespace||B.namespace===m.namespace||B.namespace===u.event.universalNamespace,L=B.type===m.type,O=!B.delegated||D!==m.target&&i.element(m.target)&&B.selObj.matches(m.target),A=z&&L&&O;if(A){var R=[m];if(R=R.concat(r),(B.unbindSelfOnTrigger||B.unbindAllBindersOnTrigger)&&(T.splice(N,1),N--),B.unbindAllBindersOnTrigger)for(var V=B.binders,q=0;q<V.length;q++){var F=V[q];if(F&&F!==D)for(var j=F._private.listeners,X=0;X<j.length;X++){var Y=j[X];Y===B&&(j.splice(X,1),X--)}}var W=B.delegated?m.target:D,H=B.callback.apply(W,R);(H===!1||m.isPropagationStopped())&&(S=!1,H===!1&&(m.stopPropagation(),m.preventDefault()))}}if(S){var Z=g?D._private.parent:null,$=null!=Z&&0!==Z.length;$?(Z=Z[0],Z.trigger(m,r)):f.trigger(m,r)}}return a}},animated:function(e){var t={};return e=n.extend({},t,e),function(){var e=this,t=void 0!==e.length,r=t?e:[e],n=this._private.cy||this;if(!n.styleEnabled())return!1;var i=r[0];return i?i._private.animation.current.length>0:void 0}},clearQueue:function(e){var t={};return e=n.extend({},t,e),function(){var e=this,t=void 0!==e.length,r=t?e:[e],n=this._private.cy||this;if(!n.styleEnabled())return this;for(var i=0;i<r.length;i++){var a=r[i];a._private.animation.queue=[]}return this}},delay:function(e){var t={};return e=n.extend({},t,e),function(e,t){var r=this._private.cy||this;return r.styleEnabled()?this.animate({delay:e,duration:e,complete:t}):this}},delayAnimation:function(e){var t={};return e=n.extend({},t,e),function(e,t){var r=this._private.cy||this;return r.styleEnabled()?this.animation({delay:e,duration:e,complete:t}):this}},animation:function(e){var t={};return e=n.extend({},t,e),function(e,t){var r=this,i=void 0!==r.length,a=i?r:[r],o=this._private.cy||this,s=!i,u=!s;if(!o.styleEnabled())return this;var c=o.style();e=n.extend({},e,t);var d=0===Object.keys(e).length;if(d)return new l(a[0],e);switch(void 0===e.duration&&(e.duration=400),e.duration){case"slow":e.duration=600;break;case"fast":e.duration=200}if(u&&(e.style=c.getPropsList(e.style||e.css),e.css=void 0),e.renderedPosition&&u){var h=e.renderedPosition,p=o.pan(),f=o.zoom();e.position={x:(h.x-p.x)/f,y:(h.y-p.y)/f}}if(e.panBy&&s){var g=e.panBy,v=o.pan();e.pan={x:v.x+g.x,y:v.y+g.y}}var y=e.center||e.centre;if(y&&s){var m=o.getCenterPan(y.eles,e.zoom);m&&(e.pan=m)}if(e.fit&&s){var b=e.fit,x=o.getFitViewport(b.eles||b.boundingBox,b.padding);x&&(e.pan=x.pan,e.zoom=x.zoom)}return new l(a[0],e)}},animate:function(e){var t={};return e=n.extend({},t,e),function(e,t){var r=this,i=void 0!==r.length,a=i?r:[r],o=this._private.cy||this;if(!o.styleEnabled())return this;t&&(e=n.extend({},e,t));for(var s=0;s<a.length;s++){var l=a[s],u=l.animated()&&(void 0===e.queue||e.queue),c=l.animation(e,u?{queue:!0}:void 0);c.play()}return this}},stop:function(e){var t={};return e=n.extend({},t,e),function(e,t){var r=this,n=void 0!==r.length,i=n?r:[r],a=this._private.cy||this;if(!a.styleEnabled())return this;for(var o=0;o<i.length;o++){for(var s=i[o],l=s._private,u=l.animation.current,c=0;c<u.length;c++){var d=u[c],h=d._private;t&&(h.duration=0)}e&&(l.animation.queue=[]),t||(l.animation.current=[])}return a.notify({eles:this,type:"draw"}),this}}};t.exports=u},{"./animation":2,"./event":45,"./is":91,"./promise":94,"./selector":95,"./util":108}],45:[function(e,t,r){"use strict";function n(){return!1}function i(){return!0}/*!
-Event object based on jQuery events, MIT license
-
-https://jquery.org/license/
-https://tldrlegal.com/license/mit-license
-https://github.com/jquery/jquery/blob/master/src/event.js
-*/
-var a=function(e,t){return this instanceof a?(e&&e.type?(this.originalEvent=e,this.type=e.type,this.isDefaultPrevented=e.defaultPrevented?i:n):this.type=e,t&&(this.type=void 0!==t.type?t.type:this.type,this.cy=t.cy,this.target=t.target,this.position=t.position,this.renderedPosition=t.renderedPosition,this.namespace=t.namespace,this.layout=t.layout,this.message=t.message),void(this.timeStamp=e&&e.timeStamp||Date.now())):new a(e,t)};a.prototype={instanceString:function(){return"event"},preventDefault:function(){this.isDefaultPrevented=i;var e=this.originalEvent;e&&e.preventDefault&&e.preventDefault()},stopPropagation:function(){this.isPropagationStopped=i;var e=this.originalEvent;e&&e.stopPropagation&&e.stopPropagation()},stopImmediatePropagation:function(){this.isImmediatePropagationStopped=i,this.stopPropagation()},isDefaultPrevented:n,isPropagationStopped:n,isImmediatePropagationStopped:n},t.exports=a},{}],46:[function(e,t,r){"use strict";function n(e,t,r){var n=r,a=function(r){s.error("Can not register `"+t+"` for `"+e+"` since `"+r+"` already exists in the prototype and can not be overridden")};if("core"===e){if(c.prototype[t])return a(t);c.prototype[t]=r}else if("collection"===e){if(u.prototype[t])return a(t);u.prototype[t]=r}else if("layout"===e){for(var o=function(e){this.options=e,r.call(this,e),h.plainObject(this._private)||(this._private={}),this._private.cy=e.cy,this._private.listeners=[]},d=o.prototype=Object.create(r.prototype),f=[],g=0;g<f.length;g++){var v=f[g];d[v]=d[v]||function(){return this}}d.start&&!d.run?d.run=function(){return this.start(),this}:!d.start&&d.run&&(d.start=function(){return this.run(),this});var y=r.prototype.stop;d.stop=function(){var e=this.options;if(e&&e.animate){var t=this.animations;if(t)for(var r=0;r<t.length;r++)t[r].stop()}return y?y.call(this):this.trigger("layoutstop"),this},d.destroy||(d.destroy=function(){return this}),d.on=l.on({layout:!0}),d.one=l.on({layout:!0,unbindSelfOnTrigger:!0}),d.once=l.on({layout:!0,unbindAllBindersOnTrigger:!0}),d.off=l.off({layout:!0}),d.trigger=l.trigger({layout:!0}),l.eventAliasesOn(d),n=o}else if("renderer"===e&&"null"!==t&&"base"!==t){var m=i("renderer","base"),b=m.prototype,x=r,w=r.prototype,E=function(){m.apply(this,arguments),x.apply(this,arguments)},C=E.prototype;for(var P in b){var D=b[P],T=null!=w[P];if(T)return a(P);C[P]=D}for(var P in w)C[P]=w[P];b.clientFunctions.forEach(function(e){C[e]=C[e]||function(){s.error("Renderer does not implement `renderer."+e+"()` on its prototype")}}),n=E}return s.setMap({map:p,keys:[e,t],value:n})}function i(e,t){return s.getMap({map:p,keys:[e,t]})}function a(e,t,r,n,i){return s.setMap({map:f,keys:[e,t,r,n],value:i})}function o(e,t,r,n){return s.getMap({map:f,keys:[e,t,r,n]})}var s=e("./util"),l=e("./define"),u=e("./collection"),c=e("./core"),d=e("./extensions"),h=e("./is"),p={},f={},g=function(){return 2===arguments.length?i.apply(null,arguments):3===arguments.length?n.apply(null,arguments):4===arguments.length?o.apply(null,arguments):5===arguments.length?a.apply(null,arguments):void s.error("Invalid extension access syntax")};c.prototype.extension=g,d.forEach(function(e){e.extensions.forEach(function(t){n(e.type,t.name,t.impl)})}),t.exports=g},{"./collection":26,"./core":37,"./define":44,"./extensions":47,"./is":91,"./util":108}],47:[function(e,t,r){"use strict";t.exports=[{type:"layout",extensions:e("./layout")},{type:"renderer",extensions:e("./renderer")}]},{"./layout":53,"./renderer":87}],48:[function(e,t,r){"use strict";function n(e){this.options=i.extend({},s,e)}var i=e("../../util"),a=e("../../math"),o=e("../../is"),s={fit:!0,directed:!1,padding:30,circle:!1,spacingFactor:1.75,boundingBox:void 0,avoidOverlap:!0,nodeDimensionsIncludeLabels:!1,roots:void 0,maximalAdjustments:0,animate:!1,animationDuration:500,animationEasing:void 0,ready:void 0,stop:void 0};n.prototype.run=function(){var e,t=this.options,r=t,n=t.cy,i=r.eles,s=i.nodes().not(":parent"),l=i,u=a.makeBoundingBox(r.boundingBox?r.boundingBox:{x1:0,y1:0,w:n.width(),h:n.height()});if(o.elementOrCollection(r.roots))e=r.roots;else if(o.array(r.roots)){for(var c=[],d=0;d<r.roots.length;d++){var h=r.roots[d],p=n.getElementById(h);c.push(p)}e=n.collection(c)}else if(o.string(r.roots))e=n.$(r.roots);else if(r.directed)e=s.roots();else{for(var f=[],g=s;g.length>0;){var v=n.collection();i.bfs({roots:g[0],visit:function(e,t,r,n,i){v=v.add(e)},directed:!1}),g=g.not(v),f.push(v)}e=n.collection();for(var d=0;d<f.length;d++){var y=f[d],m=y.maxDegree(!1),b=y.filter(function(e){return e.degree(!1)===m});e=e.add(b)}}var x=[],w={},E={},C={},P={},D={};l.bfs({roots:e,directed:r.directed,visit:function(e,t,r,n,i){var a=e[0],o=a.id();if(x[i]||(x[i]=[]),x[i].push(a),w[o]=!0,E[o]=i,C[o]=r,P[o]=t,r){var s=r.id(),l=D[s]=D[s]||[];l.push(e)}}});for(var T=[],d=0;d<s.length;d++){var p=s[d];w[p.id()]||T.push(p)}for(var k=3*T.length,S=0;0!==T.length&&S<k;){for(var _=T.shift(),M=_.neighborhood().nodes(),I=!1,d=0;d<M.length;d++){var N=E[M[d].id()];if(void 0!==N){x[N].push(_),I=!0;break}}I||T.push(_),S++}for(;0!==T.length;){var _=T.shift(),I=!1;I||(0===x.length&&x.push([]),x[0].push(_))}var B=function(){for(var e=0;e<x.length;e++)for(var t=x[e],r=0;r<t.length;r++){var n=t[r];n._private.scratch.breadthfirst={depth:e,index:r}}};B();for(var z=function(e){for(var t,r=e.connectedEdges(function(t){return t.data("target")===e.id()}),n=e._private.scratch.breadthfirst,i=0,a=0;a<r.length;a++){var o=r[a],s=o.source()[0],l=s._private.scratch.breadthfirst;n.depth<=l.depth&&i<l.depth&&(i=l.depth,t=s)}return t},L=0;L<r.maximalAdjustments;L++){for(var O=x.length,A=[],d=0;d<O;d++)for(var N=x[d],R=N.length,V=0;V<R;V++){var p=N[V],q=p._private.scratch.breadthfirst,F=z(p);F&&(q.intEle=F,A.push(p))}for(var d=0;d<A.length;d++){var p=A[d],q=p._private.scratch.breadthfirst,F=q.intEle,j=F._private.scratch.breadthfirst;x[q.depth].splice(q.index,1);for(var X=j.depth+1;X>x.length-1;)x.push([]);x[X].push(p),q.depth=X,q.index=x[X].length-1}B()}var Y=0;if(r.avoidOverlap)for(var d=0;d<s.length;d++){var W=s[d],H=W.layoutDimensions(r),Z=H.w,$=H.h;Y=Math.max(Y,Z,$)}for(var U={},G=function(e){if(U[e.id()])return U[e.id()];for(var t=e._private.scratch.breadthfirst.depth,r=e.neighborhood().nodes().not(":parent").intersection(s),n=0,i=0,a=0;a<r.length;a++){var o=r[a],l=o._private.scratch.breadthfirst,u=l.index,c=l.depth,d=x[c].length;(t>c||0===t)&&(n+=u/d,i++)}return i=Math.max(1,i),n/=i,0===i&&(n=void 0),U[e.id()]=n,n},Q=function(e,t){var r=G(e),n=G(t);return r-n},K=0;K<3;K++){for(var d=0;d<x.length;d++)x[d]=x[d].sort(Q);B()}for(var J=0,d=0;d<x.length;d++)J=Math.max(x[d].length,J);for(var ee={x:u.x1+u.w/2,y:u.x1+u.h/2},te=function(e,t){var n=e._private.scratch.breadthfirst,i=n.depth,a=n.index,o=x[i].length,s=Math.max(u.w/(o+1),Y),l=Math.max(u.h/(x.length+1),Y),c=Math.min(u.w/2/x.length,u.h/2/x.length);if(c=Math.max(c,Y),r.circle){if(r.circle){var d=c*i+c-(x.length>0&&x[0].length<=3?c/2:0),h=2*Math.PI/x[i].length*a;return 0===i&&1===x[0].length&&(d=1),{x:ee.x+d*Math.cos(h),y:ee.y+d*Math.sin(h)}}return{x:ee.x+(a+1-(o+1)/2)*s,y:(i+1)*l}}var p={x:ee.x+(a+1-(o+1)/2)*s,y:(i+1)*l};return t?p:p},re={},d=x.length-1;d>=0;d--)for(var N=x[d],V=0;V<N.length;V++){var _=N[V];re[_.id()]=te(_,d===x.length-1)}return s.layoutPositions(this,r,function(e){return re[e.id()]}),this},t.exports=n},{"../../is":91,"../../math":93,"../../util":108}],49:[function(e,t,r){"use strict";function n(e){this.options=i.extend({},s,e)}var i=e("../../util"),a=e("../../math"),o=e("../../is"),s={fit:!0,padding:30,boundingBox:void 0,avoidOverlap:!0,nodeDimensionsIncludeLabels:!1,spacingFactor:void 0,radius:void 0,startAngle:1.5*Math.PI,sweep:void 0,clockwise:!0,sort:void 0,animate:!1,animationDuration:500,animationEasing:void 0,ready:void 0,stop:void 0};n.prototype.run=function(){var e=this.options,t=e,r=e.cy,n=t.eles,i=void 0!==t.counterclockwise?!t.counterclockwise:t.clockwise,s=n.nodes().not(":parent");t.sort&&(s=s.sort(t.sort));for(var l,u=a.makeBoundingBox(t.boundingBox?t.boundingBox:{x1:0,y1:0,w:r.width(),h:r.height()}),c={x:u.x1+u.w/2,y:u.y1+u.h/2},d=void 0===t.sweep?2*Math.PI-2*Math.PI/s.length:t.sweep,h=d/Math.max(1,s.length-1),p=0,f=0;f<s.length;f++){var g=s[f],v=g.layoutDimensions(t),y=v.w,m=v.h;p=Math.max(p,y,m)}if(l=o.number(t.radius)?t.radius:s.length<=1?0:Math.min(u.h,u.w)/2-p,s.length>1&&t.avoidOverlap){p*=1.75;var b=Math.cos(h)-Math.cos(0),x=Math.sin(h)-Math.sin(0),w=Math.sqrt(p*p/(b*b+x*x));l=Math.max(w,l)}var E=function(e,r){var n=t.startAngle+r*h*(i?1:-1),a=l*Math.cos(n),o=l*Math.sin(n),s={x:c.x+a,y:c.y+o};return s};return s.layoutPositions(this,t,E),this},t.exports=n},{"../../is":91,"../../math":93,"../../util":108}],50:[function(e,t,r){"use strict";function n(e){this.options=i.extend({},o,e)}var i=e("../../util"),a=e("../../math"),o={fit:!0,padding:30,startAngle:1.5*Math.PI,sweep:void 0,clockwise:!0,equidistant:!1,minNodeSpacing:10,boundingBox:void 0,avoidOverlap:!0,nodeDimensionsIncludeLabels:!1,height:void 0,width:void 0,spacingFactor:void 0,concentric:function(e){return e.degree()},levelWidth:function(e){return e.maxDegree()/4},animate:!1,animationDuration:500,animationEasing:void 0,ready:void 0,stop:void 0};n.prototype.run=function(){for(var e=this.options,t=e,r=void 0!==t.counterclockwise?!t.counterclockwise:t.clockwise,n=e.cy,i=t.eles,o=i.nodes().not(":parent"),s=a.makeBoundingBox(t.boundingBox?t.boundingBox:{x1:0,y1:0,w:n.width(),h:n.height()}),l={x:s.x1+s.w/2,y:s.y1+s.h/2},u=[],c=t.startAngle,d=0,h=0;h<o.length;h++){var p,f=o[h];p=t.concentric(f),u.push({value:p,node:f}),f._private.scratch.concentric=p}o.updateStyle();for(var h=0;h<o.length;h++){var f=o[h],g=f.layoutDimensions(t);d=Math.max(d,g.w,g.h)}u.sort(function(e,t){return t.value-e.value});for(var v=t.levelWidth(o),y=[[]],m=y[0],h=0;h<u.length;h++){var b=u[h];if(m.length>0){var x=Math.abs(m[0].value-b.value);x>=v&&(m=[],y.push(m))}m.push(b)}var w=d+t.minNodeSpacing;if(!t.avoidOverlap){var E=y.length>0&&y[0].length>1,C=Math.min(s.w,s.h)/2-w,P=C/(y.length+E?1:0);w=Math.min(w,P)}for(var D=0,h=0;h<y.length;h++){var T=y[h],k=void 0===t.sweep?2*Math.PI-2*Math.PI/T.length:t.sweep,S=T.dTheta=k/Math.max(1,T.length-1);if(T.length>1&&t.avoidOverlap){var _=Math.cos(S)-Math.cos(0),M=Math.sin(S)-Math.sin(0),I=Math.sqrt(w*w/(_*_+M*M));D=Math.max(I,D)}T.r=D,D+=w}if(t.equidistant){for(var N=0,D=0,h=0;h<y.length;h++){var T=y[h],B=T.r-D;N=Math.max(N,B)}D=0;for(var h=0;h<y.length;h++){var T=y[h];0===h&&(D=T.r),T.r=D,D+=N}}for(var z={},h=0;h<y.length;h++)for(var T=y[h],S=T.dTheta,D=T.r,L=0;L<T.length;L++){var b=T[L],c=t.startAngle+(r?1:-1)*S*L,O={x:l.x+D*Math.cos(c),y:l.y+D*Math.sin(c)};z[b.node.id()]=O}return o.layoutPositions(this,t,function(e){var t=e.id();return z[t]}),this},t.exports=n},{"../../math":93,"../../util":108}],51:[function(e,t,r){"use strict";function n(e){this.options=a.extend({},u,e),this.options.layout=this}var i,a=e("../../util"),o=e("../../math"),s=e("../../is"),l=e("../../promise"),u={ready:function(){},stop:function(){},animate:!0,animationThreshold:250,refresh:20,fit:!0,padding:30,boundingBox:void 0,nodeDimensionsIncludeLabels:!1,randomize:!1,componentSpacing:100,nodeRepulsion:function(e){return 4e5},nodeOverlap:10,idealEdgeLength:function(e){return 10},edgeElasticity:function(e){return 100},nestingFactor:5,gravity:80,numIter:1e3,initialTemp:200,coolingFactor:.95,minTemp:1,weaver:!1};n.prototype.run=function(){function e(e){var t={type:"message",message:e};d.trigger(t)}var t=this.options,r=t.cy,n=this,o=this.thread,u=t.weaver?t.weaver.Thread:null,d={listeners:[],on:function(e,t){return this.listeners.push({event:e,callback:t}),this},trigger:function(e){s.string(e)&&(e={type:e});var t=function(t){return t.event===e.type},r=function(t){t.callback(e)};return this.listeners.filter(t).forEach(r),this},pass:function(e){return this.pass=e,this},run:function(e){var t=this.pass;return new l(function(r){r(e(t))})},stop:function(){return this}};o&&!o.stopped()||(o=this.thread=u?new u:d),n.stopped=!1,n.trigger({type:"layoutstart",layout:n}),i=!0===t.debug;var h=c(r,n,t);i&&p(h),t.randomize&&f(h,r);var v=Date.now(),y=!1,m=function(e){e=e||{},y&&!e.next||!e.force&&Date.now()-v<t.animationThreshold||(y=!0,a.requestAnimationFrame(function(){g(h,r,t),!0===t.fit&&r.fit(t.padding),y=!1,e.next&&e.next()}))};o.on("message",function(e){var t=e.message;h.layoutNodes=t,m()}),o.pass({layoutInfo:h,options:{animate:t.animate,refresh:t.refresh,componentSpacing:t.componentSpacing,nodeOverlap:t.nodeOverlap,nestingFactor:t.nestingFactor,gravity:t.gravity,numIter:t.numIter,initialTemp:t.initialTemp,coolingFactor:t.coolingFactor,minTemp:t.minTemp}}).run(function(t){var r,n=t.layoutInfo,i=t.options,a=!1,o=function(e,t,r){s(e,t),h(e,t),p(e,t),f(e,t),g(e,t)},s=function(e,t){for(var r=0;r<e.graphSet.length;r++)for(var n=e.graphSet[r],i=n.length,a=0;a<i;a++)for(var o=e.layoutNodes[e.idToIndex[n[a]]],s=a+1;s<i;s++){var l=e.layoutNodes[e.idToIndex[n[s]]];u(o,l,e,t)}},l=function(e){return-e+2*e*Math.random()},u=function(e,t,r,n){var i=e.cmptId,a=t.cmptId;if(i===a||r.isCompound){var o=t.positionX-e.positionX,s=t.positionY-e.positionY,u=1;0===o&&0===s&&(o=l(u),s=l(u));var h=c(e,t,o,s);if(h>0)var p=n.nodeOverlap*h,f=Math.sqrt(o*o+s*s),g=p*o/f,v=p*s/f;else var y=d(e,o,s),m=d(t,-1*o,-1*s),b=m.x-y.x,x=m.y-y.y,w=b*b+x*x,f=Math.sqrt(w),p=(e.nodeRepulsion+t.nodeRepulsion)/w,g=p*b/f,v=p*x/f;e.isLocked||(e.offsetX-=g,e.offsetY-=v),t.isLocked||(t.offsetX+=g,t.offsetY+=v)}},c=function(e,t,r,n){if(r>0)var i=e.maxX-t.minX;else var i=t.maxX-e.minX;if(n>0)var a=e.maxY-t.minY;else var a=t.maxY-e.minY;return i>=0&&a>=0?Math.sqrt(i*i+a*a):0},d=function(e,t,r){var n=e.positionX,i=e.positionY,a=e.height||1,o=e.width||1,s=r/t,l=a/o,u={};return 0===t&&0<r?(u.x=n,u.y=i+a/2,u):0===t&&0>r?(u.x=n,u.y=i+a/2,u):0<t&&-1*l<=s&&s<=l?(u.x=n+o/2,u.y=i+o*r/2/t,u):0>t&&-1*l<=s&&s<=l?(u.x=n-o/2,u.y=i-o*r/2/t,u):0<r&&(s<=-1*l||s>=l)?(u.x=n+a*t/2/r,u.y=i+a/2,u):0>r&&(s<=-1*l||s>=l)?(u.x=n-a*t/2/r,u.y=i-a/2,u):u},h=function(e,t){for(var r=0;r<e.edgeSize;r++){var n=e.layoutEdges[r],i=e.idToIndex[n.sourceId],a=e.layoutNodes[i],o=e.idToIndex[n.targetId],s=e.layoutNodes[o],l=s.positionX-a.positionX,u=s.positionY-a.positionY;if(0!==l||0!==u){var c=d(a,l,u),h=d(s,-1*l,-1*u),p=h.x-c.x,f=h.y-c.y,g=Math.sqrt(p*p+f*f),v=Math.pow(n.idealLength-g,2)/n.elasticity;if(0!==g)var y=v*p/g,m=v*f/g;else var y=0,m=0;a.isLocked||(a.offsetX+=y,a.offsetY+=m),s.isLocked||(s.offsetX-=y,s.offsetY-=m)}}},p=function(e,t){for(var r=1,n=0;n<e.graphSet.length;n++){var i=e.graphSet[n],a=i.length;if(0===n)var o=e.clientHeight/2,s=e.clientWidth/2;else var l=e.layoutNodes[e.idToIndex[i[0]]],u=e.layoutNodes[e.idToIndex[l.parentId]],o=u.positionX,s=u.positionY;for(var c=0;c<a;c++){var d=e.layoutNodes[e.idToIndex[i[c]]];if(!d.isLocked){var h=o-d.positionX,p=s-d.positionY,f=Math.sqrt(h*h+p*p);if(f>r){var g=t.gravity*h/f,v=t.gravity*p/f;d.offsetX+=g,d.offsetY+=v}}}}},f=function(e,t){var r=[],n=0,i=-1;for(r.push.apply(r,e.graphSet[0]),i+=e.graphSet[0].length;n<=i;){var a=r[n++],o=e.idToIndex[a],s=e.layoutNodes[o],l=s.children;if(0<l.length&&!s.isLocked){for(var u=s.offsetX,c=s.offsetY,d=0;d<l.length;d++){var h=e.layoutNodes[e.idToIndex[l[d]]];h.offsetX+=u,h.offsetY+=c,r[++i]=l[d]}s.offsetX=0,s.offsetY=0}}},g=function(e,t){for(var r=0;r<e.nodeSize;r++){var n=e.layoutNodes[r];0<n.children.length&&(n.maxX=void 0,n.minX=void 0,n.maxY=void 0,n.minY=void 0)}for(var r=0;r<e.nodeSize;r++){var n=e.layoutNodes[r];if(!(0<n.children.length||n.isLocked)){var i=v(n.offsetX,n.offsetY,e.temperature);n.positionX+=i.x,n.positionY+=i.y,n.offsetX=0,n.offsetY=0,n.minX=n.positionX-n.width,n.maxX=n.positionX+n.width,n.minY=n.positionY-n.height,n.maxY=n.positionY+n.height,y(n,e)}}for(var r=0;r<e.nodeSize;r++){var n=e.layoutNodes[r];0<n.children.length&&!n.isLocked&&(n.positionX=(n.maxX+n.minX)/2,n.positionY=(n.maxY+n.minY)/2,n.width=n.maxX-n.minX,n.height=n.maxY-n.minY)}},v=function(e,t,r){var n=Math.sqrt(e*e+t*t);if(n>r)var i={x:r*e/n,y:r*t/n};else var i={x:e,y:t};return i},y=function(e,t){var r=e.parentId;if(null!=r){var n=t.layoutNodes[t.idToIndex[r]],i=!1;return(null==n.maxX||e.maxX+n.padRight>n.maxX)&&(n.maxX=e.maxX+n.padRight,i=!0),(null==n.minX||e.minX-n.padLeft<n.minX)&&(n.minX=e.minX-n.padLeft,i=!0),(null==n.maxY||e.maxY+n.padBottom>n.maxY)&&(n.maxY=e.maxY+n.padBottom,i=!0),(null==n.minY||e.minY-n.padTop<n.minY)&&(n.minY=e.minY-n.padTop,i=!0),i?y(n,t):void 0}},m=function(e,t){for(var r=n.layoutNodes,i=[],a=0;a<r.length;a++){var o=r[a],s=o.cmptId,l=i[s]=i[s]||[];l.push(o)}for(var u=0,a=0;a<i.length;a++){var c=i[a];if(c){c.x1=1/0,c.x2=-(1/0),c.y1=1/0,c.y2=-(1/0);for(var d=0;d<c.length;d++){var h=c[d];c.x1=Math.min(c.x1,h.positionX-h.width/2),c.x2=Math.max(c.x2,h.positionX+h.width/2),c.y1=Math.min(c.y1,h.positionY-h.height/2),c.y2=Math.max(c.y2,h.positionY+h.height/2)}c.w=c.x2-c.x1,c.h=c.y2-c.y1,u+=c.w*c.h}}i.sort(function(e,t){return t.w*t.h-e.w*e.h});for(var p=0,f=0,g=0,v=0,y=Math.sqrt(u)*n.clientWidth/n.clientHeight,a=0;a<i.length;a++){var c=i[a];if(c){for(var d=0;d<c.length;d++){var h=c[d];h.isLocked||(h.positionX+=p,h.positionY+=f)}p+=c.w+t.componentSpacing,g+=c.w+t.componentSpacing,v=Math.max(v,c.h),g>y&&(f+=v+t.componentSpacing,p=0,g=0,v=0)}}},b=function(e){return!a&&(o(n,i,e),n.temperature=n.temperature*i.coolingFactor,!(n.temperature<i.minTemp))},x=0;do{for(var w=0;w<i.refresh&&x<i.numIter;){var r=b(x);if(!r)break;w++,x++}i.animate&&e(n.layoutNodes)}while(r&&x+1<i.numIter);return m(n,i),n}).then(function(e){h.layoutNodes=e.layoutNodes,o.stop(),b()});var b=function(){m({force:!0,next:function(){n.one("layoutstop",t.stop),n.trigger({type:"layoutstop",layout:n})}})};return this},n.prototype.stop=function(){return this.stopped=!0,this.thread&&this.thread.stop(),this.trigger("layoutstop"),this},n.prototype.destroy=function(){return this.thread&&this.thread.stop(),this};var c=function(e,t,r){for(var n=r.eles.edges(),i=r.eles.nodes(),a={isCompound:e.hasCompoundNodes(),layoutNodes:[],idToIndex:{},nodeSize:i.size(),graphSet:[],indexToGraph:[],layoutEdges:[],edgeSize:n.size(),temperature:r.initialTemp,clientWidth:e.width(),clientHeight:e.width(),boundingBox:o.makeBoundingBox(r.boundingBox?r.boundingBox:{x1:0,y1:0,w:e.width(),h:e.height()})},l=r.eles.components(),u={},c=0;c<l.length;c++)for(var h=l[c],p=0;p<h.length;p++){var f=h[p];u[f.id()]=c}for(var c=0;c<a.nodeSize;c++){var g=i[c],v=g.layoutDimensions(r),y={};y.isLocked=g.locked(),y.id=g.data("id"),y.parentId=g.data("parent"),y.cmptId=u[g.id()],y.children=[],y.positionX=g.position("x"),y.positionY=g.position("y"),y.offsetX=0,y.offsetY=0,y.height=v.w,y.width=v.h,y.maxX=y.positionX+y.width/2,y.minX=y.positionX-y.width/2,y.maxY=y.positionY+y.height/2,y.minY=y.positionY-y.height/2,y.padLeft=parseFloat(g.style("padding")),y.padRight=parseFloat(g.style("padding")),y.padTop=parseFloat(g.style("padding")),y.padBottom=parseFloat(g.style("padding")),y.nodeRepulsion=s.fn(r.nodeRepulsion)?r.nodeRepulsion(g):r.nodeRepulsion,a.layoutNodes.push(y),a.idToIndex[y.id]=c}for(var m=[],b=0,x=-1,w=[],c=0;c<a.nodeSize;c++){var g=a.layoutNodes[c],E=g.parentId;null!=E?a.layoutNodes[a.idToIndex[E]].children.push(g.id):(m[++x]=g.id,w.push(g.id))}for(a.graphSet.push(w);b<=x;){var C=m[b++],P=a.idToIndex[C],f=a.layoutNodes[P],D=f.children;if(D.length>0){a.graphSet.push(D);for(var c=0;c<D.length;c++)m[++x]=D[c]}}for(var c=0;c<a.graphSet.length;c++)for(var T=a.graphSet[c],p=0;p<T.length;p++){var k=a.idToIndex[T[p]];a.indexToGraph[k]=c}for(var c=0;c<a.edgeSize;c++){var S=n[c],_={};_.id=S.data("id"),_.sourceId=S.data("source"),_.targetId=S.data("target");var M=s.fn(r.idealEdgeLength)?r.idealEdgeLength(S):r.idealEdgeLength,I=s.fn(r.edgeElasticity)?r.edgeElasticity(S):r.edgeElasticity,N=a.idToIndex[_.sourceId],B=a.idToIndex[_.targetId],z=a.indexToGraph[N],L=a.indexToGraph[B];if(z!=L){for(var O=d(_.sourceId,_.targetId,a),A=a.graphSet[O],R=0,y=a.layoutNodes[N];-1===A.indexOf(y.id);)y=a.layoutNodes[a.idToIndex[y.parentId]],R++;for(y=a.layoutNodes[B];-1===A.indexOf(y.id);)y=a.layoutNodes[a.idToIndex[y.parentId]],R++;M*=R*r.nestingFactor}_.idealLength=M,_.elasticity=I,a.layoutEdges.push(_)}return a},d=function(e,t,r){var n=h(e,t,0,r);return 2>n.count?0:n.graph},h=function(e,t,r,n){var i=n.graphSet[r];if(-1<i.indexOf(e)&&-1<i.indexOf(t))return{count:2,graph:r};for(var a=0,o=0;o<i.length;o++){var s=i[o],l=n.idToIndex[s],u=n.layoutNodes[l].children;if(0!==u.length){var c=n.indexToGraph[n.idToIndex[u[0]]],d=h(e,t,c,n);if(0!==d.count){if(1!==d.count)return d;if(a++,2===a)break}}}return{count:a,graph:r}},p=function(e){if(i){console.debug("layoutNodes:");for(var t=0;t<e.nodeSize;t++){var r=e.layoutNodes[t],n="\nindex: "+t+"\nId: "+r.id+"\nChildren: "+r.children.toString()+"\nparentId: "+r.parentId+"\npositionX: "+r.positionX+"\npositionY: "+r.positionY+"\nOffsetX: "+r.offsetX+"\nOffsetY: "+r.offsetY+"\npadLeft: "+r.padLeft+"\npadRight: "+r.padRight+"\npadTop: "+r.padTop+"\npadBottom: "+r.padBottom;console.debug(n)}console.debug("idToIndex");for(var t in e.idToIndex)console.debug("Id: "+t+"\nIndex: "+e.idToIndex[t]);console.debug("Graph Set");for(var a=e.graphSet,t=0;t<a.length;t++)console.debug("Set : "+t+": "+a[t].toString());for(var n="IndexToGraph",t=0;t<e.indexToGraph.length;t++)n+="\nIndex : "+t+" Graph: "+e.indexToGraph[t];console.debug(n),n="Layout Edges";for(var t=0;t<e.layoutEdges.length;t++){var o=e.layoutEdges[t];n+="\nEdge Index: "+t+" ID: "+o.id+" SouceID: "+o.sourceId+" TargetId: "+o.targetId+" Ideal Length: "+o.idealLength}console.debug(n),n="nodeSize: "+e.nodeSize,n+="\nedgeSize: "+e.edgeSize,n+="\ntemperature: "+e.temperature,console.debug(n)}},f=function(e,t){for(var r=e.clientWidth,n=e.clientHeight,i=0;i<e.nodeSize;i++){var a=e.layoutNodes[i];0!==a.children.length||a.isLocked||(a.positionX=Math.random()*r,a.positionY=Math.random()*n)}},g=function(e,t,r){var n=r.layout,i=r.eles.nodes(),a=e.boundingBox,o={x1:1/0,x2:-(1/0),y1:1/0,y2:-(1/0)};r.boundingBox&&(i.forEach(function(t){var r=e.layoutNodes[e.idToIndex[t.data("id")]];o.x1=Math.min(o.x1,r.positionX),o.x2=Math.max(o.x2,r.positionX),o.y1=Math.min(o.y1,r.positionY),o.y2=Math.max(o.y2,r.positionY)}),o.w=o.x2-o.x1,o.h=o.y2-o.y1),i.positions(function(t,n){var i=e.layoutNodes[e.idToIndex[t.data("id")]];if(r.boundingBox){var s=(i.positionX-o.x1)/o.w,l=(i.positionY-o.y1)/o.h;return{x:a.x1+s*a.w,y:a.y1+l*a.h}}return{x:i.positionX,y:i.positionY}}),!0!==e.ready&&(e.ready=!0,n.one("layoutready",r.ready),n.trigger({type:"layoutready",layout:this}))};t.exports=n},{"../../is":91,"../../math":93,"../../promise":94,"../../util":108}],52:[function(e,t,r){"use strict";function n(e){this.options=i.extend({},o,e)}var i=e("../../util"),a=e("../../math"),o={fit:!0,padding:30,boundingBox:void 0,avoidOverlap:!0,avoidOverlapPadding:10,nodeDimensionsIncludeLabels:!1,spacingFactor:void 0,condense:!1,rows:void 0,cols:void 0,position:function(e){},sort:void 0,animate:!1,animationDuration:500,animationEasing:void 0,ready:void 0,stop:void 0};n.prototype.run=function(){var e=this.options,t=e,r=e.cy,n=t.eles,i=n.nodes().not(":parent");t.sort&&(i=i.sort(t.sort));var o=a.makeBoundingBox(t.boundingBox?t.boundingBox:{x1:0,y1:0,w:r.width(),h:r.height()});if(0===o.h||0===o.w)i.layoutPositions(this,t,function(e){return{x:o.x1,y:o.y1}});else{var s=i.size(),l=Math.sqrt(s*o.h/o.w),u=Math.round(l),c=Math.round(o.w/o.h*l),d=function(e){if(null==e)return Math.min(u,c);var t=Math.min(u,c);t==u?u=e:c=e},h=function(e){if(null==e)return Math.max(u,c);var t=Math.max(u,c);t==u?u=e:c=e},p=t.rows,f=null!=t.cols?t.cols:t.columns;if(null!=p&&null!=f)u=p,c=f;else if(null!=p&&null==f)u=p,c=Math.ceil(s/u);else if(null==p&&null!=f)c=f,u=Math.ceil(s/c);else if(c*u>s){var g=d(),v=h();(g-1)*v>=s?d(g-1):(v-1)*g>=s&&h(v-1)}else for(;c*u<s;){var g=d(),v=h();(v+1)*g>=s?h(v+1):d(g+1)}var y=o.w/c,m=o.h/u;if(t.condense&&(y=0,m=0),t.avoidOverlap)for(var b=0;b<i.length;b++){var x=i[b],w=x._private.position;null!=w.x&&null!=w.y||(w.x=0,w.y=0);var E=x.layoutDimensions(t),C=t.avoidOverlapPadding,P=E.w+C,D=E.h+C;y=Math.max(y,P),m=Math.max(m,D)}for(var T={},k=function(e,t){return!!T["c-"+e+"-"+t]},S=function(e,t){T["c-"+e+"-"+t]=!0},_=0,M=0,I=function(){M++,M>=c&&(M=0,_++)},N={},b=0;b<i.length;b++){var x=i[b],B=t.position(x);if(B&&(void 0!==B.row||void 0!==B.col)){var w={row:B.row,col:B.col};if(void 0===w.col)for(w.col=0;k(w.row,w.col);)w.col++;else if(void 0===w.row)for(w.row=0;k(w.row,w.col);)w.row++;N[x.id()]=w,S(w.row,w.col)}}var z=function(e,t){var r,n;if(e.locked()||e.isParent())return!1;var i=N[e.id()];if(i)r=i.col*y+y/2+o.x1,n=i.row*m+m/2+o.y1;else{for(;k(_,M);)I();r=M*y+y/2+o.x1,n=_*m+m/2+o.y1,S(_,M),I()}return{x:r,y:n}};i.layoutPositions(this,t,z)}return this},t.exports=n},{"../../math":93,"../../util":108}],53:[function(e,t,r){"use strict";t.exports=[{name:"breadthfirst",impl:e("./breadthfirst")},{name:"circle",impl:e("./circle")},{name:"concentric",impl:e("./concentric")},{name:"cose",impl:e("./cose")},{name:"grid",impl:e("./grid")},{name:"null",impl:e("./null")},{name:"preset",impl:e("./preset")},{name:"random",impl:e("./random")}]},{"./breadthfirst":48,"./circle":49,"./concentric":50,"./cose":51,"./grid":52,"./null":54,"./preset":55,"./random":56}],54:[function(e,t,r){"use strict";function n(e){this.options=i.extend({},a,e)}var i=e("../../util"),a={ready:function(){},stop:function(){}};n.prototype.run=function(){var e=this.options,t=e.eles,r=this;e.cy;return r.trigger("layoutstart"),t.nodes().positions(function(){return{x:0,y:0}}),r.one("layoutready",e.ready),r.trigger("layoutready"),r.one("layoutstop",e.stop),r.trigger("layoutstop"),this},n.prototype.stop=function(){return this},t.exports=n},{"../../util":108}],55:[function(e,t,r){"use strict";function n(e){this.options=i.extend({},o,e)}var i=e("../../util"),a=e("../../is"),o={positions:void 0,zoom:void 0,pan:void 0,fit:!0,padding:30,animate:!1,animationDuration:500,animationEasing:void 0,ready:void 0,stop:void 0};n.prototype.run=function(){function e(e){if(null==t.positions)return null;if(i)return t.positions(e);var r=t.positions[e._private.data.id];return null==r?null:r}var t=this.options,r=t.eles,n=r.nodes(),i=a.fn(t.positions);return n.layoutPositions(this,t,function(t,r){var n=e(t);return!t.locked()&&null!=n&&n}),this},t.exports=n},{"../../is":91,"../../util":108}],56:[function(e,t,r){"use strict";function n(e){this.options=i.extend({},o,e)}var i=e("../../util"),a=e("../../math"),o={fit:!0,padding:30,boundingBox:void 0,animate:!1,animationDuration:500,animationEasing:void 0,ready:void 0,stop:void 0};n.prototype.run=function(){var e=this.options,t=e.cy,r=e.eles,n=r.nodes().not(":parent"),i=a.makeBoundingBox(e.boundingBox?e.boundingBox:{x1:0,y1:0,w:t.width(),h:t.height()}),o=function(e,t){return{x:i.x1+Math.round(Math.random()*i.w),y:i.y1+Math.round(Math.random()*i.h)}};return n.layoutPositions(this,e,o),this},t.exports=n},{"../../math":93,"../../util":108}],57:[function(e,t,r){"use strict";var n=e("../../../math"),i=e("../../../is"),a=e("../../../util"),o={};o.arrowShapeWidth=.3,o.registerArrowShapes=function(){var e=this.arrowShapes={},t=this,r=function(e,t,r,n,i,a){var o=i.x-r/2-a,s=i.x+r/2+a,l=i.y-r/2-a,u=i.y+r/2+a,c=o<=e&&e<=s&&l<=t&&t<=u;return c},o=function(e,t,r,n,i){var a=e*Math.cos(n)-t*Math.sin(n),o=e*Math.sin(n)+t*Math.cos(n),s=a*r,l=o*r,u=s+i.x,c=l+i.y;return{x:u,y:c}},s=function(e,t,r,n){for(var i=[],a=0;a<e.length;a+=2){var s=e[a],l=e[a+1];i.push(o(s,l,t,r,n))}return i},l=function(e){for(var t=[],r=0;r<e.length;r++){var n=e[r];t.push(n.x,n.y)}return t},u=function(e){return e.pstyle("width").pfValue*e.pstyle("arrow-scale").pfValue*2},c=function(o,c){i.string(c)&&(c=e[c]),e[o]=a.extend({name:o,points:[-.15,-.3,.15,-.3,.15,.3,-.15,.3],collide:function(e,t,r,i,a,o){var u=l(s(this.points,r+2*o,i,a)),c=n.pointInsidePolygonPoints(e,t,u);return c},roughCollide:r,draw:function(e,r,n,i){var a=s(this.points,r,n,i);t.arrowShapeImpl("polygon")(e,a)},spacing:function(e){return 0},gap:u},c)};c("none",{collide:a.falsify,roughCollide:a.falsify,draw:a.noop,spacing:a.zeroify,gap:a.zeroify}),c("triangle",{points:[-.15,-.3,0,0,.15,-.3]}),c("arrow","triangle"),c("triangle-backcurve",{points:e.triangle.points,controlPoint:[0,-.15],roughCollide:r,draw:function(e,r,n,i){var a=s(this.points,r,n,i),l=this.controlPoint,u=o(l[0],l[1],r,n,i);t.arrowShapeImpl(this.name)(e,a,u)},gap:function(e){return.985*u(e)}}),c("triangle-tee",{points:[-.15,-.3,0,0,.15,-.3,-.15,-.3],pointsTee:[-.15,-.4,-.15,-.5,.15,-.5,.15,-.4],collide:function(e,t,r,i,a,o){var u=l(s(this.points,r+2*o,i,a)),c=l(s(this.pointsTee,r+2*o,i,a)),d=n.pointInsidePolygonPoints(e,t,u)||n.pointInsidePolygonPoints(e,t,c);return d},draw:function(e,r,n,i){var a=s(this.points,r,n,i),o=s(this.pointsTee,r,n,i);t.arrowShapeImpl(this.name)(e,a,o)}}),c("triangle-cross",{points:[-.15,-.3,0,0,.15,-.3,-.15,-.3],crossLinePoints:[-.24175,-.4,.24175,-.4],forceStroke:!0,matchEdgeWidth:!0,scaleCoord:function(e,t,r){return e+.012*r+.001*n.log2(t-28.95)},scaleCrossLineXCoord:function(e,t){return this.scaleCoord(.42,e,t)},scaleCrossLineYCoord:function(e,t){return this.scaleCoord(-.01,e,t)},collide:function(e,t,r,i,a,o){var u=l(s(this.points,r+2*o,i,a)),c=l(s(this.crossLinePoints,r+2*o,i,a)),d=n.pointInsidePolygonPoints(e,t,u)||n.inLineVicinity(e,t,c[0],c[1],c[2],c[3],o);return d},draw:function(e,r,n,i,a){var o=[this.crossLinePoints[0]+this.scaleCrossLineXCoord(r,a),this.crossLinePoints[1]-this.scaleCrossLineYCoord(r,a),this.crossLinePoints[2]-this.scaleCrossLineXCoord(r,a),this.crossLinePoints[3]-this.scaleCrossLineYCoord(r,a)],l=s(this.points,r,n,i),u=s(o,r,n,i);t.arrowShapeImpl(this.name)(e,l,u)}}),c("vee",{points:[-.15,-.3,0,0,.15,-.3,0,-.15],gap:function(e){return.985*u(e)}}),c("circle",{radius:.15,collide:function(e,t,r,n,i,a){var o=i,s=Math.pow(o.x-e,2)+Math.pow(o.y-t,2)<=Math.pow((r+2*a)*this.radius,2);return s},draw:function(e,r,n,i){t.arrowShapeImpl(this.name)(e,i.x,i.y,this.radius*r)},spacing:function(e){return t.getArrowWidth(e.pstyle("width").pfValue,e.pstyle("arrow-scale").value)*this.radius}}),c("tee",{points:[-.15,0,-.15,-.1,.15,-.1,.15,0],spacing:function(e){return 1},gap:function(e){return 1}}),c("square",{points:[-.15,0,.15,0,.15,-.3,-.15,-.3]}),c("diamond",{points:[-.15,-.15,0,-.3,.15,-.15,0,0],gap:function(e){return e.pstyle("width").pfValue*e.pstyle("arrow-scale").value}})},t.exports=o},{"../../../is":91,"../../../math":93,"../../../util":108}],58:[function(e,t,r){"use strict";var n=e("../../../../window"),i=e("../../../../math"),a=e("../../../../util"),n=e("../../../../window"),o={};o.projectIntoViewport=function(e,t){var r=this.cy,n=this.findContainerClientCoords(),i=n[0],a=n[1],o=r.pan(),s=r.zoom(),l=(e-i-o.x)/s,u=(t-a-o.y)/s;return[l,u]},o.findContainerClientCoords=function(){if(this.containerBB)return this.containerBB;var e=this.container,t=e.getBoundingClientRect(),r=n.getComputedStyle(e),i=function(e){return parseFloat(r.getPropertyValue(e))},a={left:i("padding-left")+i("border-left-width"),right:i("padding-right")+i("border-right-width"),top:i("padding-top")+i("border-top-width"),bottom:i("padding-bottom")+i("border-bottom-width")};return this.containerBB=[t.left+a.left,t.top+a.top,t.right-t.left-a.left-a.right,t.bottom-t.top-a.top-a.bottom]},o.invalidateContainerClientCoordsCache=function(){this.containerBB=null},o.findNearestElement=function(e,t,r,n){return this.findNearestElements(e,t,r,n)[0]},o.findNearestElements=function(e,t,r,n){function o(e,t){if(e.isNode()){if(h)return;h=e,v.push(e)}if(e.isEdge()&&(null==t||t<E))if(d){if(d.pstyle("z-index").value===e.pstyle("z-index").value)for(var r=0;r<v.length;r++)if(v[r].isEdge()){v[r]=e,d=e,E=null!=t?t:E;break}}else v.push(e),d=e,E=null!=t?t:E}function s(r){var n=r.outerWidth()+2*x,i=r.outerHeight()+2*x,a=n/2,s=i/2,l=r.position();if(l.x-a<=e&&e<=l.x+a&&l.y-s<=t&&t<=l.y+s){var u=f.nodeShapes[p.getNodeShape(r)];if(u.checkPoint(e,t,0,n,i,l.x,l.y))return o(r,0),!0}}function l(r){var n,a=r._private,l=a.rscratch,u=r.pstyle("width").pfValue,c=r.pstyle("arrow-scale").value,d=u/2+b,h=d*d,g=2*d,y=a.source,x=a.target,w=!1;
-if("segments"===l.edgeType||"straight"===l.edgeType||"haystack"===l.edgeType){for(var E=l.allpts,C=0;C+3<E.length;C+=2)if((w=i.inLineVicinity(e,t,E[C],E[C+1],E[C+2],E[C+3],g))&&h>(n=i.sqdistToFiniteLine(e,t,E[C],E[C+1],E[C+2],E[C+3])))return o(r,n),!0}else if("bezier"===l.edgeType||"multibezier"===l.edgeType||"self"===l.edgeType||"compound"===l.edgeType)for(var E=l.allpts,C=0;C+5<l.allpts.length;C+=4)if((w=i.inBezierVicinity(e,t,E[C],E[C+1],E[C+2],E[C+3],E[C+4],E[C+5],g))&&h>(n=i.sqdistToQuadraticBezier(e,t,E[C],E[C+1],E[C+2],E[C+3],E[C+4],E[C+5])))return o(r,n),!0;for(var y=y||a.source,x=x||a.target,P=p.getArrowWidth(u,c),D=[{name:"source",x:l.arrowStartX,y:l.arrowStartY,angle:l.srcArrowAngle},{name:"target",x:l.arrowEndX,y:l.arrowEndY,angle:l.tgtArrowAngle},{name:"mid-source",x:l.midX,y:l.midY,angle:l.midsrcArrowAngle},{name:"mid-target",x:l.midX,y:l.midY,angle:l.midtgtArrowAngle}],C=0;C<D.length;C++){var T=D[C],k=f.arrowShapes[r.pstyle(T.name+"-arrow-shape").value];if(k.roughCollide(e,t,P,T.angle,{x:T.x,y:T.y},b)&&k.collide(e,t,P,T.angle,{x:T.x,y:T.y},b))return o(r),!0}m&&v.length>0&&(s(y),s(x))}function u(e,t,r){return a.getPrefixedProperty(e,t,r)}function c(r,n){var a,s=r._private,l=w;a=n?n+"-":"";var c=r.pstyle(a+"label").value,d="yes"===r.pstyle("text-events").strValue;if(d&&c){var h=s.rstyle,p=r.pstyle("text-border-width").pfValue,f=r.pstyle("text-background-padding").pfValue,g=u(h,"labelWidth",n)+p+2*l+2*f,v=u(h,"labelHeight",n)+p+2*l+2*f,y=u(h,"labelX",n),m=u(h,"labelY",n),b=u(s.rscratch,"labelAngle",n),x=y-g/2,E=y+g/2,C=m-v/2,P=m+v/2;if(b){var D=Math.cos(b),T=Math.sin(b),k=function(e,t){return e-=y,t-=m,{x:e*D-t*T+y,y:e*T+t*D+m}},S=k(x,C),_=k(x,P),M=k(E,C),I=k(E,P),N=[S.x,S.y,M.x,M.y,I.x,I.y,_.x,_.y];if(i.pointInsidePolygonPoints(e,t,N))return o(r),!0}else{var B={w:g,h:v,x1:x,x2:E,y1:C,y2:P};if(i.inBoundingBox(B,e,t))return o(r),!0}}}var d,h,p=this,f=this,g=f.getCachedZSortedEles(),v=[],y=f.cy.zoom(),m=f.cy.hasCompoundNodes(),b=(n?24:8)/y,x=(n?8:2)/y,w=(n?8:2)/y,E=1/0;r&&(g=g.interactive);for(var C=g.length-1;C>=0;C--){var P=g[C];P.isNode()?s(P)||c(P):l(P)||c(P)||c(P,"source")||c(P,"target")}return v},o.getAllInBox=function(e,t,r,n){var a=this.getCachedZSortedEles().interactive,o=[],s=Math.min(e,r),l=Math.max(e,r),u=Math.min(t,n),c=Math.max(t,n);e=s,r=l,t=u,n=c;for(var d=i.makeBoundingBox({x1:e,y1:t,x2:r,y2:n}),h=0;h<a.length;h++){var p=a[h];if(p.isNode()){var f=p,g=f.boundingBox({includeNodes:!0,includeEdges:!1,includeLabels:!1});i.boundingBoxesIntersect(d,g)&&!i.boundingBoxInBoundingBox(g,d)&&o.push(f)}else{var v=p,y=v._private,m=y.rscratch;if(null!=m.startX&&null!=m.startY&&!i.inBoundingBox(d,m.startX,m.startY))continue;if(null!=m.endX&&null!=m.endY&&!i.inBoundingBox(d,m.endX,m.endY))continue;if("bezier"===m.edgeType||"multibezier"===m.edgeType||"self"===m.edgeType||"compound"===m.edgeType||"segments"===m.edgeType||"haystack"===m.edgeType){for(var b=y.rstyle.bezierPts||y.rstyle.linePts||y.rstyle.haystackPts,x=!0,w=0;w<b.length;w++)if(!i.pointInBoundingBox(d,b[w])){x=!1;break}x&&o.push(v)}else"haystack"!==m.edgeType&&"straight"!==m.edgeType||o.push(v)}}return o},t.exports=o},{"../../../../math":93,"../../../../util":108,"../../../../window":116}],59:[function(e,t,r){"use strict";var n=e("../../../../math"),i={};i.calculateArrowAngles=function(e){var t,r,i,a,o,s,l,u,c=e._private.rscratch,d="haystack"===c.edgeType,h="bezier"===c.edgeType,p="multibezier"===c.edgeType,f="segments"===c.edgeType,g="compound"===c.edgeType,v="self"===c.edgeType;if(d?(i=c.haystackPts[0],a=c.haystackPts[1],o=c.haystackPts[2],s=c.haystackPts[3]):(i=c.arrowStartX,a=c.arrowStartY,o=c.arrowEndX,s=c.arrowEndY),l=c.midX,u=c.midY,f)t=i-c.segpts[0],r=a-c.segpts[1];else if(p||g||v||h){var y=c.allpts,m=n.qbezierAt(y[0],y[2],y[4],.1),b=n.qbezierAt(y[1],y[3],y[5],.1);t=i-m,r=a-b}else t=i-l,r=a-u;c.srcArrowAngle=n.getAngleFromDisp(t,r);var l=c.midX,u=c.midY;if(d&&(l=(i+o)/2,u=(a+s)/2),t=o-i,r=s-a,f){var y=c.allpts;if(y.length/2%2===0){var x=y.length/2,w=x-2;t=y[x]-y[w],r=y[x+1]-y[w+1]}else{var x=y.length/2-1,w=x-2,E=x+2;t=y[x]-y[w],r=y[x+1]-y[w+1]}}else if(p||g||v){var C,P,D,T,y=c.allpts,k=c.ctrlpts;if(k.length/2%2===0){var S=y.length/2-1,_=S+2,M=_+2;C=n.qbezierAt(y[S],y[_],y[M],0),P=n.qbezierAt(y[S+1],y[_+1],y[M+1],0),D=n.qbezierAt(y[S],y[_],y[M],1e-4),T=n.qbezierAt(y[S+1],y[_+1],y[M+1],1e-4)}else{var _=y.length/2-1,S=_-2,M=_+2;C=n.qbezierAt(y[S],y[_],y[M],.4999),P=n.qbezierAt(y[S+1],y[_+1],y[M+1],.4999),D=n.qbezierAt(y[S],y[_],y[M],.5),T=n.qbezierAt(y[S+1],y[_+1],y[M+1],.5)}t=D-C,r=T-P}if(c.midtgtArrowAngle=n.getAngleFromDisp(t,r),c.midDispX=t,c.midDispY=r,t*=-1,r*=-1,f){var y=c.allpts;if(y.length/2%2===0);else{var x=y.length/2-1,E=x+2;t=-(y[E]-y[x]),r=-(y[E+1]-y[x+1])}}if(c.midsrcArrowAngle=n.getAngleFromDisp(t,r),f)t=o-c.segpts[c.segpts.length-2],r=s-c.segpts[c.segpts.length-1];else if(p||g||v||h){var y=c.allpts,I=y.length,m=n.qbezierAt(y[I-6],y[I-4],y[I-2],.9),b=n.qbezierAt(y[I-5],y[I-3],y[I-1],.9);t=o-m,r=s-b}else t=o-l,r=s-u;c.tgtArrowAngle=n.getAngleFromDisp(t,r)},i.getArrowWidth=i.getArrowHeight=function(e,t){var r=this.arrowWidthCache=this.arrowWidthCache||{},n=r[e+", "+t];return n?n:(n=Math.max(Math.pow(13.37*e,.9),29)*t,r[e+", "+t]=n,n)},t.exports=i},{"../../../../math":93}],60:[function(e,t,r){"use strict";var n=e("../../../../math"),i=e("../../../../is"),a={};a.findEdgeControlPoints=function(e){if(e&&0!==e.length){for(var t,r=this,a=r.cy,o=a.hasCompoundNodes(),s={},l=[],u=[],c=0;c<e.length;c++){var d=e[c],h=d._private,p=h.data,f=d.pstyle("curve-style").value,g="unbundled-bezier"===f||"segments"===f,v="unbundled-bezier"===f||"bezier"===f;if("none"!==d.pstyle("display").value)if("haystack"!==f){var y=p.source,m=p.target;t=y>m?m+"$-$"+y:y+"$-$"+m,g&&(t="unbundled$-$"+p.id);var b=s[t];null==b&&(b=s[t]=[],l.push(t)),b.push(d),g&&(b.hasUnbundled=!0),v&&(b.hasBezier=!0)}else u.push(d)}for(var x,w,E,C,P,D,T,k,S,_,M,I,N,B,z=0;z<l.length;z++){t=l[z];var L=s[t];if(L.sort(function(e,t){return e.poolIndex()-t.poolIndex()}),x=L[0]._private.source,w=L[0]._private.target,!L.hasUnbundled&&x.id()>w.id()){var O=x;x=w,w=O}E=x._private,C=w._private,P=x.position(),D=w.position(),T=x.outerWidth(),k=x.outerHeight(),S=w.outerWidth(),_=w.outerHeight(),M=r.nodeShapes[this.getNodeShape(x)],I=r.nodeShapes[this.getNodeShape(w)],B=!1;for(var d,A,R,V={north:0,west:0,south:0,east:0,northwest:0,southwest:0,northeast:0,southeast:0},q=P.x,F=P.y,j=T,X=k,Y=D.x,W=D.y,H=S,Z=_,$=L.length,c=0;c<L.length;c++){d=L[c],A=d._private,R=A.rscratch;var U=R.lastEdgeIndex,G=c,Q=R.lastNumEdges,f=d.pstyle("curve-style").value,g="unbundled-bezier"===f||"segments"===f,K=d.pstyle("control-point-distances"),J=d.pstyle("loop-direction").pfValue,ee=d.pstyle("loop-sweep").pfValue,te=d.pstyle("control-point-weights"),re=K&&te?Math.min(K.value.length,te.value.length):1,ne=d.pstyle("control-point-step-size").pfValue,ie=K?K.pfValue[0]:void 0,ae=te.value[0],oe=d.pstyle("edge-distances").value,se=d.pstyle("segment-weights"),le=d.pstyle("segment-distances"),ue=Math.min(se.pfValue.length,le.pfValue.length),ce=d.pstyle("source-endpoint").value,de=d.pstyle("target-endpoint").value,he=R.lastSrcCtlPtX,pe=R.lastSrcCtlPtY,fe=R.lastSrcCtlPtW,ge=R.lastSrcCtlPtH,ve=R.lastTgtCtlPtX,ye=R.lastTgtCtlPtY,me=R.lastTgtCtlPtW,be=R.lastTgtCtlPtH,xe=R.lastCurveStyle,we=f,Ee=R.lastCtrlptDists,Ce=K?K.strValue:null,Pe=R.lastCtrlptWs,De=te.strValue,Te=R.lastSegmentWs,ke=se.strValue,Se=R.lastSegmentDs,_e=le.strValue,Me=R.lastStepSize,Ie=ne,Ne=R.lastLoopDir,Be=J,ze=R.lastLoopSwp,Le=ee,Oe=R.lastEdgeDistances,Ae=oe,Re=R.lastSrcEndpt,Ve=ce,qe=R.lastTgtEndpt,Fe=de;if(B?R.badBezier=!0:R.badBezier=!1,he!==q||pe!==F||fe!==j||ge!==X||ve!==Y||ye!==W||me!==H||be!==Z||xe!==we||Ee!==Ce||Pe!==De||Te!==ke||Se!==_e||Me!==Ie||Ne!==Be||ze!==Le||Oe!==Ae||Re!==Ve||qe!==Fe||!(U===G&&Q===$||g)){if(R.lastSrcCtlPtX=q,R.lastSrcCtlPtY=F,R.lastSrcCtlPtW=j,R.lastSrcCtlPtH=X,R.lastTgtCtlPtX=Y,R.lastTgtCtlPtY=W,R.lastTgtCtlPtW=H,R.lastTgtCtlPtH=Z,R.lastEdgeIndex=G,R.lastNumEdges=$,R.lastCurveStyle=we,R.lastCtrlptDists=Ce,R.lastCtrlptWs=De,R.lastSegmentDs=_e,R.lastSegmentWs=ke,R.lastStepSize=Ie,R.lastLoopDir=Be,R.lastLoopSwp=Le,R.lastEdgeDistances=Ae,R.lastSrcEndpt=Ve,R.lastTgtEndpt=Fe,!L.calculatedIntersection&&x!==w&&(L.hasBezier||L.hasUnbundled)){L.calculatedIntersection=!0;var je=M.intersectLine(P.x,P.y,T,k,D.x,D.y,0);L.srcIntn=je;var Xe=I.intersectLine(D.x,D.y,S,_,P.x,P.y,0);L.tgtIntn=Xe;var Ye={x1:je[0],x2:Xe[0],y1:je[1],y2:Xe[1]},We={x1:P.x,x2:D.x,y1:P.y,y2:D.y},He=Xe[1]-je[1],Ze=Xe[0]-je[0],$e=Math.sqrt(Ze*Ze+He*He),Ue={x:Ze,y:He},Ge={x:Ue.x/$e,y:Ue.y/$e};N={x:-Ge.y,y:Ge.x},I.checkPoint(je[0],je[1],0,S,_,D.x,D.y)&&M.checkPoint(Xe[0],Xe[1],0,T,k,P.x,P.y)&&(N={},B=!0)}if(R.srcIntn=L.srcIntn,R.tgtIntn=L.tgtIntn,x===w){R.edgeType="self";var Qe=c,Ke=ne;g&&(Qe=0,Ke=ie);var Je=J-Math.PI/2,et=Je-ee/2,tt=Je+ee/2,rt=String(J+"_"+ee);Qe=void 0===V[rt]?V[rt]=0:++V[rt],R.ctrlpts=[P.x+1.4*Math.cos(et)*Ke*(Qe/3+1),P.y+1.4*Math.sin(et)*Ke*(Qe/3+1),P.x+1.4*Math.cos(tt)*Ke*(Qe/3+1),P.y+1.4*Math.sin(tt)*Ke*(Qe/3+1)]}else if(o&&(x.isParent()||x.isChild()||w.isParent()||w.isChild())&&(x.parents().anySame(w)||w.parents().anySame(x))){R.edgeType="compound",R.badBezier=!1;var Qe=c,Ke=ne;g&&(Qe=0,Ke=ie);var nt=50,it={x:P.x-T/2,y:P.y-k/2},at={x:D.x-S/2,y:D.y-_/2},ot={x:Math.min(it.x,at.x),y:Math.min(it.y,at.y)},st=.5,lt=Math.max(st,Math.log(.01*T)),ut=Math.max(st,Math.log(.01*S));R.ctrlpts=[ot.x,ot.y-(1+Math.pow(nt,1.12)/100)*Ke*(Qe/3+1)*lt,ot.x-(1+Math.pow(nt,1.12)/100)*Ke*(Qe/3+1)*ut,ot.y]}else if("segments"===f){R.edgeType="segments",R.segpts=[];for(var ct=0;ct<ue;ct++){var dt=se.pfValue[ct],ht=le.pfValue[ct],pt=1-dt,ft=dt,gt="node-position"===oe?We:Ye,vt={x:gt.x1*pt+gt.x2*ft,y:gt.y1*pt+gt.y2*ft};R.segpts.push(vt.x+N.x*ht,vt.y+N.y*ht)}}else if(L.length%2!==1||c!==Math.floor(L.length/2)||g){var yt=g;R.edgeType=yt?"multibezier":"bezier",R.ctrlpts=[];for(var mt=0;mt<re;mt++){var bt,xt=(.5-L.length/2+c)*ne,wt=n.signum(xt);yt&&(ie=K?K.pfValue[mt]:ne,ae=te.value[mt]),bt=g?ie:void 0!==ie?wt*ie:void 0;var Et=void 0!==bt?bt:xt,pt=1-ae,ft=ae,gt="node-position"===oe?We:Ye,vt={x:gt.x1*pt+gt.x2*ft,y:gt.y1*pt+gt.y2*ft};R.ctrlpts.push(vt.x+N.x*Et,vt.y+N.y*Et)}}else R.edgeType="straight";this.findEndpoints(d);var Ct=!i.number(R.startX)||!i.number(R.startY),Pt=!i.number(R.arrowStartX)||!i.number(R.arrowStartY),Dt=!i.number(R.endX)||!i.number(R.endY),Tt=!i.number(R.arrowEndX)||!i.number(R.arrowEndY),kt=3,St=this.getArrowWidth(d.pstyle("width").pfValue,d.pstyle("arrow-scale").value)*this.arrowShapeWidth,_t=kt*St;if("bezier"===R.edgeType){var Mt=n.dist({x:R.ctrlpts[0],y:R.ctrlpts[1]},{x:R.startX,y:R.startY}),It=Mt<_t,Nt=n.dist({x:R.ctrlpts[0],y:R.ctrlpts[1]},{x:R.endX,y:R.endY}),Bt=Nt<_t,zt=!1;if(Ct||Pt||It){zt=!0;var Lt={x:R.ctrlpts[0]-P.x,y:R.ctrlpts[1]-P.y},Ot=Math.sqrt(Lt.x*Lt.x+Lt.y*Lt.y),At={x:Lt.x/Ot,y:Lt.y/Ot},Rt=Math.max(T,k),Vt={x:R.ctrlpts[0]+2*At.x*Rt,y:R.ctrlpts[1]+2*At.y*Rt},qt=M.intersectLine(P.x,P.y,T,k,Vt.x,Vt.y,0);It?(R.ctrlpts[0]=R.ctrlpts[0]+At.x*(_t-Mt),R.ctrlpts[1]=R.ctrlpts[1]+At.y*(_t-Mt)):(R.ctrlpts[0]=qt[0]+At.x*_t,R.ctrlpts[1]=qt[1]+At.y*_t)}if(Dt||Tt||Bt){zt=!0;var Lt={x:R.ctrlpts[0]-D.x,y:R.ctrlpts[1]-D.y},Ot=Math.sqrt(Lt.x*Lt.x+Lt.y*Lt.y),At={x:Lt.x/Ot,y:Lt.y/Ot},Rt=Math.max(T,k),Vt={x:R.ctrlpts[0]+2*At.x*Rt,y:R.ctrlpts[1]+2*At.y*Rt},Ft=I.intersectLine(D.x,D.y,S,_,Vt.x,Vt.y,0);Bt?(R.ctrlpts[0]=R.ctrlpts[0]+At.x*(_t-Nt),R.ctrlpts[1]=R.ctrlpts[1]+At.y*(_t-Nt)):(R.ctrlpts[0]=Ft[0]+At.x*_t,R.ctrlpts[1]=Ft[1]+At.y*_t)}zt&&this.findEndpoints(d)}if("multibezier"===R.edgeType||"bezier"===R.edgeType||"self"===R.edgeType||"compound"===R.edgeType){R.allpts=[],R.allpts.push(R.startX,R.startY);for(var mt=0;mt+1<R.ctrlpts.length;mt+=2)R.allpts.push(R.ctrlpts[mt],R.ctrlpts[mt+1]),mt+3<R.ctrlpts.length&&R.allpts.push((R.ctrlpts[mt]+R.ctrlpts[mt+2])/2,(R.ctrlpts[mt+1]+R.ctrlpts[mt+3])/2);R.allpts.push(R.endX,R.endY);var jt,Xt;R.ctrlpts.length/2%2===0?(jt=R.allpts.length/2-1,R.midX=R.allpts[jt],R.midY=R.allpts[jt+1]):(jt=R.allpts.length/2-3,Xt=.5,R.midX=n.qbezierAt(R.allpts[jt],R.allpts[jt+2],R.allpts[jt+4],Xt),R.midY=n.qbezierAt(R.allpts[jt+1],R.allpts[jt+3],R.allpts[jt+5],Xt))}else if("straight"===R.edgeType)R.allpts=[R.startX,R.startY,R.endX,R.endY],R.midX=(R.startX+R.endX+R.arrowStartX+R.arrowEndX)/4,R.midY=(R.startY+R.endY+R.arrowStartY+R.arrowEndY)/4;else if("segments"===R.edgeType)if(R.allpts=[],R.allpts.push(R.startX,R.startY),R.allpts.push.apply(R.allpts,R.segpts),R.allpts.push(R.endX,R.endY),R.segpts.length%4===0){var Yt=R.segpts.length/2,Wt=Yt-2;R.midX=(R.segpts[Wt]+R.segpts[Yt])/2,R.midY=(R.segpts[Wt+1]+R.segpts[Yt+1])/2}else{var Wt=R.segpts.length/2-1;R.midX=R.segpts[Wt],R.midY=R.segpts[Wt+1]}this.storeEdgeProjections(d),this.calculateArrowAngles(d),this.recalculateEdgeLabelProjections(d),this.calculateLabelAngles(d)}}}for(var c=0;c<u.length;c++){var d=u[c],h=d._private,Ht=h.rscratch,R=Ht;if(!Ht.haystack){var Zt=2*Math.random()*Math.PI;Ht.source={x:Math.cos(Zt),y:Math.sin(Zt)};var Zt=2*Math.random()*Math.PI;Ht.target={x:Math.cos(Zt),y:Math.sin(Zt)}}var x=h.source,w=h.target,P=x.position(),D=w.position(),T=x.width(),S=w.width(),k=x.height(),_=w.height(),Rt=d.pstyle("haystack-radius").value,$t=Rt/2;R.haystackPts=R.allpts=[R.source.x*T*$t+P.x,R.source.y*k*$t+P.y,R.target.x*S*$t+D.x,R.target.y*_*$t+D.y],R.midX=(R.allpts[0]+R.allpts[2])/2,R.midY=(R.allpts[1]+R.allpts[3])/2,Ht.edgeType="haystack",Ht.haystack=!0,this.storeEdgeProjections(d),this.calculateArrowAngles(d),this.recalculateEdgeLabelProjections(d),this.calculateLabelAngles(d)}}},t.exports=a},{"../../../../is":91,"../../../../math":93}],61:[function(e,t,r){"use strict";var n=e("../../../../math"),i=e("../../../../is"),a={};a.manualEndptToPx=function(e,t){var r=this,n=e.position(),i=e.outerWidth(),a=e.outerHeight();if(2===t.value.length){var o=[t.pfValue[0],t.pfValue[1]];return"%"===t.units[0]&&(o[0]=o[0]*i),"%"===t.units[1]&&(o[1]=o[1]*a),o[0]+=n.x,o[1]+=n.y,o}var s=t.pfValue[0];s=-Math.PI/2+s;var l=2*Math.max(i,a),o=[n.x+Math.cos(s)*l,n.y+Math.sin(s)*l];return r.nodeShapes[this.getNodeShape(e)].intersectLine(n.x,n.y,i,a,o[0],o[1],0)},a.findEndpoints=function(e){var t,r=this,a=e.source()[0],o=e.target()[0],s=a.position(),l=o.position(),u=e.pstyle("target-arrow-shape").value,c=e.pstyle("source-arrow-shape").value,d=e.pstyle("target-distance-from-node").pfValue,h=e.pstyle("source-distance-from-node").pfValue,p=e._private.rscratch,f=p.edgeType,g="self"===f||"compound"===f,v="bezier"===f||"multibezier"===f||g,y="bezier"!==f,m="straight"===f||"segments"===f,b="segments"===f,x=v||y||m,w=e.pstyle("source-endpoint"),E=g?"outside-to-node":w.value,C=e.pstyle("target-endpoint"),P=g?"outside-to-node":C.value;p.srcManEndpt=w,p.tgtManEndpt=C;var D,T,k,S;if(v){var _=[p.ctrlpts[0],p.ctrlpts[1]],M=y?[p.ctrlpts[p.ctrlpts.length-2],p.ctrlpts[p.ctrlpts.length-1]]:_;D=M,T=_}else if(m){var I=b?p.segpts.slice(0,2):[l.x,l.y],N=b?p.segpts.slice(p.segpts.length-2):[s.x,s.y];D=N,T=I}"inside-to-node"===P?t=[l.x,l.y]:C.units?t=this.manualEndptToPx(o,C):"outside-to-line"===P?t=p.tgtIntn:("outside-to-node"===P?k=D:"outside-to-line"===P&&(k=[s.x,s.y]),t=r.nodeShapes[this.getNodeShape(o)].intersectLine(l.x,l.y,o.outerWidth(),o.outerHeight(),k[0],k[1],0));var B=n.shortenIntersection(t,D,r.arrowShapes[u].spacing(e)+d),z=n.shortenIntersection(t,D,r.arrowShapes[u].gap(e)+d);p.endX=z[0],p.endY=z[1],p.arrowEndX=B[0],p.arrowEndY=B[1],"inside-to-node"===E?t=[s.x,s.y]:w.units?t=this.manualEndptToPx(a,w):"outside-to-line"===E?t=p.srcIntn:("outside-to-node"===E?S=T:"outside-to-line"===E&&(S=[l.x,l.y]),t=r.nodeShapes[this.getNodeShape(a)].intersectLine(s.x,s.y,a.outerWidth(),a.outerHeight(),S[0],S[1],0));var L=n.shortenIntersection(t,T,r.arrowShapes[c].spacing(e)+h),O=n.shortenIntersection(t,T,r.arrowShapes[c].gap(e)+h);p.startX=O[0],p.startY=O[1],p.arrowStartX=L[0],p.arrowStartY=L[1],x&&(i.number(p.startX)&&i.number(p.startY)&&i.number(p.endX)&&i.number(p.endY)?p.badLine=!1:p.badLine=!0)},t.exports=a},{"../../../../is":91,"../../../../math":93}],62:[function(e,t,r){"use strict";function n(e,t,r){for(var n=function(e,t,r,n){return i.qbezierAt(e,t,r,n)},a=t._private,o=a.rstyle.bezierPts,s=0;s<e.bezierProjPcts.length;s++){var l=e.bezierProjPcts[s];o.push({x:n(r[0],r[2],r[4],l),y:n(r[1],r[3],r[5],l)})}}var i=e("../../../../math"),a={};a.storeEdgeProjections=function(e){var t=e._private,r=t.rscratch,i=r.edgeType;if(t.rstyle.bezierPts=null,t.rstyle.linePts=null,t.rstyle.haystackPts=null,"multibezier"===i||"bezier"===i||"self"===i||"compound"===i)for(var a=(t.rstyle.bezierPts=[],0);a+5<r.allpts.length;a+=4)n(this,e,r.allpts.slice(a,a+6));else if("segments"===i)for(var o=t.rstyle.linePts=[],a=0;a+1<r.allpts.length;a+=2)o.push({x:r.allpts[a],y:r.allpts[a+1]});else if("haystack"===i){var s=r.haystackPts;t.rstyle.haystackPts=[{x:s[0],y:s[1]},{x:s[2],y:s[3]}]}t.rstyle.arrowWidth=this.getArrowWidth(e.pstyle("width").pfValue,e.pstyle("arrow-scale").value)*this.arrowShapeWidth},a.recalculateEdgeProjections=function(e){this.findEdgeControlPoints(e)},t.exports=a},{"../../../../math":93}],63:[function(e,t,r){"use strict";var n=e("../../../../util"),i={};[e("./coords"),e("./edge-arrows"),e("./edge-control-points"),e("./edge-endpoints"),e("./edge-projection"),e("./labels"),e("./nodes"),e("./rendered-style"),e("./z-ordering")].forEach(function(e){n.extend(i,e)}),t.exports=i},{"../../../../util":108,"./coords":58,"./edge-arrows":59,"./edge-control-points":60,"./edge-endpoints":61,"./edge-projection":62,"./labels":64,"./nodes":65,"./rendered-style":66,"./z-ordering":67}],64:[function(e,t,r){"use strict";var n=e("../../../../math"),i=e("../../../../is"),a=e("../../../../util"),o={};o.recalculateNodeLabelProjection=function(e){var t=e.pstyle("label").strValue;if(!i.emptyString(t)){var r,n,a=e._private,o=e.width(),s=e.height(),l=e.padding(),u=e.position(),c=e.pstyle("text-halign").strValue,d=e.pstyle("text-valign").strValue,h=a.rscratch,p=a.rstyle;switch(c){case"left":r=u.x-o/2-l;break;case"right":r=u.x+o/2+l;break;default:r=u.x}switch(d){case"top":n=u.y-s/2-l;break;case"bottom":n=u.y+s/2+l;break;default:n=u.y}h.labelX=r,h.labelY=n,p.labelX=r,p.labelY=n,this.applyLabelDimensions(e)}},o.recalculateEdgeLabelProjections=function(e){var t,r=e._private,i=r.rscratch,o=this,s={mid:e.pstyle("label").strValue,source:e.pstyle("source-label").strValue,target:e.pstyle("target-label").strValue};if(s.mid||s.source||s.target){t={x:i.midX,y:i.midY};var l=function(e,t,n){a.setPrefixedProperty(r.rscratch,e,t,n),a.setPrefixedProperty(r.rstyle,e,t,n)};l("labelX",null,t.x),l("labelY",null,t.y);var u=function(){function e(e,t,r,i,a){var o=n.dist(t,r),s=e.segments[e.segments.length-1],l={p0:t,p1:r,t0:i,t1:a,startDist:s?s.startDist+s.length:0,length:o};e.segments.push(l),e.length+=o}if(u.cache)return u.cache;for(var t=[],a=0;a+5<i.allpts.length;a+=4){var s={x:i.allpts[a],y:i.allpts[a+1]},l={x:i.allpts[a+2],y:i.allpts[a+3]},c={x:i.allpts[a+4],y:i.allpts[a+5]};t.push({p0:s,p1:l,p2:c,startDist:0,length:0,segments:[]})}for(var d=r.rstyle.bezierPts,h=o.bezierProjPcts.length,a=0;a<t.length;a++){var p=t[a],f=t[a-1];f&&(p.startDist=f.startDist+f.length),e(p,p.p0,d[a*h],0,o.bezierProjPcts[0]);for(var g=0;g<h-1;g++)e(p,d[a*h+g],d[a*h+g+1],o.bezierProjPcts[g],o.bezierProjPcts[g+1]);e(p,d[a*h+h-1],p.p2,o.bezierProjPcts[h-1],1)}return u.cache=t},c=function(r){var a,o="source"===r;if(s[r]){var c=e.pstyle(r+"-text-offset").pfValue,d=function(e,t){var r=t.x-e.x,n=t.y-e.y;return Math.atan(n/r)},h=function(e,t,r,i){var a=n.bound(0,i-.001,1),o=n.bound(0,i+.001,1),s=n.qbezierPtAt(e,t,r,a),l=n.qbezierPtAt(e,t,r,o);return d(s,l)};switch(i.edgeType){case"self":case"compound":case"bezier":case"multibezier":for(var p,f=u(),g=0,v=0,y=0;y<f.length;y++){for(var m=f[o?y:f.length-1-y],b=0;b<m.segments.length;b++){var x=m.segments[o?b:m.segments.length-1-b],w=y===f.length-1&&b===m.segments.length-1;if(g=v,v+=x.length,v>=c||w){p={cp:m,segment:x};break}}if(p)break}var m=p.cp,x=p.segment,E=(c-g)/x.length,C=x.t1-x.t0,P=o?x.t0+C*E:x.t1-C*E;P=n.bound(0,P,1),t=n.qbezierPtAt(m.p0,m.p1,m.p2,P),a=h(m.p0,m.p1,m.p2,P,t);break;case"straight":case"segments":case"haystack":for(var D,T,k,S,_=0,M=i.allpts.length,y=0;y+3<M&&(o?(k={x:i.allpts[y],y:i.allpts[y+1]},S={x:i.allpts[y+2],y:i.allpts[y+3]}):(k={x:i.allpts[M-2-y],y:i.allpts[M-1-y]},S={x:i.allpts[M-4-y],y:i.allpts[M-3-y]}),D=n.dist(k,S),T=_,_+=D,!(_>=c));y+=2);var I=c-T,P=I/D;P=n.bound(0,P,1),t=n.lineAt(k,S,P),a=d(k,S)}l("labelX",r,t.x),l("labelY",r,t.y),l("labelAutoAngle",r,a)}};c("source"),c("target"),this.applyLabelDimensions(e)}},o.applyLabelDimensions=function(e){this.applyPrefixedLabelDimensions(e),e.isEdge()&&(this.applyPrefixedLabelDimensions(e,"source"),this.applyPrefixedLabelDimensions(e,"target"))},o.applyPrefixedLabelDimensions=function(e,t){var r=e._private,n=this.getLabelText(e,t),i=this.calculateLabelDimensions(e,n);a.setPrefixedProperty(r.rstyle,"labelWidth",t,i.width),a.setPrefixedProperty(r.rscratch,"labelWidth",t,i.width),a.setPrefixedProperty(r.rstyle,"labelHeight",t,i.height),a.setPrefixedProperty(r.rscratch,"labelHeight",t,i.height)},o.getLabelText=function(e,t){var r=e._private,n=t?t+"-":"",i=e.pstyle(n+"label").strValue,o=e.pstyle("text-transform").value,s=function(e,n){return n?(a.setPrefixedProperty(r.rscratch,e,t,n),n):a.getPrefixedProperty(r.rscratch,e,t)};"none"==o||("uppercase"==o?i=i.toUpperCase():"lowercase"==o&&(i=i.toLowerCase()));var l=e.pstyle("text-wrap").value;if("wrap"===l){var u=s("labelKey");if(u&&s("labelWrapKey")===u)return s("labelWrapCachedText");for(var c=i.split("\n"),d=e.pstyle("text-max-width").pfValue,h=[],p=0;p<c.length;p++){var f=c[p],g=this.calculateLabelDimensions(e,f,"line="+f),v=g.width;if(v>d){for(var y=f.split(/\s+/),m="",b=0;b<y.length;b++){var x=y[b],w=0===m.length?x:m+" "+x,E=this.calculateLabelDimensions(e,w,"testLine="+w),C=E.width;C<=d?m+=x+" ":(h.push(m),m=x+" ")}m.match(/^\s+$/)||h.push(m)}else h.push(f)}s("labelWrapCachedLines",h),i=s("labelWrapCachedText",h.join("\n")),s("labelWrapKey",u)}else if("ellipsis"===l){for(var d=e.pstyle("text-max-width").pfValue,P="",D="…",T=!1,k=0;k<i.length;k++){var S=this.calculateLabelDimensions(e,P+i[k]+D).width;if(S>d)break;P+=i[k],k===i.length-1&&(T=!0)}return T||(P+=D),P}return i},o.calculateLabelDimensions=function(e,t,r){var n=this,i=e._private.labelStyleKey+"$@$"+t;r&&(i+="$@$"+r);var a=n.labelDimCache||(n.labelDimCache={});if(a[i])return a[i];var o=1,s=e.pstyle("font-style").strValue,l=o*e.pstyle("font-size").pfValue+"px",u=e.pstyle("font-family").strValue,c=e.pstyle("font-weight").strValue,d=this.labelCalcDiv;d||(d=this.labelCalcDiv=document.createElement("div"),document.body.appendChild(d));var h=d.style;return h.fontFamily=u,h.fontStyle=s,h.fontSize=l,h.fontWeight=c,h.position="absolute",h.left="-9999px",h.top="-9999px",h.zIndex="-1",h.visibility="hidden",h.pointerEvents="none",h.padding="0",h.lineHeight="1","wrap"===e.pstyle("text-wrap").value?h.whiteSpace="pre":h.whiteSpace="normal",d.textContent=t,a[i]={width:Math.ceil(d.clientWidth/o),height:Math.ceil(d.clientHeight/o)},a[i]},o.calculateLabelAngles=function(e){var t=e._private,r=t.rscratch,n=e.isEdge(),i=e.pstyle("text-rotation"),a=i.strValue;"none"===a?r.labelAngle=r.sourceLabelAngle=r.targetLabelAngle=0:n&&"autorotate"===a?(r.labelAngle=Math.atan(r.midDispY/r.midDispX),r.sourceLabelAngle=r.sourceLabelAutoAngle,r.targetLabelAngle=r.targetLabelAutoAngle):"autorotate"===a?r.labelAngle=r.sourceLabelAngle=r.targetLabelAngle=0:r.labelAngle=r.sourceLabelAngle=r.targetLabelAngle=i.pfValue},t.exports=o},{"../../../../is":91,"../../../../math":93,"../../../../util":108}],65:[function(e,t,r){"use strict";var n={};n.getNodeShape=function(e){var t=this,r=e.pstyle("shape").value;if(e.isParent())return"rectangle"===r||"roundrectangle"===r||"cutrectangle"===r?r:"rectangle";if("polygon"===r){var n=e.pstyle("shape-polygon-points").value;return t.nodeShapes.makePolygon(n).name}return r},t.exports=n},{}],66:[function(e,t,r){"use strict";var n={};n.registerCalculationListeners=function(){var e=this.cy,t=e.collection(),r=this,n=function(e,r,n){if(t.merge(e),n===!0||void 0===n)for(var i=0;i<e.length;i++){var a=e[i],o=a._private,s=o.rstyle;s.clean=!1,o.bbCache=null;var l=s.dirtyEvents=s.dirtyEvents||{length:0};l[r.type]||(l[r.type]=!0,l.length++)}};r.binder(e).on("position.* style.* free.* bounds.*","node",function(e){var t=e.target;n(t,e),n(t.connectedEdges(),e)}).on("add.*","node",function(e){var t=e.target;n(t,e)}).on("background.*","node",function(e){var t=e.target;n(t,e,!1)}).on("add.* style.*","edge",function(e){var t=e.target;n(t,e),n(t.parallelEdges(),e)}).on("remove.*","edge",function(e){for(var t=e.target,r=t.parallelEdges(),i=0;i<r.length;i++){var a=r[i];a.removed()||n(a,e)}}).on("dirty.*","node",function(e){var t=e.target;n(t,e)});var i=function(n){if(n){var i=r.onUpdateEleCalcsFns;if(i)for(var a=0;a<i.length;a++){var o=i[a];o(n,t)}r.recalculateRenderedStyle(t,!1);for(var a=0;a<t.length;a++)t[a]._private.rstyle.dirtyEvents=null;t=e.collection()}};r.beforeRender(i,r.beforeRenderPriorities.eleCalcs)},n.onUpdateEleCalcs=function(e){var t=this.onUpdateEleCalcsFns=this.onUpdateEleCalcsFns||[];t.push(e)},n.recalculateRenderedStyle=function(e,t){var r=[],n=[];if(!this.destroyed){void 0===t&&(t=!0);for(var i=0;i<e.length;i++){var a=e[i],o=a._private,s=o.rstyle;t&&s.clean||a.removed()||"none"!==a.pstyle("display").value&&("nodes"===o.group?n.push(a):r.push(a),s.clean=!0)}for(var i=0;i<n.length;i++){var a=n[i],o=a._private,s=o.rstyle,l=a.position();this.recalculateNodeLabelProjection(a),s.nodeX=l.x,s.nodeY=l.y,s.nodeW=a.pstyle("width").pfValue,s.nodeH=a.pstyle("height").pfValue}this.recalculateEdgeProjections(r);for(var i=0;i<r.length;i++){var a=r[i],o=a._private,s=o.rstyle,u=o.rscratch;this.recalculateEdgeLabelProjections(a),s.srcX=u.arrowStartX,s.srcY=u.arrowStartY,s.tgtX=u.arrowEndX,s.tgtY=u.arrowEndY,s.midX=u.midX,s.midY=u.midY,s.labelAngle=u.labelAngle,s.sourceLabelAngle=u.sourceLabelAngle,s.targetLabelAngle=u.targetLabelAngle}}},t.exports=n},{}],67:[function(e,t,r){"use strict";var n=e("../../../../collection/zsort"),i={};i.updateCachedGrabbedEles=function(){var e=this.cachedZSortedEles;if(e){e.drag=[],e.nondrag=[];for(var t=[],r=0;r<e.length;r++){var n=e[r],i=n._private.rscratch;n.grabbed()&&!n.isParent()?t.push(n):i.inDragLayer?e.drag.push(n):e.nondrag.push(n)}for(var r=0;r<t.length;r++){var n=t[r];e.drag.push(n)}}},i.invalidateCachedZSortedEles=function(){this.cachedZSortedEles=null},i.getCachedZSortedEles=function(e){if(e||!this.cachedZSortedEles){var t=this.cy.mutableElements().toArray();t.sort(n),t.interactive=t.filter(function(e){return e.interactive()}),this.cachedZSortedEles=t,this.updateCachedGrabbedEles()}else t=this.cachedZSortedEles;return t},t.exports=i},{"../../../../collection/zsort":32}],68:[function(e,t,r){"use strict";var n={};n.getCachedImage=function(e,t,r){var n=this,i=n.imageCache=n.imageCache||{},a=i[e];if(a)return a.image.complete||a.image.addEventListener("load",r),a.image;a=i[e]=i[e]||{};var o=a.image=new Image;o.addEventListener("load",r),o.addEventListener("error",function(){o.error=!0});var s="data:",l=e.substring(0,s.length).toLowerCase()===s;return l||(o.crossOrigin=t),o.src=e,o},t.exports=n},{}],69:[function(e,t,r){"use strict";var n=e("../../../is"),i=e("../../../util"),a=e("../../../window"),o=function(e){this.init(e)},s=o,l=s.prototype;l.clientFunctions=["redrawHint","render","renderTo","matchCanvasSize","nodeShapeImpl","arrowShapeImpl"],l.init=function(e){var t=this;t.options=e,t.cy=e.cy;var r=t.container=e.cy.container();if(a){var n=a.document,o=n.head,s="__________cytoscape_stylesheet",l="__________cytoscape_container",u=null!=n.getElementById(s);if(r.className=(r.className||"")+" "+l,!u){var c=n.createElement("style");c.id=s,c.innerHTML="."+l+" { position: relative; }",o.insertBefore(c,o.children[0])}var d=a.getComputedStyle(r),h=d.getPropertyValue("position");"static"===h&&i.error("A Cytoscape container has style position:static and so can not use UI extensions properly")}t.selection=[void 0,void 0,void 0,void 0,0],t.bezierProjPcts=[.05,.225,.4,.5,.6,.775,.95],t.hoverData={down:null,last:null,downTime:null,triggerMode:null,dragging:!1,initialPan:[null,null],capture:!1},t.dragData={possibleDragElements:[]},t.touchData={start:null,capture:!1,startPosition:[null,null,null,null,null,null],singleTouchStartTime:null,singleTouchMoved:!0,now:[null,null,null,null,null,null],earlier:[null,null,null,null,null,null]},t.redraws=0,t.showFps=e.showFps,t.debug=e.debug,t.hideEdgesOnViewport=e.hideEdgesOnViewport,t.hideLabelsOnViewport=e.hideLabelsOnViewport,t.textureOnViewport=e.textureOnViewport,t.wheelSensitivity=e.wheelSensitivity,t.motionBlurEnabled=e.motionBlur,t.forcedPixelRatio=e.pixelRatio,t.motionBlur=e.motionBlur,t.motionBlurOpacity=e.motionBlurOpacity,t.motionBlurTransparency=1-t.motionBlurOpacity,t.motionBlurPxRatio=1,t.mbPxRBlurry=1,t.minMbLowQualFrames=4,t.fullQualityMb=!1,t.clearedForMotionBlur=[],t.desktopTapThreshold=e.desktopTapThreshold,t.desktopTapThreshold2=e.desktopTapThreshold*e.desktopTapThreshold,t.touchTapThreshold=e.touchTapThreshold,t.touchTapThreshold2=e.touchTapThreshold*e.touchTapThreshold,t.tapholdDuration=500,t.bindings=[],t.beforeRenderCallbacks=[],t.beforeRenderPriorities={animations:400,eleCalcs:300,eleTxrDeq:200,lyrTxrDeq:100},t.registerNodeShapes(),t.registerArrowShapes(),t.registerCalculationListeners(),t.load()},l.notify=function(e){var t,r=this;if(!this.destroyed){t=n.array(e.type)?e.type:[e.type];for(var i={},a=0;a<t.length;a++){var o=t[a];i[o]=!0}if(i.destroy)return void r.destroy();(i.add||i.remove||i.load||i.zorder)&&r.invalidateCachedZSortedEles(),i.viewport&&r.redrawHint("select",!0),(i.load||i.resize)&&(r.invalidateContainerClientCoordsCache(),r.matchCanvasSize(r.container)),r.redrawHint("eles",!0),r.redrawHint("drag",!0),this.startRenderLoop(),this.redraw()}},l.destroy=function(){var e=this;e.destroyed=!0,e.cy.stopAnimationLoop();for(var t=0;t<e.bindings.length;t++){var r=e.bindings[t],n=r,i=n.target;(i.off||i.removeEventListener).apply(i,n.args)}if(e.bindings=[],e.beforeRenderCallbacks=[],e.onUpdateEleCalcsFns=[],e.removeObserver&&e.removeObserver.disconnect(),e.styleObserver&&e.styleObserver.disconnect(),e.labelCalcDiv)try{document.body.removeChild(e.labelCalcDiv)}catch(a){}},[e("./arrow-shapes"),e("./coord-ele-math"),e("./images"),e("./load-listeners"),e("./node-shapes"),e("./redraw")].forEach(function(e){i.extend(l,e)}),t.exports=s},{"../../../is":91,"../../../util":108,"../../../window":116,"./arrow-shapes":57,"./coord-ele-math":63,"./images":68,"./load-listeners":70,"./node-shapes":71,"./redraw":72}],70:[function(e,t,r){"use strict";var n=e("../../../is"),i=e("../../../util"),a=e("../../../math"),o=e("../../../event"),s={};s.registerBinding=function(e,t,r,n){var i=Array.prototype.slice.apply(arguments,[1]),a=this.binder(e);return a.on.apply(a,i)},s.binder=function(e){var t=this,r=e===window||e===document||e===document.body||n.domElement(e);if(null==t.supportsPassiveEvents){var i=!1;try{var a=Object.defineProperty({},"passive",{get:function(){i=!0}});window.addEventListener("test",null,a)}catch(o){}t.supportsPassiveEvents=i}var s=function(n,i,a){var o=Array.prototype.slice.call(arguments);return r&&t.supportsPassiveEvents&&(o[2]={capture:null!=a&&a,passive:!1,once:!1}),t.bindings.push({target:e,args:o}),(e.addEventListener||e.on).apply(e,o),this};return{on:s,addEventListener:s,addListener:s,bind:s}},s.nodeIsDraggable=function(e){return e&&e.isNode()&&!e.locked()&&e.grabbable()},s.nodeIsGrabbable=function(e){return this.nodeIsDraggable(e)&&e.interactive()},s.load=function(){var e=this,t=function(t,r,n,a){null==t&&(t=e.cy);for(var s=0;s<r.length;s++){var l=r[s],u=new o(n,i.extend({type:l},a));t.trigger(u)}},r=function(e){return e.shiftKey||e.metaKey||e.ctrlKey},s=function(t,r){var n=!0;if(e.cy.hasCompoundNodes()&&t&&t.isEdge())for(var i=0;r&&i<r.length;i++){var t=r[i];if(t.isNode()&&t.isParent()){n=!1;break}}else n=!0;return n},l=function(t){
-var r;if(t.addToList&&e.cy.hasCompoundNodes()){if(!t.addToList.hasId){t.addToList.hasId={};for(var n=0;n<t.addToList.length;n++){var i=t.addToList[n];t.addToList.hasId[i.id()]=!0}}r=t.addToList.hasId}return r||{}},u=function(e){e[0]._private.grabbed=!0},c=function(e){e[0]._private.grabbed=!1},d=function(e){e[0]._private.rscratch.inDragLayer=!0},h=function(e){e[0]._private.rscratch.inDragLayer=!1},p=function(e){e[0]._private.rscratch.isGrabTarget=!0},f=function(e){e[0]._private.rscratch.isGrabTarget=!1},g=function(e,t){var r=l(t);r[e.id()]||(t.addToList.push(e),r[e.id()]=!0,u(e))},v=function(e,t){if(e.cy().hasCompoundNodes()&&(null!=t.inDragLayer||null!=t.addToList)){var r=e.descendants();t.inDragLayer&&(r.forEach(d),r.connectedEdges().forEach(d)),t.addToList&&r.forEach(function(e){g(e,t)})}},y=function(t,r){r=r||{};var n=t.cy().hasCompoundNodes();r.inDragLayer&&(t.forEach(d),t.neighborhood().stdFilter(function(e){return!n||e.isEdge()}).forEach(d)),r.addToList&&t.forEach(function(e){g(e,r)}),v(t,r),x(t,{inDragLayer:r.inDragLayer}),e.updateCachedGrabbedEles()},m=y,b=function(t){t&&(t.hasId={},e.getCachedZSortedEles().forEach(function(e){c(e),h(e),f(e)}),e.updateCachedGrabbedEles())},x=function(e,t){if((null!=t.inDragLayer||null!=t.addToList)&&e.cy().hasCompoundNodes()){var r=e.ancestors().orphans();if(!r.same(e)){var n=r.descendants().spawnSelf().merge(r).unmerge(e).unmerge(e.descendants()),i=n.connectedEdges();t.inDragLayer&&(i.forEach(d),n.forEach(d)),t.addToList&&n.forEach(function(e){g(e,t)})}}},w="undefined"!=typeof MutationObserver;w?(e.removeObserver=new MutationObserver(function(t){for(var r=0;r<t.length;r++){var n=t[r],i=n.removedNodes;if(i)for(var a=0;a<i.length;a++){var o=i[a];if(o===e.container){e.destroy();break}}}}),e.container.parentNode&&e.removeObserver.observe(e.container.parentNode,{childList:!0})):e.registerBinding(e.container,"DOMNodeRemoved",function(t){e.destroy()});var E=i.debounce(function(){e.cy.resize()},100);w&&(e.styleObserver=new MutationObserver(E),e.styleObserver.observe(e.container,{attributes:!0})),e.registerBinding(window,"resize",E);for(var C=function(t){e.registerBinding(t,"scroll",function(t){e.invalidateContainerClientCoordsCache()})},P=e.cy.container();C(P),P.parentNode;)P=P.parentNode;e.registerBinding(e.container,"contextmenu",function(e){e.preventDefault()});var D=function(){return 0!==e.selection[4]},T=function(t){for(var r=e.findContainerClientCoords(),n=r[0],i=r[1],a=r[2],o=r[3],s=t.touches?t.touches:[t],l=!1,u=0;u<s.length;u++){var c=s[u];if(n<=c.clientX&&c.clientX<=n+a&&i<=c.clientY&&c.clientY<=i+o){l=!0;break}}if(!l)return!1;for(var d=e.container,h=t.target,p=h.parentNode,f=!1;p;){if(p===d){f=!0;break}p=p.parentNode}return!!f};e.registerBinding(e.container,"mousedown",function(r){if(T(r)){r.preventDefault(),e.hoverData.capture=!0,e.hoverData.which=r.which;var n=e.cy,i=[r.clientX,r.clientY],a=e.projectIntoViewport(i[0],i[1]),s=e.selection,l=e.findNearestElements(a[0],a[1],!0,!1),u=l[0],c=e.dragData.possibleDragElements;e.hoverData.mdownPos=a,e.hoverData.mdownGPos=i;var d=function(){e.hoverData.tapholdCancelled=!1,clearTimeout(e.hoverData.tapholdTimeout),e.hoverData.tapholdTimeout=setTimeout(function(){if(!e.hoverData.tapholdCancelled){var t=e.hoverData.down;t?t.trigger(new o(r,{type:"taphold",position:{x:a[0],y:a[1]}})):n.trigger(new o(r,{type:"taphold",position:{x:a[0],y:a[1]}}))}},e.tapholdDuration)};if(3==r.which){e.hoverData.cxtStarted=!0;var h=new o(r,{type:"cxttapstart",position:{x:a[0],y:a[1]}});u?(u.activate(),u.trigger(h),e.hoverData.down=u):n.trigger(h),e.hoverData.downTime=(new Date).getTime(),e.hoverData.cxtDragged=!1}else if(1==r.which){if(u&&u.activate(),null!=u&&e.nodeIsGrabbable(u)){var f=function(e){return new o(r,{type:e,position:{x:a[0],y:a[1]}})},g=function(e){e.trigger(f("grab"))};if(p(u),u.selected()){c=e.dragData.possibleDragElements=[];var v=n.$(function(t){return t.isNode()&&t.selected()&&e.nodeIsGrabbable(t)});y(v,{addToList:c}),u.trigger(f("grabon")),v.forEach(g)}else c=e.dragData.possibleDragElements=[],m(u,{addToList:c}),u.trigger(f("grabon")).trigger(f("grab"));e.redrawHint("eles",!0),e.redrawHint("drag",!0)}e.hoverData.down=u,e.hoverData.downs=l,e.hoverData.downTime=(new Date).getTime(),t(u,["mousedown","tapstart","vmousedown"],r,{position:{x:a[0],y:a[1]}}),null==u?(s[4]=1,e.data.bgActivePosistion={x:a[0],y:a[1]},e.redrawHint("select",!0),e.redraw()):u.isEdge()&&(s[4]=1),d()}s[0]=s[2]=a[0],s[1]=s[3]=a[1]}},!1),e.registerBinding(window,"mousemove",function(i){var l=e.hoverData.capture;if(l||T(i)){var u=!1,c=e.cy,d=c.zoom(),h=[i.clientX,i.clientY],p=e.projectIntoViewport(h[0],h[1]),f=e.hoverData.mdownPos,g=e.hoverData.mdownGPos,v=e.selection,m=null;e.hoverData.draggingEles||e.hoverData.dragging||e.hoverData.selecting||(m=e.findNearestElement(p[0],p[1],!0,!1));var x,w=e.hoverData.last,E=e.hoverData.down,C=[p[0]-v[2],p[1]-v[3]],P=e.dragData.possibleDragElements;if(g){var D=h[0]-g[0],k=D*D,S=h[1]-g[1],_=S*S,M=k+_;e.hoverData.isOverThresholdDrag=x=M>=e.desktopTapThreshold2}var I=r(i);x&&(e.hoverData.tapholdCancelled=!0);var N=function(){var t=e.hoverData.dragDelta=e.hoverData.dragDelta||[];0===t.length?(t.push(C[0]),t.push(C[1])):(t[0]+=C[0],t[1]+=C[1])};u=!0,t(m,["mousemove","vmousemove","tapdrag"],i,{position:{x:p[0],y:p[1]}});var B=function(){e.data.bgActivePosistion=void 0,e.hoverData.selecting||c.trigger("boxstart"),v[4]=1,e.hoverData.selecting=!0,e.redrawHint("select",!0),e.redraw()};if(3===e.hoverData.which){if(x){var z=new o(i,{type:"cxtdrag",position:{x:p[0],y:p[1]}});E?E.trigger(z):c.trigger(z),e.hoverData.cxtDragged=!0,e.hoverData.cxtOver&&m===e.hoverData.cxtOver||(e.hoverData.cxtOver&&e.hoverData.cxtOver.trigger(new o(i,{type:"cxtdragout",position:{x:p[0],y:p[1]}})),e.hoverData.cxtOver=m,m&&m.trigger(new o(i,{type:"cxtdragover",position:{x:p[0],y:p[1]}})))}}else if(e.hoverData.dragging){if(u=!0,c.panningEnabled()&&c.userPanningEnabled()){var L;if(e.hoverData.justStartedPan){var O=e.hoverData.mdownPos;L={x:(p[0]-O[0])*d,y:(p[1]-O[1])*d},e.hoverData.justStartedPan=!1}else L={x:C[0]*d,y:C[1]*d};c.panBy(L),e.hoverData.dragged=!0}p=e.projectIntoViewport(i.clientX,i.clientY)}else if(1!=v[4]||null!=E&&!E.isEdge()){if(E&&E.isEdge()&&E.active()&&E.unactivate(),E&&E.grabbed()||m==w||(w&&t(w,["mouseout","tapdragout"],i,{position:{x:p[0],y:p[1]}}),m&&t(m,["mouseover","tapdragover"],i,{position:{x:p[0],y:p[1]}}),e.hoverData.last=m),E&&e.nodeIsDraggable(E))if(x)if(c.boxSelectionEnabled()&&I)E&&E.grabbed()&&(b(P),E.trigger("free")),B();else{var A=!e.dragData.didDrag;A&&e.redrawHint("eles",!0),e.dragData.didDrag=!0;var R=[];e.hoverData.draggingEles||y(c.collection(P),{inDragLayer:!0});for(var V=0;V<P.length;V++){var q=P[V];if(e.nodeIsDraggable(q)&&q.grabbed()){var F=q.position();if(R.push(q),n.number(C[0])&&n.number(C[1])){var j=!q.isParent();if(j&&(F.x+=C[0],F.y+=C[1]),A){var X=e.hoverData.dragDelta;j&&X&&n.number(X[0])&&n.number(X[1])&&(F.x+=X[0],F.y+=X[1])}}}}e.hoverData.draggingEles=!0;var Y=c.collection(R);Y.dirtyCompoundBoundsCache(),Y.trigger("position drag"),e.redrawHint("drag",!0),e.redraw()}else N();u=!0}else if(x){if(e.hoverData.dragging||!c.boxSelectionEnabled()||!I&&c.panningEnabled()&&c.userPanningEnabled()){if(!e.hoverData.selecting&&c.panningEnabled()&&c.userPanningEnabled()){var W=s(E,e.hoverData.downs);W&&(e.hoverData.dragging=!0,e.hoverData.justStartedPan=!0,v[4]=0,e.data.bgActivePosistion=a.array2point(f),e.redrawHint("select",!0),e.redraw())}}else B();E&&E.isEdge()&&E.active()&&E.unactivate()}return v[2]=p[0],v[3]=p[1],u?(i.stopPropagation&&i.stopPropagation(),i.preventDefault&&i.preventDefault(),!1):void 0}},!1),e.registerBinding(window,"mouseup",function(n){var i=e.hoverData.capture;if(i){e.hoverData.capture=!1;var a=e.cy,s=e.projectIntoViewport(n.clientX,n.clientY),l=e.selection,u=e.findNearestElement(s[0],s[1],!0,!1),c=e.dragData.possibleDragElements,d=e.hoverData.down,h=r(n);if(e.data.bgActivePosistion&&(e.redrawHint("select",!0),e.redraw()),e.hoverData.tapholdCancelled=!0,e.data.bgActivePosistion=void 0,d&&d.unactivate(),3===e.hoverData.which){var p=new o(n,{type:"cxttapend",position:{x:s[0],y:s[1]}});if(d?d.trigger(p):a.trigger(p),!e.hoverData.cxtDragged){var f=new o(n,{type:"cxttap",position:{x:s[0],y:s[1]}});d?d.trigger(f):a.trigger(f)}e.hoverData.cxtDragged=!1,e.hoverData.which=null}else if(1===e.hoverData.which){if(null!=d||e.dragData.didDrag||e.hoverData.selecting||e.hoverData.dragged||r(n)||(a.$(function(e){return e.selected()}).unselect(),c.length>0&&e.redrawHint("eles",!0),e.dragData.possibleDragElements=c=[]),t(u,["mouseup","tapend","vmouseup"],n,{position:{x:s[0],y:s[1]}}),e.dragData.didDrag||e.hoverData.dragged||e.hoverData.selecting||e.hoverData.isOverThresholdDrag||t(d,["click","tap","vclick"],n,{position:{x:s[0],y:s[1]}}),u!=d||e.dragData.didDrag||e.hoverData.selecting||null!=u&&u._private.selectable&&(e.hoverData.dragging||("additive"===a.selectionType()||h?u.selected()?u.unselect():u.select():h||(a.$(":selected").unmerge(u).unselect(),u.select())),e.redrawHint("eles",!0)),e.hoverData.selecting){var g=a.collection(e.getAllInBox(l[0],l[1],l[2],l[3]));e.redrawHint("select",!0),g.length>0&&e.redrawHint("eles",!0),a.trigger("boxend");var v=function(e){return e.selectable()&&!e.selected()};"additive"===a.selectionType()?g.trigger("box").stdFilter(v).select().trigger("boxselect"):(h||a.$(":selected").unmerge(g).unselect(),g.trigger("box").stdFilter(v).select().trigger("boxselect")),e.redraw()}if(e.hoverData.dragging&&(e.hoverData.dragging=!1,e.redrawHint("select",!0),e.redrawHint("eles",!0),e.redraw()),!l[4]){e.redrawHint("drag",!0),e.redrawHint("eles",!0);var y=d&&d.grabbed();b(c),y&&d.trigger("free")}}l[4]=0,e.hoverData.down=null,e.hoverData.cxtStarted=!1,e.hoverData.draggingEles=!1,e.hoverData.selecting=!1,e.hoverData.isOverThresholdDrag=!1,e.dragData.didDrag=!1,e.hoverData.dragged=!1,e.hoverData.dragDelta=[],e.hoverData.mdownPos=null,e.hoverData.mdownGPos=null}},!1);var k=function(t){if(!e.scrollingPage){var r=e.cy,n=e.projectIntoViewport(t.clientX,t.clientY),i=[n[0]*r.zoom()+r.pan().x,n[1]*r.zoom()+r.pan().y];if(e.hoverData.draggingEles||e.hoverData.dragging||e.hoverData.cxtStarted||D())return void t.preventDefault();if(r.panningEnabled()&&r.userPanningEnabled()&&r.zoomingEnabled()&&r.userZoomingEnabled()){t.preventDefault(),e.data.wheelZooming=!0,clearTimeout(e.data.wheelTimeout),e.data.wheelTimeout=setTimeout(function(){e.data.wheelZooming=!1,e.redrawHint("eles",!0),e.redraw()},150);var a;a=null!=t.deltaY?t.deltaY/-250:null!=t.wheelDeltaY?t.wheelDeltaY/1e3:t.wheelDelta/1e3,a*=e.wheelSensitivity;var o=1===t.deltaMode;o&&(a*=33),r.zoom({level:r.zoom()*Math.pow(10,a),renderedPosition:{x:i[0],y:i[1]}})}}};e.registerBinding(e.container,"wheel",k,!0),e.registerBinding(window,"scroll",function(t){e.scrollingPage=!0,clearTimeout(e.scrollingPageTimeout),e.scrollingPageTimeout=setTimeout(function(){e.scrollingPage=!1},250)},!0),e.registerBinding(e.container,"mouseout",function(t){var r=e.projectIntoViewport(t.clientX,t.clientY);e.cy.trigger(new o(t,{type:"mouseout",position:{x:r[0],y:r[1]}}))},!1),e.registerBinding(e.container,"mouseover",function(t){var r=e.projectIntoViewport(t.clientX,t.clientY);e.cy.trigger(new o(t,{type:"mouseover",position:{x:r[0],y:r[1]}}))},!1);var S,_,M,I,N,B,z,L,O,A,R,V,q,F,j=function(e,t,r,n){return Math.sqrt((r-e)*(r-e)+(n-t)*(n-t))},X=function(e,t,r,n){return(r-e)*(r-e)+(n-t)*(n-t)};e.registerBinding(e.container,"touchstart",F=function(r){if(T(r)){e.touchData.capture=!0,e.data.bgActivePosistion=void 0;var n=e.cy,i=e.touchData.now,a=e.touchData.earlier;if(r.touches[0]){var s=e.projectIntoViewport(r.touches[0].clientX,r.touches[0].clientY);i[0]=s[0],i[1]=s[1]}if(r.touches[1]){var s=e.projectIntoViewport(r.touches[1].clientX,r.touches[1].clientY);i[2]=s[0],i[3]=s[1]}if(r.touches[2]){var s=e.projectIntoViewport(r.touches[2].clientX,r.touches[2].clientY);i[4]=s[0],i[5]=s[1]}if(r.touches[1]){b(e.dragData.touchDragEles);var l=e.findContainerClientCoords();O=l[0],A=l[1],R=l[2],V=l[3],S=r.touches[0].clientX-O,_=r.touches[0].clientY-A,M=r.touches[1].clientX-O,I=r.touches[1].clientY-A,q=0<=S&&S<=R&&0<=M&&M<=R&&0<=_&&_<=V&&0<=I&&I<=V;var u=n.pan(),c=n.zoom();N=j(S,_,M,I),B=X(S,_,M,I),z=[(S+M)/2,(_+I)/2],L=[(z[0]-u.x)/c,(z[1]-u.y)/c];var d=200,h=d*d;if(B<h&&!r.touches[2]){var f=e.findNearestElement(i[0],i[1],!0,!0),g=e.findNearestElement(i[2],i[3],!0,!0);return f&&f.isNode()?(f.activate().trigger(new o(r,{type:"cxttapstart",position:{x:i[0],y:i[1]}})),e.touchData.start=f):g&&g.isNode()?(g.activate().trigger(new o(r,{type:"cxttapstart",position:{x:i[0],y:i[1]}})),e.touchData.start=g):n.trigger(new o(r,{type:"cxttapstart",position:{x:i[0],y:i[1]}})),e.touchData.start&&(e.touchData.start._private.grabbed=!1),e.touchData.cxt=!0,e.touchData.cxtDragged=!1,e.data.bgActivePosistion=void 0,void e.redraw()}}if(r.touches[2]);else if(r.touches[1]);else if(r.touches[0]){var v=e.findNearestElements(i[0],i[1],!0,!0),x=v[0];if(null!=x&&(x.activate(),e.touchData.start=x,e.touchData.starts=v,e.nodeIsGrabbable(x))){var w=e.dragData.touchDragEles=[],E=null;e.redrawHint("eles",!0),e.redrawHint("drag",!0),x.selected()?(E=n.$(function(t){return t.selected()&&e.nodeIsGrabbable(t)}),y(E,{addToList:w})):m(x,{addToList:w}),p(x);var C=function(e){return new o(r,{type:e,position:{x:i[0],y:i[1]}})};x.trigger(C("grabon")),E?E.forEach(function(e){e.trigger(C("grab"))}):x.trigger(C("grab"))}t(x,["touchstart","tapstart","vmousedown"],r,{position:{x:i[0],y:i[1]}}),null==x&&(e.data.bgActivePosistion={x:s[0],y:s[1]},e.redrawHint("select",!0),e.redraw()),e.touchData.singleTouchMoved=!1,e.touchData.singleTouchStartTime=+new Date,clearTimeout(e.touchData.tapholdTimeout),e.touchData.tapholdTimeout=setTimeout(function(){e.touchData.singleTouchMoved!==!1||e.pinching||e.touchData.selecting||(t(e.touchData.start,["taphold"],r,{position:{x:i[0],y:i[1]}}),e.touchData.start||n.$(":selected").unselect())},e.tapholdDuration)}if(r.touches.length>=1){for(var P=e.touchData.startPosition=[],D=0;D<i.length;D++)P[D]=a[D]=i[D];var k=r.touches[0];e.touchData.startGPosition=[k.clientX,k.clientY]}}},!1);var Y;e.registerBinding(window,"touchmove",Y=function(r){var i=e.touchData.capture;if(i||T(r)){var l=e.selection,u=e.cy,c=e.touchData.now,d=e.touchData.earlier,h=u.zoom();if(r.touches[0]){var p=e.projectIntoViewport(r.touches[0].clientX,r.touches[0].clientY);c[0]=p[0],c[1]=p[1]}if(r.touches[1]){var p=e.projectIntoViewport(r.touches[1].clientX,r.touches[1].clientY);c[2]=p[0],c[3]=p[1]}if(r.touches[2]){var p=e.projectIntoViewport(r.touches[2].clientX,r.touches[2].clientY);c[4]=p[0],c[5]=p[1]}var f,g=e.touchData.startGPosition;if(i&&r.touches[0]&&g){for(var v=[],m=0;m<c.length;m++)v[m]=c[m]-d[m];var x=r.touches[0].clientX-g[0],w=x*x,E=r.touches[0].clientY-g[1],C=E*E,P=w+C;f=P>=e.touchTapThreshold2}if(i&&e.touchData.cxt){r.preventDefault();var D=r.touches[0].clientX-O,k=r.touches[0].clientY-A,z=r.touches[1].clientX-O,R=r.touches[1].clientY-A,V=X(D,k,z,R),F=V/B,Y=150,W=Y*Y,H=1.5,Z=H*H;if(F>=Z||V>=W){e.touchData.cxt=!1,e.data.bgActivePosistion=void 0,e.redrawHint("select",!0);var $=new o(r,{type:"cxttapend",position:{x:c[0],y:c[1]}});e.touchData.start?(e.touchData.start.unactivate().trigger($),e.touchData.start=null):u.trigger($)}}if(i&&e.touchData.cxt){var $=new o(r,{type:"cxtdrag",position:{x:c[0],y:c[1]}});e.data.bgActivePosistion=void 0,e.redrawHint("select",!0),e.touchData.start?e.touchData.start.trigger($):u.trigger($),e.touchData.start&&(e.touchData.start._private.grabbed=!1),e.touchData.cxtDragged=!0;var U=e.findNearestElement(c[0],c[1],!0,!0);e.touchData.cxtOver&&U===e.touchData.cxtOver||(e.touchData.cxtOver&&e.touchData.cxtOver.trigger(new o(r,{type:"cxtdragout",position:{x:c[0],y:c[1]}})),e.touchData.cxtOver=U,U&&U.trigger(new o(r,{type:"cxtdragover",position:{x:c[0],y:c[1]}})))}else if(i&&r.touches[2]&&u.boxSelectionEnabled())r.preventDefault(),e.data.bgActivePosistion=void 0,this.lastThreeTouch=+new Date,e.touchData.selecting||u.trigger("boxstart"),e.touchData.selecting=!0,e.redrawHint("select",!0),l&&0!==l.length&&void 0!==l[0]?(l[2]=(c[0]+c[2]+c[4])/3,l[3]=(c[1]+c[3]+c[5])/3):(l[0]=(c[0]+c[2]+c[4])/3,l[1]=(c[1]+c[3]+c[5])/3,l[2]=(c[0]+c[2]+c[4])/3+1,l[3]=(c[1]+c[3]+c[5])/3+1),l[4]=1,e.touchData.selecting=!0,e.redraw();else if(i&&r.touches[1]&&u.zoomingEnabled()&&u.panningEnabled()&&u.userZoomingEnabled()&&u.userPanningEnabled()){r.preventDefault(),e.data.bgActivePosistion=void 0,e.redrawHint("select",!0);var G=e.dragData.touchDragEles;if(G){e.redrawHint("drag",!0);for(var Q=0;Q<G.length;Q++){var K=G[Q]._private;K.grabbed=!1,K.rscratch.inDragLayer=!1}}var D=r.touches[0].clientX-O,k=r.touches[0].clientY-A,z=r.touches[1].clientX-O,R=r.touches[1].clientY-A,J=j(D,k,z,R),ee=J/N;if(q){var te=D-S,re=k-_,ne=z-M,ie=R-I,ae=(te+ne)/2,oe=(re+ie)/2,se=u.zoom(),le=se*ee,ue=u.pan(),ce=L[0]*se+ue.x,de=L[1]*se+ue.y,he={x:-le/se*(ce-ue.x-ae)+ce,y:-le/se*(de-ue.y-oe)+de};if(e.touchData.start&&e.touchData.start.active()){var G=e.dragData.touchDragEles;b(G),e.redrawHint("drag",!0),e.redrawHint("eles",!0),e.touchData.start.unactivate().trigger("free")}u.viewport({zoom:le,pan:he,cancelOnFailedZoom:!0}),N=J,S=D,_=k,M=z,I=R,e.pinching=!0}if(r.touches[0]){var p=e.projectIntoViewport(r.touches[0].clientX,r.touches[0].clientY);c[0]=p[0],c[1]=p[1]}if(r.touches[1]){var p=e.projectIntoViewport(r.touches[1].clientX,r.touches[1].clientY);c[2]=p[0],c[3]=p[1]}if(r.touches[2]){var p=e.projectIntoViewport(r.touches[2].clientX,r.touches[2].clientY);c[4]=p[0],c[5]=p[1]}}else if(r.touches[0]){var U,pe=e.touchData.start,fe=e.touchData.last;if(e.hoverData.draggingEles||e.swipePanning||(U=e.findNearestElement(c[0],c[1],!0,!0)),i&&null!=pe&&r.preventDefault(),i&&null!=pe&&e.nodeIsDraggable(pe))if(f){var G=e.dragData.touchDragEles,ge=!e.dragData.didDrag;ge&&y(u.collection(G),{inDragLayer:!0});for(var ve=0;ve<G.length;ve++){var ye=G[ve];if(e.nodeIsDraggable(ye)&&ye.grabbed()){e.dragData.didDrag=!0;var me=ye.position(),be=!ye.isParent();if(be&&n.number(v[0])&&n.number(v[1])&&(me.x+=v[0],me.y+=v[1]),ge){e.redrawHint("eles",!0);var xe=e.touchData.dragDelta;be&&xe&&n.number(xe[0])&&n.number(xe[1])&&(me.x+=xe[0],me.y+=xe[1])}}}var we=u.collection(G);we.dirtyCompoundBoundsCache(),we.trigger("position drag"),e.hoverData.draggingEles=!0,e.redrawHint("drag",!0),e.touchData.startPosition[0]==d[0]&&e.touchData.startPosition[1]==d[1]&&e.redrawHint("eles",!0),e.redraw()}else{var xe=e.touchData.dragDelta=e.touchData.dragDelta||[];0===xe.length?(xe.push(v[0]),xe.push(v[1])):(xe[0]+=v[0],xe[1]+=v[1])}if(t(pe||U,["touchmove","tapdrag","vmousemove"],r,{position:{x:c[0],y:c[1]}}),pe&&pe.grabbed()||U==fe||(fe&&fe.trigger(new o(r,{type:"tapdragout",position:{x:c[0],y:c[1]}})),U&&U.trigger(new o(r,{type:"tapdragover",position:{x:c[0],y:c[1]}}))),e.touchData.last=U,i)for(var Q=0;Q<c.length;Q++)c[Q]&&e.touchData.startPosition[Q]&&f&&(e.touchData.singleTouchMoved=!0);if(i&&(null==pe||pe.isEdge())&&u.panningEnabled()&&u.userPanningEnabled()){var Ee=s(pe,e.touchData.starts);Ee&&(r.preventDefault(),e.swipePanning?u.panBy({x:v[0]*h,y:v[1]*h}):f&&(e.swipePanning=!0,u.panBy({x:x*h,y:E*h}),pe&&(pe.unactivate(),e.data.bgActivePosistion||(e.data.bgActivePosistion=a.array2point(e.touchData.startPosition)),e.redrawHint("select",!0),e.touchData.start=null)));var p=e.projectIntoViewport(r.touches[0].clientX,r.touches[0].clientY);c[0]=p[0],c[1]=p[1]}}for(var m=0;m<c.length;m++)d[m]=c[m]}},!1);var W;e.registerBinding(window,"touchcancel",W=function(t){var r=e.touchData.start;e.touchData.capture=!1,r&&r.unactivate()});var H;if(e.registerBinding(window,"touchend",H=function(r){var n=e.touchData.start,i=e.touchData.capture;if(i){e.touchData.capture=!1,r.preventDefault();var a=e.selection;e.swipePanning=!1,e.hoverData.draggingEles=!1;var s=e.cy,l=s.zoom(),u=e.touchData.now,c=e.touchData.earlier;if(r.touches[0]){var d=e.projectIntoViewport(r.touches[0].clientX,r.touches[0].clientY);u[0]=d[0],u[1]=d[1]}if(r.touches[1]){var d=e.projectIntoViewport(r.touches[1].clientX,r.touches[1].clientY);u[2]=d[0],u[3]=d[1]}if(r.touches[2]){var d=e.projectIntoViewport(r.touches[2].clientX,r.touches[2].clientY);u[4]=d[0],u[5]=d[1]}n&&n.unactivate();var h;if(e.touchData.cxt){if(h=new o(r,{type:"cxttapend",position:{x:u[0],y:u[1]}}),n?n.trigger(h):s.trigger(h),!e.touchData.cxtDragged){var p=new o(r,{type:"cxttap",position:{x:u[0],y:u[1]}});n?n.trigger(p):s.trigger(p)}return e.touchData.start&&(e.touchData.start._private.grabbed=!1),e.touchData.cxt=!1,e.touchData.start=null,void e.redraw()}if(!r.touches[2]&&s.boxSelectionEnabled()&&e.touchData.selecting){e.touchData.selecting=!1;var f=s.collection(e.getAllInBox(a[0],a[1],a[2],a[3]));a[0]=void 0,a[1]=void 0,a[2]=void 0,a[3]=void 0,a[4]=0,e.redrawHint("select",!0),s.trigger("boxend");var g=function(e){return e.selectable()&&!e.selected()};f.trigger("box").stdFilter(g).select().trigger("boxselect"),f.nonempty()&&e.redrawHint("eles",!0),e.redraw()}if(null!=n&&n.unactivate(),r.touches[2])e.data.bgActivePosistion=void 0,e.redrawHint("select",!0);else if(r.touches[1]);else if(r.touches[0]);else if(!r.touches[0]){e.data.bgActivePosistion=void 0,e.redrawHint("select",!0);var v=e.dragData.touchDragEles;if(null!=n){var y=n._private.grabbed;b(v),e.redrawHint("drag",!0),e.redrawHint("eles",!0),y&&n.trigger("free"),t(n,["touchend","tapend","vmouseup","tapdragout"],r,{position:{x:u[0],y:u[1]}}),n.unactivate(),e.touchData.start=null}else{var m=e.findNearestElement(u[0],u[1],!0,!0);t(m,["touchend","tapend","vmouseup","tapdragout"],r,{position:{x:u[0],y:u[1]}})}var x=e.touchData.startPosition[0]-u[0],w=x*x,E=e.touchData.startPosition[1]-u[1],C=E*E,P=w+C,D=P*l*l;null!=n&&!e.dragData.didDrag&&n._private.selectable&&D<e.touchTapThreshold2&&!e.pinching&&("single"===s.selectionType()?(s.$(":selected").unmerge(n).unselect(),n.select()):n.selected()?n.unselect():n.select(),e.redrawHint("eles",!0)),e.touchData.singleTouchMoved||t(n,["tap","vclick"],r,{position:{x:u[0],y:u[1]}}),e.touchData.singleTouchMoved=!0}for(var T=0;T<u.length;T++)c[T]=u[T];e.dragData.didDrag=!1,0===r.touches.length&&(e.touchData.dragDelta=[],e.touchData.startPosition=null,e.touchData.startGPosition=null),r.touches.length<2&&(e.pinching=!1,e.redrawHint("eles",!0),e.redraw())}},!1),"undefined"==typeof TouchEvent){var Z=[],$=function(e){return{clientX:e.clientX,clientY:e.clientY,force:1,identifier:e.pointerId,pageX:e.pageX,pageY:e.pageY,radiusX:e.width/2,radiusY:e.height/2,screenX:e.screenX,screenY:e.screenY,target:e.target}},U=function(e){return{event:e,touch:$(e)}},G=function(e){Z.push(U(e))},Q=function(e){for(var t=0;t<Z.length;t++){var r=Z[t];if(r.event.pointerId===e.pointerId)return void Z.splice(t,1)}},K=function(e){var t=Z.filter(function(t){return t.event.pointerId===e.pointerId})[0];t.event=e,t.touch=$(e)},J=function(e){e.touches=Z.map(function(e){return e.touch})};e.registerBinding(e.container,"pointerdown",function(e){"mouse"!==e.pointerType&&(e.preventDefault(),G(e),J(e),F(e))}),e.registerBinding(e.container,"pointerup",function(e){"mouse"!==e.pointerType&&(Q(e),J(e),H(e))}),e.registerBinding(e.container,"pointercancel",function(e){"mouse"!==e.pointerType&&(Q(e),J(e),W(e))}),e.registerBinding(e.container,"pointermove",function(e){"mouse"!==e.pointerType&&(e.preventDefault(),K(e),J(e),Y(e))})}},t.exports=s},{"../../../event":45,"../../../is":91,"../../../math":93,"../../../util":108}],71:[function(e,t,r){"use strict";var n=e("../../../math"),i=(e("../../../util"),{});i.generatePolygon=function(e,t){return this.nodeShapes[e]={renderer:this,name:e,points:t,draw:function(e,t,r,n,i){this.renderer.nodeShapeImpl("polygon",e,t,r,n,i,this.points)},intersectLine:function(e,t,r,i,a,o,s){return n.polygonIntersectLine(a,o,this.points,e,t,r/2,i/2,s)},checkPoint:function(e,t,r,i,a,o,s){return n.pointInsidePolygon(e,t,this.points,o,s,i,a,[0,-1],r)}}},i.generateEllipse=function(){return this.nodeShapes.ellipse={renderer:this,name:"ellipse",draw:function(e,t,r,n,i){this.renderer.nodeShapeImpl(this.name,e,t,r,n,i)},intersectLine:function(e,t,r,i,a,o,s){return n.intersectLineEllipse(a,o,e,t,r/2+s,i/2+s)},checkPoint:function(e,t,r,n,i,a,o){return e-=a,t-=o,e/=n/2+r,t/=i/2+r,e*e+t*t<=1}}},i.generateRoundRectangle=function(){return this.nodeShapes.roundrectangle={renderer:this,name:"roundrectangle",points:n.generateUnitNgonPointsFitToSquare(4,0),draw:function(e,t,r,n,i){this.renderer.nodeShapeImpl(this.name,e,t,r,n,i)},intersectLine:function(e,t,r,i,a,o,s){return n.roundRectangleIntersectLine(a,o,e,t,r,i,s)},checkPoint:function(e,t,r,i,a,o,s){var l=n.getRoundRectangleRadius(i,a);if(n.pointInsidePolygon(e,t,this.points,o,s,i,a-2*l,[0,-1],r))return!0;if(n.pointInsidePolygon(e,t,this.points,o,s,i-2*l,a,[0,-1],r))return!0;var u=function(e,t,r,n,i,a,o){return e-=r,t-=n,e/=i/2+o,t/=a/2+o,e*e+t*t<=1};return!!u(e,t,o-i/2+l,s-a/2+l,2*l,2*l,r)||(!!u(e,t,o+i/2-l,s-a/2+l,2*l,2*l,r)||(!!u(e,t,o+i/2-l,s+a/2-l,2*l,2*l,r)||!!u(e,t,o-i/2+l,s+a/2-l,2*l,2*l,r)))}}},i.generateCutRectangle=function(){return this.nodeShapes.cutrectangle={renderer:this,name:"cutrectangle",cornerLength:n.getCutRectangleCornerLength(),points:n.generateUnitNgonPointsFitToSquare(4,0),draw:function(e,t,r,n,i){this.renderer.nodeShapeImpl(this.name,e,t,r,n,i)},generateCutTrianglePts:function(e,t,r,n){var i=this.cornerLength,a=t/2,o=e/2,s=r-o,l=r+o,u=n-a,c=n+a;return{topLeft:[s,u+i,s+i,u,s+i,u+i],topRight:[l-i,u,l,u+i,l-i,u+i],bottomRight:[l,c-i,l-i,c,l-i,c-i],bottomLeft:[s+i,c,s,c-i,s+i,c-i]}},intersectLine:function(e,t,r,i,a,o,s){var l=this.generateCutTrianglePts(r+2*s,i+2*s,e,t),u=[].concat.apply([],[l.topLeft.splice(0,4),l.topRight.splice(0,4),l.bottomRight.splice(0,4),l.bottomLeft.splice(0,4)]);return n.polygonIntersectLine(a,o,u,e,t)},checkPoint:function(e,t,r,i,a,o,s){if(n.pointInsidePolygon(e,t,this.points,o,s,i,a-2*this.cornerLength,[0,-1],r))return!0;if(n.pointInsidePolygon(e,t,this.points,o,s,i-2*this.cornerLength,a,[0,-1],r))return!0;var l=this.generateCutTrianglePts(i,a,o,s);return n.pointInsidePolygonPoints(e,t,l.topLeft)||n.pointInsidePolygonPoints(e,t,l.topRight)||n.pointInsidePolygonPoints(e,t,l.bottomRight)||n.pointInsidePolygonPoints(e,t,l.bottomLeft)}}},i.registerNodeShapes=function(){var e=this.nodeShapes={},t=this;this.generateEllipse(),this.generatePolygon("triangle",n.generateUnitNgonPointsFitToSquare(3,0)),this.generatePolygon("rectangle",n.generateUnitNgonPointsFitToSquare(4,0)),e.square=e.rectangle,this.generateRoundRectangle(),this.generateCutRectangle(),this.generatePolygon("diamond",[0,1,1,0,0,-1,-1,0]),this.generatePolygon("pentagon",n.generateUnitNgonPointsFitToSquare(5,0)),this.generatePolygon("hexagon",n.generateUnitNgonPointsFitToSquare(6,0)),this.generatePolygon("heptagon",n.generateUnitNgonPointsFitToSquare(7,0)),this.generatePolygon("octagon",n.generateUnitNgonPointsFitToSquare(8,0));var r=new Array(20),i=n.generateUnitNgonPoints(5,0),a=n.generateUnitNgonPoints(5,Math.PI/5),o=.5*(3-Math.sqrt(5));o*=1.57;for(var s=0;s<a.length/2;s++)a[2*s]*=o,a[2*s+1]*=o;for(var s=0;s<5;s++)r[4*s]=i[2*s],r[4*s+1]=i[2*s+1],r[4*s+2]=a[2*s],r[4*s+3]=a[2*s+1];r=n.fitPolygonToSquare(r),this.generatePolygon("star",r),this.generatePolygon("vee",[-1,-1,0,-.333,1,-1,0,1]),this.generatePolygon("rhomboid",[-1,-1,.333,-1,1,1,-.333,1]),e.makePolygon=function(e){var r,n=e.join("$"),i="polygon-"+n;return(r=this[i])?r:t.generatePolygon(i,e)}},t.exports=i},{"../../../math":93,"../../../util":108}],72:[function(e,t,r){"use strict";var n=e("../../../util"),i={};i.timeToRender=function(){return this.redrawTotalTime/this.redrawCount},i.redraw=function(e){e=e||n.staticEmptyObject();var t=this;void 0===t.averageRedrawTime&&(t.averageRedrawTime=0),void 0===t.lastRedrawTime&&(t.lastRedrawTime=0),void 0===t.lastDrawTime&&(t.lastDrawTime=0),t.requestedFrame=!0,t.renderOptions=e},i.beforeRender=function(e,t){if(!this.destroyed){t=t||0;var r=this.beforeRenderCallbacks;r.push({fn:e,priority:t}),r.sort(function(e,t){return t.priority-e.priority})}};var a=function(e,t,r){for(var n=e.beforeRenderCallbacks,i=0;i<n.length;i++)n[i].fn(t,r)};i.startRenderLoop=function(){var e=this;if(!e.renderLoopStarted){e.renderLoopStarted=!0;var t=function(r){if(!e.destroyed){if(e.requestedFrame&&!e.skipFrame){a(e,!0,r);var i=n.performanceNow();e.render(e.renderOptions);var o=e.lastDrawTime=n.performanceNow();void 0===e.averageRedrawTime&&(e.averageRedrawTime=o-i),void 0===e.redrawCount&&(e.redrawCount=0),e.redrawCount++,void 0===e.redrawTotalTime&&(e.redrawTotalTime=0);var s=o-i;e.redrawTotalTime+=s,e.lastRedrawTime=s,e.averageRedrawTime=e.averageRedrawTime/2+s/2,e.requestedFrame=!1}else a(e,!1,r);e.skipFrame=!1,n.requestAnimationFrame(t)}};n.requestAnimationFrame(t)}},t.exports=i},{"../../../util":108}],73:[function(e,t,r){"use strict";var n,i={};i.arrowShapeImpl=function(e){return(n||(n={polygon:function(e,t){for(var r=0;r<t.length;r++){var n=t[r];e.lineTo(n.x,n.y)}},"triangle-backcurve":function(e,t,r){for(var n,i=0;i<t.length;i++){var a=t[i];0===i&&(n=a),e.lineTo(a.x,a.y)}e.quadraticCurveTo(r.x,r.y,n.x,n.y)},"triangle-tee":function(e,t,r){e.beginPath&&e.beginPath();for(var n=t,i=0;i<n.length;i++){var a=n[i];e.lineTo(a.x,a.y)}e.closePath&&e.closePath(),e.beginPath&&e.beginPath();var o=r,s=r[0];e.moveTo(s.x,s.y);for(var i=0;i<o.length;i++){var a=o[i];e.lineTo(a.x,a.y)}e.closePath&&e.closePath()},"triangle-cross":function(e,t,r){e.beginPath&&e.beginPath();for(var n=t,i=0;i<n.length;i++){var a=n[i];e.lineTo(a.x,a.y)}e.closePath&&e.closePath(),e.beginPath&&e.beginPath();var o=r,s=r[0];e.moveTo(s.x,s.y);for(var i=0;i<o.length;i++){var a=o[i];e.lineTo(a.x,a.y)}e.closePath&&e.closePath()},circle:function(e,t,r,n){e.arc(t,r,n,0,2*Math.PI,!1)}}))[e]},t.exports=i},{}],74:[function(e,t,r){"use strict";var n={};n.drawEdge=function(e,t,r,n,i){var a=t._private.rscratch,o=this.usePaths();if(!a.badLine&&!isNaN(a.allpts[0])&&t.visible()){var s;r&&(s=r,e.translate(-s.x1,-s.y1));var l=t.pstyle("overlay-padding").pfValue,u=t.pstyle("overlay-opacity").value,c=t.pstyle("overlay-color").value;if(i){if(0===u)return;this.strokeStyle(e,c[0],c[1],c[2],u),e.lineCap="round","self"!=a.edgeType||o||(e.lineCap="butt")}else{var d=t.pstyle("line-color").value;this.strokeStyle(e,d[0],d[1],d[2],t.pstyle("opacity").value),e.lineCap="butt"}e.lineJoin="round";var h=t.pstyle("width").pfValue+(i?2*l:0),p=i?"solid":t.pstyle("line-style").value;e.lineWidth=h,this.drawEdgePath(t,e,a.allpts,p,h),this.drawArrowheads(e,t,i),i||this.drawEdge(e,t,!1,n,!0),this.drawElementText(e,t,n),r&&e.translate(s.x1,s.y1)}},n.drawEdgePath=function(e,t,r,n,i){var a,o=e._private.rscratch,s=t,l=!1,u=this.usePaths();if(u){var c=r.join("$"),d=o.pathCacheKey&&o.pathCacheKey===c;d?(a=t=o.pathCache,l=!0):(a=t=new Path2D,o.pathCacheKey=c,o.pathCache=a)}if(s.setLineDash)switch(n){case"dotted":s.setLineDash([1,1]);break;case"dashed":s.setLineDash([6,3]);break;case"solid":s.setLineDash([])}if(!l&&!o.badLine)switch(t.beginPath&&t.beginPath(),t.moveTo(r[0],r[1]),o.edgeType){case"bezier":case"self":case"compound":case"multibezier":for(var h=2;h+3<r.length;h+=4)t.quadraticCurveTo(r[h],r[h+1],r[h+2],r[h+3]);break;case"straight":case"segments":case"haystack":for(var h=2;h+1<r.length;h+=2)t.lineTo(r[h],r[h+1])}t=s,u?t.stroke(a):t.stroke(),t.setLineDash&&t.setLineDash([])},n.drawArrowheads=function(e,t,r){if(!r){var n=t._private.rscratch,i="haystack"===n.edgeType;i||this.drawArrowhead(e,t,"source",n.arrowStartX,n.arrowStartY,n.srcArrowAngle),this.drawArrowhead(e,t,"mid-target",n.midX,n.midY,n.midtgtArrowAngle),this.drawArrowhead(e,t,"mid-source",n.midX,n.midY,n.midsrcArrowAngle),i||this.drawArrowhead(e,t,"target",n.arrowEndX,n.arrowEndY,n.tgtArrowAngle)}},n.drawArrowhead=function(e,t,r,n,i,a){if(!(isNaN(n)||null==n||isNaN(i)||null==i||isNaN(a)||null==a)){var o=this,s=t.pstyle(r+"-arrow-shape").value;if("none"!==s){var l="hollow"===t.pstyle(r+"-arrow-fill").value?"both":"filled",u=t.pstyle(r+"-arrow-fill").value,c=t.pstyle("width").pfValue,d=t.pstyle("opacity").value,h=e.globalCompositeOperation,p=o.arrowShapes[s];
-p.forceStroke&&"filled"===u&&(u="both"),1===d&&"hollow"!==u||(e.globalCompositeOperation="destination-out",o.fillStyle(e,255,255,255,1),o.strokeStyle(e,255,255,255,1),o.drawArrowShape(t,r,e,l,c,s,n,i,a),e.globalCompositeOperation=h);var f=t.pstyle(r+"-arrow-color").value;o.fillStyle(e,f[0],f[1],f[2],d),o.strokeStyle(e,f[0],f[1],f[2],d),o.drawArrowShape(t,r,e,u,c,s,n,i,a)}}},n.drawArrowShape=function(e,t,r,n,i,a,o,s,l){var u,c=this,d=this.usePaths(),h=e._private.rscratch,p=!1,f=r,g={x:o,y:s},v=e.pstyle("arrow-scale").value,y=this.getArrowWidth(i,v),m=c.arrowShapes[a];if(d){var b=y+"$"+a+"$"+l+"$"+o+"$"+s;h.arrowPathCacheKey=h.arrowPathCacheKey||{},h.arrowPathCache=h.arrowPathCache||{};var x=h.arrowPathCacheKey[t]===b;x?(u=r=h.arrowPathCache[t],p=!0):(u=r=new Path2D,h.arrowPathCacheKey[t]=b,h.arrowPathCache[t]=u)}r.beginPath&&r.beginPath(),p||m.draw(r,y,l,g,i),!m.leavePathOpen&&r.closePath&&r.closePath(),r=f,"filled"!==n&&"both"!==n||(d?r.fill(u):r.fill()),"hollow"!==n&&"both"!==n||(r.lineWidth=m.matchEdgeWidth?i:1,r.lineJoin="miter",d?r.stroke(u):r.stroke())},t.exports=n},{}],75:[function(e,t,r){"use strict";var n=e("../../../math"),i={};i.drawElement=function(e,t,r,n){var i=this;t.isNode()?i.drawNode(e,t,r,n):i.drawEdge(e,t,r,n)},i.drawCachedElement=function(e,t,r,i){var a=this,o=t.boundingBox();if(0!==o.w&&0!==o.h&&(!i||n.boundingBoxesIntersect(o,i))){var s=a.data.eleTxrCache.getElement(t,o,r);s?e.drawImage(s.texture.canvas,s.x,0,s.width,s.height,o.x1,o.y1,o.w,o.h):a.drawElement(e,t)}},i.drawElements=function(e,t){for(var r=this,n=0;n<t.length;n++){var i=t[n];r.drawElement(e,i)}},i.drawCachedElements=function(e,t,r,n){for(var i=this,a=0;a<t.length;a++){var o=t[a];i.drawCachedElement(e,o,r,n)}},i.drawCachedNodes=function(e,t,r,n){for(var i=this,a=0;a<t.length;a++){var o=t[a];o.isNode()&&i.drawCachedElement(e,o,r,n)}},i.drawLayeredElements=function(e,t,r,n){var i=this,a=i.data.lyrTxrCache.getLayers(t,r);if(a)for(var o=0;o<a.length;o++){var s=a[o],l=s.bb;0!==l.w&&0!==l.h&&e.drawImage(s.canvas,l.x1,l.y1,l.w,l.h)}else i.drawCachedElements(e,t,r,n)},i.drawDebugPoints=function(e,t){for(var r=function(t,r,n){e.fillStyle=n,e.fillRect(t-1,r-1,3,3)},n=0;n<t.length;n++){var i=t[n],a=i._private.rscratch;if(i.isNode()){var o=i.position();r(o.x,o.y,"magenta")}else{for(var s=a.allpts,l=0;l+1<s.length;l+=2){var u=s[l],c=s[l+1];r(u,c,"cyan")}r(a.midX,a.midY,"yellow")}}},t.exports=i},{"../../../math":93}],76:[function(e,t,r){"use strict";var n={};n.safeDrawImage=function(e,t,r,n,i,a,o,s,l,u){i<=0||a<=0||l<=0||u<=0||e.drawImage(t,r,n,i,a,o,s,l,u)},n.drawInscribedImage=function(e,t,r,n){var i=this,a=r.position(),o=a.x,s=a.y,l=r.cy().style(),u=l.getIndexedStyle.bind(l),c=u(r,"background-fit","value",n),d=u(r,"background-repeat","value",n),h=r.width(),p=r.height(),f=2*r.padding(),g=h+("inner"===u(r,"background-width-relative-to","value",n)?0:f),v=p+("inner"===u(r,"background-height-relative-to","value",n)?0:f),y=r._private.rscratch,m=r.pstyle("background-clip").value,b="node"===m,x=u(r,"background-image-opacity","value",n),w=t.width||t.cachedW,E=t.height||t.cachedH;null!=w&&null!=E||(document.body.appendChild(t),w=t.cachedW=t.width||t.offsetWidth,E=t.cachedH=t.height||t.offsetHeight,document.body.removeChild(t));var C=w,P=E;if("auto"!==u(r,"background-width","value",n)&&(C="%"===u(r,"background-width","units",n)?u(r,"background-width","pfValue",n)*g:u(r,"background-width","pfValue",n)),"auto"!==u(r,"background-height","value",n)&&(P="%"===u(r,"background-height","units",n)?u(r,"background-height","pfValue",n)*v:u(r,"background-height","pfValue",n)),0!==C&&0!==P){if("contain"===c){var D=Math.min(g/C,v/P);C*=D,P*=D}else if("cover"===c){var D=Math.max(g/C,v/P);C*=D,P*=D}var T=o-g/2;T+="%"===u(r,"background-position-x","units",n)?(g-C)*u(r,"background-position-x","pfValue",n):u(r,"background-position-x","pfValue",n);var k=s-v/2;k+="%"===u(r,"background-position-y","units",n)?(v-P)*u(r,"background-position-y","pfValue",n):u(r,"background-position-y","pfValue",n),y.pathCache&&(T-=o,k-=s,o=0,s=0);var S=e.globalAlpha;if(e.globalAlpha=x,"no-repeat"===d)b&&(e.save(),y.pathCache?e.clip(y.pathCache):(i.nodeShapes[i.getNodeShape(r)].draw(e,o,s,g,v),e.clip())),i.safeDrawImage(e,t,0,0,w,E,T,k,C,P),b&&e.restore();else{var _=e.createPattern(t,d);e.fillStyle=_,i.nodeShapes[i.getNodeShape(r)].draw(e,o,s,g,v),e.translate(T,k),e.fill(),e.translate(-T,-k)}e.globalAlpha=S}},t.exports=n},{}],77:[function(e,t,r){"use strict";function n(e,t,r,n,i,a){var a=a||5;e.beginPath(),e.moveTo(t+a,r),e.lineTo(t+n-a,r),e.quadraticCurveTo(t+n,r,t+n,r+a),e.lineTo(t+n,r+i-a),e.quadraticCurveTo(t+n,r+i,t+n-a,r+i),e.lineTo(t+a,r+i),e.quadraticCurveTo(t,r+i,t,r+i-a),e.lineTo(t,r+a),e.quadraticCurveTo(t,r,t+a,r),e.closePath(),e.fill()}var i=e("../../../util"),a=e("../../../math"),o={};o.eleTextBiggerThanMin=function(e,t){if(!t){var r=e.cy().zoom(),n=this.getPixelRatio(),i=Math.ceil(a.log2(r*n));t=Math.pow(2,i)}var o=e.pstyle("font-size").pfValue*t,s=e.pstyle("min-zoomed-font-size").pfValue;return!(o<s)},o.drawElementText=function(e,t,r){var n=this;if(void 0===r){if(!n.eleTextBiggerThanMin(t))return}else if(!r)return;if(t.isNode()){var i=t.pstyle("label");if(!i||!i.value)return;var a=t.pstyle("text-halign").strValue;t.pstyle("text-valign").strValue;switch(a){case"left":e.textAlign="right";break;case"right":e.textAlign="left";break;default:e.textAlign="center"}e.textBaseline="bottom"}else{var i=t.pstyle("label"),o=t.pstyle("source-label"),s=t.pstyle("target-label");if(!(i&&i.value||o&&o.value||s&&s.value))return;e.textAlign="center",e.textBaseline="bottom"}n.drawText(e,t),t.isEdge()&&(n.drawText(e,t,"source"),n.drawText(e,t,"target"))},o.drawNodeText=o.drawEdgeText=o.drawElementText,o.getFontCache=function(e){var t;this.fontCaches=this.fontCaches||[];for(var r=0;r<this.fontCaches.length;r++)if(t=this.fontCaches[r],t.context===e)return t;return t={context:e},this.fontCaches.push(t),t},o.setupTextStyle=function(e,t){var r=t.effectiveOpacity(),n=t.pstyle("font-style").strValue,i=t.pstyle("font-size").pfValue+"px",a=t.pstyle("font-family").strValue,o=t.pstyle("font-weight").strValue,s=t.pstyle("text-opacity").value*t.pstyle("opacity").value*r,l=t.pstyle("text-outline-opacity").value*s,u=t.pstyle("color").value,c=t.pstyle("text-outline-color").value,d=t._private.fontKey,h=this.getFontCache(e);h.key!==d&&(e.font=n+" "+o+" "+i+" "+a,h.key=d),e.lineJoin="round",this.fillStyle(e,u[0],u[1],u[2],s),this.strokeStyle(e,c[0],c[1],c[2],l)},o.drawText=function(e,t,r){var a=t._private,o=a.rscratch,s=t.effectiveOpacity();if(0!==s&&0!==t.pstyle("text-opacity").value){var l=i.getPrefixedProperty(o,"labelX",r),u=i.getPrefixedProperty(o,"labelY",r),c=this.getLabelText(t,r);if(null!=c&&""!==c&&!isNaN(l)&&!isNaN(u)){this.setupTextStyle(e,t);var d=r?r+"-":"",h=i.getPrefixedProperty(o,"labelWidth",r),p=i.getPrefixedProperty(o,"labelHeight",r),f=i.getPrefixedProperty(o,"labelAngle",r),g=t.pstyle(d+"text-margin-x").pfValue,v=t.pstyle(d+"text-margin-y").pfValue,y=t.isEdge(),m=(t.isNode(),t.pstyle("text-halign").value),b=t.pstyle("text-valign").value;y&&(m="center",b="center"),l+=g,u+=v;var x,w=t.pstyle("text-rotation");if(x="autorotate"===w.strValue?y?f:0:"none"===w.strValue?0:w.pfValue,0!==x){var E=l,C=u;e.translate(E,C),e.rotate(x),l=0,u=0}switch(b){case"top":break;case"center":u+=p/2;break;case"bottom":u+=p}var P=t.pstyle("text-background-opacity").value,D=t.pstyle("text-border-opacity").value,T=t.pstyle("text-border-width").pfValue,k=t.pstyle("text-background-padding").pfValue;if(P>0||T>0&&D>0){var S=l-k;switch(m){case"left":S-=h;break;case"center":S-=h/2;break;case"right":}var _=u-p-k,M=h+2*k,I=p+2*k;if(P>0){var N=e.fillStyle,B=t.pstyle("text-background-color").value;e.fillStyle="rgba("+B[0]+","+B[1]+","+B[2]+","+P*s+")";var z=t.pstyle("text-background-shape").strValue;"roundrectangle"==z?n(e,S,_,M,I,2):e.fillRect(S,_,M,I),e.fillStyle=N}if(T>0&&D>0){var L=e.strokeStyle,O=e.lineWidth,A=t.pstyle("text-border-color").value,R=t.pstyle("text-border-style").value;if(e.strokeStyle="rgba("+A[0]+","+A[1]+","+A[2]+","+D*s+")",e.lineWidth=T,e.setLineDash)switch(R){case"dotted":e.setLineDash([1,1]);break;case"dashed":e.setLineDash([4,2]);break;case"double":e.lineWidth=T/4,e.setLineDash([]);break;case"solid":e.setLineDash([])}if(e.strokeRect(S,_,M,I),"double"===R){var V=T/2;e.strokeRect(S+V,_+V,M-2*V,I-2*V)}e.setLineDash&&e.setLineDash([]),e.lineWidth=O,e.strokeStyle=L}}var q=2*t.pstyle("text-outline-width").pfValue;if(q>0&&(e.lineWidth=q),"wrap"===t.pstyle("text-wrap").value){var F=i.getPrefixedProperty(o,"labelWrapCachedLines",r),j=p/F.length;switch(b){case"top":u-=(F.length-1)*j;break;case"center":case"bottom":u-=(F.length-1)*j}for(var X=0;X<F.length;X++)q>0&&e.strokeText(F[X],l,u),e.fillText(F[X],l,u),u+=j}else q>0&&e.strokeText(c,l,u),e.fillText(c,l,u);0!==x&&(e.rotate(-x),e.translate(-E,-C))}}},t.exports=o},{"../../../math":93,"../../../util":108}],78:[function(e,t,r){"use strict";var n=e("../../../is"),i={};i.drawNode=function(e,t,r,i){var a,o,s=this,l=t._private.rscratch,u=t._private,c=c||t.position();if(n.number(c.x)&&n.number(c.y)&&t.visible()){var d,h=t.effectiveOpacity(),p=this.usePaths(),f=!1,g=t.padding();a=t.width()+2*g,o=t.height()+2*g,e.lineWidth=t.pstyle("border-width").pfValue;var v;r&&(v=r,e.translate(-v.x1,-v.y1));for(var y,m=t.pstyle("background-image"),b=m.value,x=[],w=[],E=b.length,C=0;C<E;C++)if(y=b[C],x[C]=null!=y&&"none"!==y,x[C]){var P=t.cy().style().getIndexedStyle(t,"background-image-crossorigin","value",C);w[C]=this.getCachedImage(y,P,function(){t.rtrigger("background")})}var D=t.pstyle("background-color").value,T=t.pstyle("border-color").value,k=t.pstyle("border-style").value;if(this.fillStyle(e,D[0],D[1],D[2],t.pstyle("background-opacity").value*h),this.strokeStyle(e,T[0],T[1],T[2],t.pstyle("border-opacity").value*h),e.lineJoin="miter",e.setLineDash)switch(k){case"dotted":e.setLineDash([1,1]);break;case"dashed":e.setLineDash([4,2]);break;case"solid":case"double":e.setLineDash([])}var S=t.pstyle("shape").strValue,_=t.pstyle("shape-polygon-points").pfValue;if(p){var M=S+"$"+a+"$"+o+("polygon"===S?"$"+_.join("$"):"");e.translate(c.x,c.y),l.pathCacheKey===M?(d=l.pathCache,f=!0):(d=new Path2D,l.pathCacheKey=M,l.pathCache=d)}if(!f){var I=c;p&&(I={x:0,y:0}),s.nodeShapes[this.getNodeShape(t)].draw(d||e,I.x,I.y,a,o)}p?e.fill(d):e.fill();for(var N=u.backgrounding,B=0,C=0;C<E;C++)x[C]&&w[C].complete&&!w[C].error&&(B++,this.drawInscribedImage(e,w[C],t,C));u.backgrounding=!(B===E),N!==u.backgrounding&&t.updateStyle(!1);var z=t.pstyle("background-blacken").value,L=t.pstyle("border-width").pfValue;if(this.hasPie(t)&&(this.drawPie(e,t,h),0===z&&0===L||p||s.nodeShapes[this.getNodeShape(t)].draw(e,c.x,c.y,a,o)),z>0?(this.fillStyle(e,0,0,0,z),p?e.fill(d):e.fill()):z<0&&(this.fillStyle(e,255,255,255,-z),p?e.fill(d):e.fill()),L>0&&(p?e.stroke(d):e.stroke(),"double"===k)){e.lineWidth=t.pstyle("border-width").pfValue/3;var O=e.globalCompositeOperation;e.globalCompositeOperation="destination-out",p?e.stroke(d):e.stroke(),e.globalCompositeOperation=O}p&&e.translate(-c.x,-c.y),e.setLineDash&&e.setLineDash([]),s.drawElementText(e,t,i);var A=t.pstyle("overlay-padding").pfValue,R=t.pstyle("overlay-opacity").value,V=t.pstyle("overlay-color").value;R>0&&(this.fillStyle(e,V[0],V[1],V[2],R),s.nodeShapes.roundrectangle.draw(e,c.x,c.y,a+2*A,o+2*A),e.fill()),r&&e.translate(v.x1,v.y1)}},i.hasPie=function(e){return e=e[0],e._private.hasPie},i.drawPie=function(e,t,r,n){t=t[0];var i=t.cy().style(),a=t.pstyle("pie-size"),o=t.width(),s=t.height(),n=n||t.position(),l=n.x,u=n.y,c=Math.min(o,s)/2,d=0,h=this.usePaths();h&&(l=0,u=0),"%"===a.units?c*=a.pfValue:void 0!==a.pfValue&&(c=a.pfValue/2);for(var p=1;p<=i.pieBackgroundN;p++){var f=t.pstyle("pie-"+p+"-background-size").value,g=t.pstyle("pie-"+p+"-background-color").value,v=t.pstyle("pie-"+p+"-background-opacity").value*r,y=f/100;y+d>1&&(y=1-d);var m=1.5*Math.PI+2*Math.PI*d,b=2*Math.PI*y,x=m+b;0===f||d>=1||d+y>1||(e.beginPath(),e.moveTo(l,u),e.arc(l,u,c,m,x),e.closePath(),this.fillStyle(e,g[0],g[1],g[2],v),e.fill(),d+=y)}},t.exports=i},{"../../../is":91}],79:[function(e,t,r){"use strict";var n={},i=e("../../../util"),a=100;n.getPixelRatio=function(){var e=this.data.contexts[0];if(null!=this.forcedPixelRatio)return this.forcedPixelRatio;var t=e.backingStorePixelRatio||e.webkitBackingStorePixelRatio||e.mozBackingStorePixelRatio||e.msBackingStorePixelRatio||e.oBackingStorePixelRatio||e.backingStorePixelRatio||1;return(window.devicePixelRatio||1)/t},n.paintCache=function(e){for(var t,r=this.paintCaches=this.paintCaches||[],n=!0,i=0;i<r.length;i++)if(t=r[i],t.context===e){n=!1;break}return n&&(t={context:e},r.push(t)),t},n.fillStyle=function(e,t,r,n,i){e.fillStyle="rgba("+t+","+r+","+n+","+i+")"},n.strokeStyle=function(e,t,r,n,i){e.strokeStyle="rgba("+t+","+r+","+n+","+i+")"},n.matchCanvasSize=function(e){var t=this,r=t.data,n=t.findContainerClientCoords(),i=n[2],a=n[3],o=t.getPixelRatio(),s=t.motionBlurPxRatio;e!==t.data.bufferCanvases[t.MOTIONBLUR_BUFFER_NODE]&&e!==t.data.bufferCanvases[t.MOTIONBLUR_BUFFER_DRAG]||(o=s);var l,u=i*o,c=a*o;if(u!==t.canvasWidth||c!==t.canvasHeight){t.fontCaches=null;var d=r.canvasContainer;d.style.width=i+"px",d.style.height=a+"px";for(var h=0;h<t.CANVAS_LAYERS;h++)l=r.canvases[h],l.width=u,l.height=c,l.style.width=i+"px",l.style.height=a+"px";for(var h=0;h<t.BUFFER_COUNT;h++)l=r.bufferCanvases[h],l.width=u,l.height=c,l.style.width=i+"px",l.style.height=a+"px";t.textureMult=1,o<=1&&(l=r.bufferCanvases[t.TEXTURE_BUFFER],t.textureMult=2,l.width=u*t.textureMult,l.height=c*t.textureMult),t.canvasWidth=u,t.canvasHeight=c}},n.renderTo=function(e,t,r,n){this.render({forcedContext:e,forcedZoom:t,forcedPan:r,drawAllLayers:!0,forcedPxRatio:n})},n.render=function(e){function t(e,t,r,n,i){var a=e.globalCompositeOperation;e.globalCompositeOperation="destination-out",c.fillStyle(e,255,255,255,c.motionBlurTransparency),e.fillRect(t,r,n,i),e.globalCompositeOperation=a}function r(e,r){var i,a,s,d;c.clearingMotionBlur||e!==p.bufferContexts[c.MOTIONBLUR_BUFFER_NODE]&&e!==p.bufferContexts[c.MOTIONBLUR_BUFFER_DRAG]?(i=T,a=P,s=c.canvasWidth,d=c.canvasHeight):(i={x:D.x*y,y:D.y*y},a=C*y,s=c.canvasWidth*y,d=c.canvasHeight*y),e.setTransform(1,0,0,1,0,0),"motionBlur"===r?t(e,0,0,s,d):n||void 0!==r&&!r||e.clearRect(0,0,s,d),o||(e.translate(i.x,i.y),e.scale(a,a)),u&&e.translate(u.x,u.y),l&&e.scale(l,l)}e=e||i.staticEmptyObject();var n=e.forcedContext,o=e.drawAllLayers,s=e.drawOnlyNodeLayer,l=e.forcedZoom,u=e.forcedPan,c=this,d=void 0===e.forcedPxRatio?this.getPixelRatio():e.forcedPxRatio,h=c.cy,p=c.data,f=p.canvasNeedsRedraw,g=c.textureOnViewport&&!n&&(c.pinching||c.hoverData.dragging||c.swipePanning||c.data.wheelZooming),v=void 0!==e.motionBlur?e.motionBlur:c.motionBlur,y=c.motionBlurPxRatio,m=h.hasCompoundNodes(),b=c.hoverData.draggingEles,x=!(!c.hoverData.selecting&&!c.touchData.selecting);v=v&&!n&&c.motionBlurEnabled&&!x;var w=v;n||(c.prevPxRatio!==d&&(c.invalidateContainerClientCoordsCache(),c.matchCanvasSize(c.container),c.redrawHint("eles",!0),c.redrawHint("drag",!0)),c.prevPxRatio=d),!n&&c.motionBlurTimeout&&clearTimeout(c.motionBlurTimeout),v&&(null==c.mbFrames&&(c.mbFrames=0),c.mbFrames++,c.mbFrames<3&&(w=!1),c.mbFrames>c.minMbLowQualFrames&&(c.motionBlurPxRatio=c.mbPxRBlurry)),c.clearingMotionBlur&&(c.motionBlurPxRatio=1),c.textureDrawLastFrame&&!g&&(f[c.NODE]=!0,f[c.SELECT_BOX]=!0);var E=h.style()._private.coreStyle,C=h.zoom(),P=void 0!==l?l:C,D=h.pan(),T={x:D.x,y:D.y},k={zoom:C,pan:{x:D.x,y:D.y}},S=c.prevViewport,_=void 0===S||k.zoom!==S.zoom||k.pan.x!==S.pan.x||k.pan.y!==S.pan.y;_||b&&!m||(c.motionBlurPxRatio=1),u&&(T=u),P*=d,T.x*=d,T.y*=d;var M=c.getCachedZSortedEles();if(g||(c.textureDrawLastFrame=!1),g){c.textureDrawLastFrame=!0;var I;if(!c.textureCache){c.textureCache={},I=c.textureCache.bb=h.mutableElements().boundingBox(),c.textureCache.texture=c.data.bufferCanvases[c.TEXTURE_BUFFER];var N=c.data.bufferContexts[c.TEXTURE_BUFFER];N.setTransform(1,0,0,1,0,0),N.clearRect(0,0,c.canvasWidth*c.textureMult,c.canvasHeight*c.textureMult),c.render({forcedContext:N,drawOnlyNodeLayer:!0,forcedPxRatio:d*c.textureMult});var k=c.textureCache.viewport={zoom:h.zoom(),pan:h.pan(),width:c.canvasWidth,height:c.canvasHeight};k.mpan={x:(0-k.pan.x)/k.zoom,y:(0-k.pan.y)/k.zoom}}f[c.DRAG]=!1,f[c.NODE]=!1;var B=p.contexts[c.NODE],z=c.textureCache.texture,k=c.textureCache.viewport;I=c.textureCache.bb,B.setTransform(1,0,0,1,0,0),v?t(B,0,0,k.width,k.height):B.clearRect(0,0,k.width,k.height);var L=E["outside-texture-bg-color"].value,O=E["outside-texture-bg-opacity"].value;c.fillStyle(B,L[0],L[1],L[2],O),B.fillRect(0,0,k.width,k.height);var C=h.zoom();r(B,!1),B.clearRect(k.mpan.x,k.mpan.y,k.width/k.zoom/d,k.height/k.zoom/d),B.drawImage(z,k.mpan.x,k.mpan.y,k.width/k.zoom/d,k.height/k.zoom/d)}else c.textureOnViewport&&!n&&(c.textureCache=null);var A=h.extent(),R=c.pinching||c.hoverData.dragging||c.swipePanning||c.data.wheelZooming||c.hoverData.draggingEles,V=c.hideEdgesOnViewport&&R,q=[];if(q[c.NODE]=!f[c.NODE]&&v&&!c.clearedForMotionBlur[c.NODE]||c.clearingMotionBlur,q[c.NODE]&&(c.clearedForMotionBlur[c.NODE]=!0),q[c.DRAG]=!f[c.DRAG]&&v&&!c.clearedForMotionBlur[c.DRAG]||c.clearingMotionBlur,q[c.DRAG]&&(c.clearedForMotionBlur[c.DRAG]=!0),f[c.NODE]||o||s||q[c.NODE]){var F=v&&!q[c.NODE]&&1!==y,B=n||(F?c.data.bufferContexts[c.MOTIONBLUR_BUFFER_NODE]:p.contexts[c.NODE]),j=v&&!F?"motionBlur":void 0;r(B,j),V?c.drawCachedNodes(B,M.nondrag,d,A):c.drawLayeredElements(B,M.nondrag,d,A),c.debug&&c.drawDebugPoints(B,M.nondrag),o||v||(f[c.NODE]=!1)}if(!s&&(f[c.DRAG]||o||q[c.DRAG])){var F=v&&!q[c.DRAG]&&1!==y,B=n||(F?c.data.bufferContexts[c.MOTIONBLUR_BUFFER_DRAG]:p.contexts[c.DRAG]);r(B,v&&!F?"motionBlur":void 0),V?c.drawCachedNodes(B,M.drag,d,A):c.drawCachedElements(B,M.drag,d,A),c.debug&&c.drawDebugPoints(B,M.drag),o||v||(f[c.DRAG]=!1)}if(c.showFps||!s&&f[c.SELECT_BOX]&&!o){var B=n||p.contexts[c.SELECT_BOX];if(r(B),1==c.selection[4]&&(c.hoverData.selecting||c.touchData.selecting)){var C=c.cy.zoom(),X=E["selection-box-border-width"].value/C;B.lineWidth=X,B.fillStyle="rgba("+E["selection-box-color"].value[0]+","+E["selection-box-color"].value[1]+","+E["selection-box-color"].value[2]+","+E["selection-box-opacity"].value+")",B.fillRect(c.selection[0],c.selection[1],c.selection[2]-c.selection[0],c.selection[3]-c.selection[1]),X>0&&(B.strokeStyle="rgba("+E["selection-box-border-color"].value[0]+","+E["selection-box-border-color"].value[1]+","+E["selection-box-border-color"].value[2]+","+E["selection-box-opacity"].value+")",B.strokeRect(c.selection[0],c.selection[1],c.selection[2]-c.selection[0],c.selection[3]-c.selection[1]))}if(p.bgActivePosistion&&!c.hoverData.selecting){var C=c.cy.zoom(),Y=p.bgActivePosistion;B.fillStyle="rgba("+E["active-bg-color"].value[0]+","+E["active-bg-color"].value[1]+","+E["active-bg-color"].value[2]+","+E["active-bg-opacity"].value+")",B.beginPath(),B.arc(Y.x,Y.y,E["active-bg-size"].pfValue/C,0,2*Math.PI),B.fill()}var W=c.lastRedrawTime;if(c.showFps&&W){W=Math.round(W);var H=Math.round(1e3/W);B.setTransform(1,0,0,1,0,0),B.fillStyle="rgba(255, 0, 0, 0.75)",B.strokeStyle="rgba(255, 0, 0, 0.75)",B.lineWidth=1,B.fillText("1 frame = "+W+" ms = "+H+" fps",0,20);var Z=60;B.strokeRect(0,30,250,20),B.fillRect(0,30,250*Math.min(H/Z,1),20)}o||(f[c.SELECT_BOX]=!1)}if(v&&1!==y){var $=p.contexts[c.NODE],U=c.data.bufferCanvases[c.MOTIONBLUR_BUFFER_NODE],G=p.contexts[c.DRAG],Q=c.data.bufferCanvases[c.MOTIONBLUR_BUFFER_DRAG],K=function(e,r,n){e.setTransform(1,0,0,1,0,0),n||!w?e.clearRect(0,0,c.canvasWidth,c.canvasHeight):t(e,0,0,c.canvasWidth,c.canvasHeight);var i=y;e.drawImage(r,0,0,c.canvasWidth*i,c.canvasHeight*i,0,0,c.canvasWidth,c.canvasHeight)};(f[c.NODE]||q[c.NODE])&&(K($,U,q[c.NODE]),f[c.NODE]=!1),(f[c.DRAG]||q[c.DRAG])&&(K(G,Q,q[c.DRAG]),f[c.DRAG]=!1)}c.prevViewport=k,c.clearingMotionBlur&&(c.clearingMotionBlur=!1,c.motionBlurCleared=!0,c.motionBlur=!0),v&&(c.motionBlurTimeout=setTimeout(function(){c.motionBlurTimeout=null,c.clearedForMotionBlur[c.NODE]=!1,c.clearedForMotionBlur[c.DRAG]=!1,c.motionBlur=!1,c.clearingMotionBlur=!g,c.mbFrames=0,f[c.NODE]=!0,f[c.DRAG]=!0,c.redraw()},a)),n||h.trigger("render")},t.exports=n},{"../../../util":108}],80:[function(e,t,r){"use strict";var n=e("../../../math"),i={};i.drawPolygonPath=function(e,t,r,n,i,a){var o=n/2,s=i/2;e.beginPath&&e.beginPath(),e.moveTo(t+o*a[0],r+s*a[1]);for(var l=1;l<a.length/2;l++)e.lineTo(t+o*a[2*l],r+s*a[2*l+1]);e.closePath()},i.drawRoundRectanglePath=function(e,t,r,i,a){var o=i/2,s=a/2,l=n.getRoundRectangleRadius(i,a);e.beginPath&&e.beginPath(),e.moveTo(t,r-s),e.arcTo(t+o,r-s,t+o,r,l),e.arcTo(t+o,r+s,t,r+s,l),e.arcTo(t-o,r+s,t-o,r,l),e.arcTo(t-o,r-s,t,r-s,l),e.lineTo(t,r-s),e.closePath()},i.drawCutRectanglePath=function(e,t,r,i,a){var o=i/2,s=a/2,l=n.getCutRectangleCornerLength();e.beginPath&&e.beginPath(),e.moveTo(t-o+l,r-s),e.lineTo(t+o-l,r-s),e.lineTo(t+o,r-s+l),e.lineTo(t+o,r+s-l),e.lineTo(t+o-l,r+s),e.lineTo(t-o+l,r+s),e.lineTo(t-o,r+s-l),e.lineTo(t-o,r-s+l),e.closePath()};for(var a=Math.sin(0),o=Math.cos(0),s={},l={},u=Math.PI/40,c=0*Math.PI;c<2*Math.PI;c+=u)s[c]=Math.sin(c),l[c]=Math.cos(c);i.drawEllipsePath=function(e,t,r,n,i){if(e.beginPath&&e.beginPath(),e.ellipse)e.ellipse(t,r,n/2,i/2,0,0,2*Math.PI);else for(var c,d,h=n/2,p=i/2,f=0*Math.PI;f<2*Math.PI;f+=u)c=t-h*s[f]*a+h*l[f]*o,d=r+p*l[f]*a+p*s[f]*o,0===f?e.moveTo(c,d):e.lineTo(c,d);e.closePath()},t.exports=i},{"../../../math":93}],81:[function(e,t,r){"use strict";var n=e("../../../math"),i=e("../../../util"),a=e("../../../heap"),o=e("./texture-cache-defs"),s=25,l=50,u=-4,c=2,d=3.99,h=8,p=1024,f=1024,g=1024,v=.5,y=.8,m=10,b=!1,x=!1,w=.15,E=.1,C=.9,P=.9,D=100,T=1,k={dequeue:"dequeue",downscale:"downscale",highQuality:"highQuality"},S=function(e){var t=this;t.renderer=e,t.onDequeues=[],t.setupDequeueing()},_=S.prototype;_.reasons=k,_.getTextureQueue=function(e){var t=this;return t.eleImgCaches=t.eleImgCaches||{},t.eleImgCaches[e]=t.eleImgCaches[e]||[]},_.getRetiredTextureQueue=function(e){var t=this,r=t.eleImgCaches.retired=t.eleImgCaches.retired||{},n=r[e]=r[e]||[];return n},_.getElementQueue=function(){var e=this,t=e.eleCacheQueue=e.eleCacheQueue||new a(function(e,t){return t.reqs-e.reqs});return t},_.getElementIdToQueue=function(){var e=this,t=e.eleIdToCacheQueue=e.eleIdToCacheQueue||{};return t},_.getElement=function(e,t,r,i,a){var o=this,p=this.renderer,v=e._private.rscratch,y=p.cy.zoom();if(0===t.w||0===t.h||!e.visible())return null;if(null==i&&(i=Math.ceil(n.log2(y*r))),i<u)i=u;else if(y>=d||i>c)return null;var m=Math.pow(2,i),w=t.h*m,E=t.w*m,C=v.imgCaches=v.imgCaches||{},P=C[i];if(P)return P;var D;if(D=w<=s?s:w<=l?l:Math.ceil(w/l)*l,w>g||E>f||!b&&e.isEdge()||!x&&e.isParent())return null;var T=o.getTextureQueue(D),S=T[T.length-2],_=function(){return o.recycleTexture(D,E)||o.addTexture(D,E)};S||(S=T[T.length-1]),S||(S=_()),S.width-S.usedWidth<E&&(S=_());for(var M,I=p.eleTextBiggerThanMin(e,m),N=function(e){return e&&e.scaledLabelShown===I},B=a&&a===k.dequeue,z=a&&a===k.highQuality,L=a&&a===k.downscale,O=i+1;O<=c;O++){var A=C[O];if(A){M=A;break}}var R=M&&M.level===i+1?M:null,V=function(){S.context.drawImage(R.texture.canvas,R.x,0,R.width,R.height,S.usedWidth,0,E,w)};if(S.context.setTransform(1,0,0,1,0,0),S.context.clearRect(S.usedWidth,0,E,D),N(R))V();else if(N(M)){if(!z)return o.queueElement(e,t,M.level-1),M;for(var O=M.level;O>i;O--)R=o.getElement(e,t,r,O,k.downscale);V()}else{var q;if(!B&&!z&&!L)for(var O=i-1;O>=u;O--){var A=C[O];if(A){q=A;break}}if(N(q))return o.queueElement(e,t,i),q;S.context.translate(S.usedWidth,0),S.context.scale(m,m),p.drawElement(S.context,e,t,I),S.context.scale(1/m,1/m),S.context.translate(-S.usedWidth,0)}return P=C[i]={ele:e,x:S.usedWidth,texture:S,level:i,scale:m,width:E,height:w,scaledLabelShown:I},S.usedWidth+=Math.ceil(E+h),S.eleCaches.push(P),o.checkTextureFullness(S),P},_.invalidateElement=function(e){var t=this,r=e._private.rscratch.imgCaches;if(r)for(var n=u;n<=c;n++){var a=r[n];if(a){var o=a.texture;o.invalidatedWidth+=a.width,r[n]=null,i.removeFromArray(o.eleCaches,a),t.checkTextureUtility(o)}}},_.checkTextureUtility=function(e){e.invalidatedWidth>=v*e.width&&this.retireTexture(e)},_.checkTextureFullness=function(e){var t=this,r=t.getTextureQueue(e.height);e.usedWidth/e.width>y&&e.fullnessChecks>=m?i.removeFromArray(r,e):e.fullnessChecks++},_.retireTexture=function(e){var t=this,r=e.height,n=t.getTextureQueue(r);i.removeFromArray(n,e),e.retired=!0;for(var a=e.eleCaches,o=0;o<a.length;o++){var s=a[o],l=s.ele,u=s.level,c=l._private.rscratch.imgCaches;c&&(c[u]=null)}i.clearArray(a);var d=t.getRetiredTextureQueue(r);d.push(e)},_.addTexture=function(e,t){var r=this,n=r.getTextureQueue(e),i={};return n.push(i),i.eleCaches=[],i.height=e,i.width=Math.max(p,t),i.usedWidth=0,i.invalidatedWidth=0,i.fullnessChecks=0,i.canvas=document.createElement("canvas"),i.canvas.width=i.width,i.canvas.height=i.height,i.context=i.canvas.getContext("2d"),i},_.recycleTexture=function(e,t){for(var r=this,n=r.getTextureQueue(e),a=r.getRetiredTextureQueue(e),o=0;o<a.length;o++){var s=a[o];if(s.width>=t)return s.retired=!1,s.usedWidth=0,s.invalidatedWidth=0,s.fullnessChecks=0,i.clearArray(s.eleCaches),s.context.setTransform(1,0,0,1,0,0),s.context.clearRect(0,0,s.width,s.height),i.removeFromArray(a,s),n.push(s),s}},_.queueElement=function(e,t,r){var i=this,a=i.getElementQueue(),o=i.getElementIdToQueue(),s=e.id(),l=o[s];if(l)l.level=Math.max(l.level,r),l.reqs++,a.updateItem(l);else{var u={ele:e,bb:t,position:n.copyPosition(e.position()),level:r,reqs:1};e.isEdge()&&(u.positions={source:n.copyPosition(e.source().position()),target:n.copyPosition(e.target().position())}),a.push(u),o[s]=u}},_.dequeue=function(e){for(var t=this,r=t.getElementQueue(),i=t.getElementIdToQueue(),a=[],o=0;o<T&&r.size()>0;o++){var s=r.pop();i[s.ele.id()]=null,a.push(s);var l,u=s.ele;l=(!u.isEdge()||n.arePositionsSame(u.source().position(),s.positions.source)&&n.arePositionsSame(u.target().position(),s.positions.target))&&n.arePositionsSame(u.position(),s.position)?s.bb:u.boundingBox(),t.getElement(s.ele,l,e,s.level,k.dequeue)}return a},_.onDequeue=function(e){this.onDequeues.push(e)},_.offDequeue=function(e){i.removeFromArray(this.onDequeues,e)},_.setupDequeueing=o.setupDequeueing({deqRedrawThreshold:D,deqCost:w,deqAvgCost:E,deqNoDrawCost:C,deqFastCost:P,deq:function(e,t,r){return e.dequeue(t,r)},onDeqd:function(e,t){for(var r=0;r<e.onDequeues.length;r++){var n=e.onDequeues[r];n(t)}},shouldRedraw:function(e,t,r,i){for(var a=0;a<t.length;a++){var o=t[a].bb;if(n.boundingBoxesIntersect(o,i))return!0}return!1},priority:function(e){return e.renderer.beforeRenderPriorities.eleTxrDeq}}),t.exports=S},{"../../../heap":89,"../../../math":93,"../../../util":108,"./texture-cache-defs":86}],82:[function(e,t,r){"use strict";function n(e,t){for(var r=atob(e),n=new ArrayBuffer(r.length),i=new Uint8Array(n),a=0;a<r.length;a++)i[a]=r.charCodeAt(a);return new Blob([n],{type:t})}function i(e){var t=e.indexOf(",");return e.substr(t+1)}function a(e,t,r){var a=t.toDataURL(r,e.quality);switch(e.output){case"blob":return n(i(a),r);case"base64":return i(a);case"base64uri":default:return a}}var o=e("../../../is"),s={};s.createBuffer=function(e,t){var r=document.createElement("canvas");return r.width=e,r.height=t,[r,r.getContext("2d")]},s.bufferCanvasImage=function(e){var t=this.cy,r=t.mutableElements(),n=r.boundingBox(),i=this.findContainerClientCoords(),a=e.full?Math.ceil(n.w):i[2],s=e.full?Math.ceil(n.h):i[3],l=o.number(e.maxWidth)||o.number(e.maxHeight),u=this.getPixelRatio(),c=1;if(void 0!==e.scale)a*=e.scale,s*=e.scale,c=e.scale;else if(l){var d=1/0,h=1/0;o.number(e.maxWidth)&&(d=c*e.maxWidth/a),o.number(e.maxHeight)&&(h=c*e.maxHeight/s),c=Math.min(d,h),a*=c,s*=c}l||(a*=u,s*=u,c*=u);var p=document.createElement("canvas");p.width=a,p.height=s,p.style.width=a+"px",p.style.height=s+"px";var f=p.getContext("2d");if(a>0&&s>0){f.clearRect(0,0,a,s),e.bg&&(f.fillStyle=e.bg,f.rect(0,0,a,s),f.fill()),f.globalCompositeOperation="source-over";var g=this.getCachedZSortedEles();if(e.full)f.translate(-n.x1*c,-n.y1*c),f.scale(c,c),this.drawElements(f,g);else{var v=t.pan(),y={x:v.x*c,y:v.y*c};c*=t.zoom(),f.translate(y.x,y.y),f.scale(c,c),this.drawElements(f,g)}}return p},s.png=function(e){return a(e,this.bufferCanvasImage(e),"image/png")},s.jpg=function(e){return a(e,this.bufferCanvasImage(e),"image/jpeg")},t.exports=s},{"../../../is":91}],83:[function(e,t,r){"use strict";function n(e){var t=this;t.data={canvases:new Array(u.CANVAS_LAYERS),contexts:new Array(u.CANVAS_LAYERS),canvasNeedsRedraw:new Array(u.CANVAS_LAYERS),bufferCanvases:new Array(u.BUFFER_COUNT),bufferContexts:new Array(u.CANVAS_LAYERS)},t.data.canvasContainer=document.createElement("div");var r=t.data.canvasContainer.style;t.data.canvasContainer.setAttribute("style","-webkit-tap-highlight-color: rgba(0,0,0,0);"),r.position="relative",r.zIndex="0",r.overflow="hidden";var n=e.cy.container();n.appendChild(t.data.canvasContainer),n.setAttribute("style",(n.getAttribute("style")||"")+"-webkit-tap-highlight-color: rgba(0,0,0,0);");for(var i=0;i<u.CANVAS_LAYERS;i++){var l=t.data.canvases[i]=document.createElement("canvas");t.data.contexts[i]=l.getContext("2d"),l.setAttribute("style","-webkit-user-select: none; -moz-user-select: -moz-none; user-select: none; -webkit-tap-highlight-color: rgba(0,0,0,0); outline-style: none;"+(a.ms()?" -ms-touch-action: none; touch-action: none; ":"")),l.style.position="absolute",l.setAttribute("data-id","layer"+i),l.style.zIndex=String(u.CANVAS_LAYERS-i),t.data.canvasContainer.appendChild(l),t.data.canvasNeedsRedraw[i]=!1}t.data.topCanvas=t.data.canvases[0],t.data.canvases[u.NODE].setAttribute("data-id","layer"+u.NODE+"-node"),t.data.canvases[u.SELECT_BOX].setAttribute("data-id","layer"+u.SELECT_BOX+"-selectbox"),t.data.canvases[u.DRAG].setAttribute("data-id","layer"+u.DRAG+"-drag");for(var i=0;i<u.BUFFER_COUNT;i++)t.data.bufferCanvases[i]=document.createElement("canvas"),t.data.bufferContexts[i]=t.data.bufferCanvases[i].getContext("2d"),t.data.bufferCanvases[i].style.position="absolute",t.data.bufferCanvases[i].setAttribute("data-id","buffer"+i),t.data.bufferCanvases[i].style.zIndex=String(-i-1),t.data.bufferCanvases[i].style.visibility="hidden";t.pathsEnabled=!0,t.data.eleTxrCache=new o(t),t.data.lyrTxrCache=new s(t,t.data.eleTxrCache),t.onUpdateEleCalcs(function(e,r){for(var n=0;n<r.length;n++){var i=r[n],a=i._private.rstyle,o=a.dirtyEvents;i.isNode()&&o&&1===o.length&&o.position||t.data.eleTxrCache.invalidateElement(i)}r.length>0&&t.data.lyrTxrCache.invalidateElements(r)})}var i=e("../../../util"),a=e("../../../is"),o=e("./ele-texture-cache"),s=e("./layered-texture-cache"),l=n,u=n.prototype;u.CANVAS_LAYERS=3,u.SELECT_BOX=0,u.DRAG=1,u.NODE=2,u.BUFFER_COUNT=3,u.TEXTURE_BUFFER=0,u.MOTIONBLUR_BUFFER_NODE=1,u.MOTIONBLUR_BUFFER_DRAG=2,u.redrawHint=function(e,t){var r=this;switch(e){case"eles":r.data.canvasNeedsRedraw[u.NODE]=t;break;case"drag":r.data.canvasNeedsRedraw[u.DRAG]=t;break;case"select":r.data.canvasNeedsRedraw[u.SELECT_BOX]=t}};var c="undefined"!=typeof Path2D;u.path2dEnabled=function(e){return void 0===e?this.pathsEnabled:void(this.pathsEnabled=!!e)},u.usePaths=function(){return c&&this.pathsEnabled},[e("./arrow-shapes"),e("./drawing-elements"),e("./drawing-edges"),e("./drawing-images"),e("./drawing-label-text"),e("./drawing-nodes"),e("./drawing-redraw"),e("./drawing-shapes"),e("./export-image"),e("./node-shapes")].forEach(function(e){i.extend(u,e)}),t.exports=l},{"../../../is":91,"../../../util":108,"./arrow-shapes":73,"./drawing-edges":74,"./drawing-elements":75,"./drawing-images":76,"./drawing-label-text":77,"./drawing-nodes":78,"./drawing-redraw":79,"./drawing-shapes":80,"./ele-texture-cache":81,"./export-image":82,"./layered-texture-cache":84,"./node-shapes":85}],84:[function(e,t,r){"use strict";function n(e,t){null!=e.imageSmoothingEnabled?e.imageSmoothingEnabled=t:(e.webkitImageSmoothingEnabled=t,
-e.mozImageSmoothingEnabled=t,e.msImageSmoothingEnabled=t)}var i=e("../../../util"),a=e("../../../math"),o=e("../../../heap"),s=e("../../../is"),l=e("./texture-cache-defs"),u=1,c=-4,d=2,h=3.99,p=50,f=50,g=!0,v=.15,y=.1,m=.9,b=.9,x=1,w=250,E=16e6,C=!0,P=!0,D=!0,T=function(e,t){var r=this,n=r.renderer=e;r.layersByLevel={},r.firstGet=!0,r.lastInvalidationTime=i.performanceNow()-2*w,r.skipping=!1,n.beforeRender(function(e,t){t-r.lastInvalidationTime<=w?r.skipping=!0:r.skipping=!1});var a=function(e,t){return t.reqs-e.reqs};r.layersQueue=new o(a),r.eleTxrCache=t,r.setupEleCacheInvalidation(),r.setupDequeueing()},k=T.prototype,S=0,_=Math.pow(2,53)-1;k.makeLayer=function(e,t){var r=Math.pow(2,t),n=Math.ceil(e.w*r),i=Math.ceil(e.h*r),a=document.createElement("canvas");a.width=n,a.height=i;var o={id:S=++S%_,bb:e,level:t,width:n,height:i,canvas:a,context:a.getContext("2d"),eles:[],elesQueue:[],reqs:0},s=o.context,l=-o.bb.x1,u=-o.bb.y1;return s.scale(r,r),s.translate(l,u),o},k.getLayers=function(e,t,r){var n=this,o=n.renderer,s=o.cy,l=s.zoom(),p=n.firstGet;if(n.firstGet=!1,null==r)if(r=Math.ceil(a.log2(l*t)),r<c)r=c;else if(l>=h||r>d)return null;n.validateLayersElesOrdering(r,e);var f,g,v=n.layersByLevel,y=Math.pow(2,r),m=v[r]=v[r]||[],b=n.levelIsComplete(r,e),x=function(){var t=function(t){if(n.validateLayersElesOrdering(t,e),n.levelIsComplete(t,e))return g=v[t],!0},a=function(e){if(!g)for(var n=r+e;c<=n&&n<=d&&!t(n);n+=e);};a(1),a(-1);for(var o=m.length-1;o>=0;o--){var s=m[o];s.invalid&&i.removeFromArray(m,s)}};if(b)return m;x();var w=function(){if(!f){f=a.makeBoundingBox();for(var t=0;t<e.length;t++)a.updateBoundingBox(f,e[t].boundingBox())}return f},P=function(e){e=e||{};var t=e.after;w();var i=f.w*y*(f.h*y);if(i>E)return null;var a=n.makeLayer(f,r);if(null!=t){var o=m.indexOf(t)+1;m.splice(o,0,a)}else(void 0===e.insert||e.insert)&&m.unshift(a);return a};if(n.skipping&&!p)return null;for(var D=null,T=e.length/u,k=C&&!p,S=0;S<e.length;S++){var _=e[S],M=_._private.rscratch,I=M.imgLayerCaches=M.imgLayerCaches||{},N=I[r];if(N)D=N;else{if((!D||D.eles.length>=T||!a.boundingBoxInBoundingBox(D.bb,_.boundingBox()))&&(D=P({insert:!0,after:D}),!D))return null;g||k?n.queueLayer(D,_):n.drawEleInLayer(D,_,r,t),D.eles.push(_),I[r]=D}}return g?g:k?null:m},k.getEleLevelForLayerLevel=function(e,t){return e},k.drawEleInLayer=function(e,t,r,i){var a=this,o=this.renderer,s=e.context,l=t.boundingBox();if(0!==l.w&&0!==l.h&&t.visible()){var u=a.eleTxrCache,c=P?u.reasons.highQuality:void 0;r=a.getEleLevelForLayerLevel(r,i);var d=D?u.getElement(t,l,null,r,c):null;d?(g&&n(s,!1),s.drawImage(d.texture.canvas,d.x,0,d.width,d.height,l.x1,l.y1,l.w,l.h),g&&n(s,!0)):o.drawElement(s,t)}},k.levelIsComplete=function(e,t){var r=this,n=r.layersByLevel[e];if(!n||0===n.length)return!1;for(var i=0,a=0;a<n.length;a++){var o=n[a];if(o.reqs>0)return!1;if(o.invalid)return!1;i+=o.eles.length}return i===t.length},k.validateLayersElesOrdering=function(e,t){var r=this.layersByLevel[e];if(r)for(var n=0;n<r.length;n++){for(var i=r[n],a=-1,o=0;o<t.length;o++)if(i.eles[0]===t[o]){a=o;break}if(a<0)this.invalidateLayer(i);else for(var s=a,o=0;o<i.eles.length;o++)if(i.eles[o]!==t[s+o]){this.invalidateLayer(i);break}}},k.updateElementsInLayers=function(e,t){for(var r=this,n=s.element(e[0]),i=0;i<e.length;i++)for(var a=n?null:e[i],o=n?e[i]:e[i].ele,l=o._private.rscratch,u=l.imgLayerCaches=l.imgLayerCaches||{},h=c;h<=d;h++){var p=u[h];p&&(a&&r.getEleLevelForLayerLevel(p.level)!==a.level||t(p,o,a))}},k.haveLayers=function(){for(var e=this,t=!1,r=c;r<=d;r++){var n=e.layersByLevel[r];if(n&&n.length>0){t=!0;break}}return t},k.invalidateElements=function(e){var t=this;t.lastInvalidationTime=i.performanceNow(),0!==e.length&&t.haveLayers()&&t.updateElementsInLayers(e,function(e,r,n){t.invalidateLayer(e)})},k.invalidateLayer=function(e){if(this.lastInvalidationTime=i.performanceNow(),!e.invalid){var t=e.level,r=e.eles,n=this.layersByLevel[t];i.removeFromArray(n,e),e.elesQueue=[],e.invalid=!0,e.replacement&&(e.replacement.invalid=!0);for(var a=0;a<r.length;a++){var o=r[a]._private.rscratch.imgLayerCaches;o&&(o[t]=null)}}},k.refineElementTextures=function(e){var t=this;t.updateElementsInLayers(e,function(e,r,n){var i=e.replacement;if(i||(i=e.replacement=t.makeLayer(e.bb,e.level),i.replaces=e,i.eles=e.eles),!i.reqs)for(var a=0;a<i.eles.length;a++)t.queueLayer(i,i.eles[a])})},k.setupEleCacheInvalidation=function(){var e=this,t=[];if(D){var r=i.debounce(function(){e.refineElementTextures(t),t=[]},f);e.eleTxrCache.onDequeue(function(e){for(var n=0;n<e.length;n++)t.push(e[n]);r()})}},k.queueLayer=function(e,t){var r=this,n=r.layersQueue,i=e.elesQueue,a=i.hasId=i.hasId||{};if(!e.replacement){if(t){if(a[t.id()])return;i.push(t),a[t.id()]=!0}e.reqs?(e.reqs++,n.updateItem(e)):(e.reqs=1,n.push(e))}},k.dequeue=function(e){for(var t=this,r=t.layersQueue,n=[],i=0;i<x&&0!==r.size();){var a=r.peek();if(a.replacement)r.pop();else if(a.replaces&&a!==a.replaces.replacement)r.pop();else if(a.invalid)r.pop();else{var o=a.elesQueue.shift();o&&(t.drawEleInLayer(a,o,a.level,e),i++),0===n.length&&n.push(!0),0===a.elesQueue.length&&(r.pop(),a.reqs=0,a.replaces&&t.applyLayerReplacement(a),t.requestRedraw())}}return n},k.applyLayerReplacement=function(e){var t=this,r=t.layersByLevel[e.level],n=e.replaces,i=r.indexOf(n);if(!(i<0||n.invalid)){r[i]=e;for(var a=0;a<e.eles.length;a++){var o=e.eles[a]._private,s=o.imgLayerCaches=o.imgLayerCaches||{};s&&(s[e.level]=e)}t.requestRedraw()}},k.requestRedraw=i.debounce(function(){var e=this.renderer;e.redrawHint("eles",!0),e.redrawHint("drag",!0),e.redraw()},100),k.setupDequeueing=l.setupDequeueing({deqRedrawThreshold:p,deqCost:v,deqAvgCost:y,deqNoDrawCost:m,deqFastCost:b,deq:function(e,t){return e.dequeue(t)},onDeqd:i.noop,shouldRedraw:i.trueify,priority:function(e){return e.renderer.beforeRenderPriorities.lyrTxrDeq}}),t.exports=T},{"../../../heap":89,"../../../is":91,"../../../math":93,"../../../util":108,"./texture-cache-defs":86}],85:[function(e,t,r){"use strict";var n={};n.nodeShapeImpl=function(e,t,r,n,i,a,o){switch(e){case"ellipse":return this.drawEllipsePath(t,r,n,i,a);case"polygon":return this.drawPolygonPath(t,r,n,i,a,o);case"roundrectangle":return this.drawRoundRectanglePath(t,r,n,i,a);case"cutrectangle":return this.drawCutRectanglePath(t,r,n,i,a)}},t.exports=n},{}],86:[function(e,t,r){"use strict";var n=e("../../../util"),i=1e3/60;t.exports={setupDequeueing:function(e){return function(){var t=this,r=this.renderer;if(!t.dequeueingSetup){t.dequeueingSetup=!0;var a=n.debounce(function(){r.redrawHint("eles",!0),r.redrawHint("drag",!0),r.redraw()},e.deqRedrawThreshold),o=function(o,s){for(var l=n.performanceNow(),u=r.averageRedrawTime,c=r.lastRedrawTime,d=[],h=r.cy.extent(),p=r.getPixelRatio();;){var f=n.performanceNow(),g=f-l,v=f-s;if(c<i){var y=i-(o?u:0);if(v>=e.deqFastCost*y)break}else if(o){if(g>=e.deqCost*c||g>=e.deqAvgCost*u)break}else if(v>=e.deqNoDrawCost*i)break;var m=e.deq(t,p,h);if(!(m.length>0))break;for(var b=0;b<m.length;b++)d.push(m[b])}d.length>0&&(e.onDeqd(t,d),!o&&e.shouldRedraw(t,d,p,h)&&a())},s=e.priority||n.noop;r.beforeRender(o,s(t))}}}}},{"../../../util":108}],87:[function(e,t,r){"use strict";t.exports=[{name:"null",impl:e("./null")},{name:"base",impl:e("./base")},{name:"canvas",impl:e("./canvas")}]},{"./base":69,"./canvas":83,"./null":88}],88:[function(e,t,r){"use strict";function n(e){this.options=e,this.notifications=0}var i=function(){};n.prototype={recalculateRenderedStyle:i,notify:function(){this.notifications++},init:i},t.exports=n},{}],89:[function(e,t,r){/*!
-Ported by Xueqiao Xu <xueqiaoxu@gmail.com>;
-
-PSF LICENSE AGREEMENT FOR PYTHON 2.7.2
-
-1. This LICENSE AGREEMENT is between the Python Software Foundation (“PSF”), and the Individual or Organization (“Licensee”) accessing and otherwise using Python 2.7.2 software in source or binary form and its associated documentation.
-2. Subject to the terms and conditions of this License Agreement, PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python 2.7.2 alone or in any derivative version, provided, however, that PSF’s License Agreement and PSF’s notice of copyright, i.e., “Copyright © 2001-2012 Python Software Foundation; All Rights Reserved” are retained in Python 2.7.2 alone or in any derivative version prepared by Licensee.
-3. In the event Licensee prepares a derivative work that is based on or incorporates Python 2.7.2 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python 2.7.2.
-4. PSF is making Python 2.7.2 available to Licensee on an “AS IS” basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.7.2 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
-5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 2.7.2 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.7.2, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-6. This License Agreement will automatically terminate upon a material breach of its terms and conditions.
-7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between PSF and Licensee. This License Agreement does not grant permission to use PSF trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party.
-8. By copying, installing or otherwise using Python 2.7.2, Licensee agrees to be bound by the terms and conditions of this License Agreement.
-*/
-"use strict";var n,i,a,o,s,l,u,c,d,h,p,f,g,v,y;a=Math.floor,h=Math.min,i=function(e,t){return e<t?-1:e>t?1:0},d=function(e,t,r,n,o){var s;if(null==r&&(r=0),null==o&&(o=i),r<0)throw new Error("lo must be non-negative");for(null==n&&(n=e.length);r<n;)s=a((r+n)/2),o(t,e[s])<0?n=s:r=s+1;return[].splice.apply(e,[r,r-r].concat(t)),t},l=function(e,t,r){return null==r&&(r=i),e.push(t),v(e,0,e.length-1,r)},s=function(e,t){var r,n;return null==t&&(t=i),r=e.pop(),e.length?(n=e[0],e[0]=r,y(e,0,t)):n=r,n},c=function(e,t,r){var n;return null==r&&(r=i),n=e[0],e[0]=t,y(e,0,r),n},u=function(e,t,r){var n;return null==r&&(r=i),e.length&&r(e[0],t)<0&&(n=[e[0],t],t=n[0],e[0]=n[1],y(e,0,r)),t},o=function(e,t){var r,n,o,s,l,u;for(null==t&&(t=i),s=function(){u=[];for(var t=0,r=a(e.length/2);0<=r?t<r:t>r;0<=r?t++:t--)u.push(t);return u}.apply(this).reverse(),l=[],n=0,o=s.length;n<o;n++)r=s[n],l.push(y(e,r,t));return l},g=function(e,t,r){var n;if(null==r&&(r=i),n=e.indexOf(t),n!==-1)return v(e,0,n,r),y(e,n,r)},p=function(e,t,r){var n,a,s,l,c;if(null==r&&(r=i),a=e.slice(0,t),!a.length)return a;for(o(a,r),c=e.slice(t),s=0,l=c.length;s<l;s++)n=c[s],u(a,n,r);return a.sort(r).reverse()},f=function(e,t,r){var n,a,l,u,c,p,f,g,v,y;if(null==r&&(r=i),10*t<=e.length){if(u=e.slice(0,t).sort(r),!u.length)return u;for(l=u[u.length-1],g=e.slice(t),c=0,f=g.length;c<f;c++)n=g[c],r(n,l)<0&&(d(u,n,0,null,r),u.pop(),l=u[u.length-1]);return u}for(o(e,r),y=[],a=p=0,v=h(t,e.length);0<=v?p<v:p>v;a=0<=v?++p:--p)y.push(s(e,r));return y},v=function(e,t,r,n){var a,o,s;for(null==n&&(n=i),a=e[r];r>t&&(s=r-1>>1,o=e[s],n(a,o)<0);)e[r]=o,r=s;return e[r]=a},y=function(e,t,r){var n,a,o,s,l;for(null==r&&(r=i),a=e.length,l=t,o=e[t],n=2*t+1;n<a;)s=n+1,s<a&&!(r(e[n],e[s])<0)&&(n=s),e[t]=e[n],t=n,n=2*t+1;return e[t]=o,v(e,l,t,r)},n=function(){function e(e){this.cmp=null!=e?e:i,this.nodes=[]}return e.push=l,e.pop=s,e.replace=c,e.pushpop=u,e.heapify=o,e.updateItem=g,e.nlargest=p,e.nsmallest=f,e.prototype.push=function(e){return l(this.nodes,e,this.cmp)},e.prototype.pop=function(){return s(this.nodes,this.cmp)},e.prototype.peek=function(){return this.nodes[0]},e.prototype.contains=function(e){return this.nodes.indexOf(e)!==-1},e.prototype.replace=function(e){return c(this.nodes,e,this.cmp)},e.prototype.pushpop=function(e){return u(this.nodes,e,this.cmp)},e.prototype.heapify=function(){return o(this.nodes,this.cmp)},e.prototype.updateItem=function(e){return g(this.nodes,e,this.cmp)},e.prototype.clear=function(){return this.nodes=[]},e.prototype.empty=function(){return 0===this.nodes.length},e.prototype.size=function(){return this.nodes.length},e.prototype.clone=function(){var t;return t=new e,t.nodes=this.nodes.slice(0),t},e.prototype.toArray=function(){return this.nodes.slice(0)},e.prototype.insert=e.prototype.push,e.prototype.top=e.prototype.peek,e.prototype.front=e.prototype.peek,e.prototype.has=e.prototype.contains,e.prototype.copy=e.prototype.clone,e}(),t.exports=n},{}],90:[function(e,t,r){"use strict";e("./-preamble");var n=(e("./window"),e("./is")),i=e("./core"),a=e("./extension"),o=e("./stylesheet"),s=function(e){return void 0===e&&(e={}),n.plainObject(e)?new i(e):n.string(e)?a.apply(a,arguments):void 0};s.use=function(e){var t=Array.prototype.slice.call(arguments,1);return t.unshift(s),e.apply(null,t),this},s.version=e("./version"),s.stylesheet=s.Stylesheet=o,t.exports=s},{"./-preamble":1,"./core":37,"./extension":46,"./is":91,"./stylesheet":106,"./version":115,"./window":116}],91:[function(e,t,r){"use strict";var n=e("./window"),i=n?n.navigator:null,a=n?n.document:null,o="string",s=typeof{},l="function",u=typeof HTMLElement,c=function(e){return e&&e.instanceString&&d.fn(e.instanceString)?e.instanceString():null},d={defined:function(e){return null!=e},string:function(e){return null!=e&&typeof e==o},fn:function(e){return null!=e&&typeof e===l},array:function(e){return Array.isArray?Array.isArray(e):null!=e&&e instanceof Array},plainObject:function(e){return null!=e&&typeof e===s&&!d.array(e)&&e.constructor===Object},object:function(e){return null!=e&&typeof e===s},number:function(e){return null!=e&&"number"==typeof e&&!isNaN(e)},integer:function(e){return d.number(e)&&Math.floor(e)===e},bool:function(e){return null!=e&&typeof e==typeof!0},htmlElement:function(e){return"undefined"===u?void 0:null!=e&&e instanceof HTMLElement},elementOrCollection:function(e){return d.element(e)||d.collection(e)},element:function(e){return"collection"===c(e)&&e._private.single},collection:function(e){return"collection"===c(e)&&!e._private.single},core:function(e){return"core"===c(e)},style:function(e){return"style"===c(e)},stylesheet:function(e){return"stylesheet"===c(e)},event:function(e){return"event"===c(e)},thread:function(e){return"thread"===c(e)},fabric:function(e){return"fabric"===c(e)},emptyString:function(e){return void 0===e||null===e||!(""!==e&&!e.match(/^\s+$/))},nonemptyString:function(e){return!(!e||!d.string(e)||""===e||e.match(/^\s+$/))},domElement:function(e){return"undefined"!=typeof HTMLElement&&e instanceof HTMLElement},boundingBox:function(e){return d.plainObject(e)&&d.number(e.x1)&&d.number(e.x2)&&d.number(e.y1)&&d.number(e.y2)},promise:function(e){return d.object(e)&&d.fn(e.then)},touch:function(){return n&&("ontouchstart"in n||n.DocumentTouch&&a instanceof DocumentTouch)},gecko:function(){return n&&("undefined"!=typeof InstallTrigger||"MozAppearance"in a.documentElement.style)},webkit:function(){return n&&("undefined"!=typeof webkitURL||"WebkitAppearance"in a.documentElement.style)},chromium:function(){return n&&"undefined"!=typeof chrome},khtml:function(){return i&&i.vendor.match(/kde/i)},khtmlEtc:function(){return d.khtml()||d.webkit()||d.chromium()},ms:function(){return i&&i.userAgent.match(/msie|trident|edge/i)},windows:function(){return i&&i.appVersion.match(/Win/i)},mac:function(){return i&&i.appVersion.match(/Mac/i)},linux:function(){return i&&i.appVersion.match(/Linux/i)},unix:function(){return i&&i.appVersion.match(/X11/i)}};t.exports=d},{"./window":116}],92:[function(e,t,r){function n(){this._obj={}}var i=n.prototype;i.set=function(e,t){this._obj[e]=t},i["delete"]=function(e){this._obj[e]=null},i.has=function(e){return null!=this._obj[e]},i.get=function(e){return this._obj[e]},t.exports="undefined"!=typeof Map?Map:n},{}],93:[function(e,t,r){"use strict";var n={};n.arePositionsSame=function(e,t){return e.x===t.x&&e.y===t.y},n.copyPosition=function(e){return{x:e.x,y:e.y}},n.array2point=function(e){return{x:e[0],y:e[1]}},n.deg2rad=function(e){return Math.PI*e/180},n.getAngleFromDisp=function(e,t){return Math.atan2(t,e)-Math.PI/2},n.log2=Math.log2||function(e){return Math.log(e)/Math.log(2)},n.signum=function(e){return e>0?1:e<0?-1:0},n.dist=function(e,t){return Math.sqrt(n.sqdist(e,t))},n.sqdist=function(e,t){var r=t.x-e.x,n=t.y-e.y;return r*r+n*n},n.qbezierAt=function(e,t,r,n){return(1-n)*(1-n)*e+2*(1-n)*n*t+n*n*r},n.qbezierPtAt=function(e,t,r,i){return{x:n.qbezierAt(e.x,t.x,r.x,i),y:n.qbezierAt(e.y,t.y,r.y,i)}},n.lineAt=function(e,t,r,i){var a={x:t.x-e.x,y:t.y-e.y},o=n.dist(e,t),s={x:a.x/o,y:a.y/o};r=null==r?0:r;var i=null!=i?i:r*o;return{x:e.x+s.x*i,y:e.y+s.y*i}},n.lineAtDist=function(e,t,r){return n.lineAt(e,t,void 0,r)},n.triangleAngle=function(e,t,r){var i=n.dist(t,r),a=n.dist(e,r),o=n.dist(e,t);return Math.acos((i*i+a*a-o*o)/(2*i*a))},n.bound=function(e,t,r){return Math.max(e,Math.min(r,t))},n.makeBoundingBox=function(e){if(null==e)return{x1:1/0,y1:1/0,x2:-(1/0),y2:-(1/0),w:0,h:0};if(null!=e.x1&&null!=e.y1){if(null!=e.x2&&null!=e.y2&&e.x2>=e.x1&&e.y2>=e.y1)return{x1:e.x1,y1:e.y1,x2:e.x2,y2:e.y2,w:e.x2-e.x1,h:e.y2-e.y1};if(null!=e.w&&null!=e.h&&e.w>=0&&e.h>=0)return{x1:e.x1,y1:e.y1,x2:e.x1+e.w,y2:e.y1+e.h,w:e.w,h:e.h}}},n.updateBoundingBox=function(e,t){e.x1=Math.min(e.x1,t.x1),e.x2=Math.max(e.x2,t.x2),e.w=e.x2-e.x1,e.y1=Math.min(e.y1,t.y1),e.y2=Math.max(e.y2,t.y2),e.h=e.y2-e.y1},n.expandBoundingBoxByPoint=function(e,t,r){e.x1=Math.min(e.x1,t),e.x2=Math.max(e.x2,t),e.w=e.x2-e.x1,e.y1=Math.min(e.y1,r),e.y2=Math.max(e.y2,r),e.h=e.y2-e.y1},n.expandBoundingBox=function(e,t){return e.x1-=t,e.x2+=t,e.y1-=t,e.y2+=t,e.w=e.x2-e.x1,e.h=e.y2-e.y1,e},n.boundingBoxesIntersect=function(e,t){return!(e.x1>t.x2)&&(!(t.x1>e.x2)&&(!(e.x2<t.x1)&&(!(t.x2<e.x1)&&(!(e.y2<t.y1)&&(!(t.y2<e.y1)&&(!(e.y1>t.y2)&&!(t.y1>e.y2)))))))},n.inBoundingBox=function(e,t,r){return e.x1<=t&&t<=e.x2&&e.y1<=r&&r<=e.y2},n.pointInBoundingBox=function(e,t){return this.inBoundingBox(e,t.x,t.y)},n.boundingBoxInBoundingBox=function(e,t){return n.inBoundingBox(e,t.x1,t.y1)&&n.inBoundingBox(e,t.x2,t.y2)},n.roundRectangleIntersectLine=function(e,t,r,n,i,a,o){var s,l=this.getRoundRectangleRadius(i,a),u=i/2,c=a/2,d=r-u+l-o,h=n-c-o,p=r+u-l+o,f=h;if(s=this.finiteLinesIntersect(e,t,r,n,d,h,p,f,!1),s.length>0)return s;var g=r+u+o,v=n-c+l-o,y=g,m=n+c-l+o;if(s=this.finiteLinesIntersect(e,t,r,n,g,v,y,m,!1),s.length>0)return s;var b=r-u+l-o,x=n+c+o,w=r+u-l+o,E=x;if(s=this.finiteLinesIntersect(e,t,r,n,b,x,w,E,!1),s.length>0)return s;var C=r-u-o,P=n-c+l-o,D=C,T=n+c-l+o;if(s=this.finiteLinesIntersect(e,t,r,n,C,P,D,T,!1),s.length>0)return s;var k,S=r-u+l,_=n-c+l;if(k=this.intersectLineCircle(e,t,r,n,S,_,l+o),k.length>0&&k[0]<=S&&k[1]<=_)return[k[0],k[1]];var M=r+u-l,I=n-c+l;if(k=this.intersectLineCircle(e,t,r,n,M,I,l+o),k.length>0&&k[0]>=M&&k[1]<=I)return[k[0],k[1]];var N=r+u-l,B=n+c-l;if(k=this.intersectLineCircle(e,t,r,n,N,B,l+o),k.length>0&&k[0]>=N&&k[1]>=B)return[k[0],k[1]];var z=r-u+l,L=n+c-l;return k=this.intersectLineCircle(e,t,r,n,z,L,l+o),k.length>0&&k[0]<=z&&k[1]>=L?[k[0],k[1]]:[]},n.inLineVicinity=function(e,t,r,n,i,a,o){var s=o,l=Math.min(r,i),u=Math.max(r,i),c=Math.min(n,a),d=Math.max(n,a);return l-s<=e&&e<=u+s&&c-s<=t&&t<=d+s},n.inBezierVicinity=function(e,t,r,n,i,a,o,s,l){var u={x1:Math.min(r,o,i)-l,x2:Math.max(r,o,i)+l,y1:Math.min(n,s,a)-l,y2:Math.max(n,s,a)+l};return!(e<u.x1||e>u.x2||t<u.y1||t>u.y2)},n.solveCubic=function(e,t,r,n,i){t/=e,r/=e,n/=e;var a,o,s,l,u,c,d,h;return o=(3*r-t*t)/9,s=-(27*n)+t*(9*r-2*(t*t)),s/=54,a=o*o*o+s*s,i[1]=0,d=t/3,a>0?(u=s+Math.sqrt(a),u=u<0?-Math.pow(-u,1/3):Math.pow(u,1/3),c=s-Math.sqrt(a),c=c<0?-Math.pow(-c,1/3):Math.pow(c,1/3),i[0]=-d+u+c,d+=(u+c)/2,i[4]=i[2]=-d,d=Math.sqrt(3)*(-c+u)/2,i[3]=d,void(i[5]=-d)):(i[5]=i[3]=0,0===a?(h=s<0?-Math.pow(-s,1/3):Math.pow(s,1/3),i[0]=-d+2*h,void(i[4]=i[2]=-(h+d))):(o=-o,l=o*o*o,l=Math.acos(s/Math.sqrt(l)),h=2*Math.sqrt(o),i[0]=-d+h*Math.cos(l/3),i[2]=-d+h*Math.cos((l+2*Math.PI)/3),void(i[4]=-d+h*Math.cos((l+4*Math.PI)/3))))},n.sqdistToQuadraticBezier=function(e,t,r,n,i,a,o,s){var l=1*r*r-4*r*i+2*r*o+4*i*i-4*i*o+o*o+n*n-4*n*a+2*n*s+4*a*a-4*a*s+s*s,u=9*r*i-3*r*r-3*r*o-6*i*i+3*i*o+9*n*a-3*n*n-3*n*s-6*a*a+3*a*s,c=3*r*r-6*r*i+r*o-r*e+2*i*i+2*i*e-o*e+3*n*n-6*n*a+n*s-n*t+2*a*a+2*a*t-s*t,d=1*r*i-r*r+r*e-i*e+n*a-n*n+n*t-a*t,h=[];this.solveCubic(l,u,c,d,h);for(var p=1e-7,f=[],g=0;g<6;g+=2)Math.abs(h[g+1])<p&&h[g]>=0&&h[g]<=1&&f.push(h[g]);f.push(1),f.push(0);for(var v,y,m,b,x=-1,w=0;w<f.length;w++)y=Math.pow(1-f[w],2)*r+2*(1-f[w])*f[w]*i+f[w]*f[w]*o,m=Math.pow(1-f[w],2)*n+2*(1-f[w])*f[w]*a+f[w]*f[w]*s,b=Math.pow(y-e,2)+Math.pow(m-t,2),x>=0?b<x&&(x=b,v=f[w]):(x=b,v=f[w]);return x},n.sqdistToFiniteLine=function(e,t,r,n,i,a){var o=[e-r,t-n],s=[i-r,a-n],l=s[0]*s[0]+s[1]*s[1],u=o[0]*o[0]+o[1]*o[1],c=o[0]*s[0]+o[1]*s[1],d=c*c/l;return c<0?u:d>l?(e-i)*(e-i)+(t-a)*(t-a):u-d},n.pointInsidePolygonPoints=function(e,t,r){for(var n,i,a,o,s,l=0,u=0,c=0;c<r.length/2;c++)if(n=r[2*c],i=r[2*c+1],c+1<r.length/2?(a=r[2*(c+1)],o=r[2*(c+1)+1]):(a=r[2*(c+1-r.length/2)],o=r[2*(c+1-r.length/2)+1]),n==e&&a==e);else{if(!(n>=e&&e>=a||n<=e&&e<=a))continue;s=(e-n)/(a-n)*(o-i)+i,s>t&&l++,s<t&&u++}return l%2!==0},n.pointInsidePolygon=function(e,t,r,i,a,o,s,l,u){var c,d=new Array(r.length);null!=l[0]?(c=Math.atan(l[1]/l[0]),l[0]<0?c+=Math.PI/2:c=-c-Math.PI/2):c=l;for(var h=Math.cos(-c),p=Math.sin(-c),f=0;f<d.length/2;f++)d[2*f]=o/2*(r[2*f]*h-r[2*f+1]*p),d[2*f+1]=s/2*(r[2*f+1]*h+r[2*f]*p),d[2*f]+=i,d[2*f+1]+=a;var g;if(u>0){var v=this.expandPolygon(d,-u);g=this.joinLines(v)}else g=d;return n.pointInsidePolygonPoints(e,t,g)},n.joinLines=function(e){for(var t,r,n,i,a,o,s,l,u=new Array(e.length/2),c=0;c<e.length/4;c++){t=e[4*c],r=e[4*c+1],n=e[4*c+2],i=e[4*c+3],c<e.length/4-1?(a=e[4*(c+1)],o=e[4*(c+1)+1],s=e[4*(c+1)+2],l=e[4*(c+1)+3]):(a=e[0],o=e[1],s=e[2],l=e[3]);var d=this.finiteLinesIntersect(t,r,n,i,a,o,s,l,!0);u[2*c]=d[0],u[2*c+1]=d[1]}return u},n.expandPolygon=function(e,t){for(var r,n,i,a,o=new Array(2*e.length),s=0;s<e.length/2;s++){r=e[2*s],n=e[2*s+1],s<e.length/2-1?(i=e[2*(s+1)],a=e[2*(s+1)+1]):(i=e[0],a=e[1]);var l=a-n,u=-(i-r),c=Math.sqrt(l*l+u*u),d=l/c,h=u/c;o[4*s]=r+d*t,o[4*s+1]=n+h*t,o[4*s+2]=i+d*t,o[4*s+3]=a+h*t}return o},n.intersectLineEllipse=function(e,t,r,n,i,a){var o=r-e,s=n-t;o/=i,s/=a;var l=Math.sqrt(o*o+s*s),u=l-1;if(u<0)return[];var c=u/l;return[(r-e)*c+e,(n-t)*c+t]},n.intersectLineCircle=function(e,t,r,n,i,a,o){var s=[r-e,n-t],l=[i,a],u=[e-i,t-a],c=s[0]*s[0]+s[1]*s[1],d=2*(u[0]*s[0]+u[1]*s[1]),l=u[0]*u[0]+u[1]*u[1]-o*o,h=d*d-4*c*l;if(h<0)return[];var p=(-d+Math.sqrt(h))/(2*c),f=(-d-Math.sqrt(h))/(2*c),g=Math.min(p,f),v=Math.max(p,f),y=[];if(g>=0&&g<=1&&y.push(g),v>=0&&v<=1&&y.push(v),0===y.length)return[];var m=y[0]*s[0]+e,b=y[0]*s[1]+t;if(y.length>1){if(y[0]==y[1])return[m,b];var x=y[1]*s[0]+e,w=y[1]*s[1]+t;return[m,b,x,w]}return[m,b]},n.findCircleNearPoint=function(e,t,r,n,i){var a=n-e,o=i-t,s=Math.sqrt(a*a+o*o),l=a/s,u=o/s;return[e+l*r,t+u*r]},n.findMaxSqDistanceToOrigin=function(e){for(var t,r=1e-6,n=0;n<e.length/2;n++)t=e[2*n]*e[2*n]+e[2*n+1]*e[2*n+1],t>r&&(r=t);return r},n.midOfThree=function(e,t,r){return t<=e&&e<=r||r<=e&&e<=t?e:e<=t&&t<=r||r<=t&&t<=e?t:r},n.finiteLinesIntersect=function(e,t,r,n,i,a,o,s,l){var u=e-i,c=r-e,d=o-i,h=t-a,p=n-t,f=s-a,g=d*h-f*u,v=c*h-p*u,y=f*c-d*p;if(0!==y){var m=g/y,b=v/y,x=.001,w=0-x,E=1+x;return w<=m&&m<=E&&w<=b&&b<=E?[e+m*c,t+m*p]:l?[e+m*c,t+m*p]:[]}return 0===g||0===v?this.midOfThree(e,r,o)===o?[o,s]:this.midOfThree(e,r,i)===i?[i,a]:this.midOfThree(i,o,r)===r?[r,n]:[]:[]},n.polygonIntersectLine=function(e,t,r,i,a,o,s,l){var u,c=[],d=new Array(r.length),h=!0;5===arguments.length&&(h=!1);var p;if(h){for(var f=0;f<d.length/2;f++)d[2*f]=r[2*f]*o+i,d[2*f+1]=r[2*f+1]*s+a;if(l>0){var g=n.expandPolygon(d,-l);p=n.joinLines(g)}else p=d}else p=r;for(var v,y,m,b,f=0;f<p.length/2;f++)v=p[2*f],y=p[2*f+1],f<p.length/2-1?(m=p[2*(f+1)],b=p[2*(f+1)+1]):(m=p[0],b=p[1]),u=this.finiteLinesIntersect(e,t,i,a,v,y,m,b),0!==u.length&&c.push(u[0],u[1]);return c},n.shortenIntersection=function(e,t,r){var n=[e[0]-t[0],e[1]-t[1]],i=Math.sqrt(n[0]*n[0]+n[1]*n[1]),a=(i-r)/i;return a<0&&(a=1e-5),[t[0]+a*n[0],t[1]+a*n[1]]},n.generateUnitNgonPointsFitToSquare=function(e,t){var r=n.generateUnitNgonPoints(e,t);return r=n.fitPolygonToSquare(r)},n.fitPolygonToSquare=function(e){for(var t,r,n=e.length/2,i=1/0,a=1/0,o=-(1/0),s=-(1/0),l=0;l<n;l++)t=e[2*l],r=e[2*l+1],i=Math.min(i,t),o=Math.max(o,t),a=Math.min(a,r),s=Math.max(s,r);for(var u=2/(o-i),c=2/(s-a),l=0;l<n;l++)t=e[2*l]=e[2*l]*u,r=e[2*l+1]=e[2*l+1]*c,i=Math.min(i,t),o=Math.max(o,t),a=Math.min(a,r),s=Math.max(s,r);if(a<-1)for(var l=0;l<n;l++)r=e[2*l+1]=e[2*l+1]+(-1-a);return e},n.generateUnitNgonPoints=function(e,t){var r=1/e*2*Math.PI,n=e%2===0?Math.PI/2+r/2:Math.PI/2;n+=t;for(var i,a,o,s=new Array(2*e),l=0;l<e;l++)i=l*r+n,a=s[2*l]=Math.cos(i),o=s[2*l+1]=Math.sin(-i);return s},n.getRoundRectangleRadius=function(e,t){return Math.min(e/4,t/4,8)},n.getCutRectangleCornerLength=function(){return 8},t.exports=n},{}],94:[function(e,t,r){/*!
-Embeddable Minimum Strictly-Compliant Promises/A+ 1.1.1 Thenable
-Copyright (c) 2013-2014 Ralf S. Engelschall (http://engelschall.com)
-Licensed under The MIT License (http://opensource.org/licenses/MIT)
-*/
-"use strict";var n=0,i=1,a=2,o=function(e){return this instanceof o?(this.id="Thenable/1.0.7",this.state=n,this.fulfillValue=void 0,this.rejectReason=void 0,this.onFulfilled=[],this.onRejected=[],this.proxy={then:this.then.bind(this)},void("function"==typeof e&&e.call(this,this.fulfill.bind(this),this.reject.bind(this)))):new o(e)};o.prototype={fulfill:function(e){return s(this,i,"fulfillValue",e)},reject:function(e){return s(this,a,"rejectReason",e)},then:function(e,t){var r=this,n=new o;return r.onFulfilled.push(c(e,n,"fulfill")),r.onRejected.push(c(t,n,"reject")),l(r),n.proxy}};var s=function(e,t,r,i){return e.state===n&&(e.state=t,e[r]=i,l(e)),e},l=function(e){e.state===i?u(e,"onFulfilled",e.fulfillValue):e.state===a&&u(e,"onRejected",e.rejectReason)},u=function(e,t,r){if(0!==e[t].length){var n=e[t];e[t]=[];var i=function(){for(var e=0;e<n.length;e++)n[e](r)};"function"==typeof setImmediate?setImmediate(i):setTimeout(i,0)}},c=function(e,t,r){return function(n){if("function"!=typeof e)t[r].call(t,n);else{var i;try{i=e(n)}catch(a){return void t.reject(a)}d(t,i)}}},d=function(e,t){if(e===t||e.proxy===t)return void e.reject(new TypeError("cannot resolve promise with itself"));var r;if("object"==typeof t&&null!==t||"function"==typeof t)try{r=t.then}catch(n){return void e.reject(n)}if("function"!=typeof r)e.fulfill(t);else{var i=!1;try{r.call(t,function(r){i||(i=!0,r===t?e.reject(new TypeError("circular thenable chain")):d(e,r))},function(t){i||(i=!0,e.reject(t))})}catch(n){i||e.reject(n)}}};o.all=function(e){return new o(function(t,r){for(var n=new Array(e.length),i=0,a=function(r,a){n[r]=a,i++,i===e.length&&t(n)},o=0;o<e.length;o++)!function(t){var n=e[t],i=null!=n&&null!=n.then;if(i)n.then(function(e){a(t,e)},function(e){r(e)});else{var o=n;a(t,o)}}(o)})},o.resolve=function(e){return new o(function(t,r){t(e)})},o.reject=function(e){return new o(function(t,r){r(e)})},t.exports="undefined"!=typeof Promise?Promise:o},{}],95:[function(e,t,r){"use strict";var n=e("./is"),i=e("./util"),a=[{selector:":selected",matches:function(e){return e.selected()}},{selector:":unselected",matches:function(e){return!e.selected()}},{selector:":selectable",matches:function(e){return e.selectable()}},{selector:":unselectable",matches:function(e){return!e.selectable()}},{selector:":locked",matches:function(e){return e.locked()}},{selector:":unlocked",matches:function(e){return!e.locked()}},{selector:":visible",matches:function(e){return e.visible()}},{selector:":hidden",matches:function(e){return!e.visible()}},{selector:":transparent",matches:function(e){return e.transparent()}},{selector:":grabbed",matches:function(e){return e.grabbed()}},{selector:":free",matches:function(e){return!e.grabbed()}},{selector:":removed",matches:function(e){return e.removed()}},{selector:":inside",matches:function(e){return!e.removed()}},{selector:":grabbable",matches:function(e){return e.grabbable()}},{selector:":ungrabbable",matches:function(e){return!e.grabbable()}},{selector:":animated",matches:function(e){return e.animated()}},{selector:":unanimated",matches:function(e){return!e.animated()}},{selector:":parent",matches:function(e){return e.isParent()}},{selector:":childless",matches:function(e){return e.isChildless()}},{selector:":child",matches:function(e){return e.isChild()}},{selector:":orphan",matches:function(e){return e.isOrphan()}},{selector:":nonorphan",matches:function(e){return e.isChild()}},{selector:":loop",matches:function(e){return e.isLoop()}},{selector:":simple",matches:function(e){return e.isSimple()}},{selector:":active",matches:function(e){return e.active()}},{selector:":inactive",matches:function(e){return!e.active()}},{selector:":backgrounding",matches:function(e){return e.backgrounding()}},{selector:":nonbackgrounding",matches:function(e){return!e.backgrounding()}}].sort(function(e,t){return i.sort.descending(e.selector,t.selector)}),o=function(e,t){var r=o.lookup=o.lookup||function(){for(var e,t={},r=0;r<a.length;r++)e=a[r],t[e.selector]=e.matches;return t}();return r[e](t)},s="("+a.map(function(e){return e.selector}).join("|")+")",l=function(e){var t=this;t._private={selectorText:null,invalid:!0};var r=function(){return{length:0,classes:[],colonSelectors:[],data:[],group:null,ids:[],meta:[],collection:null,filter:null,parent:null,ancestor:null,subject:null,child:null,descendant:null}};if(!e||n.string(e)&&e.match(/^\s*$/))t.length=0;else if("*"===e||"edge"===e||"node"===e)t[0]=r(),t[0].group="*"===e?e:e+"s",t[0].groupOnly=!0,t[0].length=1,t._private.invalid=!1,t._private.selectorText=e,t.length=1;else if(n.elementOrCollection(e)){var a=e.collection();t[0]=r(),t[0].collection=a,t[0].length=1,t.length=1}else if(n.fn(e))t[0]=r(),t[0].filter=e,t[0].length=1,t.length=1;else{if(!n.string(e))return void i.error("A selector must be created from a string; found "+e);var o=null,l={metaChar:"[\\!\\\"\\#\\$\\%\\&\\'\\(\\)\\*\\+\\,\\.\\/\\:\\;\\<\\=\\>\\?\\@\\[\\]\\^\\`\\{\\|\\}\\~]",comparatorOp:"=|\\!=|>|>=|<|<=|\\$=|\\^=|\\*=",boolOp:"\\?|\\!|\\^",string:'"(?:\\\\"|[^"])*"|'+"'(?:\\\\'|[^'])*'",number:i.regex.number,meta:"degree|indegree|outdegree",separator:"\\s*,\\s*",descendant:"\\s+",child:"\\s+>\\s+",subject:"\\$"};l.variable="(?:[\\w-]|(?:\\\\"+l.metaChar+"))+",l.value=l.string+"|"+l.number,l.className=l.variable,l.id=l.variable;var u,c,d,h=function(e){return e.replace(new RegExp("\\\\("+l.metaChar+")","g"),function(e,t){return t})};for(u=l.comparatorOp.split("|"),d=0;d<u.length;d++)c=u[d],l.comparatorOp+="|@"+c;for(u=l.comparatorOp.split("|"),d=0;d<u.length;d++)c=u[d],c.indexOf("!")>=0||"="!==c&&(l.comparatorOp+="|\\!"+c);var p=[{name:"group",query:!0,regex:"(node|edge|\\*)",populate:function(e){this.group="*"===e?e:e+"s"}},{name:"state",query:!0,regex:s,populate:function(e){this.colonSelectors.push(e)}},{name:"id",query:!0,regex:"\\#("+l.id+")",populate:function(e){this.ids.push(h(e))}},{name:"className",query:!0,regex:"\\.("+l.className+")",populate:function(e){this.classes.push(h(e))}},{name:"dataExists",query:!0,regex:"\\[\\s*("+l.variable+")\\s*\\]",populate:function(e){this.data.push({field:h(e)})}},{name:"dataCompare",query:!0,regex:"\\[\\s*("+l.variable+")\\s*("+l.comparatorOp+")\\s*("+l.value+")\\s*\\]",populate:function(e,t,r){var n=null!=new RegExp("^"+l.string+"$").exec(r);r=n?r.substring(1,r.length-1):parseFloat(r),this.data.push({field:h(e),operator:t,value:r})}},{name:"dataBool",query:!0,regex:"\\[\\s*("+l.boolOp+")\\s*("+l.variable+")\\s*\\]",populate:function(e,t){this.data.push({field:h(t),operator:e})}},{name:"metaCompare",query:!0,regex:"\\[\\[\\s*("+l.meta+")\\s*("+l.comparatorOp+")\\s*("+l.number+")\\s*\\]\\]",populate:function(e,t,r){this.meta.push({field:h(e),operator:t,value:parseFloat(r)})}},{name:"nextQuery",separator:!0,regex:l.separator,populate:function(){t[++d]=r(),o=null}},{name:"child",separator:!0,regex:l.child,populate:function(){var e=r();e.parent=this,e.subject=o,t[d]=e}},{name:"descendant",separator:!0,regex:l.descendant,populate:function(){var e=r();e.ancestor=this,e.subject=o,t[d]=e}},{name:"subject",modifier:!0,regex:l.subject,populate:function(){return null!=o&&this.subject!=this?(i.error("Redefinition of subject in selector `"+e+"`"),!1):(o=this,void(this.subject=this))}}];t._private.selectorText=e;var f=e;d=0;var g=function(e){for(var t,r,i,a=0;a<p.length;a++){var o=p[a],s=o.name;if(!n.fn(e)||e(s,o)){var l=f.match(new RegExp("^"+o.regex));if(null!=l){r=l,t=o,i=s;var u=l[0];f=f.substring(u.length);break}}}return{expr:t,match:r,name:i}},v=function(){var e=f.match(/^\s+/);if(e){var t=e[0];f=f.substring(t.length)}};t[0]=r();var y;for(v();;){var m=g();if(null==m.expr)return void i.error("The selector `"+e+"`is invalid");var b=[];for(y=1;y<m.match.length;y++)b.push(m.match[y]);t[d].length++;var x=m.expr.populate.apply(t[d],b);if(x===!1)return;if(f.match(/^\s*$/))break}for(t.length=d+1,y=0;y<t.length;y++){var w=t[y];if(null!=w.subject){for(;w.subject!=w;)if(null!=w.parent){var E=w.parent,C=w;C.parent=null,E.child=C,w=E}else{if(null==w.ancestor){i.error("When adjusting references for the selector `"+w+"`, neither parent nor ancestor was found");break}var P=w.ancestor,D=w;D.ancestor=null,P.descendant=D,w=P}t[y]=w.subject}}}t._private.invalid=!1},u=l.prototype;u.size=function(){return this.length},u.eq=function(e){return this[e]};var c=function(e,t){if(e.groupOnly)return"*"===e.group||e.group===t.group();if(null!=e.group&&"*"!=e.group&&e.group!=t.group())return!1;var r,i=t.cy(),a=!0;for(r=0;r<e.colonSelectors.length;r++){var s=e.colonSelectors[r];if(a=o(s,t),!a)break}if(!a)return!1;var l=!0;for(r=0;r<e.ids.length;r++){var u=e.ids[r],d=t.id();if(l=l&&u==d,!l)break}if(!l)return!1;var h=!0;for(r=0;r<e.classes.length;r++){var p=e.classes[r];if(h=h&&t.hasClass(p),!h)break}if(!h)return!1;var f=function(t){for(var r=!0,i=0;i<e[t.name].length;i++){var a,o=e[t.name][i],s=o.operator,l=o.value,u=o.field,c=t.fieldValue(u);if(null!=s&&null!=l){var d=n.string(c)||n.number(c)?""+c:"",h=""+l,p=!1;s.indexOf("@")>=0&&(d=d.toLowerCase(),h=h.toLowerCase(),s=s.replace("@",""),p=!0);var f=!1;s.indexOf("!")>=0&&(s=s.replace("!",""),f=!0),p&&(l=h.toLowerCase(),c=d.toLowerCase());var g=!1;switch(s){case"*=":a=d.indexOf(h)>=0;break;case"$=":a=d.indexOf(h,d.length-h.length)>=0;break;case"^=":a=0===d.indexOf(h);break;case"=":a=c===l;break;case">":g=!0,a=c>l;break;case">=":g=!0,a=c>=l;break;case"<":g=!0,a=c<l;break;case"<=":g=!0,a=c<=l;break;default:a=!1}!f||null==c&&g||(a=!a)}else if(null!=s)switch(s){case"?":a=!!c;break;case"!":a=!c;break;case"^":a=void 0===c}else a=void 0!==c;if(!a){r=!1;break}}return r},g=f({name:"data",fieldValue:function(e){return t.data(e)}});if(!g)return!1;var v=f({name:"meta",fieldValue:function(e){return t[e]()}});if(!v)return!1;if(null!=e.collection){var y=e.collection.hasElementWithId(t.id());if(!y)return!1}if(null!=e.filter&&t.collection().some(e.filter))return!1;var m=function(e,t){if(null!=e){var r=!1;if(!i.hasCompoundNodes())return!1;t=t();for(var n=0;n<t.length;n++)if(c(e,t[n])){r=!0;break}return r}return!0};return!!m(e.parent,function(){return t.parent()})&&(!!m(e.ancestor,function(){return t.parents()})&&(!!m(e.child,function(){return t.children()})&&!!m(e.descendant,function(){return t.descendants()})))};u.filter=function(e){var t=this,r=e.cy();if(t._private.invalid)return r.collection();if(1===t.length&&1===t[0].length&&1===t[0].ids.length)return e.getElementById(t[0].ids[0]).collection();var n=function(e){for(var r=0;r<t.length;r++){var n=t[r];if(c(n,e))return!0}return!1};null==t._private.selectorText&&(n=function(){return!0});var i=e.filter(n);return i},u.matches=function(e){var t=this;if(t._private.invalid)return!1;for(var r=0;r<t.length;r++){var n=t[r];if(c(n,e))return!0}return!1},u.toString=u.selector=function(){for(var e="",t=function(e){return null==e?"":e},r=function(e){return n.string(e)?'"'+e+'"':t(e)},i=function(e){return" "+e+" "},a=function(e){var n,s,l="";e.subject===e&&(l+="$");var u=t(e.group);for(l+=u.substring(0,u.length-1),n=0;n<e.data.length;n++){var c=e.data[n];l+=c.value?"["+c.field+i(t(c.operator))+r(c.value)+"]":"["+t(c.operator)+c.field+"]"}for(n=0;n<e.meta.length;n++){var d=e.meta[n];l+="[["+d.field+i(t(d.operator))+r(d.value)+"]]"}for(n=0;n<e.colonSelectors.length;n++)s=e.colonSelectors[o],l+=s;for(n=0;n<e.ids.length;n++)s="#"+e.ids[o],l+=s;for(n=0;n<e.classes.length;n++)s="."+e.classes[n],l+=s;return null!=e.parent&&(l=a(e.parent)+" > "+l),null!=e.ancestor&&(l=a(e.ancestor)+" "+l),null!=e.child&&(l+=" > "+a(e.child)),null!=e.descendant&&(l+=" "+a(e.descendant)),l},o=0;o<this.length;o++){var s=this[o];e+=a(s),this.length>1&&o<this.length-1&&(e+=", ")}return e},t.exports=l},{"./is":91,"./util":108}],96:[function(e,t,r){function n(){this._obj={}}var i=n.prototype;i.add=function(e){this._obj[e]=1},i.remove=function(e){this._obj[e]=0},i.has=function(e){return 1===this._obj[e]},t.exports="undefined"!=typeof Set?Set:n},{}],97:[function(e,t,r){"use strict";var n=e("../util"),i=e("../is"),a=e("../promise"),o={};o.apply=function(e){var t=this,r=t._private,n=r.cy,i=n.collection();r.newStyle&&(r.contextStyles={},r.propDiffs={},t.cleanElements(e,!0));for(var a=0;a<e.length;a++){var o=e[a],s=t.getContextMeta(o);if(!s.empty){i.merge(o);var l=t.getContextStyle(s),u=t.applyContextStyle(s,l,o);r.newStyle||t.updateTransitions(o,u.diffProps),t.updateStyleHints(o)}}return r.newStyle=!1,i},o.getPropertiesDiff=function(e,t){var r=this,n=r._private.propDiffs=r._private.propDiffs||{},i=e+"-"+t,a=n[i];if(a)return a;for(var o=[],s={},l=0;l<r.length;l++){var u=r[l],c="t"===e[l],d="t"===t[l],h=c!==d,p=u.mappedProperties.length>0;if(h||p){var f;h&&p?f=u.properties:h?f=u.properties:p&&(f=u.mappedProperties);for(var g=0;g<f.length;g++){for(var v=f[g],y=v.name,m=!1,b=l+1;b<r.length;b++){var x=r[b],w="t"===t[b];if(w&&(m=null!=x.properties[v.name]))break}s[y]||m||(s[y]=!0,o.push(y))}}}return n[i]=o,o},o.getContextMeta=function(e){var t,r=this,n="",i=e._private.styleCxtKey||"";r._private.newStyle&&(i="");for(var a=0;a<r.length;a++){var o=r[a],s=o.selector&&o.selector.matches(e);n+=s?"t":"f"}return t=r.getPropertiesDiff(i,n),e._private.styleCxtKey=n,{key:n,diffPropNames:t,empty:0===t.length}},o.getContextStyle=function(e){var t=e.key,r=this,n=this._private.contextStyles=this._private.contextStyles||{};if(n[t])return n[t];for(var i={_private:{key:t}},a=0;a<r.length;a++){var o=r[a],s="t"===t[a];if(s)for(var l=0;l<o.properties.length;l++){var u=o.properties[l];i[u.name]=u}}return n[t]=i,i},o.applyContextStyle=function(e,t,r){for(var n=this,i=e.diffPropNames,a={},o=0;o<i.length;o++){var s=i[o],l=t[s],u=r.pstyle(s);if(!l){if(!u)continue;l=u.bypass?{name:s,deleteBypassed:!0}:{name:s,"delete":!0}}if(u!==l){var c=a[s]={prev:u};n.applyParsedProperty(r,l),c.next=r.pstyle(s),c.next&&c.next.bypass&&(c.next=c.next.bypassed)}}return{diffProps:a}},o.updateStyleHints=function(e){var t=e._private,r=this;if(!e.removed()){var n=!1;if("nodes"===t.group)for(var i=1;i<=r.pieBackgroundN;i++){var a=e.pstyle("pie-"+i+"-background-size").value;if(a>0){n=!0;break}}t.hasPie=n;var o=e.pstyle("text-transform").strValue,s=e.pstyle("label").strValue,l=e.pstyle("source-label").strValue,u=e.pstyle("target-label").strValue,c=e.pstyle("font-style").strValue,a=e.pstyle("font-size").pfValue+"px",d=e.pstyle("font-family").strValue,h=e.pstyle("font-weight").strValue,p=e.pstyle("text-valign").strValue,f=e.pstyle("text-valign").strValue,g=e.pstyle("text-outline-width").pfValue,v=e.pstyle("text-wrap").strValue,y=e.pstyle("text-max-width").pfValue,m=c+"$"+a+"$"+d+"$"+h+"$"+o+"$"+p+"$"+f+"$"+g+"$"+v+"$"+y;t.labelStyleKey=m,t.sourceLabelKey=m+"$"+l,t.targetLabelKey=m+"$"+u,t.labelKey=m+"$"+s,t.fontKey=c+"$"+h+"$"+a+"$"+d,t.styleKey=Date.now()}},o.applyParsedProperty=function(e,t){var r,a,o=this,s=t,l=e._private.style,u=o.types,c=o.properties[s.name].type,d=s.bypass,h=l[s.name],p=h&&h.bypass,f=e._private,g="mapping",v=function(){o.checkZOrderTrigger(e,s.name,h?h.value:null,s.value)};if("curve-style"===t.name&&"haystack"===t.value&&e.isEdge()&&(e.isLoop()||e.source().isParent()||e.target().isParent())&&(s=t=this.parse(t.name,"bezier",d)),s["delete"])return l[s.name]=void 0,v(),!0;if(s.deleteBypassed)return h?!!h.bypass&&(h.bypassed=void 0,v(),!0):(v(),!0);if(s.deleteBypass)return h?!!h.bypass&&(l[s.name]=h.bypassed,v(),!0):(v(),!0);var y=function(){n.error("Do not assign mappings to elements without corresponding data (e.g. ele `"+e.id()+"` for property `"+s.name+"` with data field `"+s.field+"`); try a `["+s.field+"]` selector to limit scope to elements with `"+s.field+"` defined")};switch(s.mapped){case u.mapData:for(var m=s.field.split("."),r=f.data,b=0;b<m.length&&r;b++){var x=m[b];r=r[x]}var w;if(w=i.number(r)?(r-s.fieldMin)/(s.fieldMax-s.fieldMin):0,w<0?w=0:w>1&&(w=1),c.color){var E=s.valueMin[0],C=s.valueMax[0],P=s.valueMin[1],D=s.valueMax[1],T=s.valueMin[2],k=s.valueMax[2],S=null==s.valueMin[3]?1:s.valueMin[3],_=null==s.valueMax[3]?1:s.valueMax[3],M=[Math.round(E+(C-E)*w),Math.round(P+(D-P)*w),Math.round(T+(k-T)*w),Math.round(S+(_-S)*w)];a={bypass:s.bypass,name:s.name,value:M,strValue:"rgb("+M[0]+", "+M[1]+", "+M[2]+")"}}else{if(!c.number)return!1;var I=s.valueMin+(s.valueMax-s.valueMin)*w;a=this.parse(s.name,I,s.bypass,g)}a||(a=this.parse(s.name,h.strValue,s.bypass,g)),a||y(),a.mapping=s,s=a;break;case u.data:var m=s.field.split("."),r=f.data;if(r)for(var b=0;b<m.length;b++){var x=m[b];r=r[x]}if(a=this.parse(s.name,r,s.bypass,g),!a){var N=h?h.strValue:"";a=this.parse(s.name,N,s.bypass,g)}a||y(),a.mapping=s,s=a;break;case u.fn:var B=s.value,z=B(e);a=this.parse(s.name,z,s.bypass,g),a.mapping=s,s=a;break;case void 0:break;default:return!1}return d?(p?s.bypassed=h.bypassed:s.bypassed=h,l[s.name]=s):p?h.bypassed=s:l[s.name]=s,v(),!0},o.cleanElements=function(e,t){for(var r=this,n=r.properties,i=0;i<e.length;i++){var a=e[i];if(t)for(var o=a._private.style,s=0;s<n.length;s++){var l=n[s],u=o[l.name];u&&(u.bypass?u.bypassed=null:o[l.name]=null)}else a._private.style={}}},o.update=function(){var e=this._private.cy,t=e.mutableElements();t.updateStyle()},o.updateMappers=function(e){for(var t=this,r=this._private.cy,n=r.collection(),i=0;i<e.length;i++){for(var a=e[i],o=a._private.style,s=!1,l=0;l<t.properties.length;l++){var u=t.properties[l],c=o[u.name];if(c&&c.mapping){var d=c.mapping;this.applyParsedProperty(a,d),s=!0}}s&&(this.updateStyleHints(a),n.merge(a))}return n},o.updateTransitions=function(e,t,r){var n=this,o=e._private,s=e.pstyle("transition-property").value,l=e.pstyle("transition-duration").pfValue,u=e.pstyle("transition-delay").pfValue;if(s.length>0&&l>0){for(var c={},d=!1,h=0;h<s.length;h++){var p=s[h],f=e.pstyle(p),g=t[p];if(g){var v,y=g.prev,m=y,b=null!=g.next?g.next:f,x=!1,w=1e-6;m&&(i.number(m.pfValue)&&i.number(b.pfValue)?(x=b.pfValue-m.pfValue,v=m.pfValue+w*x):i.number(m.value)&&i.number(b.value)?(x=b.value-m.value,v=m.value+w*x):i.array(m.value)&&i.array(b.value)&&(x=m.value[0]!==b.value[0]||m.value[1]!==b.value[1]||m.value[2]!==b.value[2],v=m.strValue),x&&(c[p]=b.strValue,this.applyBypass(e,p,v),d=!0))}}if(!d)return;o.transitioning=!0,new a(function(t){u>0?e.delayAnimation(u).play().promise().then(t):t()}).then(function(){return e.animation({style:c,duration:l,easing:e.pstyle("transition-timing-function").value,queue:!1}).play().promise()}).then(function(){n.removeBypasses(e,s),e.rtrigger("style"),o.transitioning=!1})}else o.transitioning&&(this.removeBypasses(e,s),e.rtrigger("style"),o.transitioning=!1)},o.checkZOrderTrigger=function(e,t,r,n){var i=this.properties[t];null==i.triggersZOrder||null!=r&&!i.triggersZOrder(r,n)||this._private.cy.notify({type:"zorder",eles:e})},t.exports=o},{"../is":91,"../promise":94,"../util":108}],98:[function(e,t,r){"use strict";var n=e("../is"),i=e("../util"),a={};a.applyBypass=function(e,t,r,a){var o=this,s=[],l=!0;if("*"===t||"**"===t){if(void 0!==r)for(var u=0;u<o.properties.length;u++){var c=o.properties[u],t=c.name,d=this.parse(t,r,!0);d&&s.push(d)}}else if(n.string(t)){var d=this.parse(t,r,!0);d&&s.push(d)}else{if(!n.plainObject(t))return!1;var h=t;a=r;for(var p=Object.keys(h),u=0;u<p.length;u++){var t=p[u],c=o.properties[t],r=h[t];if(void 0===r&&(r=h[i.dash2camel(t)]),void 0!==r){var d=this.parse(t,r,!0);d&&s.push(d)}}}if(0===s.length)return!1;for(var f=!1,u=0;u<e.length;u++){for(var g,v=e[u],y={},m=0;m<s.length;m++){var c=s[m];if(a){var b=v.pstyle(c.name);g=y[c.name]={prev:b}}f=this.applyParsedProperty(v,c)||f,a&&(g.next=v.pstyle(c.name))}f&&this.updateStyleHints(v),a&&this.updateTransitions(v,y,l)}return f},a.overrideBypass=function(e,t,r){t=i.camel2dash(t);for(var n=0;n<e.length;n++){var a=e[n],o=a._private.style[t],s=this.properties[t].type,l=s.color,u=s.mutiple;if(o&&o.bypass){var c=null!=o.pfValue?o.pfValue:o.value;o.value=r,null!=o.pfValue&&(o.pfValue=r),l?o.strValue="rgb("+r.join(",")+")":u?o.strValue=r.join(" "):o.strValue=""+r,this.checkZOrderTrigger(a,t,c,r)}else this.applyBypass(a,t,r)}},a.removeAllBypasses=function(e,t){return this.removeBypasses(e,this.propertyNames,t)},a.removeBypasses=function(e,t,r){for(var n=!0,i=0;i<e.length;i++){for(var a=e[i],o={},s=0;s<t.length;s++){var l=t[s],u=this.properties[l],c=a.pstyle(u.name);if(c&&c.bypass){var d="",h=this.parse(l,d,!0),p=o[u.name]={prev:c};this.applyParsedProperty(a,h),p.next=a.pstyle(u.name)}}this.updateStyleHints(a),r&&this.updateTransitions(a,o,n)}},t.exports=a},{"../is":91,"../util":108}],99:[function(e,t,r){"use strict";var n=e("../window"),i={};i.getEmSizeInPixels=function(){var e=this.containerCss("font-size");return null!=e?parseFloat(e):1},i.containerCss=function(e){var t=this._private.cy,r=t.container();if(n&&r&&n.getComputedStyle)return n.getComputedStyle(r).getPropertyValue(e)},t.exports=i},{"../window":116}],100:[function(e,t,r){"use strict";var n=e("../util"),i=e("../is"),a={};a.getRenderedStyle=function(e,t){return t?this.getStylePropertyValue(e,t,!0):this.getRawStyle(e,!0)},a.getRawStyle=function(e,t){var r=this,e=e[0];if(e){for(var i={},a=0;a<r.properties.length;a++){var o=r.properties[a],s=r.getStylePropertyValue(e,o.name,t);null!=s&&(i[o.name]=s,i[n.dash2camel(o.name)]=s)}return i}},a.getIndexedStyle=function(e,t,r,n){var i=e.pstyle(t)[r][n];return null!=i?i:e.cy().style().getDefaultProperty(t)[r][0]},a.getStylePropertyValue=function(e,t,r){var n=this,e=e[0];if(e){var i=n.properties[t];i.alias&&(i=i.pointsTo);var a=i.type,o=e.pstyle(i.name),s=e.cy().zoom();if(o){var l=o.units?a.implicitUnits||"px":null,u=l?[].concat(o.pfValue).map(function(e){return e*(r?s:1)+l}).join(" "):o.strValue;return u}}},a.getAnimationStartStyle=function(e,t){for(var r={},n=0;n<t.length;n++){var a=t[n],o=a.name,s=e.pstyle(o);void 0!==s&&(s=i.plainObject(s)?this.parse(o,s.strValue):this.parse(o,s)),s&&(r[o]=s)}return r},a.getPropsList=function(e){var t=this,r=[],i=e,a=t.properties;if(i)for(var o=Object.keys(i),s=0;s<o.length;s++){var l=o[s],u=i[l],c=a[l]||a[n.camel2dash(l)],d=this.parse(c.name,u);d&&r.push(d)}return r},t.exports=a},{"../is":91,"../util":108}],101:[function(e,t,r){"use strict";var n=e("../is"),i=e("../util"),a=e("../selector"),o=function(e){return this instanceof o?n.core(e)?(this._private={cy:e,coreStyle:{}},this.length=0,void this.resetToDefault()):void i.error("A style must have a core reference"):new o(e)},s=o.prototype;s.instanceString=function(){return"style"},s.clear=function(){for(var e=0;e<this.length;e++)this[e]=void 0;this.length=0;var t=this._private;return t.newStyle=!0,this},s.resetToDefault=function(){return this.clear(),this.addDefaultStylesheet(),this},s.core=function(){return this._private.coreStyle},s.selector=function(e){var t="core"===e?null:new a(e),r=this.length++;return this[r]={selector:t,properties:[],mappedProperties:[],index:r},this},s.css=function(){var e=this,t=arguments;switch(t.length){case 1:for(var r=t[0],n=0;n<e.properties.length;n++){var a=e.properties[n],o=r[a.name];void 0===o&&(o=r[i.dash2camel(a.name)]),void 0!==o&&this.cssRule(a.name,o)}break;case 2:this.cssRule(t[0],t[1])}return this},s.style=s.css,s.cssRule=function(e,t){var r=this.parse(e,t);if(r){var n=this.length-1;this[n].properties.push(r),this[n].properties[r.name]=r,r.name.match(/pie-(\d+)-background-size/)&&r.value&&(this._private.hasPie=!0),r.mapped&&this[n].mappedProperties.push(r);var i=!this[n].selector;i&&(this._private.coreStyle[r.name]=r)}return this},o.fromJson=function(e,t){var r=new o(e);return r.fromJson(t),r},o.fromString=function(e,t){return new o(e).fromString(t)},[e("./apply"),e("./bypass"),e("./container"),e("./get-for-ele"),e("./json"),e("./string-sheet"),e("./properties"),e("./parse")].forEach(function(e){i.extend(s,e)}),o.types=s.types,o.properties=s.properties,t.exports=o},{"../is":91,"../selector":95,"../util":108,"./apply":97,"./bypass":98,"./container":99,"./get-for-ele":100,"./json":102,"./parse":103,"./properties":104,"./string-sheet":105}],102:[function(e,t,r){"use strict";var n={};n.applyFromJson=function(e){for(var t=this,r=0;r<e.length;r++){var n=e[r],i=n.selector,a=n.style||n.css,o=Object.keys(a);t.selector(i);for(var s=0;s<o.length;s++){var l=o[s],u=a[l];t.css(l,u)}}return t},n.fromJson=function(e){var t=this;return t.resetToDefault(),t.applyFromJson(e),t},n.json=function(){for(var e=[],t=this.defaultLength;t<this.length;t++){for(var r=this[t],n=r.selector,i=r.properties,a={},o=0;o<i.length;o++){var s=i[o];a[s.name]=s.strValue}e.push({selector:n?n.toString():"core",style:a})}return e},t.exports=n},{}],103:[function(e,t,r){"use strict";var n=e("../util"),i=e("../is"),a=e("../math"),o={};o.parse=function(e,t,r,a){var o=this;if(i.fn(t))return o.parseImplWarn(e,t,r,a);var s,l="mapping"===a||a===!0||a===!1||null==a?"dontcare":a,u=[e,t,r,l].join("$"),c=o.propCache=o.propCache||{};return(s=c[u])||(s=c[u]=o.parseImplWarn(e,t,r,a)),(r||"mapping"===a)&&(s=n.copy(s),s&&(s.value=n.copy(s.value))),s},o.parseImplWarn=function(e,t,r,i){var a=this.parseImpl(e,t,r,i);return a||null==t||n.error("The style property `%s: %s` is invalid",e,t),a},o.parseImpl=function(e,t,r,o){var s=this;e=n.camel2dash(e);var l=s.properties[e],u=t,c=s.types;if(!l)return null;if(void 0===t||null===t)return null;l.alias&&(l=l.pointsTo,e=l.name);var d=i.string(t);d&&(t=t.trim());var h=l.type;if(!h)return null;if(r&&(""===t||null===t))return{name:e,value:t,bypass:!0,deleteBypass:!0};if(i.fn(t))return{name:e,value:t,strValue:"fn",mapped:c.fn,bypass:r};var p,f;if(!d||o);else{if(p=new RegExp(c.data.regex).exec(t)){if(r)return!1;var g=c.data;return{name:e,value:p,strValue:""+t,mapped:g,field:p[1],bypass:r}}if(f=new RegExp(c.mapData.regex).exec(t)){if(r)return!1;if(h.multiple)return!1;var g=c.mapData;if(!h.color&&!h.number)return!1;var v=this.parse(e,f[4]);if(!v||v.mapped)return!1;var y=this.parse(e,f[5]);if(!y||y.mapped)return!1;if(v.value===y.value)return!1;if(h.color){var m=v.value,b=y.value,x=!(m[0]!==b[0]||m[1]!==b[1]||m[2]!==b[2]||m[3]!==b[3]&&(null!=m[3]&&1!==m[3]||null!=b[3]&&1!==b[3]));if(x)return!1}return{name:e,value:f,strValue:""+t,mapped:g,field:f[1],fieldMin:parseFloat(f[2]),fieldMax:parseFloat(f[3]),valueMin:v.value,valueMax:y.value,bypass:r}}}if(h.multiple&&"multiple"!==o){var w;if(w=d?t.split(/\s+/):i.array(t)?t:[t],h.evenMultiple&&w.length%2!==0)return null;for(var E=[],C=[],P=[],D=!1,T=0;T<w.length;T++){var k=s.parse(e,w[T],r,"multiple");D=D||i.string(k.value),E.push(k.value),P.push(null!=k.pfValue?k.pfValue:k.value),C.push(k.units)}return h.validate&&!h.validate(E,C)?null:h.singleEnum&&D?1===E.length&&i.string(E[0])?{name:e,value:E[0],strValue:E[0],bypass:r}:null:{name:e,value:E,pfValue:P,strValue:E.join(" "),bypass:r,units:C}}var S=function(){for(var n=0;n<h.enums.length;n++){var i=h.enums[n];if(i===t)return{name:e,value:t,strValue:""+t,bypass:r}}return null};if(h.number){var _,M="px";if(h.units&&(_=h.units),h.implicitUnits&&(M=h.implicitUnits),!h.unitless)if(d){var I="px|em"+(h.allowPercent?"|\\%":"");_&&(I=_);var N=t.match("^("+n.regex.number+")("+I+")?$");N&&(t=N[1],_=N[2]||M)}else _&&!h.implicitUnits||(_=M);if(t=parseFloat(t),isNaN(t)&&void 0===h.enums)return null;if(isNaN(t)&&void 0!==h.enums)return t=u,S();if(h.integer&&!i.integer(t))return null;if(void 0!==h.min&&(t<h.min||h.strictMin&&t===h.min)||void 0!==h.max&&(t>h.max||h.strictMax&&t===h.max))return null;var B={name:e,value:t,strValue:""+t+(_?_:""),units:_,bypass:r};return h.unitless||"px"!==_&&"em"!==_?B.pfValue=t:B.pfValue="px"!==_&&_?this.getEmSizeInPixels()*t:t,"ms"!==_&&"s"!==_||(B.pfValue="ms"===_?t:1e3*t),"deg"!==_&&"rad"!==_||(B.pfValue="rad"===_?t:a.deg2rad(t)),"%"===_&&(B.pfValue=t/100),B}if(h.propList){var z=[],L=""+t;if("none"===L);else{for(var O=L.split(","),T=0;T<O.length;T++){var A=O[T].trim();s.properties[A]&&z.push(A)}if(0===z.length)return null}return{name:e,value:z,strValue:0===z.length?"none":z.join(", "),bypass:r}}if(h.color){var R=n.color2tuple(t);return R?{name:e,value:R,pfValue:R,strValue:""+t,bypass:r}:null}if(h.regex||h.regexes){if(h.enums){var V=S();if(V)return V}for(var q=h.regexes?h.regexes:[h.regex],T=0;T<q.length;T++){var F=new RegExp(q[T]),j=F.exec(t);if(j)return{name:e,value:h.singleRegexMatchValue?j[1]:j,strValue:""+t,bypass:r}}return null}return h.string?{name:e,value:""+t,strValue:""+t,bypass:r}:h.enums?S():null},t.exports=o},{"../is":91,"../math":93,"../util":108}],104:[function(e,t,r){"use strict";var n=e("../util"),i=e("../is"),a={};!function(){var e=n.regex.number,t=n.regex.rgbaNoBackRefs,r=n.regex.hslaNoBackRefs,o=n.regex.hex3,s=n.regex.hex6,l=function(e){return"^"+e+"\\s*\\(\\s*([\\w\\.]+)\\s*\\)$"},u=function(n){var i=e+"|\\w+|"+t+"|"+r+"|"+o+"|"+s;return"^"+n+"\\s*\\(([\\w\\.]+)\\s*\\,\\s*("+e+")\\s*\\,\\s*("+e+")\\s*,\\s*("+i+")\\s*\\,\\s*("+i+")\\)$"},c=["^url\\s*\\(\\s*['\"]?(.+?)['\"]?\\s*\\)$","^(none)$","^(.+)$"];a.types={time:{number:!0,min:0,units:"s|ms",implicitUnits:"ms"},percent:{number:!0,min:0,max:100,units:"%",implicitUnits:"%"},zeroOneNumber:{number:!0,min:0,max:1,unitless:!0},zeroOneNumbers:{number:!0,min:0,max:1,unitless:!0,multiple:!0},nOneOneNumber:{number:!0,min:-1,max:1,unitless:!0},nonNegativeInt:{number:!0,min:0,integer:!0,unitless:!0},position:{enums:["parent","origin"]},nodeSize:{number:!0,min:0,enums:["label"]},number:{number:!0,unitless:!0},numbers:{number:!0,unitless:!0,multiple:!0},positiveNumber:{number:!0,unitless:!0,min:0,strictMin:!0},size:{number:!0,min:0},bidirectionalSize:{number:!0},bidirectionalSizes:{number:!0,multiple:!0},sizeMaybePercent:{number:!0,min:0,allowPercent:!0},paddingRelativeTo:{enums:["width","height","average","min","max"]},bgWH:{number:!0,min:0,allowPercent:!0,enums:["auto"],multiple:!0},bgPos:{number:!0,allowPercent:!0,multiple:!0},bgRelativeTo:{enums:["inner","include-padding"],multiple:!0},bgRepeat:{enums:["repeat","repeat-x","repeat-y","no-repeat"],multiple:!0},bgFit:{enums:["none","contain","cover"],multiple:!0},bgCrossOrigin:{enums:["anonymous","use-credentials"],multiple:!0},bgClip:{enums:["none","node"]},color:{color:!0},bool:{enums:["yes","no"]},lineStyle:{enums:["solid","dotted","dashed"]},borderStyle:{enums:["solid","dotted","dashed","double"]},curveStyle:{enums:["bezier","unbundled-bezier","haystack","segments"]},fontFamily:{regex:'^([\\w- \\"]+(?:\\s*,\\s*[\\w- \\"]+)*)$'},fontVariant:{enums:["small-caps","normal"]},fontStyle:{enums:["italic","normal","oblique"]},fontWeight:{enums:["normal","bold","bolder","lighter","100","200","300","400","500","600","800","900",100,200,300,400,500,600,700,800,900]},textDecoration:{enums:["none","underline","overline","line-through"]},textTransform:{enums:["none","uppercase","lowercase"]},textWrap:{enums:["none","wrap","ellipsis"]},textBackgroundShape:{enums:["rectangle","roundrectangle"]},nodeShape:{enums:["rectangle","roundrectangle","cutrectangle","ellipse","triangle","square","pentagon","hexagon","heptagon","octagon","star","diamond","vee","rhomboid","polygon"]},compoundIncludeLabels:{enums:["include","exclude"]},arrowShape:{enums:["tee","triangle","triangle-tee","triangle-cross","triangle-backcurve","half-triangle-overshot","vee","square","circle","diamond","none"]},arrowFill:{enums:["filled","hollow"]},display:{enums:["element","none"]},visibility:{enums:["hidden","visible"]},zCompoundDepth:{enums:["bottom","orphan","auto","top"]},zIndexCompare:{enums:["auto","manual"]},valign:{enums:["top","center","bottom"]},halign:{enums:["left","center","right"]},text:{string:!0},data:{mapping:!0,regex:l("data")},layoutData:{mapping:!0,regex:l("layoutData")},scratch:{mapping:!0,regex:l("scratch")},mapData:{mapping:!0,regex:u("mapData")},mapLayoutData:{mapping:!0,regex:u("mapLayoutData")},mapScratch:{mapping:!0,regex:u("mapScratch")},fn:{mapping:!0,fn:!0},url:{regexes:c,singleRegexMatchValue:!0},urls:{regexes:c,singleRegexMatchValue:!0,multiple:!0},propList:{propList:!0},angle:{number:!0,units:"deg|rad",implicitUnits:"rad"},textRotation:{number:!0,units:"deg|rad",implicitUnits:"rad",enums:["none","autorotate"]},polygonPointList:{number:!0,multiple:!0,evenMultiple:!0,min:-1,max:1,unitless:!0},edgeDistances:{enums:["intersection","node-position"]},edgeEndpoint:{number:!0,multiple:!0,units:"%|px|em|deg|rad",
-implicitUnits:"px",enums:["inside-to-node","outside-to-node","outside-to-line"],singleEnum:!0,validate:function(e,t){switch(e.length){case 2:return"deg"!==t[0]&&"rad"!==t[0]&&"deg"!==t[1]&&"rad"!==t[1];case 1:return i.string(e[0])||"deg"===t[0]||"rad"===t[0];default:return!1}}},easing:{regexes:["^(spring)\\s*\\(\\s*("+e+")\\s*,\\s*("+e+")\\s*\\)$","^(cubic-bezier)\\s*\\(\\s*("+e+")\\s*,\\s*("+e+")\\s*,\\s*("+e+")\\s*,\\s*("+e+")\\s*\\)$"],enums:["linear","ease","ease-in","ease-out","ease-in-out","ease-in-sine","ease-out-sine","ease-in-out-sine","ease-in-quad","ease-out-quad","ease-in-out-quad","ease-in-cubic","ease-out-cubic","ease-in-out-cubic","ease-in-quart","ease-out-quart","ease-in-out-quart","ease-in-quint","ease-out-quint","ease-in-out-quint","ease-in-expo","ease-out-expo","ease-in-out-expo","ease-in-circ","ease-out-circ","ease-in-out-circ"]}};var d={zeroNonZero:function(e,t){return 0===e&&0!==t||0!==e&&0===t},anyDiff:function(e,t){return e!==t}},h=d,p=a.types,f=a.properties=[{name:"label",type:p.text},{name:"text-rotation",type:p.textRotation},{name:"text-margin-x",type:p.bidirectionalSize},{name:"text-margin-y",type:p.bidirectionalSize},{name:"source-label",type:p.text},{name:"source-text-rotation",type:p.textRotation},{name:"source-text-margin-x",type:p.bidirectionalSize},{name:"source-text-margin-y",type:p.bidirectionalSize},{name:"source-text-offset",type:p.size},{name:"target-label",type:p.text},{name:"target-text-rotation",type:p.textRotation},{name:"target-text-margin-x",type:p.bidirectionalSize},{name:"target-text-margin-y",type:p.bidirectionalSize},{name:"target-text-offset",type:p.size},{name:"text-valign",type:p.valign},{name:"text-halign",type:p.halign},{name:"color",type:p.color},{name:"text-outline-color",type:p.color},{name:"text-outline-width",type:p.size},{name:"text-outline-opacity",type:p.zeroOneNumber},{name:"text-opacity",type:p.zeroOneNumber},{name:"text-background-color",type:p.color},{name:"text-background-opacity",type:p.zeroOneNumber},{name:"text-background-padding",type:p.size},{name:"text-border-opacity",type:p.zeroOneNumber},{name:"text-border-color",type:p.color},{name:"text-border-width",type:p.size},{name:"text-border-style",type:p.borderStyle},{name:"text-background-shape",type:p.textBackgroundShape},{name:"text-transform",type:p.textTransform},{name:"text-wrap",type:p.textWrap},{name:"text-max-width",type:p.size},{name:"text-events",type:p.bool},{name:"font-family",type:p.fontFamily},{name:"font-style",type:p.fontStyle},{name:"font-weight",type:p.fontWeight},{name:"font-size",type:p.size},{name:"min-zoomed-font-size",type:p.size},{name:"events",type:p.bool},{name:"display",type:p.display,triggersZOrder:h.anyDiff},{name:"visibility",type:p.visibility,triggersZOrder:h.anyDiff},{name:"opacity",type:p.zeroOneNumber,triggersZOrder:h.zeroNonZero},{name:"z-compound-depth",type:p.zCompoundDepth,triggersZOrder:h.anyDiff},{name:"z-index-compare",type:p.zIndexCompare,triggersZOrder:h.anyDiff},{name:"z-index",type:p.nonNegativeInt,triggersZOrder:h.anyDiff},{name:"overlay-padding",type:p.size},{name:"overlay-color",type:p.color},{name:"overlay-opacity",type:p.zeroOneNumber},{name:"transition-property",type:p.propList},{name:"transition-duration",type:p.time},{name:"transition-delay",type:p.time},{name:"transition-timing-function",type:p.easing},{name:"height",type:p.nodeSize},{name:"width",type:p.nodeSize},{name:"shape",type:p.nodeShape},{name:"shape-polygon-points",type:p.polygonPointList},{name:"background-color",type:p.color},{name:"background-opacity",type:p.zeroOneNumber},{name:"background-blacken",type:p.nOneOneNumber},{name:"padding",type:p.sizeMaybePercent},{name:"padding-relative-to",type:p.paddingRelativeTo},{name:"border-color",type:p.color},{name:"border-opacity",type:p.zeroOneNumber},{name:"border-width",type:p.size},{name:"border-style",type:p.borderStyle},{name:"background-image",type:p.urls},{name:"background-image-crossorigin",type:p.bgCrossOrigin},{name:"background-image-opacity",type:p.zeroOneNumbers},{name:"background-position-x",type:p.bgPos},{name:"background-position-y",type:p.bgPos},{name:"background-width-relative-to",type:p.bgRelativeTo},{name:"background-height-relative-to",type:p.bgRelativeTo},{name:"background-repeat",type:p.bgRepeat},{name:"background-fit",type:p.bgFit},{name:"background-clip",type:p.bgClip},{name:"background-width",type:p.bgWH},{name:"background-height",type:p.bgWH},{name:"position",type:p.position},{name:"compound-sizing-wrt-labels",type:p.compoundIncludeLabels},{name:"min-width",type:p.size},{name:"min-width-bias-left",type:p.sizeMaybePercent},{name:"min-width-bias-right",type:p.sizeMaybePercent},{name:"min-height",type:p.size},{name:"min-height-bias-top",type:p.sizeMaybePercent},{name:"min-height-bias-bottom",type:p.sizeMaybePercent},{name:"line-style",type:p.lineStyle},{name:"line-color",type:p.color},{name:"curve-style",type:p.curveStyle},{name:"haystack-radius",type:p.zeroOneNumber},{name:"source-endpoint",type:p.edgeEndpoint},{name:"target-endpoint",type:p.edgeEndpoint},{name:"control-point-step-size",type:p.size},{name:"control-point-distances",type:p.bidirectionalSizes},{name:"control-point-weights",type:p.numbers},{name:"segment-distances",type:p.bidirectionalSizes},{name:"segment-weights",type:p.numbers},{name:"edge-distances",type:p.edgeDistances},{name:"arrow-scale",type:p.positiveNumber},{name:"loop-direction",type:p.angle},{name:"loop-sweep",type:p.angle},{name:"source-distance-from-node",type:p.size},{name:"target-distance-from-node",type:p.size},{name:"selection-box-color",type:p.color},{name:"selection-box-opacity",type:p.zeroOneNumber},{name:"selection-box-border-color",type:p.color},{name:"selection-box-border-width",type:p.size},{name:"active-bg-color",type:p.color},{name:"active-bg-opacity",type:p.zeroOneNumber},{name:"active-bg-size",type:p.size},{name:"outside-texture-bg-color",type:p.color},{name:"outside-texture-bg-opacity",type:p.zeroOneNumber}],g=a.aliases=[{name:"content",pointsTo:"label"},{name:"control-point-distance",pointsTo:"control-point-distances"},{name:"control-point-weight",pointsTo:"control-point-weights"},{name:"edge-text-rotation",pointsTo:"text-rotation"},{name:"padding-left",pointsTo:"padding"},{name:"padding-right",pointsTo:"padding"},{name:"padding-top",pointsTo:"padding"},{name:"padding-bottom",pointsTo:"padding"}];a.pieBackgroundN=16,f.push({name:"pie-size",type:p.sizeMaybePercent});for(var v=1;v<=a.pieBackgroundN;v++)f.push({name:"pie-"+v+"-background-color",type:p.color}),f.push({name:"pie-"+v+"-background-size",type:p.percent}),f.push({name:"pie-"+v+"-background-opacity",type:p.zeroOneNumber});var y=a.arrowPrefixes=["source","mid-source","target","mid-target"];[{name:"arrow-shape",type:p.arrowShape},{name:"arrow-color",type:p.color},{name:"arrow-fill",type:p.arrowFill}].forEach(function(e){y.forEach(function(t){var r=t+"-"+e.name,n=e.type;f.push({name:r,type:n})})},{}),a.propertyNames=f.map(function(e){return e.name});for(var v=0;v<f.length;v++){var m=f[v];f[m.name]=m}for(var v=0;v<g.length;v++){var b=g[v],x=f[b.pointsTo],w={name:b.name,alias:!0,pointsTo:x};f.push(w),f[b.name]=w}}(),a.getDefaultProperty=function(e){return this.getDefaultProperties()[e]},a.getDefaultProperties=n.memoize(function(){for(var e=n.extend({events:"yes","text-events":"no","text-valign":"top","text-halign":"center",color:"#000","text-outline-color":"#000","text-outline-width":0,"text-outline-opacity":1,"text-opacity":1,"text-decoration":"none","text-transform":"none","text-wrap":"none","text-max-width":9999,"text-background-color":"#000","text-background-opacity":0,"text-background-shape":"rectangle","text-background-padding":0,"text-border-opacity":0,"text-border-width":0,"text-border-style":"solid","text-border-color":"#000","font-family":"Helvetica Neue, Helvetica, sans-serif","font-style":"normal","font-weight":"normal","font-size":16,"min-zoomed-font-size":0,"text-rotation":"none","source-text-rotation":"none","target-text-rotation":"none",visibility:"visible",display:"element",opacity:1,"z-compound-depth":"auto","z-index-compare":"auto","z-index":0,label:"","text-margin-x":0,"text-margin-y":0,"source-label":"","source-text-offset":0,"source-text-margin-x":0,"source-text-margin-y":0,"target-label":"","target-text-offset":0,"target-text-margin-x":0,"target-text-margin-y":0,"overlay-opacity":0,"overlay-color":"#000","overlay-padding":10,"transition-property":"none","transition-duration":0,"transition-delay":0,"transition-timing-function":"linear","background-blacken":0,"background-color":"#999","background-opacity":1,"background-image":"none","background-image-crossorigin":"anonymous","background-image-opacity":1,"background-position-x":"50%","background-position-y":"50%","background-width-relative-to":"include-padding","background-height-relative-to":"include-padding","background-repeat":"no-repeat","background-fit":"none","background-clip":"node","background-width":"auto","background-height":"auto","border-color":"#000","border-opacity":1,"border-width":0,"border-style":"solid",height:30,width:30,shape:"ellipse","shape-polygon-points":"-1, -1,   1, -1,   1, 1,   -1, 1",padding:0,"padding-relative-to":"width",position:"origin","compound-sizing-wrt-labels":"include","min-width":0,"min-width-bias-left":0,"min-width-bias-right":0,"min-height":0,"min-height-bias-top":0,"min-height-bias-bottom":0},{"pie-size":"100%"},[{name:"pie-{{i}}-background-color",value:"black"},{name:"pie-{{i}}-background-size",value:"0%"},{name:"pie-{{i}}-background-opacity",value:1}].reduce(function(e,t){for(var r=1;r<=a.pieBackgroundN;r++){var n=t.name.replace("{{i}}",r),i=t.value;e[n]=i}return e},{}),{"line-style":"solid","line-color":"#999","control-point-step-size":40,"control-point-weights":.5,"segment-weights":.5,"segment-distances":20,"edge-distances":"intersection","curve-style":"bezier","haystack-radius":0,"arrow-scale":1,"loop-direction":"-45deg","loop-sweep":"-90deg","source-distance-from-node":0,"target-distance-from-node":0,"source-endpoint":"outside-to-node","target-endpoint":"outside-to-node"},[{name:"arrow-shape",value:"none"},{name:"arrow-color",value:"#999"},{name:"arrow-fill",value:"filled"}].reduce(function(e,t){return a.arrowPrefixes.forEach(function(r){var n=r+"-"+t.name,i=t.value;e[n]=i}),e},{})),t={},r=0;r<this.properties.length;r++){var i=this.properties[r];if(!i.pointsTo){var o=i.name,s=e[o],l=this.parse(o,s);t[o]=l}}return t}),a.addDefaultStylesheet=function(){this.selector("$node > node").css({shape:"rectangle",padding:10,"background-color":"#eee","border-color":"#ccc","border-width":1}).selector("edge").css({width:3,"curve-style":"haystack"}).selector(":selected").css({"background-color":"#0169D9","line-color":"#0169D9","source-arrow-color":"#0169D9","target-arrow-color":"#0169D9","mid-source-arrow-color":"#0169D9","mid-target-arrow-color":"#0169D9"}).selector("node:parent:selected").css({"background-color":"#CCE1F9","border-color":"#aec8e5"}).selector(":active").css({"overlay-color":"black","overlay-padding":10,"overlay-opacity":.25}).selector("core").css({"selection-box-color":"#ddd","selection-box-opacity":.65,"selection-box-border-color":"#aaa","selection-box-border-width":1,"active-bg-color":"black","active-bg-opacity":.15,"active-bg-size":30,"outside-texture-bg-color":"#000","outside-texture-bg-opacity":.125}),this.defaultLength=this.length},t.exports=a},{"../is":91,"../util":108}],105:[function(e,t,r){"use strict";var n=e("../util"),i=e("../selector"),a={};a.applyFromString=function(e){function t(){c=c.length>a.length?c.substr(a.length):""}function r(){o=o.length>s.length?o.substr(s.length):""}var a,o,s,l=this,u=this,c=""+e;for(c=c.replace(/[\/][*](\s|.)+?[*][\/]/g,"");;){var d=c.match(/^\s*$/);if(d)break;var h=c.match(/^\s*((?:.|\s)+?)\s*\{((?:.|\s)+?)\}/);if(!h){n.error("Halting stylesheet parsing: String stylesheet contains more to parse but no selector and block found in: "+c);break}a=h[0];var p=h[1];if("core"!==p){var f=new i(p);if(f._private.invalid){n.error("Skipping parsing of block: Invalid selector found in string stylesheet: "+p),t();continue}}var g=h[2],v=!1;o=g;for(var y=[];;){var d=o.match(/^\s*$/);if(d)break;var m=o.match(/^\s*(.+?)\s*:\s*(.+?)\s*;/);if(!m){n.error("Skipping parsing of block: Invalid formatting of style property and value definitions found in:"+g),v=!0;break}s=m[0];var b=m[1],x=m[2],w=l.properties[b];if(w){var E=u.parse(b,x);E?(y.push({name:b,val:x}),r()):(n.error("Skipping property: Invalid property definition in: "+s),r())}else n.error("Skipping property: Invalid property name in: "+s),r()}if(v){t();break}u.selector(p);for(var C=0;C<y.length;C++){var w=y[C];u.css(w.name,w.val)}t()}return u},a.fromString=function(e){var t=this;return t.resetToDefault(),t.applyFromString(e),t},t.exports=a},{"../selector":95,"../util":108}],106:[function(e,t,r){"use strict";var n=e("./is"),i=e("./util"),a=e("./style"),o=function(){return this instanceof o?void(this.length=0):new o},s=o.prototype;s.instanceString=function(){return"stylesheet"},s.selector=function(e){var t=this.length++;return this[t]={selector:e,properties:[]},this},s.css=function(e,t){var r=this.length-1;if(n.string(e))this[r].properties.push({name:e,value:t});else if(n.plainObject(e))for(var o=e,s=0;s<a.properties.length;s++){var l=a.properties[s],u=o[l.name];if(void 0===u&&(u=o[i.dash2camel(l.name)]),void 0!==u){var e=l.name,t=u;this[r].properties.push({name:e,value:t})}}return this},s.style=s.css,s.generateStyle=function(e){for(var t=new a(e),r=0;r<this.length;r++){var n=this[r],i=n.selector,o=n.properties;t.selector(i);for(var s=0;s<o.length;s++){var l=o[s];t.css(l.name,l.value)}}return t},t.exports=o},{"./is":91,"./style":101,"./util":108}],107:[function(e,t,r){"use strict";var n=e("../is");t.exports={hex2tuple:function(e){if((4===e.length||7===e.length)&&"#"===e[0]){var t,r,n,i=4===e.length,a=16;return i?(t=parseInt(e[1]+e[1],a),r=parseInt(e[2]+e[2],a),n=parseInt(e[3]+e[3],a)):(t=parseInt(e[1]+e[2],a),r=parseInt(e[3]+e[4],a),n=parseInt(e[5]+e[6],a)),[t,r,n]}},hsl2tuple:function(e){function t(e,t,r){return r<0&&(r+=1),r>1&&(r-=1),r<1/6?e+6*(t-e)*r:r<.5?t:r<2/3?e+(t-e)*(2/3-r)*6:e}var r,n,i,a,o,s,l,u,c=new RegExp("^"+this.regex.hsla+"$").exec(e);if(c){if(n=parseInt(c[1]),n<0?n=(360- -1*n%360)%360:n>360&&(n%=360),n/=360,i=parseFloat(c[2]),i<0||i>100)return;if(i/=100,a=parseFloat(c[3]),a<0||a>100)return;if(a/=100,o=c[4],void 0!==o&&(o=parseFloat(o),o<0||o>1))return;if(0===i)s=l=u=Math.round(255*a);else{var d=a<.5?a*(1+i):a+i-a*i,h=2*a-d;s=Math.round(255*t(h,d,n+1/3)),l=Math.round(255*t(h,d,n)),u=Math.round(255*t(h,d,n-1/3))}r=[s,l,u,o]}return r},rgb2tuple:function(e){var t,r=new RegExp("^"+this.regex.rgba+"$").exec(e);if(r){t=[];for(var n=[],i=1;i<=3;i++){var a=r[i];if("%"===a[a.length-1]&&(n[i]=!0),a=parseFloat(a),n[i]&&(a=a/100*255),a<0||a>255)return;t.push(Math.floor(a))}var o=n[1]||n[2]||n[3],s=n[1]&&n[2]&&n[3];if(o&&!s)return;var l=r[4];if(void 0!==l){if(l=parseFloat(l),l<0||l>1)return;t.push(l)}}return t},colorname2tuple:function(e){return this.colors[e.toLowerCase()]},color2tuple:function(e){return(n.array(e)?e:null)||this.colorname2tuple(e)||this.hex2tuple(e)||this.rgb2tuple(e)||this.hsl2tuple(e)},colors:{transparent:[0,0,0,0],aliceblue:[240,248,255],antiquewhite:[250,235,215],aqua:[0,255,255],aquamarine:[127,255,212],azure:[240,255,255],beige:[245,245,220],bisque:[255,228,196],black:[0,0,0],blanchedalmond:[255,235,205],blue:[0,0,255],blueviolet:[138,43,226],brown:[165,42,42],burlywood:[222,184,135],cadetblue:[95,158,160],chartreuse:[127,255,0],chocolate:[210,105,30],coral:[255,127,80],cornflowerblue:[100,149,237],cornsilk:[255,248,220],crimson:[220,20,60],cyan:[0,255,255],darkblue:[0,0,139],darkcyan:[0,139,139],darkgoldenrod:[184,134,11],darkgray:[169,169,169],darkgreen:[0,100,0],darkgrey:[169,169,169],darkkhaki:[189,183,107],darkmagenta:[139,0,139],darkolivegreen:[85,107,47],darkorange:[255,140,0],darkorchid:[153,50,204],darkred:[139,0,0],darksalmon:[233,150,122],darkseagreen:[143,188,143],darkslateblue:[72,61,139],darkslategray:[47,79,79],darkslategrey:[47,79,79],darkturquoise:[0,206,209],darkviolet:[148,0,211],deeppink:[255,20,147],deepskyblue:[0,191,255],dimgray:[105,105,105],dimgrey:[105,105,105],dodgerblue:[30,144,255],firebrick:[178,34,34],floralwhite:[255,250,240],forestgreen:[34,139,34],fuchsia:[255,0,255],gainsboro:[220,220,220],ghostwhite:[248,248,255],gold:[255,215,0],goldenrod:[218,165,32],gray:[128,128,128],grey:[128,128,128],green:[0,128,0],greenyellow:[173,255,47],honeydew:[240,255,240],hotpink:[255,105,180],indianred:[205,92,92],indigo:[75,0,130],ivory:[255,255,240],khaki:[240,230,140],lavender:[230,230,250],lavenderblush:[255,240,245],lawngreen:[124,252,0],lemonchiffon:[255,250,205],lightblue:[173,216,230],lightcoral:[240,128,128],lightcyan:[224,255,255],lightgoldenrodyellow:[250,250,210],lightgray:[211,211,211],lightgreen:[144,238,144],lightgrey:[211,211,211],lightpink:[255,182,193],lightsalmon:[255,160,122],lightseagreen:[32,178,170],lightskyblue:[135,206,250],lightslategray:[119,136,153],lightslategrey:[119,136,153],lightsteelblue:[176,196,222],lightyellow:[255,255,224],lime:[0,255,0],limegreen:[50,205,50],linen:[250,240,230],magenta:[255,0,255],maroon:[128,0,0],mediumaquamarine:[102,205,170],mediumblue:[0,0,205],mediumorchid:[186,85,211],mediumpurple:[147,112,219],mediumseagreen:[60,179,113],mediumslateblue:[123,104,238],mediumspringgreen:[0,250,154],mediumturquoise:[72,209,204],mediumvioletred:[199,21,133],midnightblue:[25,25,112],mintcream:[245,255,250],mistyrose:[255,228,225],moccasin:[255,228,181],navajowhite:[255,222,173],navy:[0,0,128],oldlace:[253,245,230],olive:[128,128,0],olivedrab:[107,142,35],orange:[255,165,0],orangered:[255,69,0],orchid:[218,112,214],palegoldenrod:[238,232,170],palegreen:[152,251,152],paleturquoise:[175,238,238],palevioletred:[219,112,147],papayawhip:[255,239,213],peachpuff:[255,218,185],peru:[205,133,63],pink:[255,192,203],plum:[221,160,221],powderblue:[176,224,230],purple:[128,0,128],red:[255,0,0],rosybrown:[188,143,143],royalblue:[65,105,225],saddlebrown:[139,69,19],salmon:[250,128,114],sandybrown:[244,164,96],seagreen:[46,139,87],seashell:[255,245,238],sienna:[160,82,45],silver:[192,192,192],skyblue:[135,206,235],slateblue:[106,90,205],slategray:[112,128,144],slategrey:[112,128,144],snow:[255,250,250],springgreen:[0,255,127],steelblue:[70,130,180],tan:[210,180,140],teal:[0,128,128],thistle:[216,191,216],tomato:[255,99,71],turquoise:[64,224,208],violet:[238,130,238],wheat:[245,222,179],white:[255,255,255],whitesmoke:[245,245,245],yellow:[255,255,0],yellowgreen:[154,205,50]}}},{"../is":91}],108:[function(e,t,r){"use strict";var n=e("../is"),i=e("../math"),a={trueify:function(){return!0},falsify:function(){return!1},zeroify:function(){return 0},noop:function(){},error:function(e){console.error?(console.error.apply(console,arguments),console.trace&&console.trace()):(console.log.apply(console,arguments),console.trace&&console.trace())},clone:function(e){return this.extend({},e)},copy:function(e){return null==e?e:n.array(e)?e.slice():n.plainObject(e)?this.clone(e):e},clonePosition:function(e){return{x:e.x,y:e.y}},uuid:function(e,t){for(t=e="";e++<36;t+=51*e&52?(15^e?8^Math.random()*(20^e?16:4):4).toString(16):"-");return t}};a.makeBoundingBox=i.makeBoundingBox.bind(i),a._staticEmptyObject={},a.staticEmptyObject=function(){return a._staticEmptyObject},a.extend=null!=Object.assign?Object.assign.bind(Object):function(e){for(var t=arguments,r=1;r<t.length;r++){var n=t[r];if(n)for(var i=Object.keys(n),a=0;a<i.length;a++){var o=i[a];e[o]=n[o]}}return e},a["default"]=function(e,t){return void 0===e?t:e},a.removeFromArray=function(e,t,r){for(var n=e.length;n>=0&&(e[n]!==t||(e.splice(n,1),r));n--);},a.clearArray=function(e){e.splice(0,e.length)},a.push=function(e){var t=Array.prototype.slice.call(arguments,1);Array.prototype.push.apply(e,t)},a.getPrefixedProperty=function(e,t,r){return r&&(t=this.prependCamel(r,t)),e[t]},a.setPrefixedProperty=function(e,t,r,n){r&&(t=this.prependCamel(r,t)),e[t]=n},[e("./colors"),e("./maps"),{memoize:e("./memoize")},e("./regex"),e("./strings"),e("./timing"),e("./sort")].forEach(function(e){a.extend(a,e)}),t.exports=a},{"../is":91,"../math":93,"./colors":107,"./maps":109,"./memoize":110,"./regex":111,"./sort":112,"./strings":113,"./timing":114}],109:[function(e,t,r){"use strict";var n=e("../is");t.exports={mapEmpty:function(e){var t=!0;return null!=e?0===Object.keys(e).length:t},pushMap:function(e){var t=this.getMap(e);null==t?this.setMap(this.extend({},e,{value:[e.value]})):t.push(e.value)},setMap:function(e){for(var t,r=e.map,i=e.keys,a=i.length,o=0;o<a;o++){var t=i[o];n.plainObject(t)&&this.error("Tried to set map with object key"),o<i.length-1?(null==r[t]&&(r[t]={}),r=r[t]):r[t]=e.value}},getMap:function(e){for(var t=e.map,r=e.keys,i=r.length,a=0;a<i;a++){var o=r[a];if(n.plainObject(o)&&this.error("Tried to get map with object key"),t=t[o],null==t)return t}return t},deleteMap:function(e){for(var t=e.map,r=e.keys,i=r.length,a=e.keepChildren,o=0;o<i;o++){var s=r[o];n.plainObject(s)&&this.error("Tried to delete map with object key");var l=o===e.keys.length-1;if(l)if(a)for(var u=Object.keys(t),c=0;c<u.length;c++){var d=u[c];a[d]||(t[d]=void 0)}else t[s]=void 0;else t=t[s]}}}},{"../is":91}],110:[function(e,t,r){"use strict";t.exports=function(e,t){t||(t=function(){if(1===arguments.length)return arguments[0];if(0===arguments.length)return"undefined";for(var e=[],t=0;t<arguments.length;t++)e.push(arguments[t]);return e.join("$")});var r=function(){var n,i=this,a=arguments,o=t.apply(i,a),s=r.cache;return(n=s[o])||(n=s[o]=e.apply(i,a)),n};return r.cache={},r}},{}],111:[function(e,t,r){"use strict";var n="(?:[-+]?(?:(?:\\d+|\\d*\\.\\d+)(?:[Ee][+-]?\\d+)?))",i="rgb[a]?\\(("+n+"[%]?)\\s*,\\s*("+n+"[%]?)\\s*,\\s*("+n+"[%]?)(?:\\s*,\\s*("+n+"))?\\)",a="rgb[a]?\\((?:"+n+"[%]?)\\s*,\\s*(?:"+n+"[%]?)\\s*,\\s*(?:"+n+"[%]?)(?:\\s*,\\s*(?:"+n+"))?\\)",o="hsl[a]?\\(("+n+")\\s*,\\s*("+n+"[%])\\s*,\\s*("+n+"[%])(?:\\s*,\\s*("+n+"))?\\)",s="hsl[a]?\\((?:"+n+")\\s*,\\s*(?:"+n+"[%])\\s*,\\s*(?:"+n+"[%])(?:\\s*,\\s*(?:"+n+"))?\\)",l="\\#[0-9a-fA-F]{3}",u="\\#[0-9a-fA-F]{6}";t.exports={regex:{number:n,rgba:i,rgbaNoBackRefs:a,hsla:o,hslaNoBackRefs:s,hex3:l,hex6:u}}},{}],112:[function(e,t,r){function n(e,t){return e<t?-1:e>t?1:0}function i(e,t){return-1*n(e,t)}t.exports={sort:{ascending:n,descending:i}}},{}],113:[function(e,t,r){"use strict";var n=e("./memoize"),i=e("../is");t.exports={camel2dash:n(function(e){return e.replace(/([A-Z])/g,function(e){return"-"+e.toLowerCase()})}),dash2camel:n(function(e){return e.replace(/(-\w)/g,function(e){return e[1].toUpperCase()})}),prependCamel:n(function(e,t){return e+t[0].toUpperCase()+t.substring(1)},function(e,t){return e+"$"+t}),capitalize:function(e){return i.emptyString(e)?e:e.charAt(0).toUpperCase()+e.substring(1)}}},{"../is":91,"./memoize":110}],114:[function(e,t,r){"use strict";var n=e("../window"),i=e("../is"),a=n?n.performance:null,o={},s=a&&a.now?function(){return a.now()}:function(){return Date.now()},l=function(){if(n){if(n.requestAnimationFrame)return function(e){n.requestAnimationFrame(e)};if(n.mozRequestAnimationFrame)return function(e){n.mozRequestAnimationFrame(e)};if(n.webkitRequestAnimationFrame)return function(e){n.webkitRequestAnimationFrame(e)};if(n.msRequestAnimationFrame)return function(e){n.msRequestAnimationFrame(e)}}return function(e){e&&setTimeout(function(){e(s())},1e3/60)}}();o.requestAnimationFrame=function(e){l(e)},o.performanceNow=s,o.throttle=function(e,t,r){var n=!0,a=!0;return r===!1?n=!1:i.plainObject(r)&&(n="leading"in r?r.leading:n,a="trailing"in r?r.trailing:a),r=r||{},r.leading=n,r.maxWait=t,r.trailing=a,o.debounce(e,t,r)},o.now=function(){return Date.now()},o.debounce=function(e,t,r){var n,a,o,s,l,u,c,d=this,h=0,p=!1,f=!0;if(i.fn(e)){if(t=Math.max(0,t)||0,r===!0){var g=!0;f=!1}else i.plainObject(r)&&(g=r.leading,p="maxWait"in r&&(Math.max(t,r.maxWait)||0),f="trailing"in r?r.trailing:f);var v=function(){var r=t-(d.now()-s);if(r<=0){a&&clearTimeout(a);var i=c;a=u=c=void 0,i&&(h=d.now(),o=e.apply(l,n),u||a||(n=l=null))}else u=setTimeout(v,r)},y=function(){u&&clearTimeout(u),a=u=c=void 0,(f||p!==t)&&(h=d.now(),o=e.apply(l,n),u||a||(n=l=null))};return function(){if(n=arguments,s=d.now(),l=this,c=f&&(u||!g),p===!1)var r=g&&!u;else{a||g||(h=s);var i=p-(s-h),m=i<=0;m?(a&&(a=clearTimeout(a)),h=s,o=e.apply(l,n)):a||(a=setTimeout(y,i))}return m&&u?u=clearTimeout(u):u||t===p||(u=setTimeout(v,t)),r&&(m=!0,o=e.apply(l,n)),!m||u||a||(n=l=null),o}}},t.exports=o},{"../is":91,"../window":116}],115:[function(e,t,r){t.exports="3.1.4"},{}],116:[function(e,t,r){t.exports="undefined"==typeof window?null:window},{}]},{},[90])(90)});
\ No newline at end of file
diff --git a/coremltools/graph_visualization/assets/dagre.min.js b/coremltools/graph_visualization/assets/dagre.min.js
deleted file mode 100644
index b7a9bbc4d..000000000
--- a/coremltools/graph_visualization/assets/dagre.min.js
+++ /dev/null
@@ -1,6 +0,0 @@
-!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var f;"undefined"!=typeof window?f=window:"undefined"!=typeof global?f=global:"undefined"!=typeof self&&(f=self),f.dagre=e()}}(function(){var define,module,exports;return function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s}({1:[function(require,module,exports){module.exports={graphlib:require("./lib/graphlib"),layout:require("./lib/layout"),debug:require("./lib/debug"),util:{time:require("./lib/util").time,notime:require("./lib/util").notime},version:require("./lib/version")}},{"./lib/debug":6,"./lib/graphlib":7,"./lib/layout":9,"./lib/util":29,"./lib/version":30}],2:[function(require,module,exports){"use strict";var _=require("./lodash"),greedyFAS=require("./greedy-fas");module.exports={run:run,undo:undo};function run(g){var fas=g.graph().acyclicer==="greedy"?greedyFAS(g,weightFn(g)):dfsFAS(g);_.each(fas,function(e){var label=g.edge(e);g.removeEdge(e);label.forwardName=e.name;label.reversed=true;g.setEdge(e.w,e.v,label,_.uniqueId("rev"))});function weightFn(g){return function(e){return g.edge(e).weight}}}function dfsFAS(g){var fas=[],stack={},visited={};function dfs(v){if(_.has(visited,v)){return}visited[v]=true;stack[v]=true;_.each(g.outEdges(v),function(e){if(_.has(stack,e.w)){fas.push(e)}else{dfs(e.w)}});delete stack[v]}_.each(g.nodes(),dfs);return fas}function undo(g){_.each(g.edges(),function(e){var label=g.edge(e);if(label.reversed){g.removeEdge(e);var forwardName=label.forwardName;delete label.reversed;delete label.forwardName;g.setEdge(e.w,e.v,label,forwardName)}})}},{"./greedy-fas":8,"./lodash":10}],3:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports=addBorderSegments;function addBorderSegments(g){function dfs(v){var children=g.children(v),node=g.node(v);if(children.length){_.each(children,dfs)}if(_.has(node,"minRank")){node.borderLeft=[];node.borderRight=[];for(var rank=node.minRank,maxRank=node.maxRank+1;rank<maxRank;++rank){addBorderNode(g,"borderLeft","_bl",v,node,rank);addBorderNode(g,"borderRight","_br",v,node,rank)}}}_.each(g.children(),dfs)}function addBorderNode(g,prop,prefix,sg,sgNode,rank){var label={width:0,height:0,rank:rank,borderType:prop},prev=sgNode[prop][rank-1],curr=util.addDummyNode(g,"border",label,prefix);sgNode[prop][rank]=curr;g.setParent(curr,sg);if(prev){g.setEdge(prev,curr,{weight:1})}}},{"./lodash":10,"./util":29}],4:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports={adjust:adjust,undo:undo};function adjust(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="lr"||rankDir==="rl"){swapWidthHeight(g)}}function undo(g){var rankDir=g.graph().rankdir.toLowerCase();if(rankDir==="bt"||rankDir==="rl"){reverseY(g)}if(rankDir==="lr"||rankDir==="rl"){swapXY(g);swapWidthHeight(g)}}function swapWidthHeight(g){_.each(g.nodes(),function(v){swapWidthHeightOne(g.node(v))});_.each(g.edges(),function(e){swapWidthHeightOne(g.edge(e))})}function swapWidthHeightOne(attrs){var w=attrs.width;attrs.width=attrs.height;attrs.height=w}function reverseY(g){_.each(g.nodes(),function(v){reverseYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,reverseYOne);if(_.has(edge,"y")){reverseYOne(edge)}})}function reverseYOne(attrs){attrs.y=-attrs.y}function swapXY(g){_.each(g.nodes(),function(v){swapXYOne(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,swapXYOne);if(_.has(edge,"x")){swapXYOne(edge)}})}function swapXYOne(attrs){var x=attrs.x;attrs.x=attrs.y;attrs.y=x}},{"./lodash":10}],5:[function(require,module,exports){module.exports=List;function List(){var sentinel={};sentinel._next=sentinel._prev=sentinel;this._sentinel=sentinel}List.prototype.dequeue=function(){var sentinel=this._sentinel,entry=sentinel._prev;if(entry!==sentinel){unlink(entry);return entry}};List.prototype.enqueue=function(entry){var sentinel=this._sentinel;if(entry._prev&&entry._next){unlink(entry)}entry._next=sentinel._next;sentinel._next._prev=entry;sentinel._next=entry;entry._prev=sentinel};List.prototype.toString=function(){var strs=[],sentinel=this._sentinel,curr=sentinel._prev;while(curr!==sentinel){strs.push(JSON.stringify(curr,filterOutLinks));curr=curr._prev}return"["+strs.join(", ")+"]"};function unlink(entry){entry._prev._next=entry._next;entry._next._prev=entry._prev;delete entry._next;delete entry._prev}function filterOutLinks(k,v){if(k!=="_next"&&k!=="_prev"){return v}}},{}],6:[function(require,module,exports){var _=require("./lodash"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports={debugOrdering:debugOrdering};function debugOrdering(g){var layerMatrix=util.buildLayerMatrix(g);var h=new Graph({compound:true,multigraph:true}).setGraph({});_.each(g.nodes(),function(v){h.setNode(v,{label:v});h.setParent(v,"layer"+g.node(v).rank)});_.each(g.edges(),function(e){h.setEdge(e.v,e.w,{},e.name)});_.each(layerMatrix,function(layer,i){var layerV="layer"+i;h.setNode(layerV,{rank:"same"});_.reduce(layer,function(u,v){h.setEdge(u,v,{style:"invis"});return v})});return h}},{"./graphlib":7,"./lodash":10,"./util":29}],7:[function(require,module,exports){var graphlib;if(typeof require==="function"){try{graphlib=require("graphlib")}catch(e){}}if(!graphlib){graphlib=window.graphlib}module.exports=graphlib},{graphlib:31}],8:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graphlib").Graph,List=require("./data/list");module.exports=greedyFAS;var DEFAULT_WEIGHT_FN=_.constant(1);function greedyFAS(g,weightFn){if(g.nodeCount()<=1){return[]}var state=buildState(g,weightFn||DEFAULT_WEIGHT_FN);var results=doGreedyFAS(state.graph,state.buckets,state.zeroIdx);return _.flatten(_.map(results,function(e){return g.outEdges(e.v,e.w)}),true)}function doGreedyFAS(g,buckets,zeroIdx){var results=[],sources=buckets[buckets.length-1],sinks=buckets[0];var entry;while(g.nodeCount()){while(entry=sinks.dequeue()){removeNode(g,buckets,zeroIdx,entry)}while(entry=sources.dequeue()){removeNode(g,buckets,zeroIdx,entry)}if(g.nodeCount()){for(var i=buckets.length-2;i>0;--i){entry=buckets[i].dequeue();if(entry){results=results.concat(removeNode(g,buckets,zeroIdx,entry,true));break}}}}return results}function removeNode(g,buckets,zeroIdx,entry,collectPredecessors){var results=collectPredecessors?[]:undefined;_.each(g.inEdges(entry.v),function(edge){var weight=g.edge(edge),uEntry=g.node(edge.v);if(collectPredecessors){results.push({v:edge.v,w:edge.w})}uEntry.out-=weight;assignBucket(buckets,zeroIdx,uEntry)});_.each(g.outEdges(entry.v),function(edge){var weight=g.edge(edge),w=edge.w,wEntry=g.node(w);wEntry["in"]-=weight;assignBucket(buckets,zeroIdx,wEntry)});g.removeNode(entry.v);return results}function buildState(g,weightFn){var fasGraph=new Graph,maxIn=0,maxOut=0;_.each(g.nodes(),function(v){fasGraph.setNode(v,{v:v,"in":0,out:0})});_.each(g.edges(),function(e){var prevWeight=fasGraph.edge(e.v,e.w)||0,weight=weightFn(e),edgeWeight=prevWeight+weight;fasGraph.setEdge(e.v,e.w,edgeWeight);maxOut=Math.max(maxOut,fasGraph.node(e.v).out+=weight);maxIn=Math.max(maxIn,fasGraph.node(e.w)["in"]+=weight)});var buckets=_.range(maxOut+maxIn+3).map(function(){return new List});var zeroIdx=maxIn+1;_.each(fasGraph.nodes(),function(v){assignBucket(buckets,zeroIdx,fasGraph.node(v))});return{graph:fasGraph,buckets:buckets,zeroIdx:zeroIdx}}function assignBucket(buckets,zeroIdx,entry){if(!entry.out){buckets[0].enqueue(entry)}else if(!entry["in"]){buckets[buckets.length-1].enqueue(entry)}else{buckets[entry.out-entry["in"]+zeroIdx].enqueue(entry)}}},{"./data/list":5,"./graphlib":7,"./lodash":10}],9:[function(require,module,exports){"use strict";var _=require("./lodash"),acyclic=require("./acyclic"),normalize=require("./normalize"),rank=require("./rank"),normalizeRanks=require("./util").normalizeRanks,parentDummyChains=require("./parent-dummy-chains"),removeEmptyRanks=require("./util").removeEmptyRanks,nestingGraph=require("./nesting-graph"),addBorderSegments=require("./add-border-segments"),coordinateSystem=require("./coordinate-system"),order=require("./order"),position=require("./position"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports=layout;function layout(g,opts){var time=opts&&opts.debugTiming?util.time:util.notime;time("layout",function(){var layoutGraph=time("  buildLayoutGraph",function(){return buildLayoutGraph(g)});time("  runLayout",function(){runLayout(layoutGraph,time)});time("  updateInputGraph",function(){updateInputGraph(g,layoutGraph)})})}function runLayout(g,time){time("    makeSpaceForEdgeLabels",function(){makeSpaceForEdgeLabels(g)});time("    removeSelfEdges",function(){removeSelfEdges(g)});time("    acyclic",function(){acyclic.run(g)});time("    nestingGraph.run",function(){nestingGraph.run(g)});time("    rank",function(){rank(util.asNonCompoundGraph(g))});time("    injectEdgeLabelProxies",function(){injectEdgeLabelProxies(g)});time("    removeEmptyRanks",function(){removeEmptyRanks(g)});time("    nestingGraph.cleanup",function(){nestingGraph.cleanup(g)});time("    normalizeRanks",function(){normalizeRanks(g)});time("    assignRankMinMax",function(){assignRankMinMax(g)});time("    removeEdgeLabelProxies",function(){removeEdgeLabelProxies(g)});time("    normalize.run",function(){normalize.run(g)});time("    parentDummyChains",function(){parentDummyChains(g)});time("    addBorderSegments",function(){addBorderSegments(g)});time("    order",function(){order(g)});time("    insertSelfEdges",function(){insertSelfEdges(g)});time("    adjustCoordinateSystem",function(){coordinateSystem.adjust(g)});time("    position",function(){position(g)});time("    positionSelfEdges",function(){positionSelfEdges(g)});time("    removeBorderNodes",function(){removeBorderNodes(g)});time("    normalize.undo",function(){normalize.undo(g)});time("    fixupEdgeLabelCoords",function(){fixupEdgeLabelCoords(g)});time("    undoCoordinateSystem",function(){coordinateSystem.undo(g)});time("    translateGraph",function(){translateGraph(g)});time("    assignNodeIntersects",function(){assignNodeIntersects(g)});time("    reversePoints",function(){reversePointsForReversedEdges(g)});time("    acyclic.undo",function(){acyclic.undo(g)})}function updateInputGraph(inputGraph,layoutGraph){_.each(inputGraph.nodes(),function(v){var inputLabel=inputGraph.node(v),layoutLabel=layoutGraph.node(v);if(inputLabel){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y;if(layoutGraph.children(v).length){inputLabel.width=layoutLabel.width;inputLabel.height=layoutLabel.height}}});_.each(inputGraph.edges(),function(e){var inputLabel=inputGraph.edge(e),layoutLabel=layoutGraph.edge(e);inputLabel.points=layoutLabel.points;if(_.has(layoutLabel,"x")){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y}});inputGraph.graph().width=layoutGraph.graph().width;inputGraph.graph().height=layoutGraph.graph().height}var graphNumAttrs=["nodesep","edgesep","ranksep","marginx","marginy"],graphDefaults={ranksep:50,edgesep:20,nodesep:50,rankdir:"tb"},graphAttrs=["acyclicer","ranker","rankdir","align"],nodeNumAttrs=["width","height"],nodeDefaults={width:0,height:0},edgeNumAttrs=["minlen","weight","width","height","labeloffset"],edgeDefaults={minlen:1,weight:1,width:0,height:0,labeloffset:10,labelpos:"r"},edgeAttrs=["labelpos"];function buildLayoutGraph(inputGraph){var g=new Graph({multigraph:true,compound:true}),graph=canonicalize(inputGraph.graph());g.setGraph(_.merge({},graphDefaults,selectNumberAttrs(graph,graphNumAttrs),_.pick(graph,graphAttrs)));_.each(inputGraph.nodes(),function(v){var node=canonicalize(inputGraph.node(v));g.setNode(v,_.defaults(selectNumberAttrs(node,nodeNumAttrs),nodeDefaults));g.setParent(v,inputGraph.parent(v))});_.each(inputGraph.edges(),function(e){var edge=canonicalize(inputGraph.edge(e));g.setEdge(e,_.merge({},edgeDefaults,selectNumberAttrs(edge,edgeNumAttrs),_.pick(edge,edgeAttrs)))});return g}function makeSpaceForEdgeLabels(g){var graph=g.graph();graph.ranksep/=2;_.each(g.edges(),function(e){var edge=g.edge(e);edge.minlen*=2;if(edge.labelpos.toLowerCase()!=="c"){if(graph.rankdir==="TB"||graph.rankdir==="BT"){edge.width+=edge.labeloffset}else{edge.height+=edge.labeloffset}}})}function injectEdgeLabelProxies(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.width&&edge.height){var v=g.node(e.v),w=g.node(e.w),label={rank:(w.rank-v.rank)/2+v.rank,e:e};util.addDummyNode(g,"edge-proxy",label,"_ep")}})}function assignRankMinMax(g){var maxRank=0;_.each(g.nodes(),function(v){var node=g.node(v);if(node.borderTop){node.minRank=g.node(node.borderTop).rank;node.maxRank=g.node(node.borderBottom).rank;maxRank=_.max(maxRank,node.maxRank)}});g.graph().maxRank=maxRank}function removeEdgeLabelProxies(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="edge-proxy"){g.edge(node.e).labelRank=node.rank;g.removeNode(v)}})}function translateGraph(g){var minX=Number.POSITIVE_INFINITY,maxX=0,minY=Number.POSITIVE_INFINITY,maxY=0,graphLabel=g.graph(),marginX=graphLabel.marginx||0,marginY=graphLabel.marginy||0;function getExtremes(attrs){var x=attrs.x,y=attrs.y,w=attrs.width,h=attrs.height;minX=Math.min(minX,x-w/2);maxX=Math.max(maxX,x+w/2);minY=Math.min(minY,y-h/2);maxY=Math.max(maxY,y+h/2)}_.each(g.nodes(),function(v){getExtremes(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){getExtremes(edge)}});minX-=marginX;minY-=marginY;_.each(g.nodes(),function(v){var node=g.node(v);node.x-=minX;node.y-=minY});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,function(p){p.x-=minX;p.y-=minY});if(_.has(edge,"x")){edge.x-=minX}if(_.has(edge,"y")){edge.y-=minY}});graphLabel.width=maxX-minX+marginX;graphLabel.height=maxY-minY+marginY}function assignNodeIntersects(g){_.each(g.edges(),function(e){var edge=g.edge(e),nodeV=g.node(e.v),nodeW=g.node(e.w),p1,p2;if(!edge.points){edge.points=[];p1=nodeW;p2=nodeV}else{p1=edge.points[0];p2=edge.points[edge.points.length-1]}edge.points.unshift(util.intersectRect(nodeV,p1));edge.points.push(util.intersectRect(nodeW,p2))})}function fixupEdgeLabelCoords(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){if(edge.labelpos==="l"||edge.labelpos==="r"){edge.width-=edge.labeloffset}switch(edge.labelpos){case"l":edge.x-=edge.width/2+edge.labeloffset;break;case"r":edge.x+=edge.width/2+edge.labeloffset;break}}})}function reversePointsForReversedEdges(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.reversed){edge.points.reverse()}})}function removeBorderNodes(g){_.each(g.nodes(),function(v){if(g.children(v).length){var node=g.node(v),t=g.node(node.borderTop),b=g.node(node.borderBottom),l=g.node(_.last(node.borderLeft)),r=g.node(_.last(node.borderRight));node.width=Math.abs(r.x-l.x);node.height=Math.abs(b.y-t.y);node.x=l.x+node.width/2;node.y=t.y+node.height/2}});_.each(g.nodes(),function(v){if(g.node(v).dummy==="border"){g.removeNode(v)}})}function removeSelfEdges(g){_.each(g.edges(),function(e){if(e.v===e.w){var node=g.node(e.v);if(!node.selfEdges){node.selfEdges=[]}node.selfEdges.push({e:e,label:g.edge(e)});g.removeEdge(e)}})}function insertSelfEdges(g){var layers=util.buildLayerMatrix(g);_.each(layers,function(layer){var orderShift=0;_.each(layer,function(v,i){var node=g.node(v);node.order=i+orderShift;_.each(node.selfEdges,function(selfEdge){util.addDummyNode(g,"selfedge",{width:selfEdge.label.width,height:selfEdge.label.height,rank:node.rank,order:i+ ++orderShift,e:selfEdge.e,label:selfEdge.label},"_se")});delete node.selfEdges})})}function positionSelfEdges(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="selfedge"){var selfNode=g.node(node.e.v),x=selfNode.x+selfNode.width/2,y=selfNode.y,dx=node.x-x,dy=selfNode.height/2;g.setEdge(node.e,node.label);g.removeNode(v);node.label.points=[{x:x+2*dx/3,y:y-dy},{x:x+5*dx/6,y:y-dy},{x:x+dx,y:y},{x:x+5*dx/6,y:y+dy},{x:x+2*dx/3,y:y+dy}];node.label.x=node.x;node.label.y=node.y}})}function selectNumberAttrs(obj,attrs){return _.mapValues(_.pick(obj,attrs),Number)}function canonicalize(attrs){var newAttrs={};_.each(attrs,function(v,k){newAttrs[k.toLowerCase()]=v});return newAttrs}},{"./acyclic":2,"./add-border-segments":3,"./coordinate-system":4,"./graphlib":7,"./lodash":10,"./nesting-graph":11,"./normalize":12,"./order":17,"./parent-dummy-chains":22,"./position":24,"./rank":26,"./util":29}],10:[function(require,module,exports){var lodash;if(typeof require==="function"){try{lodash=require("lodash")}catch(e){}}if(!lodash){lodash=window._}module.exports=lodash},{lodash:51}],11:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports={run:run,cleanup:cleanup};function run(g){var root=util.addDummyNode(g,"root",{},"_root"),depths=treeDepths(g),height=_.max(depths)-1,nodeSep=2*height+1;g.graph().nestingRoot=root;_.each(g.edges(),function(e){g.edge(e).minlen*=nodeSep});var weight=sumWeights(g)+1;_.each(g.children(),function(child){dfs(g,root,nodeSep,weight,height,depths,child)});g.graph().nodeRankFactor=nodeSep}function dfs(g,root,nodeSep,weight,height,depths,v){var children=g.children(v);if(!children.length){if(v!==root){g.setEdge(root,v,{weight:0,minlen:nodeSep})}return}var top=util.addBorderNode(g,"_bt"),bottom=util.addBorderNode(g,"_bb"),label=g.node(v);g.setParent(top,v);label.borderTop=top;g.setParent(bottom,v);label.borderBottom=bottom;_.each(children,function(child){dfs(g,root,nodeSep,weight,height,depths,child);var childNode=g.node(child),childTop=childNode.borderTop?childNode.borderTop:child,childBottom=childNode.borderBottom?childNode.borderBottom:child,thisWeight=childNode.borderTop?weight:2*weight,minlen=childTop!==childBottom?1:height-depths[v]+1;g.setEdge(top,childTop,{weight:thisWeight,minlen:minlen,nestingEdge:true});g.setEdge(childBottom,bottom,{weight:thisWeight,minlen:minlen,nestingEdge:true})});if(!g.parent(v)){g.setEdge(root,top,{weight:0,minlen:height+depths[v]})}}function treeDepths(g){var depths={};function dfs(v,depth){var children=g.children(v);if(children&&children.length){_.each(children,function(child){dfs(child,depth+1)})}depths[v]=depth}_.each(g.children(),function(v){dfs(v,1)});return depths}function sumWeights(g){return _.reduce(g.edges(),function(acc,e){return acc+g.edge(e).weight},0)}function cleanup(g){var graphLabel=g.graph();g.removeNode(graphLabel.nestingRoot);delete graphLabel.nestingRoot;_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.nestingEdge){g.removeEdge(e)}})}},{"./lodash":10,"./util":29}],12:[function(require,module,exports){"use strict";var _=require("./lodash"),util=require("./util");module.exports={run:run,undo:undo};function run(g){g.graph().dummyChains=[];_.each(g.edges(),function(edge){normalizeEdge(g,edge)})}function normalizeEdge(g,e){var v=e.v,vRank=g.node(v).rank,w=e.w,wRank=g.node(w).rank,name=e.name,edgeLabel=g.edge(e),labelRank=edgeLabel.labelRank;if(wRank===vRank+1)return;g.removeEdge(e);var dummy,attrs,i;for(i=0,++vRank;vRank<wRank;++i,++vRank){edgeLabel.points=[];attrs={width:0,height:0,edgeLabel:edgeLabel,edgeObj:e,rank:vRank};dummy=util.addDummyNode(g,"edge",attrs,"_d");if(vRank===labelRank){attrs.width=edgeLabel.width;attrs.height=edgeLabel.height;attrs.dummy="edge-label";attrs.labelpos=edgeLabel.labelpos}g.setEdge(v,dummy,{weight:edgeLabel.weight},name);if(i===0){g.graph().dummyChains.push(dummy)}v=dummy}g.setEdge(v,w,{weight:edgeLabel.weight},name)}function undo(g){_.each(g.graph().dummyChains,function(v){var node=g.node(v),origLabel=node.edgeLabel,w;g.setEdge(node.edgeObj,origLabel);while(node.dummy){w=g.successors(v)[0];g.removeNode(v);origLabel.points.push({x:node.x,y:node.y});if(node.dummy==="edge-label"){origLabel.x=node.x;origLabel.y=node.y;origLabel.width=node.width;origLabel.height=node.height}v=w;node=g.node(v)}})}},{"./lodash":10,"./util":29}],13:[function(require,module,exports){var _=require("../lodash");module.exports=addSubgraphConstraints;function addSubgraphConstraints(g,cg,vs){var prev={},rootPrev;_.each(vs,function(v){var child=g.parent(v),parent,prevChild;while(child){parent=g.parent(child);if(parent){prevChild=prev[parent];prev[parent]=child}else{prevChild=rootPrev;rootPrev=child}if(prevChild&&prevChild!==child){cg.setEdge(prevChild,child);return}child=parent}})}},{"../lodash":10}],14:[function(require,module,exports){var _=require("../lodash");module.exports=barycenter;function barycenter(g,movable){return _.map(movable,function(v){var inV=g.inEdges(v);if(!inV.length){return{v:v}}else{var result=_.reduce(inV,function(acc,e){var edge=g.edge(e),nodeU=g.node(e.v);return{sum:acc.sum+edge.weight*nodeU.order,weight:acc.weight+edge.weight}},{sum:0,weight:0});return{v:v,barycenter:result.sum/result.weight,weight:result.weight}}})}},{"../lodash":10}],15:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graphlib").Graph;module.exports=buildLayerGraph;function buildLayerGraph(g,rank,relationship){var root=createRootNode(g),result=new Graph({compound:true}).setGraph({root:root}).setDefaultNodeLabel(function(v){return g.node(v)});_.each(g.nodes(),function(v){var node=g.node(v),parent=g.parent(v);if(node.rank===rank||node.minRank<=rank&&rank<=node.maxRank){result.setNode(v);result.setParent(v,parent||root);_.each(g[relationship](v),function(e){var u=e.v===v?e.w:e.v,edge=result.edge(u,v),weight=!_.isUndefined(edge)?edge.weight:0;result.setEdge(u,v,{weight:g.edge(e).weight+weight})});if(_.has(node,"minRank")){result.setNode(v,{borderLeft:node.borderLeft[rank],borderRight:node.borderRight[rank]})}}});return result}function createRootNode(g){var v;while(g.hasNode(v=_.uniqueId("_root")));return v}},{"../graphlib":7,"../lodash":10}],16:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=crossCount;function crossCount(g,layering){var cc=0;for(var i=1;i<layering.length;++i){cc+=twoLayerCrossCount(g,layering[i-1],layering[i])}return cc}function twoLayerCrossCount(g,northLayer,southLayer){var southPos=_.zipObject(southLayer,_.map(southLayer,function(v,i){return i}));var southEntries=_.flatten(_.map(northLayer,function(v){return _.chain(g.outEdges(v)).map(function(e){return{pos:southPos[e.w],weight:g.edge(e).weight}}).sortBy("pos").value()}),true);var firstIndex=1;while(firstIndex<southLayer.length)firstIndex<<=1;var treeSize=2*firstIndex-1;firstIndex-=1;var tree=_.map(new Array(treeSize),function(){return 0});var cc=0;_.each(southEntries.forEach(function(entry){var index=entry.pos+firstIndex;tree[index]+=entry.weight;var weightSum=0;while(index>0){if(index%2){weightSum+=tree[index+1]}index=index-1>>1;tree[index]+=entry.weight}cc+=entry.weight*weightSum}));return cc}},{"../lodash":10}],17:[function(require,module,exports){"use strict";var _=require("../lodash"),initOrder=require("./init-order"),crossCount=require("./cross-count"),sortSubgraph=require("./sort-subgraph"),buildLayerGraph=require("./build-layer-graph"),addSubgraphConstraints=require("./add-subgraph-constraints"),Graph=require("../graphlib").Graph,util=require("../util");module.exports=order;function order(g){var maxRank=util.maxRank(g),downLayerGraphs=buildLayerGraphs(g,_.range(1,maxRank+1),"inEdges"),upLayerGraphs=buildLayerGraphs(g,_.range(maxRank-1,-1,-1),"outEdges");var layering=initOrder(g);assignOrder(g,layering);var bestCC=Number.POSITIVE_INFINITY,best;for(var i=0,lastBest=0;lastBest<4;++i,++lastBest){sweepLayerGraphs(i%2?downLayerGraphs:upLayerGraphs,i%4>=2);layering=util.buildLayerMatrix(g);var cc=crossCount(g,layering);if(cc<bestCC){lastBest=0;best=_.cloneDeep(layering);bestCC=cc}}assignOrder(g,best)}function buildLayerGraphs(g,ranks,relationship){return _.map(ranks,function(rank){return buildLayerGraph(g,rank,relationship)})}function sweepLayerGraphs(layerGraphs,biasRight){var cg=new Graph;_.each(layerGraphs,function(lg){var root=lg.graph().root;var sorted=sortSubgraph(lg,root,cg,biasRight);_.each(sorted.vs,function(v,i){lg.node(v).order=i});addSubgraphConstraints(lg,cg,sorted.vs)})}function assignOrder(g,layering){_.each(layering,function(layer){_.each(layer,function(v,i){g.node(v).order=i})})}},{"../graphlib":7,"../lodash":10,"../util":29,"./add-subgraph-constraints":13,"./build-layer-graph":15,"./cross-count":16,"./init-order":18,"./sort-subgraph":20}],18:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=initOrder;function initOrder(g){var visited={},simpleNodes=_.filter(g.nodes(),function(v){return!g.children(v).length}),maxRank=_.max(_.map(simpleNodes,function(v){return g.node(v).rank})),layers=_.map(_.range(maxRank+1),function(){return[]});function dfs(v){if(_.has(visited,v))return;visited[v]=true;var node=g.node(v);layers[node.rank].push(v);_.each(g.successors(v),dfs)}var orderedVs=_.sortBy(simpleNodes,function(v){return g.node(v).rank});_.each(orderedVs,dfs);return layers}},{"../lodash":10}],19:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports=resolveConflicts;function resolveConflicts(entries,cg){var mappedEntries={};_.each(entries,function(entry,i){var tmp=mappedEntries[entry.v]={indegree:0,"in":[],out:[],vs:[entry.v],i:i};if(!_.isUndefined(entry.barycenter)){tmp.barycenter=entry.barycenter;tmp.weight=entry.weight}});_.each(cg.edges(),function(e){var entryV=mappedEntries[e.v],entryW=mappedEntries[e.w];if(!_.isUndefined(entryV)&&!_.isUndefined(entryW)){entryW.indegree++;entryV.out.push(mappedEntries[e.w])}});var sourceSet=_.filter(mappedEntries,function(entry){return!entry.indegree});return doResolveConflicts(sourceSet)}function doResolveConflicts(sourceSet){var entries=[];function handleIn(vEntry){return function(uEntry){if(uEntry.merged){return}if(_.isUndefined(uEntry.barycenter)||_.isUndefined(vEntry.barycenter)||uEntry.barycenter>=vEntry.barycenter){mergeEntries(vEntry,uEntry)}}}function handleOut(vEntry){return function(wEntry){wEntry["in"].push(vEntry);if(--wEntry.indegree===0){sourceSet.push(wEntry)}}}while(sourceSet.length){var entry=sourceSet.pop();entries.push(entry);_.each(entry["in"].reverse(),handleIn(entry));_.each(entry.out,handleOut(entry))}return _.chain(entries).filter(function(entry){return!entry.merged}).map(function(entry){return _.pick(entry,["vs","i","barycenter","weight"])}).value()}function mergeEntries(target,source){var sum=0,weight=0;if(target.weight){sum+=target.barycenter*target.weight;weight+=target.weight}if(source.weight){sum+=source.barycenter*source.weight;weight+=source.weight}target.vs=source.vs.concat(target.vs);target.barycenter=sum/weight;target.weight=weight;target.i=Math.min(source.i,target.i);source.merged=true}},{"../lodash":10}],20:[function(require,module,exports){var _=require("../lodash"),barycenter=require("./barycenter"),resolveConflicts=require("./resolve-conflicts"),sort=require("./sort");module.exports=sortSubgraph;function sortSubgraph(g,v,cg,biasRight){var movable=g.children(v),node=g.node(v),bl=node?node.borderLeft:undefined,br=node?node.borderRight:undefined,subgraphs={};if(bl){movable=_.filter(movable,function(w){return w!==bl&&w!==br})}var barycenters=barycenter(g,movable);_.each(barycenters,function(entry){if(g.children(entry.v).length){var subgraphResult=sortSubgraph(g,entry.v,cg,biasRight);subgraphs[entry.v]=subgraphResult;if(_.has(subgraphResult,"barycenter")){mergeBarycenters(entry,subgraphResult)}}});var entries=resolveConflicts(barycenters,cg);expandSubgraphs(entries,subgraphs);var result=sort(entries,biasRight);if(bl){result.vs=_.flatten([bl,result.vs,br],true);if(g.predecessors(bl).length){var blPred=g.node(g.predecessors(bl)[0]),brPred=g.node(g.predecessors(br)[0]);if(!_.has(result,"barycenter")){result.barycenter=0;result.weight=0}result.barycenter=(result.barycenter*result.weight+blPred.order+brPred.order)/(result.weight+2);result.weight+=2}}return result}function expandSubgraphs(entries,subgraphs){_.each(entries,function(entry){entry.vs=_.flatten(entry.vs.map(function(v){if(subgraphs[v]){return subgraphs[v].vs}return v}),true)})}function mergeBarycenters(target,other){if(!_.isUndefined(target.barycenter)){target.barycenter=(target.barycenter*target.weight+other.barycenter*other.weight)/(target.weight+other.weight);target.weight+=other.weight}else{target.barycenter=other.barycenter;target.weight=other.weight}}},{"../lodash":10,"./barycenter":14,"./resolve-conflicts":19,"./sort":21}],21:[function(require,module,exports){var _=require("../lodash"),util=require("../util");module.exports=sort;function sort(entries,biasRight){var parts=util.partition(entries,function(entry){return _.has(entry,"barycenter")});var sortable=parts.lhs,unsortable=_.sortBy(parts.rhs,function(entry){return-entry.i}),vs=[],sum=0,weight=0,vsIndex=0;sortable.sort(compareWithBias(!!biasRight));vsIndex=consumeUnsortable(vs,unsortable,vsIndex);_.each(sortable,function(entry){vsIndex+=entry.vs.length;vs.push(entry.vs);sum+=entry.barycenter*entry.weight;weight+=entry.weight;vsIndex=consumeUnsortable(vs,unsortable,vsIndex)});var result={vs:_.flatten(vs,true)};if(weight){result.barycenter=sum/weight;result.weight=weight}return result}function consumeUnsortable(vs,unsortable,index){var last;while(unsortable.length&&(last=_.last(unsortable)).i<=index){unsortable.pop();vs.push(last.vs);index++}return index}function compareWithBias(bias){return function(entryV,entryW){if(entryV.barycenter<entryW.barycenter){return-1}else if(entryV.barycenter>entryW.barycenter){return 1}return!bias?entryV.i-entryW.i:entryW.i-entryV.i}}},{"../lodash":10,"../util":29}],22:[function(require,module,exports){var _=require("./lodash");module.exports=parentDummyChains;function parentDummyChains(g){var postorderNums=postorder(g);_.each(g.graph().dummyChains,function(v){var node=g.node(v),edgeObj=node.edgeObj,pathData=findPath(g,postorderNums,edgeObj.v,edgeObj.w),path=pathData.path,lca=pathData.lca,pathIdx=0,pathV=path[pathIdx],ascending=true;while(v!==edgeObj.w){node=g.node(v);if(ascending){while((pathV=path[pathIdx])!==lca&&g.node(pathV).maxRank<node.rank){pathIdx++}if(pathV===lca){ascending=false}}if(!ascending){while(pathIdx<path.length-1&&g.node(pathV=path[pathIdx+1]).minRank<=node.rank){pathIdx++}pathV=path[pathIdx]}g.setParent(v,pathV);v=g.successors(v)[0]}})}function findPath(g,postorderNums,v,w){var vPath=[],wPath=[],low=Math.min(postorderNums[v].low,postorderNums[w].low),lim=Math.max(postorderNums[v].lim,postorderNums[w].lim),parent,lca;parent=v;do{parent=g.parent(parent);vPath.push(parent)}while(parent&&(postorderNums[parent].low>low||lim>postorderNums[parent].lim));lca=parent;parent=w;while((parent=g.parent(parent))!==lca){wPath.push(parent)}return{path:vPath.concat(wPath.reverse()),lca:lca}}function postorder(g){var result={},lim=0;function dfs(v){var low=lim;_.each(g.children(v),dfs);result[v]={low:low,lim:lim++}}_.each(g.children(),dfs);return result}},{"./lodash":10}],23:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,util=require("../util");module.exports={positionX:positionX,findType1Conflicts:findType1Conflicts,findType2Conflicts:findType2Conflicts,addConflict:addConflict,hasConflict:hasConflict,verticalAlignment:verticalAlignment,horizontalCompaction:horizontalCompaction,alignCoordinates:alignCoordinates,findSmallestWidthAlignment:findSmallestWidthAlignment,balance:balance};function findType1Conflicts(g,layering){var conflicts={};function visitLayer(prevLayer,layer){var k0=0,scanPos=0,prevLayerLength=prevLayer.length,lastNode=_.last(layer);_.each(layer,function(v,i){var w=findOtherInnerSegmentNode(g,v),k1=w?g.node(w).order:prevLayerLength;if(w||v===lastNode){_.each(layer.slice(scanPos,i+1),function(scanNode){_.each(g.predecessors(scanNode),function(u){var uLabel=g.node(u),uPos=uLabel.order;
-if((uPos<k0||k1<uPos)&&!(uLabel.dummy&&g.node(scanNode).dummy)){addConflict(conflicts,u,scanNode)}})});scanPos=i+1;k0=k1}});return layer}_.reduce(layering,visitLayer);return conflicts}function findType2Conflicts(g,layering){var conflicts={};function scan(south,southPos,southEnd,prevNorthBorder,nextNorthBorder){var v;_.each(_.range(southPos,southEnd),function(i){v=south[i];if(g.node(v).dummy){_.each(g.predecessors(v),function(u){var uNode=g.node(u);if(uNode.dummy&&(uNode.order<prevNorthBorder||uNode.order>nextNorthBorder)){addConflict(conflicts,u,v)}})}})}function visitLayer(north,south){var prevNorthPos=-1,nextNorthPos,southPos=0;_.each(south,function(v,southLookahead){if(g.node(v).dummy==="border"){var predecessors=g.predecessors(v);if(predecessors.length){nextNorthPos=g.node(predecessors[0]).order;scan(south,southPos,southLookahead,prevNorthPos,nextNorthPos);southPos=southLookahead;prevNorthPos=nextNorthPos}}scan(south,southPos,south.length,nextNorthPos,north.length)});return south}_.reduce(layering,visitLayer);return conflicts}function findOtherInnerSegmentNode(g,v){if(g.node(v).dummy){return _.find(g.predecessors(v),function(u){return g.node(u).dummy})}}function addConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}var conflictsV=conflicts[v];if(!conflictsV){conflicts[v]=conflictsV={}}conflictsV[w]=true}function hasConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}return _.has(conflicts[v],w)}function verticalAlignment(g,layering,conflicts,neighborFn){var root={},align={},pos={};_.each(layering,function(layer){_.each(layer,function(v,order){root[v]=v;align[v]=v;pos[v]=order})});_.each(layering,function(layer){var prevIdx=-1;_.each(layer,function(v){var ws=neighborFn(v);if(ws.length){ws=_.sortBy(ws,function(w){return pos[w]});var mp=(ws.length-1)/2;for(var i=Math.floor(mp),il=Math.ceil(mp);i<=il;++i){var w=ws[i];if(align[v]===v&&prevIdx<pos[w]&&!hasConflict(conflicts,v,w)){align[w]=v;align[v]=root[v]=root[w];prevIdx=pos[w]}}}})});return{root:root,align:align}}function horizontalCompaction(g,layering,root,align,reverseSep){var xs={},blockG=buildBlockGraph(g,layering,root,reverseSep);var visited={};function pass1(v){if(!_.has(visited,v)){visited[v]=true;xs[v]=_.reduce(blockG.inEdges(v),function(max,e){pass1(e.v);return Math.max(max,xs[e.v]+blockG.edge(e))},0)}}_.each(blockG.nodes(),pass1);var borderType=reverseSep?"borderLeft":"borderRight";function pass2(v){if(visited[v]!==2){visited[v]++;var node=g.node(v);var min=_.reduce(blockG.outEdges(v),function(min,e){pass2(e.w);return Math.min(min,xs[e.w]-blockG.edge(e))},Number.POSITIVE_INFINITY);if(min!==Number.POSITIVE_INFINITY&&node.borderType!==borderType){xs[v]=Math.max(xs[v],min)}}}_.each(blockG.nodes(),pass2);_.each(align,function(v){xs[v]=xs[root[v]]});return xs}function buildBlockGraph(g,layering,root,reverseSep){var blockGraph=new Graph,graphLabel=g.graph(),sepFn=sep(graphLabel.nodesep,graphLabel.edgesep,reverseSep);_.each(layering,function(layer){var u;_.each(layer,function(v){var vRoot=root[v];blockGraph.setNode(vRoot);if(u){var uRoot=root[u],prevMax=blockGraph.edge(uRoot,vRoot);blockGraph.setEdge(uRoot,vRoot,Math.max(sepFn(g,v,u),prevMax||0))}u=v})});return blockGraph}function findSmallestWidthAlignment(g,xss){return _.min(xss,function(xs){var min=_.min(xs,function(x,v){return x-width(g,v)/2}),max=_.max(xs,function(x,v){return x+width(g,v)/2});return max-min})}function alignCoordinates(xss,alignTo){var alignToMin=_.min(alignTo),alignToMax=_.max(alignTo);_.each(["u","d"],function(vert){_.each(["l","r"],function(horiz){var alignment=vert+horiz,xs=xss[alignment],delta;if(xs===alignTo)return;delta=horiz==="l"?alignToMin-_.min(xs):alignToMax-_.max(xs);if(delta){xss[alignment]=_.mapValues(xs,function(x){return x+delta})}})})}function balance(xss,align){return _.mapValues(xss.ul,function(ignore,v){if(align){return xss[align.toLowerCase()][v]}else{var xs=_.sortBy(_.pluck(xss,v));return(xs[1]+xs[2])/2}})}function positionX(g){var layering=util.buildLayerMatrix(g),conflicts=_.merge(findType1Conflicts(g,layering),findType2Conflicts(g,layering));var xss={},adjustedLayering;_.each(["u","d"],function(vert){adjustedLayering=vert==="u"?layering:_.values(layering).reverse();_.each(["l","r"],function(horiz){if(horiz==="r"){adjustedLayering=_.map(adjustedLayering,function(inner){return _.values(inner).reverse()})}var neighborFn=_.bind(vert==="u"?g.predecessors:g.successors,g);var align=verticalAlignment(g,adjustedLayering,conflicts,neighborFn);var xs=horizontalCompaction(g,adjustedLayering,align.root,align.align,horiz==="r");if(horiz==="r"){xs=_.mapValues(xs,function(x){return-x})}xss[vert+horiz]=xs})});var smallestWidth=findSmallestWidthAlignment(g,xss);alignCoordinates(xss,smallestWidth);return balance(xss,g.graph().align)}function sep(nodeSep,edgeSep,reverseSep){return function(g,v,w){var vLabel=g.node(v),wLabel=g.node(w),sum=0,delta;sum+=vLabel.width/2;if(_.has(vLabel,"labelpos")){switch(vLabel.labelpos.toLowerCase()){case"l":delta=-vLabel.width/2;break;case"r":delta=vLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;sum+=(vLabel.dummy?edgeSep:nodeSep)/2;sum+=(wLabel.dummy?edgeSep:nodeSep)/2;sum+=wLabel.width/2;if(_.has(wLabel,"labelpos")){switch(wLabel.labelpos.toLowerCase()){case"l":delta=wLabel.width/2;break;case"r":delta=-wLabel.width/2;break}}if(delta){sum+=reverseSep?delta:-delta}delta=0;return sum}}function width(g,v){return g.node(v).width}},{"../graphlib":7,"../lodash":10,"../util":29}],24:[function(require,module,exports){"use strict";var _=require("../lodash"),util=require("../util"),positionX=require("./bk").positionX;module.exports=position;function position(g){g=util.asNonCompoundGraph(g);positionY(g);_.each(positionX(g),function(x,v){g.node(v).x=x})}function positionY(g){var layering=util.buildLayerMatrix(g),rankSep=g.graph().ranksep,prevY=0;_.each(layering,function(layer){var maxHeight=_.max(_.map(layer,function(v){return g.node(v).height}));_.each(layer,function(v){g.node(v).y=prevY+maxHeight/2});prevY+=maxHeight+rankSep})}},{"../lodash":10,"../util":29,"./bk":23}],25:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,slack=require("./util").slack;module.exports=feasibleTree;function feasibleTree(g){var t=new Graph({directed:false});var start=g.nodes()[0],size=g.nodeCount();t.setNode(start,{});var edge,delta;while(tightTree(t,g)<size){edge=findMinSlackEdge(t,g);delta=t.hasNode(edge.v)?slack(g,edge):-slack(g,edge);shiftRanks(t,g,delta)}return t}function tightTree(t,g){function dfs(v){_.each(g.nodeEdges(v),function(e){var edgeV=e.v,w=v===edgeV?e.w:edgeV;if(!t.hasNode(w)&&!slack(g,e)){t.setNode(w,{});t.setEdge(v,w,{});dfs(w)}})}_.each(t.nodes(),dfs);return t.nodeCount()}function findMinSlackEdge(t,g){return _.min(g.edges(),function(e){if(t.hasNode(e.v)!==t.hasNode(e.w)){return slack(g,e)}})}function shiftRanks(t,g,delta){_.each(t.nodes(),function(v){g.node(v).rank+=delta})}},{"../graphlib":7,"../lodash":10,"./util":28}],26:[function(require,module,exports){"use strict";var rankUtil=require("./util"),longestPath=rankUtil.longestPath,feasibleTree=require("./feasible-tree"),networkSimplex=require("./network-simplex");module.exports=rank;function rank(g){switch(g.graph().ranker){case"network-simplex":networkSimplexRanker(g);break;case"tight-tree":tightTreeRanker(g);break;case"longest-path":longestPathRanker(g);break;default:networkSimplexRanker(g)}}var longestPathRanker=longestPath;function tightTreeRanker(g){longestPath(g);feasibleTree(g)}function networkSimplexRanker(g){networkSimplex(g)}},{"./feasible-tree":25,"./network-simplex":27,"./util":28}],27:[function(require,module,exports){"use strict";var _=require("../lodash"),feasibleTree=require("./feasible-tree"),slack=require("./util").slack,initRank=require("./util").longestPath,preorder=require("../graphlib").alg.preorder,postorder=require("../graphlib").alg.postorder,simplify=require("../util").simplify;module.exports=networkSimplex;networkSimplex.initLowLimValues=initLowLimValues;networkSimplex.initCutValues=initCutValues;networkSimplex.calcCutValue=calcCutValue;networkSimplex.leaveEdge=leaveEdge;networkSimplex.enterEdge=enterEdge;networkSimplex.exchangeEdges=exchangeEdges;function networkSimplex(g){g=simplify(g);initRank(g);var t=feasibleTree(g);initLowLimValues(t);initCutValues(t,g);var e,f;while(e=leaveEdge(t)){f=enterEdge(t,g,e);exchangeEdges(t,g,e,f)}}function initCutValues(t,g){var vs=postorder(t,t.nodes());vs=vs.slice(0,vs.length-1);_.each(vs,function(v){assignCutValue(t,g,v)})}function assignCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent;t.edge(child,parent).cutvalue=calcCutValue(t,g,child)}function calcCutValue(t,g,child){var childLab=t.node(child),parent=childLab.parent,childIsTail=true,graphEdge=g.edge(child,parent),cutValue=0;if(!graphEdge){childIsTail=false;graphEdge=g.edge(parent,child)}cutValue=graphEdge.weight;_.each(g.nodeEdges(child),function(e){var isOutEdge=e.v===child,other=isOutEdge?e.w:e.v;if(other!==parent){var pointsToHead=isOutEdge===childIsTail,otherWeight=g.edge(e).weight;cutValue+=pointsToHead?otherWeight:-otherWeight;if(isTreeEdge(t,child,other)){var otherCutValue=t.edge(child,other).cutvalue;cutValue+=pointsToHead?-otherCutValue:otherCutValue}}});return cutValue}function initLowLimValues(tree,root){if(arguments.length<2){root=tree.nodes()[0]}dfsAssignLowLim(tree,{},1,root)}function dfsAssignLowLim(tree,visited,nextLim,v,parent){var low=nextLim,label=tree.node(v);visited[v]=true;_.each(tree.neighbors(v),function(w){if(!_.has(visited,w)){nextLim=dfsAssignLowLim(tree,visited,nextLim,w,v)}});label.low=low;label.lim=nextLim++;if(parent){label.parent=parent}else{delete label.parent}return nextLim}function leaveEdge(tree){return _.find(tree.edges(),function(e){return tree.edge(e).cutvalue<0})}function enterEdge(t,g,edge){var v=edge.v,w=edge.w;if(!g.hasEdge(v,w)){v=edge.w;w=edge.v}var vLabel=t.node(v),wLabel=t.node(w),tailLabel=vLabel,flip=false;if(vLabel.lim>wLabel.lim){tailLabel=wLabel;flip=true}var candidates=_.filter(g.edges(),function(edge){return flip===isDescendant(t,t.node(edge.v),tailLabel)&&flip!==isDescendant(t,t.node(edge.w),tailLabel)});return _.min(candidates,function(edge){return slack(g,edge)})}function exchangeEdges(t,g,e,f){var v=e.v,w=e.w;t.removeEdge(v,w);t.setEdge(f.v,f.w,{});initLowLimValues(t);initCutValues(t,g);updateRanks(t,g)}function updateRanks(t,g){var root=_.find(t.nodes(),function(v){return!g.node(v).parent}),vs=preorder(t,root);vs=vs.slice(1);_.each(vs,function(v){var parent=t.node(v).parent,edge=g.edge(v,parent),flipped=false;if(!edge){edge=g.edge(parent,v);flipped=true}g.node(v).rank=g.node(parent).rank+(flipped?edge.minlen:-edge.minlen)})}function isTreeEdge(tree,u,v){return tree.hasEdge(u,v)}function isDescendant(tree,vLabel,rootLabel){return rootLabel.low<=vLabel.lim&&vLabel.lim<=rootLabel.lim}},{"../graphlib":7,"../lodash":10,"../util":29,"./feasible-tree":25,"./util":28}],28:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports={longestPath:longestPath,slack:slack};function longestPath(g){var visited={};function dfs(v){var label=g.node(v);if(_.has(visited,v)){return label.rank}visited[v]=true;var rank=_.min(_.map(g.outEdges(v),function(e){return dfs(e.w)-g.edge(e).minlen}));if(rank===Number.POSITIVE_INFINITY){rank=0}return label.rank=rank}_.each(g.sources(),dfs)}function slack(g,e){return g.node(e.w).rank-g.node(e.v).rank-g.edge(e).minlen}},{"../lodash":10}],29:[function(require,module,exports){"use strict";var _=require("./lodash"),Graph=require("./graphlib").Graph;module.exports={addDummyNode:addDummyNode,simplify:simplify,asNonCompoundGraph:asNonCompoundGraph,successorWeights:successorWeights,predecessorWeights:predecessorWeights,intersectRect:intersectRect,buildLayerMatrix:buildLayerMatrix,normalizeRanks:normalizeRanks,removeEmptyRanks:removeEmptyRanks,addBorderNode:addBorderNode,maxRank:maxRank,partition:partition,time:time,notime:notime};function addDummyNode(g,type,attrs,name){var v;do{v=_.uniqueId(name)}while(g.hasNode(v));attrs.dummy=type;g.setNode(v,attrs);return v}function simplify(g){var simplified=(new Graph).setGraph(g.graph());_.each(g.nodes(),function(v){simplified.setNode(v,g.node(v))});_.each(g.edges(),function(e){var simpleLabel=simplified.edge(e.v,e.w)||{weight:0,minlen:1},label=g.edge(e);simplified.setEdge(e.v,e.w,{weight:simpleLabel.weight+label.weight,minlen:Math.max(simpleLabel.minlen,label.minlen)})});return simplified}function asNonCompoundGraph(g){var simplified=new Graph({multigraph:g.isMultigraph()}).setGraph(g.graph());_.each(g.nodes(),function(v){if(!g.children(v).length){simplified.setNode(v,g.node(v))}});_.each(g.edges(),function(e){simplified.setEdge(e,g.edge(e))});return simplified}function successorWeights(g){var weightMap=_.map(g.nodes(),function(v){var sucs={};_.each(g.outEdges(v),function(e){sucs[e.w]=(sucs[e.w]||0)+g.edge(e).weight});return sucs});return _.zipObject(g.nodes(),weightMap)}function predecessorWeights(g){var weightMap=_.map(g.nodes(),function(v){var preds={};_.each(g.inEdges(v),function(e){preds[e.v]=(preds[e.v]||0)+g.edge(e).weight});return preds});return _.zipObject(g.nodes(),weightMap)}function intersectRect(rect,point){var x=rect.x;var y=rect.y;var dx=point.x-x;var dy=point.y-y;var w=rect.width/2;var h=rect.height/2;if(!dx&&!dy){throw new Error("Not possible to find intersection inside of the rectangle")}var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=w*dy/dx}return{x:x+sx,y:y+sy}}function buildLayerMatrix(g){var layering=_.map(_.range(maxRank(g)+1),function(){return[]});_.each(g.nodes(),function(v){var node=g.node(v),rank=node.rank;if(!_.isUndefined(rank)){layering[rank][node.order]=v}});return layering}function normalizeRanks(g){var min=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"rank")){node.rank-=min}})}function removeEmptyRanks(g){var offset=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));var layers=[];_.each(g.nodes(),function(v){var rank=g.node(v).rank-offset;if(!layers[rank]){layers[rank]=[]}layers[rank].push(v)});var delta=0,nodeRankFactor=g.graph().nodeRankFactor;_.each(layers,function(vs,i){if(_.isUndefined(vs)&&i%nodeRankFactor!==0){--delta}else if(delta){_.each(vs,function(v){g.node(v).rank+=delta})}})}function addBorderNode(g,prefix,rank,order){var node={width:0,height:0};if(arguments.length>=4){node.rank=rank;node.order=order}return addDummyNode(g,"border",node,prefix)}function maxRank(g){return _.max(_.map(g.nodes(),function(v){var rank=g.node(v).rank;if(!_.isUndefined(rank)){return rank}}))}function partition(collection,fn){var result={lhs:[],rhs:[]};_.each(collection,function(value){if(fn(value)){result.lhs.push(value)}else{result.rhs.push(value)}});return result}function time(name,fn){var start=_.now();try{return fn()}finally{console.log(name+" time: "+(_.now()-start)+"ms")}}function notime(name,fn){return fn()}},{"./graphlib":7,"./lodash":10}],30:[function(require,module,exports){module.exports="0.7.4"},{}],31:[function(require,module,exports){var lib=require("./lib");module.exports={Graph:lib.Graph,json:require("./lib/json"),alg:require("./lib/alg"),version:lib.version}},{"./lib":47,"./lib/alg":38,"./lib/json":48}],32:[function(require,module,exports){var _=require("../lodash");module.exports=components;function components(g){var visited={},cmpts=[],cmpt;function dfs(v){if(_.has(visited,v))return;visited[v]=true;cmpt.push(v);_.each(g.successors(v),dfs);_.each(g.predecessors(v),dfs)}_.each(g.nodes(),function(v){cmpt=[];dfs(v);if(cmpt.length){cmpts.push(cmpt)}});return cmpts}},{"../lodash":49}],33:[function(require,module,exports){var _=require("../lodash");module.exports=dfs;function dfs(g,vs,order){if(!_.isArray(vs)){vs=[vs]}var acc=[],visited={};_.each(vs,function(v){if(!g.hasNode(v)){throw new Error("Graph does not have node: "+v)}doDfs(g,v,order==="post",visited,acc)});return acc}function doDfs(g,v,postorder,visited,acc){if(!_.has(visited,v)){visited[v]=true;if(!postorder){acc.push(v)}_.each(g.neighbors(v),function(w){doDfs(g,w,postorder,visited,acc)});if(postorder){acc.push(v)}}}},{"../lodash":49}],34:[function(require,module,exports){var dijkstra=require("./dijkstra"),_=require("../lodash");module.exports=dijkstraAll;function dijkstraAll(g,weightFunc,edgeFunc){return _.transform(g.nodes(),function(acc,v){acc[v]=dijkstra(g,v,weightFunc,edgeFunc)},{})}},{"../lodash":49,"./dijkstra":35}],35:[function(require,module,exports){var _=require("../lodash"),PriorityQueue=require("../data/priority-queue");module.exports=dijkstra;var DEFAULT_WEIGHT_FUNC=_.constant(1);function dijkstra(g,source,weightFn,edgeFn){return runDijkstra(g,String(source),weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runDijkstra(g,source,weightFn,edgeFn){var results={},pq=new PriorityQueue,v,vEntry;var updateNeighbors=function(edge){var w=edge.v!==v?edge.v:edge.w,wEntry=results[w],weight=weightFn(edge),distance=vEntry.distance+weight;if(weight<0){throw new Error("dijkstra does not allow negative edge weights. "+"Bad edge: "+edge+" Weight: "+weight)}if(distance<wEntry.distance){wEntry.distance=distance;wEntry.predecessor=v;pq.decrease(w,distance)}};g.nodes().forEach(function(v){var distance=v===source?0:Number.POSITIVE_INFINITY;results[v]={distance:distance};pq.add(v,distance)});while(pq.size()>0){v=pq.removeMin();vEntry=results[v];if(vEntry.distance===Number.POSITIVE_INFINITY){break}edgeFn(v).forEach(updateNeighbors)}return results}},{"../data/priority-queue":45,"../lodash":49}],36:[function(require,module,exports){var _=require("../lodash"),tarjan=require("./tarjan");module.exports=findCycles;function findCycles(g){return _.filter(tarjan(g),function(cmpt){return cmpt.length>1||cmpt.length===1&&g.hasEdge(cmpt[0],cmpt[0])})}},{"../lodash":49,"./tarjan":43}],37:[function(require,module,exports){var _=require("../lodash");module.exports=floydWarshall;var DEFAULT_WEIGHT_FUNC=_.constant(1);function floydWarshall(g,weightFn,edgeFn){return runFloydWarshall(g,weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runFloydWarshall(g,weightFn,edgeFn){var results={},nodes=g.nodes();nodes.forEach(function(v){results[v]={};results[v][v]={distance:0};nodes.forEach(function(w){if(v!==w){results[v][w]={distance:Number.POSITIVE_INFINITY}}});edgeFn(v).forEach(function(edge){var w=edge.v===v?edge.w:edge.v,d=weightFn(edge);results[v][w]={distance:d,predecessor:v}})});nodes.forEach(function(k){var rowK=results[k];nodes.forEach(function(i){var rowI=results[i];nodes.forEach(function(j){var ik=rowI[k];var kj=rowK[j];var ij=rowI[j];var altDistance=ik.distance+kj.distance;if(altDistance<ij.distance){ij.distance=altDistance;ij.predecessor=kj.predecessor}})})});return results}},{"../lodash":49}],38:[function(require,module,exports){module.exports={components:require("./components"),dijkstra:require("./dijkstra"),dijkstraAll:require("./dijkstra-all"),findCycles:require("./find-cycles"),floydWarshall:require("./floyd-warshall"),isAcyclic:require("./is-acyclic"),postorder:require("./postorder"),preorder:require("./preorder"),prim:require("./prim"),tarjan:require("./tarjan"),topsort:require("./topsort")}},{"./components":32,"./dijkstra":35,"./dijkstra-all":34,"./find-cycles":36,"./floyd-warshall":37,"./is-acyclic":39,"./postorder":40,"./preorder":41,"./prim":42,"./tarjan":43,"./topsort":44}],39:[function(require,module,exports){var topsort=require("./topsort");module.exports=isAcyclic;function isAcyclic(g){try{topsort(g)}catch(e){if(e instanceof topsort.CycleException){return false}throw e}return true}},{"./topsort":44}],40:[function(require,module,exports){var dfs=require("./dfs");module.exports=postorder;function postorder(g,vs){return dfs(g,vs,"post")}},{"./dfs":33}],41:[function(require,module,exports){var dfs=require("./dfs");module.exports=preorder;function preorder(g,vs){return dfs(g,vs,"pre")}},{"./dfs":33}],42:[function(require,module,exports){var _=require("../lodash"),Graph=require("../graph"),PriorityQueue=require("../data/priority-queue");module.exports=prim;function prim(g,weightFunc){var result=new Graph,parents={},pq=new PriorityQueue,v;function updateNeighbors(edge){var w=edge.v===v?edge.w:edge.v,pri=pq.priority(w);if(pri!==undefined){var edgeWeight=weightFunc(edge);if(edgeWeight<pri){parents[w]=v;pq.decrease(w,edgeWeight)}}}if(g.nodeCount()===0){return result}_.each(g.nodes(),function(v){pq.add(v,Number.POSITIVE_INFINITY);result.setNode(v)});pq.decrease(g.nodes()[0],0);var init=false;while(pq.size()>0){v=pq.removeMin();if(_.has(parents,v)){result.setEdge(v,parents[v])}else if(init){throw new Error("Input graph is not connected: "+g)}else{init=true}g.nodeEdges(v).forEach(updateNeighbors)}return result}},{"../data/priority-queue":45,"../graph":46,"../lodash":49}],43:[function(require,module,exports){var _=require("../lodash");module.exports=tarjan;function tarjan(g){var index=0,stack=[],visited={},results=[];function dfs(v){var entry=visited[v]={onStack:true,lowlink:index,index:index++};stack.push(v);g.successors(v).forEach(function(w){if(!_.has(visited,w)){dfs(w);entry.lowlink=Math.min(entry.lowlink,visited[w].lowlink)}else if(visited[w].onStack){entry.lowlink=Math.min(entry.lowlink,visited[w].index)}});if(entry.lowlink===entry.index){var cmpt=[],w;do{w=stack.pop();visited[w].onStack=false;cmpt.push(w)}while(v!==w);results.push(cmpt)}}g.nodes().forEach(function(v){if(!_.has(visited,v)){dfs(v)}});return results}},{"../lodash":49}],44:[function(require,module,exports){var _=require("../lodash");module.exports=topsort;topsort.CycleException=CycleException;function topsort(g){var visited={},stack={},results=[];function visit(node){if(_.has(stack,node)){throw new CycleException}if(!_.has(visited,node)){stack[node]=true;visited[node]=true;_.each(g.predecessors(node),visit);delete stack[node];results.push(node)}}_.each(g.sinks(),visit);if(_.size(visited)!==g.nodeCount()){throw new CycleException}return results}function CycleException(){}},{"../lodash":49}],45:[function(require,module,exports){var _=require("../lodash");module.exports=PriorityQueue;function PriorityQueue(){this._arr=[];this._keyIndices={}}PriorityQueue.prototype.size=function(){return this._arr.length};PriorityQueue.prototype.keys=function(){return this._arr.map(function(x){return x.key})};PriorityQueue.prototype.has=function(key){return _.has(this._keyIndices,key)};PriorityQueue.prototype.priority=function(key){var index=this._keyIndices[key];if(index!==undefined){return this._arr[index].priority}};PriorityQueue.prototype.min=function(){if(this.size()===0){throw new Error("Queue underflow")}return this._arr[0].key};PriorityQueue.prototype.add=function(key,priority){var keyIndices=this._keyIndices;key=String(key);if(!_.has(keyIndices,key)){var arr=this._arr;var index=arr.length;keyIndices[key]=index;arr.push({key:key,priority:priority});this._decrease(index);return true}return false};PriorityQueue.prototype.removeMin=function(){this._swap(0,this._arr.length-1);var min=this._arr.pop();delete this._keyIndices[min.key];this._heapify(0);return min.key};PriorityQueue.prototype.decrease=function(key,priority){var index=this._keyIndices[key];if(priority>this._arr[index].priority){throw new Error("New priority is greater than current priority. "+"Key: "+key+" Old: "+this._arr[index].priority+" New: "+priority)}this._arr[index].priority=priority;this._decrease(index)};PriorityQueue.prototype._heapify=function(i){var arr=this._arr;var l=2*i,r=l+1,largest=i;if(l<arr.length){largest=arr[l].priority<arr[largest].priority?l:largest;if(r<arr.length){largest=arr[r].priority<arr[largest].priority?r:largest}if(largest!==i){this._swap(i,largest);this._heapify(largest)}}};PriorityQueue.prototype._decrease=function(index){var arr=this._arr;var priority=arr[index].priority;var parent;while(index!==0){parent=index>>1;if(arr[parent].priority<priority){break}this._swap(index,parent);index=parent}};PriorityQueue.prototype._swap=function(i,j){var arr=this._arr;var keyIndices=this._keyIndices;var origArrI=arr[i];var origArrJ=arr[j];arr[i]=origArrJ;arr[j]=origArrI;keyIndices[origArrJ.key]=i;keyIndices[origArrI.key]=j}},{"../lodash":49}],46:[function(require,module,exports){"use strict";var _=require("./lodash");module.exports=Graph;var DEFAULT_EDGE_NAME="\x00",GRAPH_NODE="\x00",EDGE_KEY_DELIM="";function Graph(opts){this._isDirected=_.has(opts,"directed")?opts.directed:true;this._isMultigraph=_.has(opts,"multigraph")?opts.multigraph:false;this._isCompound=_.has(opts,"compound")?opts.compound:false;this._label=undefined;this._defaultNodeLabelFn=_.constant(undefined);this._defaultEdgeLabelFn=_.constant(undefined);this._nodes={};if(this._isCompound){this._parent={};this._children={};this._children[GRAPH_NODE]={}}this._in={};this._preds={};this._out={};this._sucs={};this._edgeObjs={};this._edgeLabels={}}Graph.prototype._nodeCount=0;Graph.prototype._edgeCount=0;Graph.prototype.isDirected=function(){return this._isDirected};Graph.prototype.isMultigraph=function(){return this._isMultigraph};Graph.prototype.isCompound=function(){return this._isCompound};Graph.prototype.setGraph=function(label){this._label=label;return this};Graph.prototype.graph=function(){return this._label};Graph.prototype.setDefaultNodeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultNodeLabelFn=newDefault;return this};Graph.prototype.nodeCount=function(){return this._nodeCount};Graph.prototype.nodes=function(){return _.keys(this._nodes)};Graph.prototype.sources=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._in[v])},this)};Graph.prototype.sinks=function(){return _.filter(this.nodes(),function(v){return _.isEmpty(this._out[v])},this)};Graph.prototype.setNodes=function(vs,value){var args=arguments;_.each(vs,function(v){if(args.length>1){this.setNode(v,value)}else{this.setNode(v)}},this);return this};Graph.prototype.setNode=function(v,value){if(_.has(this._nodes,v)){if(arguments.length>1){this._nodes[v]=value}return this}this._nodes[v]=arguments.length>1?value:this._defaultNodeLabelFn(v);if(this._isCompound){this._parent[v]=GRAPH_NODE;this._children[v]={};this._children[GRAPH_NODE][v]=true}this._in[v]={};this._preds[v]={};this._out[v]={};this._sucs[v]={};++this._nodeCount;return this};Graph.prototype.node=function(v){return this._nodes[v]};Graph.prototype.hasNode=function(v){return _.has(this._nodes,v)};Graph.prototype.removeNode=function(v){var self=this;if(_.has(this._nodes,v)){var removeEdge=function(e){self.removeEdge(self._edgeObjs[e])};delete this._nodes[v];if(this._isCompound){this._removeFromParentsChildList(v);delete this._parent[v];_.each(this.children(v),function(child){this.setParent(child)},this);delete this._children[v]}_.each(_.keys(this._in[v]),removeEdge);delete this._in[v];delete this._preds[v];_.each(_.keys(this._out[v]),removeEdge);delete this._out[v];delete this._sucs[v];--this._nodeCount}return this};Graph.prototype.setParent=function(v,parent){if(!this._isCompound){throw new Error("Cannot set parent in a non-compound graph")}if(_.isUndefined(parent)){parent=GRAPH_NODE}else{parent+="";for(var ancestor=parent;!_.isUndefined(ancestor);ancestor=this.parent(ancestor)){if(ancestor===v){throw new Error("Setting "+parent+" as parent of "+v+" would create create a cycle")}}this.setNode(parent)}this.setNode(v);this._removeFromParentsChildList(v);this._parent[v]=parent;this._children[parent][v]=true;return this};Graph.prototype._removeFromParentsChildList=function(v){delete this._children[this._parent[v]][v]};Graph.prototype.parent=function(v){if(this._isCompound){var parent=this._parent[v];if(parent!==GRAPH_NODE){return parent}}};Graph.prototype.children=function(v){if(_.isUndefined(v)){v=GRAPH_NODE}if(this._isCompound){var children=this._children[v];if(children){return _.keys(children)}}else if(v===GRAPH_NODE){return this.nodes()}else if(this.hasNode(v)){return[]}};Graph.prototype.predecessors=function(v){var predsV=this._preds[v];if(predsV){return _.keys(predsV)}};Graph.prototype.successors=function(v){var sucsV=this._sucs[v];if(sucsV){return _.keys(sucsV)}};Graph.prototype.neighbors=function(v){var preds=this.predecessors(v);if(preds){return _.union(preds,this.successors(v))}};Graph.prototype.setDefaultEdgeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultEdgeLabelFn=newDefault;return this};Graph.prototype.edgeCount=function(){return this._edgeCount};Graph.prototype.edges=function(){return _.values(this._edgeObjs)};Graph.prototype.setPath=function(vs,value){var self=this,args=arguments;_.reduce(vs,function(v,w){if(args.length>1){self.setEdge(v,w,value)}else{self.setEdge(v,w)}return w});return this};Graph.prototype.setEdge=function(){var v,w,name,value,valueSpecified=false;if(_.isPlainObject(arguments[0])){v=arguments[0].v;w=arguments[0].w;name=arguments[0].name;if(arguments.length===2){value=arguments[1];valueSpecified=true}}else{v=arguments[0];w=arguments[1];name=arguments[3];if(arguments.length>2){value=arguments[2];valueSpecified=true}}v=""+v;w=""+w;if(!_.isUndefined(name)){name=""+name}var e=edgeArgsToId(this._isDirected,v,w,name);if(_.has(this._edgeLabels,e)){if(valueSpecified){this._edgeLabels[e]=value}return this}if(!_.isUndefined(name)&&!this._isMultigraph){throw new Error("Cannot set a named edge when isMultigraph = false")}this.setNode(v);this.setNode(w);this._edgeLabels[e]=valueSpecified?value:this._defaultEdgeLabelFn(v,w,name);var edgeObj=edgeArgsToObj(this._isDirected,v,w,name);v=edgeObj.v;w=edgeObj.w;Object.freeze(edgeObj);this._edgeObjs[e]=edgeObj;incrementOrInitEntry(this._preds[w],v);incrementOrInitEntry(this._sucs[v],w);this._in[w][e]=edgeObj;this._out[v][e]=edgeObj;this._edgeCount++;return this};Graph.prototype.edge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return this._edgeLabels[e]};Graph.prototype.hasEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return _.has(this._edgeLabels,e)};Graph.prototype.removeEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name),edge=this._edgeObjs[e];if(edge){v=edge.v;w=edge.w;delete this._edgeLabels[e];delete this._edgeObjs[e];decrementOrRemoveEntry(this._preds[w],v);decrementOrRemoveEntry(this._sucs[v],w);delete this._in[w][e];delete this._out[v][e];this._edgeCount--}return this};Graph.prototype.inEdges=function(v,u){var inV=this._in[v];if(inV){var edges=_.values(inV);if(!u){return edges}return _.filter(edges,function(edge){return edge.v===u})}};Graph.prototype.outEdges=function(v,w){var outV=this._out[v];if(outV){var edges=_.values(outV);if(!w){return edges}return _.filter(edges,function(edge){return edge.w===w})}};Graph.prototype.nodeEdges=function(v,w){var inEdges=this.inEdges(v,w);if(inEdges){return inEdges.concat(this.outEdges(v,w))}};function incrementOrInitEntry(map,k){if(_.has(map,k)){map[k]++}else{map[k]=1}}function decrementOrRemoveEntry(map,k){if(!--map[k]){delete map[k]}}function edgeArgsToId(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}return v+EDGE_KEY_DELIM+w+EDGE_KEY_DELIM+(_.isUndefined(name)?DEFAULT_EDGE_NAME:name)}function edgeArgsToObj(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}var edgeObj={v:v,w:w};if(name){edgeObj.name=name}return edgeObj}function edgeObjToId(isDirected,edgeObj){return edgeArgsToId(isDirected,edgeObj.v,edgeObj.w,edgeObj.name)}},{"./lodash":49}],47:[function(require,module,exports){module.exports={Graph:require("./graph"),version:require("./version")}},{"./graph":46,"./version":50}],48:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graph");module.exports={write:write,read:read};function write(g){var json={options:{directed:g.isDirected(),multigraph:g.isMultigraph(),compound:g.isCompound()},nodes:writeNodes(g),edges:writeEdges(g)};
-if(!_.isUndefined(g.graph())){json.value=_.clone(g.graph())}return json}function writeNodes(g){return _.map(g.nodes(),function(v){var nodeValue=g.node(v),parent=g.parent(v),node={v:v};if(!_.isUndefined(nodeValue)){node.value=nodeValue}if(!_.isUndefined(parent)){node.parent=parent}return node})}function writeEdges(g){return _.map(g.edges(),function(e){var edgeValue=g.edge(e),edge={v:e.v,w:e.w};if(!_.isUndefined(e.name)){edge.name=e.name}if(!_.isUndefined(edgeValue)){edge.value=edgeValue}return edge})}function read(json){var g=new Graph(json.options).setGraph(json.value);_.each(json.nodes,function(entry){g.setNode(entry.v,entry.value);if(entry.parent){g.setParent(entry.v,entry.parent)}});_.each(json.edges,function(entry){g.setEdge({v:entry.v,w:entry.w,name:entry.name},entry.value)});return g}},{"./graph":46,"./lodash":49}],49:[function(require,module,exports){module.exports=require(10)},{"/Users/cpettitt/projects/dagre/lib/lodash.js":10,lodash:51}],50:[function(require,module,exports){module.exports="1.0.5"},{}],51:[function(require,module,exports){(function(global){(function(){var undefined;var VERSION="3.10.0";var BIND_FLAG=1,BIND_KEY_FLAG=2,CURRY_BOUND_FLAG=4,CURRY_FLAG=8,CURRY_RIGHT_FLAG=16,PARTIAL_FLAG=32,PARTIAL_RIGHT_FLAG=64,ARY_FLAG=128,REARG_FLAG=256;var DEFAULT_TRUNC_LENGTH=30,DEFAULT_TRUNC_OMISSION="...";var HOT_COUNT=150,HOT_SPAN=16;var LARGE_ARRAY_SIZE=200;var LAZY_FILTER_FLAG=1,LAZY_MAP_FLAG=2;var FUNC_ERROR_TEXT="Expected a function";var PLACEHOLDER="__lodash_placeholder__";var argsTag="[object Arguments]",arrayTag="[object Array]",boolTag="[object Boolean]",dateTag="[object Date]",errorTag="[object Error]",funcTag="[object Function]",mapTag="[object Map]",numberTag="[object Number]",objectTag="[object Object]",regexpTag="[object RegExp]",setTag="[object Set]",stringTag="[object String]",weakMapTag="[object WeakMap]";var arrayBufferTag="[object ArrayBuffer]",float32Tag="[object Float32Array]",float64Tag="[object Float64Array]",int8Tag="[object Int8Array]",int16Tag="[object Int16Array]",int32Tag="[object Int32Array]",uint8Tag="[object Uint8Array]",uint8ClampedTag="[object Uint8ClampedArray]",uint16Tag="[object Uint16Array]",uint32Tag="[object Uint32Array]";var reEmptyStringLeading=/\b__p \+= '';/g,reEmptyStringMiddle=/\b(__p \+=) '' \+/g,reEmptyStringTrailing=/(__e\(.*?\)|\b__t\)) \+\n'';/g;var reEscapedHtml=/&(?:amp|lt|gt|quot|#39|#96);/g,reUnescapedHtml=/[&<>"'`]/g,reHasEscapedHtml=RegExp(reEscapedHtml.source),reHasUnescapedHtml=RegExp(reUnescapedHtml.source);var reEscape=/<%-([\s\S]+?)%>/g,reEvaluate=/<%([\s\S]+?)%>/g,reInterpolate=/<%=([\s\S]+?)%>/g;var reIsDeepProp=/\.|\[(?:[^[\]]*|(["'])(?:(?!\1)[^\n\\]|\\.)*?\1)\]/,reIsPlainProp=/^\w*$/,rePropName=/[^.[\]]+|\[(?:(-?\d+(?:\.\d+)?)|(["'])((?:(?!\2)[^\n\\]|\\.)*?)\2)\]/g;var reRegExpChars=/^[:!,]|[\\^$.*+?()[\]{}|\/]|(^[0-9a-fA-Fnrtuvx])|([\n\r\u2028\u2029])/g,reHasRegExpChars=RegExp(reRegExpChars.source);var reComboMark=/[\u0300-\u036f\ufe20-\ufe23]/g;var reEscapeChar=/\\(\\)?/g;var reEsTemplate=/\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g;var reFlags=/\w*$/;var reHasHexPrefix=/^0[xX]/;var reIsHostCtor=/^\[object .+?Constructor\]$/;var reIsUint=/^\d+$/;var reLatin1=/[\xc0-\xd6\xd8-\xde\xdf-\xf6\xf8-\xff]/g;var reNoMatch=/($^)/;var reUnescapedString=/['\n\r\u2028\u2029\\]/g;var reWords=function(){var upper="[A-Z\\xc0-\\xd6\\xd8-\\xde]",lower="[a-z\\xdf-\\xf6\\xf8-\\xff]+";return RegExp(upper+"+(?="+upper+lower+")|"+upper+"?"+lower+"|"+upper+"+|[0-9]+","g")}();var contextProps=["Array","ArrayBuffer","Date","Error","Float32Array","Float64Array","Function","Int8Array","Int16Array","Int32Array","Math","Number","Object","RegExp","Set","String","_","clearTimeout","isFinite","parseFloat","parseInt","setTimeout","TypeError","Uint8Array","Uint8ClampedArray","Uint16Array","Uint32Array","WeakMap"];var templateCounter=-1;var typedArrayTags={};typedArrayTags[float32Tag]=typedArrayTags[float64Tag]=typedArrayTags[int8Tag]=typedArrayTags[int16Tag]=typedArrayTags[int32Tag]=typedArrayTags[uint8Tag]=typedArrayTags[uint8ClampedTag]=typedArrayTags[uint16Tag]=typedArrayTags[uint32Tag]=true;typedArrayTags[argsTag]=typedArrayTags[arrayTag]=typedArrayTags[arrayBufferTag]=typedArrayTags[boolTag]=typedArrayTags[dateTag]=typedArrayTags[errorTag]=typedArrayTags[funcTag]=typedArrayTags[mapTag]=typedArrayTags[numberTag]=typedArrayTags[objectTag]=typedArrayTags[regexpTag]=typedArrayTags[setTag]=typedArrayTags[stringTag]=typedArrayTags[weakMapTag]=false;var cloneableTags={};cloneableTags[argsTag]=cloneableTags[arrayTag]=cloneableTags[arrayBufferTag]=cloneableTags[boolTag]=cloneableTags[dateTag]=cloneableTags[float32Tag]=cloneableTags[float64Tag]=cloneableTags[int8Tag]=cloneableTags[int16Tag]=cloneableTags[int32Tag]=cloneableTags[numberTag]=cloneableTags[objectTag]=cloneableTags[regexpTag]=cloneableTags[stringTag]=cloneableTags[uint8Tag]=cloneableTags[uint8ClampedTag]=cloneableTags[uint16Tag]=cloneableTags[uint32Tag]=true;cloneableTags[errorTag]=cloneableTags[funcTag]=cloneableTags[mapTag]=cloneableTags[setTag]=cloneableTags[weakMapTag]=false;var deburredLetters={"À":"A","Á":"A","Â":"A","Ã":"A","Ä":"A","Å":"A","à":"a","á":"a","â":"a","ã":"a","ä":"a","å":"a","Ç":"C","ç":"c","Ð":"D","ð":"d","È":"E","É":"E","Ê":"E","Ë":"E","è":"e","é":"e","ê":"e","ë":"e","Ì":"I","Í":"I","Î":"I","Ï":"I","ì":"i","í":"i","î":"i","ï":"i","Ñ":"N","ñ":"n","Ò":"O","Ó":"O","Ô":"O","Õ":"O","Ö":"O","Ø":"O","ò":"o","ó":"o","ô":"o","õ":"o","ö":"o","ø":"o","Ù":"U","Ú":"U","Û":"U","Ü":"U","ù":"u","ú":"u","û":"u","ü":"u","Ý":"Y","ý":"y","ÿ":"y","Æ":"Ae","æ":"ae","Þ":"Th","þ":"th","ß":"ss"};var htmlEscapes={"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;","`":"&#96;"};var htmlUnescapes={"&amp;":"&","&lt;":"<","&gt;":">","&quot;":'"',"&#39;":"'","&#96;":"`"};var objectTypes={"function":true,object:true};var regexpEscapes={0:"x30",1:"x31",2:"x32",3:"x33",4:"x34",5:"x35",6:"x36",7:"x37",8:"x38",9:"x39",A:"x41",B:"x42",C:"x43",D:"x44",E:"x45",F:"x46",a:"x61",b:"x62",c:"x63",d:"x64",e:"x65",f:"x66",n:"x6e",r:"x72",t:"x74",u:"x75",v:"x76",x:"x78"};var stringEscapes={"\\":"\\","'":"'","\n":"n","\r":"r","\u2028":"u2028","\u2029":"u2029"};var freeExports=objectTypes[typeof exports]&&exports&&!exports.nodeType&&exports;var freeModule=objectTypes[typeof module]&&module&&!module.nodeType&&module;var freeGlobal=freeExports&&freeModule&&typeof global=="object"&&global&&global.Object&&global;var freeSelf=objectTypes[typeof self]&&self&&self.Object&&self;var freeWindow=objectTypes[typeof window]&&window&&window.Object&&window;var moduleExports=freeModule&&freeModule.exports===freeExports&&freeExports;var root=freeGlobal||freeWindow!==(this&&this.window)&&freeWindow||freeSelf||this;function baseCompareAscending(value,other){if(value!==other){var valIsNull=value===null,valIsUndef=value===undefined,valIsReflexive=value===value;var othIsNull=other===null,othIsUndef=other===undefined,othIsReflexive=other===other;if(value>other&&!othIsNull||!valIsReflexive||valIsNull&&!othIsUndef&&othIsReflexive||valIsUndef&&othIsReflexive){return 1}if(value<other&&!valIsNull||!othIsReflexive||othIsNull&&!valIsUndef&&valIsReflexive||othIsUndef&&valIsReflexive){return-1}}return 0}function baseFindIndex(array,predicate,fromRight){var length=array.length,index=fromRight?length:-1;while(fromRight?index--:++index<length){if(predicate(array[index],index,array)){return index}}return-1}function baseIndexOf(array,value,fromIndex){if(value!==value){return indexOfNaN(array,fromIndex)}var index=fromIndex-1,length=array.length;while(++index<length){if(array[index]===value){return index}}return-1}function baseIsFunction(value){return typeof value=="function"||false}function baseToString(value){return value==null?"":value+""}function charsLeftIndex(string,chars){var index=-1,length=string.length;while(++index<length&&chars.indexOf(string.charAt(index))>-1){}return index}function charsRightIndex(string,chars){var index=string.length;while(index--&&chars.indexOf(string.charAt(index))>-1){}return index}function compareAscending(object,other){return baseCompareAscending(object.criteria,other.criteria)||object.index-other.index}function compareMultiple(object,other,orders){var index=-1,objCriteria=object.criteria,othCriteria=other.criteria,length=objCriteria.length,ordersLength=orders.length;while(++index<length){var result=baseCompareAscending(objCriteria[index],othCriteria[index]);if(result){if(index>=ordersLength){return result}var order=orders[index];return result*(order==="asc"||order===true?1:-1)}}return object.index-other.index}function deburrLetter(letter){return deburredLetters[letter]}function escapeHtmlChar(chr){return htmlEscapes[chr]}function escapeRegExpChar(chr,leadingChar,whitespaceChar){if(leadingChar){chr=regexpEscapes[chr]}else if(whitespaceChar){chr=stringEscapes[chr]}return"\\"+chr}function escapeStringChar(chr){return"\\"+stringEscapes[chr]}function indexOfNaN(array,fromIndex,fromRight){var length=array.length,index=fromIndex+(fromRight?0:-1);while(fromRight?index--:++index<length){var other=array[index];if(other!==other){return index}}return-1}function isObjectLike(value){return!!value&&typeof value=="object"}function isSpace(charCode){return charCode<=160&&(charCode>=9&&charCode<=13)||charCode==32||charCode==160||charCode==5760||charCode==6158||charCode>=8192&&(charCode<=8202||charCode==8232||charCode==8233||charCode==8239||charCode==8287||charCode==12288||charCode==65279)}function replaceHolders(array,placeholder){var index=-1,length=array.length,resIndex=-1,result=[];while(++index<length){if(array[index]===placeholder){array[index]=PLACEHOLDER;result[++resIndex]=index}}return result}function sortedUniq(array,iteratee){var seen,index=-1,length=array.length,resIndex=-1,result=[];while(++index<length){var value=array[index],computed=iteratee?iteratee(value,index,array):value;if(!index||seen!==computed){seen=computed;result[++resIndex]=value}}return result}function trimmedLeftIndex(string){var index=-1,length=string.length;while(++index<length&&isSpace(string.charCodeAt(index))){}return index}function trimmedRightIndex(string){var index=string.length;while(index--&&isSpace(string.charCodeAt(index))){}return index}function unescapeHtmlChar(chr){return htmlUnescapes[chr]}function runInContext(context){context=context?_.defaults(root.Object(),context,_.pick(root,contextProps)):root;var Array=context.Array,Date=context.Date,Error=context.Error,Function=context.Function,Math=context.Math,Number=context.Number,Object=context.Object,RegExp=context.RegExp,String=context.String,TypeError=context.TypeError;var arrayProto=Array.prototype,objectProto=Object.prototype,stringProto=String.prototype;var fnToString=Function.prototype.toString;var hasOwnProperty=objectProto.hasOwnProperty;var idCounter=0;var objToString=objectProto.toString;var oldDash=root._;var reIsNative=RegExp("^"+fnToString.call(hasOwnProperty).replace(/[\\^$.*+?()[\]{}|]/g,"\\$&").replace(/hasOwnProperty|(function).*?(?=\\\()| for .+?(?=\\\])/g,"$1.*?")+"$");var ArrayBuffer=context.ArrayBuffer,clearTimeout=context.clearTimeout,parseFloat=context.parseFloat,pow=Math.pow,propertyIsEnumerable=objectProto.propertyIsEnumerable,Set=getNative(context,"Set"),setTimeout=context.setTimeout,splice=arrayProto.splice,Uint8Array=context.Uint8Array,WeakMap=getNative(context,"WeakMap");var nativeCeil=Math.ceil,nativeCreate=getNative(Object,"create"),nativeFloor=Math.floor,nativeIsArray=getNative(Array,"isArray"),nativeIsFinite=context.isFinite,nativeKeys=getNative(Object,"keys"),nativeMax=Math.max,nativeMin=Math.min,nativeNow=getNative(Date,"now"),nativeParseInt=context.parseInt,nativeRandom=Math.random;var NEGATIVE_INFINITY=Number.NEGATIVE_INFINITY,POSITIVE_INFINITY=Number.POSITIVE_INFINITY;var MAX_ARRAY_LENGTH=4294967295,MAX_ARRAY_INDEX=MAX_ARRAY_LENGTH-1,HALF_MAX_ARRAY_LENGTH=MAX_ARRAY_LENGTH>>>1;var MAX_SAFE_INTEGER=9007199254740991;var metaMap=WeakMap&&new WeakMap;var realNames={};function lodash(value){if(isObjectLike(value)&&!isArray(value)&&!(value instanceof LazyWrapper)){if(value instanceof LodashWrapper){return value}if(hasOwnProperty.call(value,"__chain__")&&hasOwnProperty.call(value,"__wrapped__")){return wrapperClone(value)}}return new LodashWrapper(value)}function baseLodash(){}function LodashWrapper(value,chainAll,actions){this.__wrapped__=value;this.__actions__=actions||[];this.__chain__=!!chainAll}var support=lodash.support={};lodash.templateSettings={escape:reEscape,evaluate:reEvaluate,interpolate:reInterpolate,variable:"",imports:{_:lodash}};function LazyWrapper(value){this.__wrapped__=value;this.__actions__=[];this.__dir__=1;this.__filtered__=false;this.__iteratees__=[];this.__takeCount__=POSITIVE_INFINITY;this.__views__=[]}function lazyClone(){var result=new LazyWrapper(this.__wrapped__);result.__actions__=arrayCopy(this.__actions__);result.__dir__=this.__dir__;result.__filtered__=this.__filtered__;result.__iteratees__=arrayCopy(this.__iteratees__);result.__takeCount__=this.__takeCount__;result.__views__=arrayCopy(this.__views__);return result}function lazyReverse(){if(this.__filtered__){var result=new LazyWrapper(this);result.__dir__=-1;result.__filtered__=true}else{result=this.clone();result.__dir__*=-1}return result}function lazyValue(){var array=this.__wrapped__.value(),dir=this.__dir__,isArr=isArray(array),isRight=dir<0,arrLength=isArr?array.length:0,view=getView(0,arrLength,this.__views__),start=view.start,end=view.end,length=end-start,index=isRight?end:start-1,iteratees=this.__iteratees__,iterLength=iteratees.length,resIndex=0,takeCount=nativeMin(length,this.__takeCount__);if(!isArr||arrLength<LARGE_ARRAY_SIZE||arrLength==length&&takeCount==length){return baseWrapperValue(isRight&&isArr?array.reverse():array,this.__actions__)}var result=[];outer:while(length--&&resIndex<takeCount){index+=dir;var iterIndex=-1,value=array[index];while(++iterIndex<iterLength){var data=iteratees[iterIndex],iteratee=data.iteratee,type=data.type,computed=iteratee(value);if(type==LAZY_MAP_FLAG){value=computed}else if(!computed){if(type==LAZY_FILTER_FLAG){continue outer}else{break outer}}}result[resIndex++]=value}return result}function MapCache(){this.__data__={}}function mapDelete(key){return this.has(key)&&delete this.__data__[key]}function mapGet(key){return key=="__proto__"?undefined:this.__data__[key]}function mapHas(key){return key!="__proto__"&&hasOwnProperty.call(this.__data__,key)}function mapSet(key,value){if(key!="__proto__"){this.__data__[key]=value}return this}function SetCache(values){var length=values?values.length:0;this.data={hash:nativeCreate(null),set:new Set};while(length--){this.push(values[length])}}function cacheIndexOf(cache,value){var data=cache.data,result=typeof value=="string"||isObject(value)?data.set.has(value):data.hash[value];return result?0:-1}function cachePush(value){var data=this.data;if(typeof value=="string"||isObject(value)){data.set.add(value)}else{data.hash[value]=true}}function arrayConcat(array,other){var index=-1,length=array.length,othIndex=-1,othLength=other.length,result=Array(length+othLength);while(++index<length){result[index]=array[index]}while(++othIndex<othLength){result[index++]=other[othIndex]}return result}function arrayCopy(source,array){var index=-1,length=source.length;array||(array=Array(length));while(++index<length){array[index]=source[index]}return array}function arrayEach(array,iteratee){var index=-1,length=array.length;while(++index<length){if(iteratee(array[index],index,array)===false){break}}return array}function arrayEachRight(array,iteratee){var length=array.length;while(length--){if(iteratee(array[length],length,array)===false){break}}return array}function arrayEvery(array,predicate){var index=-1,length=array.length;while(++index<length){if(!predicate(array[index],index,array)){return false}}return true}function arrayExtremum(array,iteratee,comparator,exValue){var index=-1,length=array.length,computed=exValue,result=computed;while(++index<length){var value=array[index],current=+iteratee(value);if(comparator(current,computed)){computed=current;result=value}}return result}function arrayFilter(array,predicate){var index=-1,length=array.length,resIndex=-1,result=[];while(++index<length){var value=array[index];if(predicate(value,index,array)){result[++resIndex]=value}}return result}function arrayMap(array,iteratee){var index=-1,length=array.length,result=Array(length);while(++index<length){result[index]=iteratee(array[index],index,array)}return result}function arrayPush(array,values){var index=-1,length=values.length,offset=array.length;while(++index<length){array[offset+index]=values[index]}return array}function arrayReduce(array,iteratee,accumulator,initFromArray){var index=-1,length=array.length;if(initFromArray&&length){accumulator=array[++index]}while(++index<length){accumulator=iteratee(accumulator,array[index],index,array)}return accumulator}function arrayReduceRight(array,iteratee,accumulator,initFromArray){var length=array.length;if(initFromArray&&length){accumulator=array[--length]}while(length--){accumulator=iteratee(accumulator,array[length],length,array)}return accumulator}function arraySome(array,predicate){var index=-1,length=array.length;while(++index<length){if(predicate(array[index],index,array)){return true}}return false}function arraySum(array,iteratee){var length=array.length,result=0;while(length--){result+=+iteratee(array[length])||0}return result}function assignDefaults(objectValue,sourceValue){return objectValue===undefined?sourceValue:objectValue}function assignOwnDefaults(objectValue,sourceValue,key,object){return objectValue===undefined||!hasOwnProperty.call(object,key)?sourceValue:objectValue}function assignWith(object,source,customizer){var index=-1,props=keys(source),length=props.length;while(++index<length){var key=props[index],value=object[key],result=customizer(value,source[key],key,object,source);if((result===result?result!==value:value===value)||value===undefined&&!(key in object)){object[key]=result}}return object}function baseAssign(object,source){return source==null?object:baseCopy(source,keys(source),object)}function baseAt(collection,props){var index=-1,isNil=collection==null,isArr=!isNil&&isArrayLike(collection),length=isArr?collection.length:0,propsLength=props.length,result=Array(propsLength);while(++index<propsLength){var key=props[index];if(isArr){result[index]=isIndex(key,length)?collection[key]:undefined}else{result[index]=isNil?undefined:collection[key]}}return result}function baseCopy(source,props,object){object||(object={});var index=-1,length=props.length;while(++index<length){var key=props[index];object[key]=source[key]}return object}function baseCallback(func,thisArg,argCount){var type=typeof func;if(type=="function"){return thisArg===undefined?func:bindCallback(func,thisArg,argCount)}if(func==null){return identity}if(type=="object"){return baseMatches(func)}return thisArg===undefined?property(func):baseMatchesProperty(func,thisArg)}function baseClone(value,isDeep,customizer,key,object,stackA,stackB){var result;if(customizer){result=object?customizer(value,key,object):customizer(value)}if(result!==undefined){return result}if(!isObject(value)){return value}var isArr=isArray(value);if(isArr){result=initCloneArray(value);if(!isDeep){return arrayCopy(value,result)}}else{var tag=objToString.call(value),isFunc=tag==funcTag;if(tag==objectTag||tag==argsTag||isFunc&&!object){result=initCloneObject(isFunc?{}:value);if(!isDeep){return baseAssign(result,value)}}else{return cloneableTags[tag]?initCloneByTag(value,tag,isDeep):object?value:{}}}stackA||(stackA=[]);stackB||(stackB=[]);var length=stackA.length;while(length--){if(stackA[length]==value){return stackB[length]}}stackA.push(value);stackB.push(result);(isArr?arrayEach:baseForOwn)(value,function(subValue,key){result[key]=baseClone(subValue,isDeep,customizer,key,value,stackA,stackB)});return result}var baseCreate=function(){function object(){}return function(prototype){if(isObject(prototype)){object.prototype=prototype;var result=new object;object.prototype=undefined}return result||{}}}();function baseDelay(func,wait,args){if(typeof func!="function"){throw new TypeError(FUNC_ERROR_TEXT)}return setTimeout(function(){func.apply(undefined,args)},wait)}function baseDifference(array,values){var length=array?array.length:0,result=[];if(!length){return result}var index=-1,indexOf=getIndexOf(),isCommon=indexOf==baseIndexOf,cache=isCommon&&values.length>=LARGE_ARRAY_SIZE?createCache(values):null,valuesLength=values.length;if(cache){indexOf=cacheIndexOf;isCommon=false;values=cache}outer:while(++index<length){var value=array[index];if(isCommon&&value===value){var valuesIndex=valuesLength;while(valuesIndex--){if(values[valuesIndex]===value){continue outer}}result.push(value)}else if(indexOf(values,value,0)<0){result.push(value)}}return result}var baseEach=createBaseEach(baseForOwn);var baseEachRight=createBaseEach(baseForOwnRight,true);function baseEvery(collection,predicate){var result=true;baseEach(collection,function(value,index,collection){result=!!predicate(value,index,collection);return result});return result}function baseExtremum(collection,iteratee,comparator,exValue){var computed=exValue,result=computed;baseEach(collection,function(value,index,collection){var current=+iteratee(value,index,collection);if(comparator(current,computed)||current===exValue&&current===result){computed=current;result=value}});return result}function baseFill(array,value,start,end){var length=array.length;start=start==null?0:+start||0;if(start<0){start=-start>length?0:length+start}end=end===undefined||end>length?length:+end||0;if(end<0){end+=length}length=start>end?0:end>>>0;start>>>=0;while(start<length){array[start++]=value}return array}function baseFilter(collection,predicate){var result=[];baseEach(collection,function(value,index,collection){if(predicate(value,index,collection)){result.push(value)}});return result}function baseFind(collection,predicate,eachFunc,retKey){var result;eachFunc(collection,function(value,key,collection){if(predicate(value,key,collection)){result=retKey?key:value;return false}});return result}function baseFlatten(array,isDeep,isStrict,result){result||(result=[]);var index=-1,length=array.length;while(++index<length){var value=array[index];if(isObjectLike(value)&&isArrayLike(value)&&(isStrict||isArray(value)||isArguments(value))){if(isDeep){baseFlatten(value,isDeep,isStrict,result)}else{arrayPush(result,value)}}else if(!isStrict){result[result.length]=value}}return result}var baseFor=createBaseFor();var baseForRight=createBaseFor(true);function baseForIn(object,iteratee){return baseFor(object,iteratee,keysIn)}function baseForOwn(object,iteratee){return baseFor(object,iteratee,keys)}function baseForOwnRight(object,iteratee){return baseForRight(object,iteratee,keys)}function baseFunctions(object,props){var index=-1,length=props.length,resIndex=-1,result=[];while(++index<length){var key=props[index];if(isFunction(object[key])){result[++resIndex]=key}}return result}function baseGet(object,path,pathKey){if(object==null){return}if(pathKey!==undefined&&pathKey in toObject(object)){path=[pathKey]}var index=0,length=path.length;while(object!=null&&index<length){object=object[path[index++]]}return index&&index==length?object:undefined}function baseIsEqual(value,other,customizer,isLoose,stackA,stackB){if(value===other){return true}if(value==null||other==null||!isObject(value)&&!isObjectLike(other)){return value!==value&&other!==other}return baseIsEqualDeep(value,other,baseIsEqual,customizer,isLoose,stackA,stackB)}function baseIsEqualDeep(object,other,equalFunc,customizer,isLoose,stackA,stackB){var objIsArr=isArray(object),othIsArr=isArray(other),objTag=arrayTag,othTag=arrayTag;if(!objIsArr){objTag=objToString.call(object);if(objTag==argsTag){objTag=objectTag}else if(objTag!=objectTag){objIsArr=isTypedArray(object)}}if(!othIsArr){othTag=objToString.call(other);if(othTag==argsTag){othTag=objectTag}else if(othTag!=objectTag){othIsArr=isTypedArray(other)}}var objIsObj=objTag==objectTag,othIsObj=othTag==objectTag,isSameTag=objTag==othTag;if(isSameTag&&!(objIsArr||objIsObj)){return equalByTag(object,other,objTag)}if(!isLoose){var objIsWrapped=objIsObj&&hasOwnProperty.call(object,"__wrapped__"),othIsWrapped=othIsObj&&hasOwnProperty.call(other,"__wrapped__");if(objIsWrapped||othIsWrapped){return equalFunc(objIsWrapped?object.value():object,othIsWrapped?other.value():other,customizer,isLoose,stackA,stackB)}}if(!isSameTag){return false}stackA||(stackA=[]);stackB||(stackB=[]);var length=stackA.length;while(length--){if(stackA[length]==object){return stackB[length]==other}}stackA.push(object);stackB.push(other);var result=(objIsArr?equalArrays:equalObjects)(object,other,equalFunc,customizer,isLoose,stackA,stackB);stackA.pop();stackB.pop();return result}function baseIsMatch(object,matchData,customizer){var index=matchData.length,length=index,noCustomizer=!customizer;if(object==null){return!length}object=toObject(object);while(index--){var data=matchData[index];if(noCustomizer&&data[2]?data[1]!==object[data[0]]:!(data[0]in object)){return false}}while(++index<length){data=matchData[index];var key=data[0],objValue=object[key],srcValue=data[1];if(noCustomizer&&data[2]){if(objValue===undefined&&!(key in object)){return false}}else{var result=customizer?customizer(objValue,srcValue,key):undefined;if(!(result===undefined?baseIsEqual(srcValue,objValue,customizer,true):result)){return false}}}return true}function baseMap(collection,iteratee){var index=-1,result=isArrayLike(collection)?Array(collection.length):[];baseEach(collection,function(value,key,collection){result[++index]=iteratee(value,key,collection)});return result}function baseMatches(source){var matchData=getMatchData(source);if(matchData.length==1&&matchData[0][2]){var key=matchData[0][0],value=matchData[0][1];return function(object){if(object==null){return false}return object[key]===value&&(value!==undefined||key in toObject(object))}}return function(object){return baseIsMatch(object,matchData)}}function baseMatchesProperty(path,srcValue){var isArr=isArray(path),isCommon=isKey(path)&&isStrictComparable(srcValue),pathKey=path+"";path=toPath(path);return function(object){if(object==null){return false}var key=pathKey;object=toObject(object);if((isArr||!isCommon)&&!(key in object)){object=path.length==1?object:baseGet(object,baseSlice(path,0,-1));if(object==null){return false}key=last(path);object=toObject(object)}return object[key]===srcValue?srcValue!==undefined||key in object:baseIsEqual(srcValue,object[key],undefined,true)}}function baseMerge(object,source,customizer,stackA,stackB){if(!isObject(object)){return object}var isSrcArr=isArrayLike(source)&&(isArray(source)||isTypedArray(source)),props=isSrcArr?undefined:keys(source);arrayEach(props||source,function(srcValue,key){if(props){key=srcValue;srcValue=source[key]}if(isObjectLike(srcValue)){stackA||(stackA=[]);stackB||(stackB=[]);baseMergeDeep(object,source,key,baseMerge,customizer,stackA,stackB)}else{var value=object[key],result=customizer?customizer(value,srcValue,key,object,source):undefined,isCommon=result===undefined;if(isCommon){result=srcValue}if((result!==undefined||isSrcArr&&!(key in object))&&(isCommon||(result===result?result!==value:value===value))){object[key]=result}}});return object}function baseMergeDeep(object,source,key,mergeFunc,customizer,stackA,stackB){var length=stackA.length,srcValue=source[key];while(length--){if(stackA[length]==srcValue){object[key]=stackB[length];return}}var value=object[key],result=customizer?customizer(value,srcValue,key,object,source):undefined,isCommon=result===undefined;if(isCommon){result=srcValue;if(isArrayLike(srcValue)&&(isArray(srcValue)||isTypedArray(srcValue))){result=isArray(value)?value:isArrayLike(value)?arrayCopy(value):[]}else if(isPlainObject(srcValue)||isArguments(srcValue)){result=isArguments(value)?toPlainObject(value):isPlainObject(value)?value:{}}else{isCommon=false}}stackA.push(srcValue);stackB.push(result);if(isCommon){object[key]=mergeFunc(result,srcValue,customizer,stackA,stackB)}else if(result===result?result!==value:value===value){object[key]=result}}function baseProperty(key){return function(object){return object==null?undefined:object[key]}}function basePropertyDeep(path){var pathKey=path+"";path=toPath(path);return function(object){return baseGet(object,path,pathKey)}}function basePullAt(array,indexes){var length=array?indexes.length:0;while(length--){var index=indexes[length];if(index!=previous&&isIndex(index)){var previous=index;splice.call(array,index,1)}}return array}function baseRandom(min,max){return min+nativeFloor(nativeRandom()*(max-min+1))}function baseReduce(collection,iteratee,accumulator,initFromCollection,eachFunc){eachFunc(collection,function(value,index,collection){accumulator=initFromCollection?(initFromCollection=false,value):iteratee(accumulator,value,index,collection)});return accumulator}var baseSetData=!metaMap?identity:function(func,data){metaMap.set(func,data);return func};function baseSlice(array,start,end){var index=-1,length=array.length;start=start==null?0:+start||0;if(start<0){start=-start>length?0:length+start}end=end===undefined||end>length?length:+end||0;if(end<0){end+=length}length=start>end?0:end-start>>>0;start>>>=0;var result=Array(length);while(++index<length){result[index]=array[index+start]}return result}function baseSome(collection,predicate){var result;baseEach(collection,function(value,index,collection){result=predicate(value,index,collection);return!result});return!!result}function baseSortBy(array,comparer){var length=array.length;array.sort(comparer);while(length--){array[length]=array[length].value}return array}function baseSortByOrder(collection,iteratees,orders){var callback=getCallback(),index=-1;iteratees=arrayMap(iteratees,function(iteratee){return callback(iteratee)});var result=baseMap(collection,function(value){var criteria=arrayMap(iteratees,function(iteratee){return iteratee(value)});return{criteria:criteria,index:++index,value:value}});return baseSortBy(result,function(object,other){return compareMultiple(object,other,orders)})}function baseSum(collection,iteratee){var result=0;baseEach(collection,function(value,index,collection){result+=+iteratee(value,index,collection)||0});return result}function baseUniq(array,iteratee){var index=-1,indexOf=getIndexOf(),length=array.length,isCommon=indexOf==baseIndexOf,isLarge=isCommon&&length>=LARGE_ARRAY_SIZE,seen=isLarge?createCache():null,result=[];if(seen){indexOf=cacheIndexOf;isCommon=false}else{isLarge=false;seen=iteratee?[]:result}outer:while(++index<length){var value=array[index],computed=iteratee?iteratee(value,index,array):value;if(isCommon&&value===value){var seenIndex=seen.length;while(seenIndex--){if(seen[seenIndex]===computed){continue outer}}if(iteratee){seen.push(computed)}result.push(value)}else if(indexOf(seen,computed,0)<0){if(iteratee||isLarge){seen.push(computed)}result.push(value)}}return result}function baseValues(object,props){var index=-1,length=props.length,result=Array(length);while(++index<length){result[index]=object[props[index]]}return result}function baseWhile(array,predicate,isDrop,fromRight){var length=array.length,index=fromRight?length:-1;while((fromRight?index--:++index<length)&&predicate(array[index],index,array)){}return isDrop?baseSlice(array,fromRight?0:index,fromRight?index+1:length):baseSlice(array,fromRight?index+1:0,fromRight?length:index)}function baseWrapperValue(value,actions){var result=value;if(result instanceof LazyWrapper){result=result.value()}var index=-1,length=actions.length;while(++index<length){var action=actions[index];result=action.func.apply(action.thisArg,arrayPush([result],action.args))}return result}function binaryIndex(array,value,retHighest){var low=0,high=array?array.length:low;if(typeof value=="number"&&value===value&&high<=HALF_MAX_ARRAY_LENGTH){while(low<high){var mid=low+high>>>1,computed=array[mid];if((retHighest?computed<=value:computed<value)&&computed!==null){low=mid+1}else{high=mid}}return high
-}return binaryIndexBy(array,value,identity,retHighest)}function binaryIndexBy(array,value,iteratee,retHighest){value=iteratee(value);var low=0,high=array?array.length:0,valIsNaN=value!==value,valIsNull=value===null,valIsUndef=value===undefined;while(low<high){var mid=nativeFloor((low+high)/2),computed=iteratee(array[mid]),isDef=computed!==undefined,isReflexive=computed===computed;if(valIsNaN){var setLow=isReflexive||retHighest}else if(valIsNull){setLow=isReflexive&&isDef&&(retHighest||computed!=null)}else if(valIsUndef){setLow=isReflexive&&(retHighest||isDef)}else if(computed==null){setLow=false}else{setLow=retHighest?computed<=value:computed<value}if(setLow){low=mid+1}else{high=mid}}return nativeMin(high,MAX_ARRAY_INDEX)}function bindCallback(func,thisArg,argCount){if(typeof func!="function"){return identity}if(thisArg===undefined){return func}switch(argCount){case 1:return function(value){return func.call(thisArg,value)};case 3:return function(value,index,collection){return func.call(thisArg,value,index,collection)};case 4:return function(accumulator,value,index,collection){return func.call(thisArg,accumulator,value,index,collection)};case 5:return function(value,other,key,object,source){return func.call(thisArg,value,other,key,object,source)}}return function(){return func.apply(thisArg,arguments)}}function bufferClone(buffer){var result=new ArrayBuffer(buffer.byteLength),view=new Uint8Array(result);view.set(new Uint8Array(buffer));return result}function composeArgs(args,partials,holders){var holdersLength=holders.length,argsIndex=-1,argsLength=nativeMax(args.length-holdersLength,0),leftIndex=-1,leftLength=partials.length,result=Array(leftLength+argsLength);while(++leftIndex<leftLength){result[leftIndex]=partials[leftIndex]}while(++argsIndex<holdersLength){result[holders[argsIndex]]=args[argsIndex]}while(argsLength--){result[leftIndex++]=args[argsIndex++]}return result}function composeArgsRight(args,partials,holders){var holdersIndex=-1,holdersLength=holders.length,argsIndex=-1,argsLength=nativeMax(args.length-holdersLength,0),rightIndex=-1,rightLength=partials.length,result=Array(argsLength+rightLength);while(++argsIndex<argsLength){result[argsIndex]=args[argsIndex]}var offset=argsIndex;while(++rightIndex<rightLength){result[offset+rightIndex]=partials[rightIndex]}while(++holdersIndex<holdersLength){result[offset+holders[holdersIndex]]=args[argsIndex++]}return result}function createAggregator(setter,initializer){return function(collection,iteratee,thisArg){var result=initializer?initializer():{};iteratee=getCallback(iteratee,thisArg,3);if(isArray(collection)){var index=-1,length=collection.length;while(++index<length){var value=collection[index];setter(result,value,iteratee(value,index,collection),collection)}}else{baseEach(collection,function(value,key,collection){setter(result,value,iteratee(value,key,collection),collection)})}return result}}function createAssigner(assigner){return restParam(function(object,sources){var index=-1,length=object==null?0:sources.length,customizer=length>2?sources[length-2]:undefined,guard=length>2?sources[2]:undefined,thisArg=length>1?sources[length-1]:undefined;if(typeof customizer=="function"){customizer=bindCallback(customizer,thisArg,5);length-=2}else{customizer=typeof thisArg=="function"?thisArg:undefined;length-=customizer?1:0}if(guard&&isIterateeCall(sources[0],sources[1],guard)){customizer=length<3?undefined:customizer;length=1}while(++index<length){var source=sources[index];if(source){assigner(object,source,customizer)}}return object})}function createBaseEach(eachFunc,fromRight){return function(collection,iteratee){var length=collection?getLength(collection):0;if(!isLength(length)){return eachFunc(collection,iteratee)}var index=fromRight?length:-1,iterable=toObject(collection);while(fromRight?index--:++index<length){if(iteratee(iterable[index],index,iterable)===false){break}}return collection}}function createBaseFor(fromRight){return function(object,iteratee,keysFunc){var iterable=toObject(object),props=keysFunc(object),length=props.length,index=fromRight?length:-1;while(fromRight?index--:++index<length){var key=props[index];if(iteratee(iterable[key],key,iterable)===false){break}}return object}}function createBindWrapper(func,thisArg){var Ctor=createCtorWrapper(func);function wrapper(){var fn=this&&this!==root&&this instanceof wrapper?Ctor:func;return fn.apply(thisArg,arguments)}return wrapper}function createCache(values){return nativeCreate&&Set?new SetCache(values):null}function createCompounder(callback){return function(string){var index=-1,array=words(deburr(string)),length=array.length,result="";while(++index<length){result=callback(result,array[index],index)}return result}}function createCtorWrapper(Ctor){return function(){var args=arguments;switch(args.length){case 0:return new Ctor;case 1:return new Ctor(args[0]);case 2:return new Ctor(args[0],args[1]);case 3:return new Ctor(args[0],args[1],args[2]);case 4:return new Ctor(args[0],args[1],args[2],args[3]);case 5:return new Ctor(args[0],args[1],args[2],args[3],args[4]);case 6:return new Ctor(args[0],args[1],args[2],args[3],args[4],args[5]);case 7:return new Ctor(args[0],args[1],args[2],args[3],args[4],args[5],args[6])}var thisBinding=baseCreate(Ctor.prototype),result=Ctor.apply(thisBinding,args);return isObject(result)?result:thisBinding}}function createCurry(flag){function curryFunc(func,arity,guard){if(guard&&isIterateeCall(func,arity,guard)){arity=undefined}var result=createWrapper(func,flag,undefined,undefined,undefined,undefined,undefined,arity);result.placeholder=curryFunc.placeholder;return result}return curryFunc}function createDefaults(assigner,customizer){return restParam(function(args){var object=args[0];if(object==null){return object}args.push(customizer);return assigner.apply(undefined,args)})}function createExtremum(comparator,exValue){return function(collection,iteratee,thisArg){if(thisArg&&isIterateeCall(collection,iteratee,thisArg)){iteratee=undefined}iteratee=getCallback(iteratee,thisArg,3);if(iteratee.length==1){collection=isArray(collection)?collection:toIterable(collection);var result=arrayExtremum(collection,iteratee,comparator,exValue);if(!(collection.length&&result===exValue)){return result}}return baseExtremum(collection,iteratee,comparator,exValue)}}function createFind(eachFunc,fromRight){return function(collection,predicate,thisArg){predicate=getCallback(predicate,thisArg,3);if(isArray(collection)){var index=baseFindIndex(collection,predicate,fromRight);return index>-1?collection[index]:undefined}return baseFind(collection,predicate,eachFunc)}}function createFindIndex(fromRight){return function(array,predicate,thisArg){if(!(array&&array.length)){return-1}predicate=getCallback(predicate,thisArg,3);return baseFindIndex(array,predicate,fromRight)}}function createFindKey(objectFunc){return function(object,predicate,thisArg){predicate=getCallback(predicate,thisArg,3);return baseFind(object,predicate,objectFunc,true)}}function createFlow(fromRight){return function(){var wrapper,length=arguments.length,index=fromRight?length:-1,leftIndex=0,funcs=Array(length);while(fromRight?index--:++index<length){var func=funcs[leftIndex++]=arguments[index];if(typeof func!="function"){throw new TypeError(FUNC_ERROR_TEXT)}if(!wrapper&&LodashWrapper.prototype.thru&&getFuncName(func)=="wrapper"){wrapper=new LodashWrapper([],true)}}index=wrapper?-1:length;while(++index<length){func=funcs[index];var funcName=getFuncName(func),data=funcName=="wrapper"?getData(func):undefined;if(data&&isLaziable(data[0])&&data[1]==(ARY_FLAG|CURRY_FLAG|PARTIAL_FLAG|REARG_FLAG)&&!data[4].length&&data[9]==1){wrapper=wrapper[getFuncName(data[0])].apply(wrapper,data[3])}else{wrapper=func.length==1&&isLaziable(func)?wrapper[funcName]():wrapper.thru(func)}}return function(){var args=arguments,value=args[0];if(wrapper&&args.length==1&&isArray(value)&&value.length>=LARGE_ARRAY_SIZE){return wrapper.plant(value).value()}var index=0,result=length?funcs[index].apply(this,args):value;while(++index<length){result=funcs[index].call(this,result)}return result}}}function createForEach(arrayFunc,eachFunc){return function(collection,iteratee,thisArg){return typeof iteratee=="function"&&thisArg===undefined&&isArray(collection)?arrayFunc(collection,iteratee):eachFunc(collection,bindCallback(iteratee,thisArg,3))}}function createForIn(objectFunc){return function(object,iteratee,thisArg){if(typeof iteratee!="function"||thisArg!==undefined){iteratee=bindCallback(iteratee,thisArg,3)}return objectFunc(object,iteratee,keysIn)}}function createForOwn(objectFunc){return function(object,iteratee,thisArg){if(typeof iteratee!="function"||thisArg!==undefined){iteratee=bindCallback(iteratee,thisArg,3)}return objectFunc(object,iteratee)}}function createObjectMapper(isMapKeys){return function(object,iteratee,thisArg){var result={};iteratee=getCallback(iteratee,thisArg,3);baseForOwn(object,function(value,key,object){var mapped=iteratee(value,key,object);key=isMapKeys?mapped:key;value=isMapKeys?value:mapped;result[key]=value});return result}}function createPadDir(fromRight){return function(string,length,chars){string=baseToString(string);return(fromRight?string:"")+createPadding(string,length,chars)+(fromRight?"":string)}}function createPartial(flag){var partialFunc=restParam(function(func,partials){var holders=replaceHolders(partials,partialFunc.placeholder);return createWrapper(func,flag,undefined,partials,holders)});return partialFunc}function createReduce(arrayFunc,eachFunc){return function(collection,iteratee,accumulator,thisArg){var initFromArray=arguments.length<3;return typeof iteratee=="function"&&thisArg===undefined&&isArray(collection)?arrayFunc(collection,iteratee,accumulator,initFromArray):baseReduce(collection,getCallback(iteratee,thisArg,4),accumulator,initFromArray,eachFunc)}}function createHybridWrapper(func,bitmask,thisArg,partials,holders,partialsRight,holdersRight,argPos,ary,arity){var isAry=bitmask&ARY_FLAG,isBind=bitmask&BIND_FLAG,isBindKey=bitmask&BIND_KEY_FLAG,isCurry=bitmask&CURRY_FLAG,isCurryBound=bitmask&CURRY_BOUND_FLAG,isCurryRight=bitmask&CURRY_RIGHT_FLAG,Ctor=isBindKey?undefined:createCtorWrapper(func);function wrapper(){var length=arguments.length,index=length,args=Array(length);while(index--){args[index]=arguments[index]}if(partials){args=composeArgs(args,partials,holders)}if(partialsRight){args=composeArgsRight(args,partialsRight,holdersRight)}if(isCurry||isCurryRight){var placeholder=wrapper.placeholder,argsHolders=replaceHolders(args,placeholder);length-=argsHolders.length;if(length<arity){var newArgPos=argPos?arrayCopy(argPos):undefined,newArity=nativeMax(arity-length,0),newsHolders=isCurry?argsHolders:undefined,newHoldersRight=isCurry?undefined:argsHolders,newPartials=isCurry?args:undefined,newPartialsRight=isCurry?undefined:args;bitmask|=isCurry?PARTIAL_FLAG:PARTIAL_RIGHT_FLAG;bitmask&=~(isCurry?PARTIAL_RIGHT_FLAG:PARTIAL_FLAG);if(!isCurryBound){bitmask&=~(BIND_FLAG|BIND_KEY_FLAG)}var newData=[func,bitmask,thisArg,newPartials,newsHolders,newPartialsRight,newHoldersRight,newArgPos,ary,newArity],result=createHybridWrapper.apply(undefined,newData);if(isLaziable(func)){setData(result,newData)}result.placeholder=placeholder;return result}}var thisBinding=isBind?thisArg:this,fn=isBindKey?thisBinding[func]:func;if(argPos){args=reorder(args,argPos)}if(isAry&&ary<args.length){args.length=ary}if(this&&this!==root&&this instanceof wrapper){fn=Ctor||createCtorWrapper(func)}return fn.apply(thisBinding,args)}return wrapper}function createPadding(string,length,chars){var strLength=string.length;length=+length;if(strLength>=length||!nativeIsFinite(length)){return""}var padLength=length-strLength;chars=chars==null?" ":chars+"";return repeat(chars,nativeCeil(padLength/chars.length)).slice(0,padLength)}function createPartialWrapper(func,bitmask,thisArg,partials){var isBind=bitmask&BIND_FLAG,Ctor=createCtorWrapper(func);function wrapper(){var argsIndex=-1,argsLength=arguments.length,leftIndex=-1,leftLength=partials.length,args=Array(leftLength+argsLength);while(++leftIndex<leftLength){args[leftIndex]=partials[leftIndex]}while(argsLength--){args[leftIndex++]=arguments[++argsIndex]}var fn=this&&this!==root&&this instanceof wrapper?Ctor:func;return fn.apply(isBind?thisArg:this,args)}return wrapper}function createRound(methodName){var func=Math[methodName];return function(number,precision){precision=precision===undefined?0:+precision||0;if(precision){precision=pow(10,precision);return func(number*precision)/precision}return func(number)}}function createSortedIndex(retHighest){return function(array,value,iteratee,thisArg){var callback=getCallback(iteratee);return iteratee==null&&callback===baseCallback?binaryIndex(array,value,retHighest):binaryIndexBy(array,value,callback(iteratee,thisArg,1),retHighest)}}function createWrapper(func,bitmask,thisArg,partials,holders,argPos,ary,arity){var isBindKey=bitmask&BIND_KEY_FLAG;if(!isBindKey&&typeof func!="function"){throw new TypeError(FUNC_ERROR_TEXT)}var length=partials?partials.length:0;if(!length){bitmask&=~(PARTIAL_FLAG|PARTIAL_RIGHT_FLAG);partials=holders=undefined}length-=holders?holders.length:0;if(bitmask&PARTIAL_RIGHT_FLAG){var partialsRight=partials,holdersRight=holders;partials=holders=undefined}var data=isBindKey?undefined:getData(func),newData=[func,bitmask,thisArg,partials,holders,partialsRight,holdersRight,argPos,ary,arity];if(data){mergeData(newData,data);bitmask=newData[1];arity=newData[9]}newData[9]=arity==null?isBindKey?0:func.length:nativeMax(arity-length,0)||0;if(bitmask==BIND_FLAG){var result=createBindWrapper(newData[0],newData[2])}else if((bitmask==PARTIAL_FLAG||bitmask==(BIND_FLAG|PARTIAL_FLAG))&&!newData[4].length){result=createPartialWrapper.apply(undefined,newData)}else{result=createHybridWrapper.apply(undefined,newData)}var setter=data?baseSetData:setData;return setter(result,newData)}function equalArrays(array,other,equalFunc,customizer,isLoose,stackA,stackB){var index=-1,arrLength=array.length,othLength=other.length;if(arrLength!=othLength&&!(isLoose&&othLength>arrLength)){return false}while(++index<arrLength){var arrValue=array[index],othValue=other[index],result=customizer?customizer(isLoose?othValue:arrValue,isLoose?arrValue:othValue,index):undefined;if(result!==undefined){if(result){continue}return false}if(isLoose){if(!arraySome(other,function(othValue){return arrValue===othValue||equalFunc(arrValue,othValue,customizer,isLoose,stackA,stackB)})){return false}}else if(!(arrValue===othValue||equalFunc(arrValue,othValue,customizer,isLoose,stackA,stackB))){return false}}return true}function equalByTag(object,other,tag){switch(tag){case boolTag:case dateTag:return+object==+other;case errorTag:return object.name==other.name&&object.message==other.message;case numberTag:return object!=+object?other!=+other:object==+other;case regexpTag:case stringTag:return object==other+""}return false}function equalObjects(object,other,equalFunc,customizer,isLoose,stackA,stackB){var objProps=keys(object),objLength=objProps.length,othProps=keys(other),othLength=othProps.length;if(objLength!=othLength&&!isLoose){return false}var index=objLength;while(index--){var key=objProps[index];if(!(isLoose?key in other:hasOwnProperty.call(other,key))){return false}}var skipCtor=isLoose;while(++index<objLength){key=objProps[index];var objValue=object[key],othValue=other[key],result=customizer?customizer(isLoose?othValue:objValue,isLoose?objValue:othValue,key):undefined;if(!(result===undefined?equalFunc(objValue,othValue,customizer,isLoose,stackA,stackB):result)){return false}skipCtor||(skipCtor=key=="constructor")}if(!skipCtor){var objCtor=object.constructor,othCtor=other.constructor;if(objCtor!=othCtor&&("constructor"in object&&"constructor"in other)&&!(typeof objCtor=="function"&&objCtor instanceof objCtor&&typeof othCtor=="function"&&othCtor instanceof othCtor)){return false}}return true}function getCallback(func,thisArg,argCount){var result=lodash.callback||callback;result=result===callback?baseCallback:result;return argCount?result(func,thisArg,argCount):result}var getData=!metaMap?noop:function(func){return metaMap.get(func)};function getFuncName(func){var result=func.name,array=realNames[result],length=array?array.length:0;while(length--){var data=array[length],otherFunc=data.func;if(otherFunc==null||otherFunc==func){return data.name}}return result}function getIndexOf(collection,target,fromIndex){var result=lodash.indexOf||indexOf;result=result===indexOf?baseIndexOf:result;return collection?result(collection,target,fromIndex):result}var getLength=baseProperty("length");function getMatchData(object){var result=pairs(object),length=result.length;while(length--){result[length][2]=isStrictComparable(result[length][1])}return result}function getNative(object,key){var value=object==null?undefined:object[key];return isNative(value)?value:undefined}function getView(start,end,transforms){var index=-1,length=transforms.length;while(++index<length){var data=transforms[index],size=data.size;switch(data.type){case"drop":start+=size;break;case"dropRight":end-=size;break;case"take":end=nativeMin(end,start+size);break;case"takeRight":start=nativeMax(start,end-size);break}}return{start:start,end:end}}function initCloneArray(array){var length=array.length,result=new array.constructor(length);if(length&&typeof array[0]=="string"&&hasOwnProperty.call(array,"index")){result.index=array.index;result.input=array.input}return result}function initCloneObject(object){var Ctor=object.constructor;if(!(typeof Ctor=="function"&&Ctor instanceof Ctor)){Ctor=Object}return new Ctor}function initCloneByTag(object,tag,isDeep){var Ctor=object.constructor;switch(tag){case arrayBufferTag:return bufferClone(object);case boolTag:case dateTag:return new Ctor(+object);case float32Tag:case float64Tag:case int8Tag:case int16Tag:case int32Tag:case uint8Tag:case uint8ClampedTag:case uint16Tag:case uint32Tag:var buffer=object.buffer;return new Ctor(isDeep?bufferClone(buffer):buffer,object.byteOffset,object.length);case numberTag:case stringTag:return new Ctor(object);case regexpTag:var result=new Ctor(object.source,reFlags.exec(object));result.lastIndex=object.lastIndex}return result}function invokePath(object,path,args){if(object!=null&&!isKey(path,object)){path=toPath(path);object=path.length==1?object:baseGet(object,baseSlice(path,0,-1));path=last(path)}var func=object==null?object:object[path];return func==null?undefined:func.apply(object,args)}function isArrayLike(value){return value!=null&&isLength(getLength(value))}function isIndex(value,length){value=typeof value=="number"||reIsUint.test(value)?+value:-1;length=length==null?MAX_SAFE_INTEGER:length;return value>-1&&value%1==0&&value<length}function isIterateeCall(value,index,object){if(!isObject(object)){return false}var type=typeof index;if(type=="number"?isArrayLike(object)&&isIndex(index,object.length):type=="string"&&index in object){var other=object[index];return value===value?value===other:other!==other}return false}function isKey(value,object){var type=typeof value;if(type=="string"&&reIsPlainProp.test(value)||type=="number"){return true}if(isArray(value)){return false}var result=!reIsDeepProp.test(value);return result||object!=null&&value in toObject(object)}function isLaziable(func){var funcName=getFuncName(func);if(!(funcName in LazyWrapper.prototype)){return false}var other=lodash[funcName];if(func===other){return true}var data=getData(other);return!!data&&func===data[0]}function isLength(value){return typeof value=="number"&&value>-1&&value%1==0&&value<=MAX_SAFE_INTEGER}function isStrictComparable(value){return value===value&&!isObject(value)}function mergeData(data,source){var bitmask=data[1],srcBitmask=source[1],newBitmask=bitmask|srcBitmask,isCommon=newBitmask<ARY_FLAG;var isCombo=srcBitmask==ARY_FLAG&&bitmask==CURRY_FLAG||srcBitmask==ARY_FLAG&&bitmask==REARG_FLAG&&data[7].length<=source[8]||srcBitmask==(ARY_FLAG|REARG_FLAG)&&bitmask==CURRY_FLAG;if(!(isCommon||isCombo)){return data}if(srcBitmask&BIND_FLAG){data[2]=source[2];newBitmask|=bitmask&BIND_FLAG?0:CURRY_BOUND_FLAG}var value=source[3];if(value){var partials=data[3];data[3]=partials?composeArgs(partials,value,source[4]):arrayCopy(value);data[4]=partials?replaceHolders(data[3],PLACEHOLDER):arrayCopy(source[4])}value=source[5];if(value){partials=data[5];data[5]=partials?composeArgsRight(partials,value,source[6]):arrayCopy(value);data[6]=partials?replaceHolders(data[5],PLACEHOLDER):arrayCopy(source[6])}value=source[7];if(value){data[7]=arrayCopy(value)}if(srcBitmask&ARY_FLAG){data[8]=data[8]==null?source[8]:nativeMin(data[8],source[8])}if(data[9]==null){data[9]=source[9]}data[0]=source[0];data[1]=newBitmask;return data}function mergeDefaults(objectValue,sourceValue){return objectValue===undefined?sourceValue:merge(objectValue,sourceValue,mergeDefaults)}function pickByArray(object,props){object=toObject(object);var index=-1,length=props.length,result={};while(++index<length){var key=props[index];if(key in object){result[key]=object[key]}}return result}function pickByCallback(object,predicate){var result={};baseForIn(object,function(value,key,object){if(predicate(value,key,object)){result[key]=value}});return result}function reorder(array,indexes){var arrLength=array.length,length=nativeMin(indexes.length,arrLength),oldArray=arrayCopy(array);while(length--){var index=indexes[length];array[length]=isIndex(index,arrLength)?oldArray[index]:undefined}return array}var setData=function(){var count=0,lastCalled=0;return function(key,value){var stamp=now(),remaining=HOT_SPAN-(stamp-lastCalled);lastCalled=stamp;if(remaining>0){if(++count>=HOT_COUNT){return key}}else{count=0}return baseSetData(key,value)}}();function shimKeys(object){var props=keysIn(object),propsLength=props.length,length=propsLength&&object.length;var allowIndexes=!!length&&isLength(length)&&(isArray(object)||isArguments(object));var index=-1,result=[];while(++index<propsLength){var key=props[index];if(allowIndexes&&isIndex(key,length)||hasOwnProperty.call(object,key)){result.push(key)}}return result}function toIterable(value){if(value==null){return[]}if(!isArrayLike(value)){return values(value)}return isObject(value)?value:Object(value)}function toObject(value){return isObject(value)?value:Object(value)}function toPath(value){if(isArray(value)){return value}var result=[];baseToString(value).replace(rePropName,function(match,number,quote,string){result.push(quote?string.replace(reEscapeChar,"$1"):number||match)});return result}function wrapperClone(wrapper){return wrapper instanceof LazyWrapper?wrapper.clone():new LodashWrapper(wrapper.__wrapped__,wrapper.__chain__,arrayCopy(wrapper.__actions__))}function chunk(array,size,guard){if(guard?isIterateeCall(array,size,guard):size==null){size=1}else{size=nativeMax(nativeFloor(size)||1,1)}var index=0,length=array?array.length:0,resIndex=-1,result=Array(nativeCeil(length/size));while(index<length){result[++resIndex]=baseSlice(array,index,index+=size)}return result}function compact(array){var index=-1,length=array?array.length:0,resIndex=-1,result=[];while(++index<length){var value=array[index];if(value){result[++resIndex]=value}}return result}var difference=restParam(function(array,values){return isObjectLike(array)&&isArrayLike(array)?baseDifference(array,baseFlatten(values,false,true)):[]});function drop(array,n,guard){var length=array?array.length:0;if(!length){return[]}if(guard?isIterateeCall(array,n,guard):n==null){n=1}return baseSlice(array,n<0?0:n)}function dropRight(array,n,guard){var length=array?array.length:0;if(!length){return[]}if(guard?isIterateeCall(array,n,guard):n==null){n=1}n=length-(+n||0);return baseSlice(array,0,n<0?0:n)}function dropRightWhile(array,predicate,thisArg){return array&&array.length?baseWhile(array,getCallback(predicate,thisArg,3),true,true):[]}function dropWhile(array,predicate,thisArg){return array&&array.length?baseWhile(array,getCallback(predicate,thisArg,3),true):[]}function fill(array,value,start,end){var length=array?array.length:0;if(!length){return[]}if(start&&typeof start!="number"&&isIterateeCall(array,value,start)){start=0;end=length}return baseFill(array,value,start,end)}var findIndex=createFindIndex();var findLastIndex=createFindIndex(true);function first(array){return array?array[0]:undefined}function flatten(array,isDeep,guard){var length=array?array.length:0;if(guard&&isIterateeCall(array,isDeep,guard)){isDeep=false}return length?baseFlatten(array,isDeep):[]}function flattenDeep(array){var length=array?array.length:0;return length?baseFlatten(array,true):[]}function indexOf(array,value,fromIndex){var length=array?array.length:0;if(!length){return-1}if(typeof fromIndex=="number"){fromIndex=fromIndex<0?nativeMax(length+fromIndex,0):fromIndex}else if(fromIndex){var index=binaryIndex(array,value);if(index<length&&(value===value?value===array[index]:array[index]!==array[index])){return index}return-1}return baseIndexOf(array,value,fromIndex||0)}function initial(array){return dropRight(array,1)}var intersection=restParam(function(arrays){var othLength=arrays.length,othIndex=othLength,caches=Array(length),indexOf=getIndexOf(),isCommon=indexOf==baseIndexOf,result=[];while(othIndex--){var value=arrays[othIndex]=isArrayLike(value=arrays[othIndex])?value:[];caches[othIndex]=isCommon&&value.length>=120?createCache(othIndex&&value):null}var array=arrays[0],index=-1,length=array?array.length:0,seen=caches[0];outer:while(++index<length){value=array[index];if((seen?cacheIndexOf(seen,value):indexOf(result,value,0))<0){var othIndex=othLength;while(--othIndex){var cache=caches[othIndex];if((cache?cacheIndexOf(cache,value):indexOf(arrays[othIndex],value,0))<0){continue outer}}if(seen){seen.push(value)}result.push(value)}}return result});function last(array){var length=array?array.length:0;return length?array[length-1]:undefined}function lastIndexOf(array,value,fromIndex){var length=array?array.length:0;if(!length){return-1}var index=length;if(typeof fromIndex=="number"){index=(fromIndex<0?nativeMax(length+fromIndex,0):nativeMin(fromIndex||0,length-1))+1}else if(fromIndex){index=binaryIndex(array,value,true)-1;var other=array[index];if(value===value?value===other:other!==other){return index}return-1}if(value!==value){return indexOfNaN(array,index,true)}while(index--){if(array[index]===value){return index}}return-1}function pull(){var args=arguments,array=args[0];if(!(array&&array.length)){return array}var index=0,indexOf=getIndexOf(),length=args.length;while(++index<length){var fromIndex=0,value=args[index];while((fromIndex=indexOf(array,value,fromIndex))>-1){splice.call(array,fromIndex,1)}}return array}var pullAt=restParam(function(array,indexes){indexes=baseFlatten(indexes);var result=baseAt(array,indexes);basePullAt(array,indexes.sort(baseCompareAscending));return result});function remove(array,predicate,thisArg){var result=[];if(!(array&&array.length)){return result}var index=-1,indexes=[],length=array.length;predicate=getCallback(predicate,thisArg,3);while(++index<length){var value=array[index];if(predicate(value,index,array)){result.push(value);indexes.push(index)}}basePullAt(array,indexes);return result}function rest(array){return drop(array,1)}function slice(array,start,end){var length=array?array.length:0;if(!length){return[]}if(end&&typeof end!="number"&&isIterateeCall(array,start,end)){start=0;end=length}return baseSlice(array,start,end)}var sortedIndex=createSortedIndex();var sortedLastIndex=createSortedIndex(true);function take(array,n,guard){var length=array?array.length:0;if(!length){return[]}if(guard?isIterateeCall(array,n,guard):n==null){n=1}return baseSlice(array,0,n<0?0:n)}function takeRight(array,n,guard){var length=array?array.length:0;if(!length){return[]}if(guard?isIterateeCall(array,n,guard):n==null){n=1}n=length-(+n||0);return baseSlice(array,n<0?0:n)}function takeRightWhile(array,predicate,thisArg){return array&&array.length?baseWhile(array,getCallback(predicate,thisArg,3),false,true):[]}function takeWhile(array,predicate,thisArg){return array&&array.length?baseWhile(array,getCallback(predicate,thisArg,3)):[]}var union=restParam(function(arrays){return baseUniq(baseFlatten(arrays,false,true))});function uniq(array,isSorted,iteratee,thisArg){var length=array?array.length:0;if(!length){return[]}if(isSorted!=null&&typeof isSorted!="boolean"){thisArg=iteratee;iteratee=isIterateeCall(array,isSorted,thisArg)?undefined:isSorted;isSorted=false}var callback=getCallback();if(!(iteratee==null&&callback===baseCallback)){iteratee=callback(iteratee,thisArg,3)}return isSorted&&getIndexOf()==baseIndexOf?sortedUniq(array,iteratee):baseUniq(array,iteratee)}function unzip(array){if(!(array&&array.length)){return[]}var index=-1,length=0;array=arrayFilter(array,function(group){if(isArrayLike(group)){length=nativeMax(group.length,length);return true}});var result=Array(length);while(++index<length){result[index]=arrayMap(array,baseProperty(index))}return result}function unzipWith(array,iteratee,thisArg){var length=array?array.length:0;if(!length){return[]}var result=unzip(array);if(iteratee==null){return result}iteratee=bindCallback(iteratee,thisArg,4);return arrayMap(result,function(group){return arrayReduce(group,iteratee,undefined,true)})}var without=restParam(function(array,values){return isArrayLike(array)?baseDifference(array,values):[]});function xor(){var index=-1,length=arguments.length;while(++index<length){var array=arguments[index];if(isArrayLike(array)){var result=result?arrayPush(baseDifference(result,array),baseDifference(array,result)):array}}return result?baseUniq(result):[]}var zip=restParam(unzip);function zipObject(props,values){var index=-1,length=props?props.length:0,result={};if(length&&!values&&!isArray(props[0])){values=[]}while(++index<length){var key=props[index];if(values){result[key]=values[index]}else if(key){result[key[0]]=key[1]}}return result}var zipWith=restParam(function(arrays){var length=arrays.length,iteratee=length>2?arrays[length-2]:undefined,thisArg=length>1?arrays[length-1]:undefined;if(length>2&&typeof iteratee=="function"){length-=2}else{iteratee=length>1&&typeof thisArg=="function"?(--length,thisArg):undefined;thisArg=undefined}arrays.length=length;return unzipWith(arrays,iteratee,thisArg)});function chain(value){var result=lodash(value);result.__chain__=true;return result}function tap(value,interceptor,thisArg){interceptor.call(thisArg,value);return value}function thru(value,interceptor,thisArg){return interceptor.call(thisArg,value)}function wrapperChain(){return chain(this)}function wrapperCommit(){return new LodashWrapper(this.value(),this.__chain__)}var wrapperConcat=restParam(function(values){values=baseFlatten(values);return this.thru(function(array){return arrayConcat(isArray(array)?array:[toObject(array)],values)})});function wrapperPlant(value){var result,parent=this;while(parent instanceof baseLodash){var clone=wrapperClone(parent);if(result){previous.__wrapped__=clone}else{result=clone}var previous=clone;parent=parent.__wrapped__}previous.__wrapped__=value;return result}function wrapperReverse(){var value=this.__wrapped__;var interceptor=function(value){return wrapped&&wrapped.__dir__<0?value:value.reverse()};if(value instanceof LazyWrapper){var wrapped=value;if(this.__actions__.length){wrapped=new LazyWrapper(this)}wrapped=wrapped.reverse();wrapped.__actions__.push({func:thru,args:[interceptor],thisArg:undefined});return new LodashWrapper(wrapped,this.__chain__)}return this.thru(interceptor)}function wrapperToString(){return this.value()+""}function wrapperValue(){return baseWrapperValue(this.__wrapped__,this.__actions__)}var at=restParam(function(collection,props){return baseAt(collection,baseFlatten(props))});var countBy=createAggregator(function(result,value,key){hasOwnProperty.call(result,key)?++result[key]:result[key]=1});function every(collection,predicate,thisArg){var func=isArray(collection)?arrayEvery:baseEvery;if(thisArg&&isIterateeCall(collection,predicate,thisArg)){predicate=undefined}if(typeof predicate!="function"||thisArg!==undefined){predicate=getCallback(predicate,thisArg,3)
-}return func(collection,predicate)}function filter(collection,predicate,thisArg){var func=isArray(collection)?arrayFilter:baseFilter;predicate=getCallback(predicate,thisArg,3);return func(collection,predicate)}var find=createFind(baseEach);var findLast=createFind(baseEachRight,true);function findWhere(collection,source){return find(collection,baseMatches(source))}var forEach=createForEach(arrayEach,baseEach);var forEachRight=createForEach(arrayEachRight,baseEachRight);var groupBy=createAggregator(function(result,value,key){if(hasOwnProperty.call(result,key)){result[key].push(value)}else{result[key]=[value]}});function includes(collection,target,fromIndex,guard){var length=collection?getLength(collection):0;if(!isLength(length)){collection=values(collection);length=collection.length}if(typeof fromIndex!="number"||guard&&isIterateeCall(target,fromIndex,guard)){fromIndex=0}else{fromIndex=fromIndex<0?nativeMax(length+fromIndex,0):fromIndex||0}return typeof collection=="string"||!isArray(collection)&&isString(collection)?fromIndex<=length&&collection.indexOf(target,fromIndex)>-1:!!length&&getIndexOf(collection,target,fromIndex)>-1}var indexBy=createAggregator(function(result,value,key){result[key]=value});var invoke=restParam(function(collection,path,args){var index=-1,isFunc=typeof path=="function",isProp=isKey(path),result=isArrayLike(collection)?Array(collection.length):[];baseEach(collection,function(value){var func=isFunc?path:isProp&&value!=null?value[path]:undefined;result[++index]=func?func.apply(value,args):invokePath(value,path,args)});return result});function map(collection,iteratee,thisArg){var func=isArray(collection)?arrayMap:baseMap;iteratee=getCallback(iteratee,thisArg,3);return func(collection,iteratee)}var partition=createAggregator(function(result,value,key){result[key?0:1].push(value)},function(){return[[],[]]});function pluck(collection,path){return map(collection,property(path))}var reduce=createReduce(arrayReduce,baseEach);var reduceRight=createReduce(arrayReduceRight,baseEachRight);function reject(collection,predicate,thisArg){var func=isArray(collection)?arrayFilter:baseFilter;predicate=getCallback(predicate,thisArg,3);return func(collection,function(value,index,collection){return!predicate(value,index,collection)})}function sample(collection,n,guard){if(guard?isIterateeCall(collection,n,guard):n==null){collection=toIterable(collection);var length=collection.length;return length>0?collection[baseRandom(0,length-1)]:undefined}var index=-1,result=toArray(collection),length=result.length,lastIndex=length-1;n=nativeMin(n<0?0:+n||0,length);while(++index<n){var rand=baseRandom(index,lastIndex),value=result[rand];result[rand]=result[index];result[index]=value}result.length=n;return result}function shuffle(collection){return sample(collection,POSITIVE_INFINITY)}function size(collection){var length=collection?getLength(collection):0;return isLength(length)?length:keys(collection).length}function some(collection,predicate,thisArg){var func=isArray(collection)?arraySome:baseSome;if(thisArg&&isIterateeCall(collection,predicate,thisArg)){predicate=undefined}if(typeof predicate!="function"||thisArg!==undefined){predicate=getCallback(predicate,thisArg,3)}return func(collection,predicate)}function sortBy(collection,iteratee,thisArg){if(collection==null){return[]}if(thisArg&&isIterateeCall(collection,iteratee,thisArg)){iteratee=undefined}var index=-1;iteratee=getCallback(iteratee,thisArg,3);var result=baseMap(collection,function(value,key,collection){return{criteria:iteratee(value,key,collection),index:++index,value:value}});return baseSortBy(result,compareAscending)}var sortByAll=restParam(function(collection,iteratees){if(collection==null){return[]}var guard=iteratees[2];if(guard&&isIterateeCall(iteratees[0],iteratees[1],guard)){iteratees.length=1}return baseSortByOrder(collection,baseFlatten(iteratees),[])});function sortByOrder(collection,iteratees,orders,guard){if(collection==null){return[]}if(guard&&isIterateeCall(iteratees,orders,guard)){orders=undefined}if(!isArray(iteratees)){iteratees=iteratees==null?[]:[iteratees]}if(!isArray(orders)){orders=orders==null?[]:[orders]}return baseSortByOrder(collection,iteratees,orders)}function where(collection,source){return filter(collection,baseMatches(source))}var now=nativeNow||function(){return(new Date).getTime()};function after(n,func){if(typeof func!="function"){if(typeof n=="function"){var temp=n;n=func;func=temp}else{throw new TypeError(FUNC_ERROR_TEXT)}}n=nativeIsFinite(n=+n)?n:0;return function(){if(--n<1){return func.apply(this,arguments)}}}function ary(func,n,guard){if(guard&&isIterateeCall(func,n,guard)){n=undefined}n=func&&n==null?func.length:nativeMax(+n||0,0);return createWrapper(func,ARY_FLAG,undefined,undefined,undefined,undefined,n)}function before(n,func){var result;if(typeof func!="function"){if(typeof n=="function"){var temp=n;n=func;func=temp}else{throw new TypeError(FUNC_ERROR_TEXT)}}return function(){if(--n>0){result=func.apply(this,arguments)}if(n<=1){func=undefined}return result}}var bind=restParam(function(func,thisArg,partials){var bitmask=BIND_FLAG;if(partials.length){var holders=replaceHolders(partials,bind.placeholder);bitmask|=PARTIAL_FLAG}return createWrapper(func,bitmask,thisArg,partials,holders)});var bindAll=restParam(function(object,methodNames){methodNames=methodNames.length?baseFlatten(methodNames):functions(object);var index=-1,length=methodNames.length;while(++index<length){var key=methodNames[index];object[key]=createWrapper(object[key],BIND_FLAG,object)}return object});var bindKey=restParam(function(object,key,partials){var bitmask=BIND_FLAG|BIND_KEY_FLAG;if(partials.length){var holders=replaceHolders(partials,bindKey.placeholder);bitmask|=PARTIAL_FLAG}return createWrapper(key,bitmask,object,partials,holders)});var curry=createCurry(CURRY_FLAG);var curryRight=createCurry(CURRY_RIGHT_FLAG);function debounce(func,wait,options){var args,maxTimeoutId,result,stamp,thisArg,timeoutId,trailingCall,lastCalled=0,maxWait=false,trailing=true;if(typeof func!="function"){throw new TypeError(FUNC_ERROR_TEXT)}wait=wait<0?0:+wait||0;if(options===true){var leading=true;trailing=false}else if(isObject(options)){leading=!!options.leading;maxWait="maxWait"in options&&nativeMax(+options.maxWait||0,wait);trailing="trailing"in options?!!options.trailing:trailing}function cancel(){if(timeoutId){clearTimeout(timeoutId)}if(maxTimeoutId){clearTimeout(maxTimeoutId)}lastCalled=0;maxTimeoutId=timeoutId=trailingCall=undefined}function complete(isCalled,id){if(id){clearTimeout(id)}maxTimeoutId=timeoutId=trailingCall=undefined;if(isCalled){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=undefined}}}function delayed(){var remaining=wait-(now()-stamp);if(remaining<=0||remaining>wait){complete(trailingCall,maxTimeoutId)}else{timeoutId=setTimeout(delayed,remaining)}}function maxDelayed(){complete(trailing,timeoutId)}function debounced(){args=arguments;stamp=now();thisArg=this;trailingCall=trailing&&(timeoutId||!leading);if(maxWait===false){var leadingCall=leading&&!timeoutId}else{if(!maxTimeoutId&&!leading){lastCalled=stamp}var remaining=maxWait-(stamp-lastCalled),isCalled=remaining<=0||remaining>maxWait;if(isCalled){if(maxTimeoutId){maxTimeoutId=clearTimeout(maxTimeoutId)}lastCalled=stamp;result=func.apply(thisArg,args)}else if(!maxTimeoutId){maxTimeoutId=setTimeout(maxDelayed,remaining)}}if(isCalled&&timeoutId){timeoutId=clearTimeout(timeoutId)}else if(!timeoutId&&wait!==maxWait){timeoutId=setTimeout(delayed,wait)}if(leadingCall){isCalled=true;result=func.apply(thisArg,args)}if(isCalled&&!timeoutId&&!maxTimeoutId){args=thisArg=undefined}return result}debounced.cancel=cancel;return debounced}var defer=restParam(function(func,args){return baseDelay(func,1,args)});var delay=restParam(function(func,wait,args){return baseDelay(func,wait,args)});var flow=createFlow();var flowRight=createFlow(true);function memoize(func,resolver){if(typeof func!="function"||resolver&&typeof resolver!="function"){throw new TypeError(FUNC_ERROR_TEXT)}var memoized=function(){var args=arguments,key=resolver?resolver.apply(this,args):args[0],cache=memoized.cache;if(cache.has(key)){return cache.get(key)}var result=func.apply(this,args);memoized.cache=cache.set(key,result);return result};memoized.cache=new memoize.Cache;return memoized}var modArgs=restParam(function(func,transforms){transforms=baseFlatten(transforms);if(typeof func!="function"||!arrayEvery(transforms,baseIsFunction)){throw new TypeError(FUNC_ERROR_TEXT)}var length=transforms.length;return restParam(function(args){var index=nativeMin(args.length,length);while(index--){args[index]=transforms[index](args[index])}return func.apply(this,args)})});function negate(predicate){if(typeof predicate!="function"){throw new TypeError(FUNC_ERROR_TEXT)}return function(){return!predicate.apply(this,arguments)}}function once(func){return before(2,func)}var partial=createPartial(PARTIAL_FLAG);var partialRight=createPartial(PARTIAL_RIGHT_FLAG);var rearg=restParam(function(func,indexes){return createWrapper(func,REARG_FLAG,undefined,undefined,undefined,baseFlatten(indexes))});function restParam(func,start){if(typeof func!="function"){throw new TypeError(FUNC_ERROR_TEXT)}start=nativeMax(start===undefined?func.length-1:+start||0,0);return function(){var args=arguments,index=-1,length=nativeMax(args.length-start,0),rest=Array(length);while(++index<length){rest[index]=args[start+index]}switch(start){case 0:return func.call(this,rest);case 1:return func.call(this,args[0],rest);case 2:return func.call(this,args[0],args[1],rest)}var otherArgs=Array(start+1);index=-1;while(++index<start){otherArgs[index]=args[index]}otherArgs[start]=rest;return func.apply(this,otherArgs)}}function spread(func){if(typeof func!="function"){throw new TypeError(FUNC_ERROR_TEXT)}return function(array){return func.apply(this,array)}}function throttle(func,wait,options){var leading=true,trailing=true;if(typeof func!="function"){throw new TypeError(FUNC_ERROR_TEXT)}if(options===false){leading=false}else if(isObject(options)){leading="leading"in options?!!options.leading:leading;trailing="trailing"in options?!!options.trailing:trailing}return debounce(func,wait,{leading:leading,maxWait:+wait,trailing:trailing})}function wrap(value,wrapper){wrapper=wrapper==null?identity:wrapper;return createWrapper(wrapper,PARTIAL_FLAG,undefined,[value],[])}function clone(value,isDeep,customizer,thisArg){if(isDeep&&typeof isDeep!="boolean"&&isIterateeCall(value,isDeep,customizer)){isDeep=false}else if(typeof isDeep=="function"){thisArg=customizer;customizer=isDeep;isDeep=false}return typeof customizer=="function"?baseClone(value,isDeep,bindCallback(customizer,thisArg,1)):baseClone(value,isDeep)}function cloneDeep(value,customizer,thisArg){return typeof customizer=="function"?baseClone(value,true,bindCallback(customizer,thisArg,1)):baseClone(value,true)}function gt(value,other){return value>other}function gte(value,other){return value>=other}function isArguments(value){return isObjectLike(value)&&isArrayLike(value)&&hasOwnProperty.call(value,"callee")&&!propertyIsEnumerable.call(value,"callee")}var isArray=nativeIsArray||function(value){return isObjectLike(value)&&isLength(value.length)&&objToString.call(value)==arrayTag};function isBoolean(value){return value===true||value===false||isObjectLike(value)&&objToString.call(value)==boolTag}function isDate(value){return isObjectLike(value)&&objToString.call(value)==dateTag}function isElement(value){return!!value&&value.nodeType===1&&isObjectLike(value)&&!isPlainObject(value)}function isEmpty(value){if(value==null){return true}if(isArrayLike(value)&&(isArray(value)||isString(value)||isArguments(value)||isObjectLike(value)&&isFunction(value.splice))){return!value.length}return!keys(value).length}function isEqual(value,other,customizer,thisArg){customizer=typeof customizer=="function"?bindCallback(customizer,thisArg,3):undefined;var result=customizer?customizer(value,other):undefined;return result===undefined?baseIsEqual(value,other,customizer):!!result}function isError(value){return isObjectLike(value)&&typeof value.message=="string"&&objToString.call(value)==errorTag}function isFinite(value){return typeof value=="number"&&nativeIsFinite(value)}function isFunction(value){return isObject(value)&&objToString.call(value)==funcTag}function isObject(value){var type=typeof value;return!!value&&(type=="object"||type=="function")}function isMatch(object,source,customizer,thisArg){customizer=typeof customizer=="function"?bindCallback(customizer,thisArg,3):undefined;return baseIsMatch(object,getMatchData(source),customizer)}function isNaN(value){return isNumber(value)&&value!=+value}function isNative(value){if(value==null){return false}if(isFunction(value)){return reIsNative.test(fnToString.call(value))}return isObjectLike(value)&&reIsHostCtor.test(value)}function isNull(value){return value===null}function isNumber(value){return typeof value=="number"||isObjectLike(value)&&objToString.call(value)==numberTag}function isPlainObject(value){var Ctor;if(!(isObjectLike(value)&&objToString.call(value)==objectTag&&!isArguments(value))||!hasOwnProperty.call(value,"constructor")&&(Ctor=value.constructor,typeof Ctor=="function"&&!(Ctor instanceof Ctor))){return false}var result;baseForIn(value,function(subValue,key){result=key});return result===undefined||hasOwnProperty.call(value,result)}function isRegExp(value){return isObject(value)&&objToString.call(value)==regexpTag}function isString(value){return typeof value=="string"||isObjectLike(value)&&objToString.call(value)==stringTag}function isTypedArray(value){return isObjectLike(value)&&isLength(value.length)&&!!typedArrayTags[objToString.call(value)]}function isUndefined(value){return value===undefined}function lt(value,other){return value<other}function lte(value,other){return value<=other}function toArray(value){var length=value?getLength(value):0;if(!isLength(length)){return values(value)}if(!length){return[]}return arrayCopy(value)}function toPlainObject(value){return baseCopy(value,keysIn(value))}var merge=createAssigner(baseMerge);var assign=createAssigner(function(object,source,customizer){return customizer?assignWith(object,source,customizer):baseAssign(object,source)});function create(prototype,properties,guard){var result=baseCreate(prototype);if(guard&&isIterateeCall(prototype,properties,guard)){properties=undefined}return properties?baseAssign(result,properties):result}var defaults=createDefaults(assign,assignDefaults);var defaultsDeep=createDefaults(merge,mergeDefaults);var findKey=createFindKey(baseForOwn);var findLastKey=createFindKey(baseForOwnRight);var forIn=createForIn(baseFor);var forInRight=createForIn(baseForRight);var forOwn=createForOwn(baseForOwn);var forOwnRight=createForOwn(baseForOwnRight);function functions(object){return baseFunctions(object,keysIn(object))}function get(object,path,defaultValue){var result=object==null?undefined:baseGet(object,toPath(path),path+"");return result===undefined?defaultValue:result}function has(object,path){if(object==null){return false}var result=hasOwnProperty.call(object,path);if(!result&&!isKey(path)){path=toPath(path);object=path.length==1?object:baseGet(object,baseSlice(path,0,-1));if(object==null){return false}path=last(path);result=hasOwnProperty.call(object,path)}return result||isLength(object.length)&&isIndex(path,object.length)&&(isArray(object)||isArguments(object))}function invert(object,multiValue,guard){if(guard&&isIterateeCall(object,multiValue,guard)){multiValue=undefined}var index=-1,props=keys(object),length=props.length,result={};while(++index<length){var key=props[index],value=object[key];if(multiValue){if(hasOwnProperty.call(result,value)){result[value].push(key)}else{result[value]=[key]}}else{result[value]=key}}return result}var keys=!nativeKeys?shimKeys:function(object){var Ctor=object==null?undefined:object.constructor;if(typeof Ctor=="function"&&Ctor.prototype===object||typeof object!="function"&&isArrayLike(object)){return shimKeys(object)}return isObject(object)?nativeKeys(object):[]};function keysIn(object){if(object==null){return[]}if(!isObject(object)){object=Object(object)}var length=object.length;length=length&&isLength(length)&&(isArray(object)||isArguments(object))&&length||0;var Ctor=object.constructor,index=-1,isProto=typeof Ctor=="function"&&Ctor.prototype===object,result=Array(length),skipIndexes=length>0;while(++index<length){result[index]=index+""}for(var key in object){if(!(skipIndexes&&isIndex(key,length))&&!(key=="constructor"&&(isProto||!hasOwnProperty.call(object,key)))){result.push(key)}}return result}var mapKeys=createObjectMapper(true);var mapValues=createObjectMapper();var omit=restParam(function(object,props){if(object==null){return{}}if(typeof props[0]!="function"){var props=arrayMap(baseFlatten(props),String);return pickByArray(object,baseDifference(keysIn(object),props))}var predicate=bindCallback(props[0],props[1],3);return pickByCallback(object,function(value,key,object){return!predicate(value,key,object)})});function pairs(object){object=toObject(object);var index=-1,props=keys(object),length=props.length,result=Array(length);while(++index<length){var key=props[index];result[index]=[key,object[key]]}return result}var pick=restParam(function(object,props){if(object==null){return{}}return typeof props[0]=="function"?pickByCallback(object,bindCallback(props[0],props[1],3)):pickByArray(object,baseFlatten(props))});function result(object,path,defaultValue){var result=object==null?undefined:object[path];if(result===undefined){if(object!=null&&!isKey(path,object)){path=toPath(path);object=path.length==1?object:baseGet(object,baseSlice(path,0,-1));result=object==null?undefined:object[last(path)]}result=result===undefined?defaultValue:result}return isFunction(result)?result.call(object):result}function set(object,path,value){if(object==null){return object}var pathKey=path+"";path=object[pathKey]!=null||isKey(path,object)?[pathKey]:toPath(path);var index=-1,length=path.length,lastIndex=length-1,nested=object;while(nested!=null&&++index<length){var key=path[index];if(isObject(nested)){if(index==lastIndex){nested[key]=value}else if(nested[key]==null){nested[key]=isIndex(path[index+1])?[]:{}}}nested=nested[key]}return object}function transform(object,iteratee,accumulator,thisArg){var isArr=isArray(object)||isTypedArray(object);iteratee=getCallback(iteratee,thisArg,4);if(accumulator==null){if(isArr||isObject(object)){var Ctor=object.constructor;if(isArr){accumulator=isArray(object)?new Ctor:[]}else{accumulator=baseCreate(isFunction(Ctor)?Ctor.prototype:undefined)}}else{accumulator={}}}(isArr?arrayEach:baseForOwn)(object,function(value,index,object){return iteratee(accumulator,value,index,object)});return accumulator}function values(object){return baseValues(object,keys(object))}function valuesIn(object){return baseValues(object,keysIn(object))}function inRange(value,start,end){start=+start||0;if(end===undefined){end=start;start=0}else{end=+end||0}return value>=nativeMin(start,end)&&value<nativeMax(start,end)}function random(min,max,floating){if(floating&&isIterateeCall(min,max,floating)){max=floating=undefined}var noMin=min==null,noMax=max==null;if(floating==null){if(noMax&&typeof min=="boolean"){floating=min;min=1}else if(typeof max=="boolean"){floating=max;noMax=true}}if(noMin&&noMax){max=1;noMax=false}min=+min||0;if(noMax){max=min;min=0}else{max=+max||0}if(floating||min%1||max%1){var rand=nativeRandom();return nativeMin(min+rand*(max-min+parseFloat("1e-"+((rand+"").length-1))),max)}return baseRandom(min,max)}var camelCase=createCompounder(function(result,word,index){word=word.toLowerCase();return result+(index?word.charAt(0).toUpperCase()+word.slice(1):word)});function capitalize(string){string=baseToString(string);return string&&string.charAt(0).toUpperCase()+string.slice(1)}function deburr(string){string=baseToString(string);return string&&string.replace(reLatin1,deburrLetter).replace(reComboMark,"")}function endsWith(string,target,position){string=baseToString(string);target=target+"";var length=string.length;position=position===undefined?length:nativeMin(position<0?0:+position||0,length);position-=target.length;return position>=0&&string.indexOf(target,position)==position}function escape(string){string=baseToString(string);return string&&reHasUnescapedHtml.test(string)?string.replace(reUnescapedHtml,escapeHtmlChar):string}function escapeRegExp(string){string=baseToString(string);return string&&reHasRegExpChars.test(string)?string.replace(reRegExpChars,escapeRegExpChar):string||"(?:)"}var kebabCase=createCompounder(function(result,word,index){return result+(index?"-":"")+word.toLowerCase()});function pad(string,length,chars){string=baseToString(string);length=+length;var strLength=string.length;if(strLength>=length||!nativeIsFinite(length)){return string}var mid=(length-strLength)/2,leftLength=nativeFloor(mid),rightLength=nativeCeil(mid);chars=createPadding("",rightLength,chars);return chars.slice(0,leftLength)+string+chars}var padLeft=createPadDir();var padRight=createPadDir(true);function parseInt(string,radix,guard){if(guard?isIterateeCall(string,radix,guard):radix==null){radix=0}else if(radix){radix=+radix}string=trim(string);return nativeParseInt(string,radix||(reHasHexPrefix.test(string)?16:10))}function repeat(string,n){var result="";string=baseToString(string);n=+n;if(n<1||!string||!nativeIsFinite(n)){return result}do{if(n%2){result+=string}n=nativeFloor(n/2);string+=string}while(n);return result}var snakeCase=createCompounder(function(result,word,index){return result+(index?"_":"")+word.toLowerCase()});var startCase=createCompounder(function(result,word,index){return result+(index?" ":"")+(word.charAt(0).toUpperCase()+word.slice(1))});function startsWith(string,target,position){string=baseToString(string);position=position==null?0:nativeMin(position<0?0:+position||0,string.length);return string.lastIndexOf(target,position)==position}function template(string,options,otherOptions){var settings=lodash.templateSettings;if(otherOptions&&isIterateeCall(string,options,otherOptions)){options=otherOptions=undefined}string=baseToString(string);options=assignWith(baseAssign({},otherOptions||options),settings,assignOwnDefaults);var imports=assignWith(baseAssign({},options.imports),settings.imports,assignOwnDefaults),importsKeys=keys(imports),importsValues=baseValues(imports,importsKeys);var isEscaping,isEvaluating,index=0,interpolate=options.interpolate||reNoMatch,source="__p += '";var reDelimiters=RegExp((options.escape||reNoMatch).source+"|"+interpolate.source+"|"+(interpolate===reInterpolate?reEsTemplate:reNoMatch).source+"|"+(options.evaluate||reNoMatch).source+"|$","g");var sourceURL="//# sourceURL="+("sourceURL"in options?options.sourceURL:"lodash.templateSources["+ ++templateCounter+"]")+"\n";string.replace(reDelimiters,function(match,escapeValue,interpolateValue,esTemplateValue,evaluateValue,offset){interpolateValue||(interpolateValue=esTemplateValue);source+=string.slice(index,offset).replace(reUnescapedString,escapeStringChar);if(escapeValue){isEscaping=true;source+="' +\n__e("+escapeValue+") +\n'"}if(evaluateValue){isEvaluating=true;source+="';\n"+evaluateValue+";\n__p += '"}if(interpolateValue){source+="' +\n((__t = ("+interpolateValue+")) == null ? '' : __t) +\n'"}index=offset+match.length;return match});source+="';\n";var variable=options.variable;if(!variable){source="with (obj) {\n"+source+"\n}\n"}source=(isEvaluating?source.replace(reEmptyStringLeading,""):source).replace(reEmptyStringMiddle,"$1").replace(reEmptyStringTrailing,"$1;");source="function("+(variable||"obj")+") {\n"+(variable?"":"obj || (obj = {});\n")+"var __t, __p = ''"+(isEscaping?", __e = _.escape":"")+(isEvaluating?", __j = Array.prototype.join;\n"+"function print() { __p += __j.call(arguments, '') }\n":";\n")+source+"return __p\n}";var result=attempt(function(){return Function(importsKeys,sourceURL+"return "+source).apply(undefined,importsValues)});result.source=source;if(isError(result)){throw result}return result}function trim(string,chars,guard){var value=string;string=baseToString(string);if(!string){return string}if(guard?isIterateeCall(value,chars,guard):chars==null){return string.slice(trimmedLeftIndex(string),trimmedRightIndex(string)+1)}chars=chars+"";return string.slice(charsLeftIndex(string,chars),charsRightIndex(string,chars)+1)}function trimLeft(string,chars,guard){var value=string;string=baseToString(string);if(!string){return string}if(guard?isIterateeCall(value,chars,guard):chars==null){return string.slice(trimmedLeftIndex(string))}return string.slice(charsLeftIndex(string,chars+""))}function trimRight(string,chars,guard){var value=string;string=baseToString(string);if(!string){return string}if(guard?isIterateeCall(value,chars,guard):chars==null){return string.slice(0,trimmedRightIndex(string)+1)}return string.slice(0,charsRightIndex(string,chars+"")+1)}function trunc(string,options,guard){if(guard&&isIterateeCall(string,options,guard)){options=undefined}var length=DEFAULT_TRUNC_LENGTH,omission=DEFAULT_TRUNC_OMISSION;if(options!=null){if(isObject(options)){var separator="separator"in options?options.separator:separator;length="length"in options?+options.length||0:length;omission="omission"in options?baseToString(options.omission):omission}else{length=+options||0}}string=baseToString(string);if(length>=string.length){return string}var end=length-omission.length;if(end<1){return omission}var result=string.slice(0,end);if(separator==null){return result+omission}if(isRegExp(separator)){if(string.slice(end).search(separator)){var match,newEnd,substring=string.slice(0,end);if(!separator.global){separator=RegExp(separator.source,(reFlags.exec(separator)||"")+"g")}separator.lastIndex=0;while(match=separator.exec(substring)){newEnd=match.index}result=result.slice(0,newEnd==null?end:newEnd)}}else if(string.indexOf(separator,end)!=end){var index=result.lastIndexOf(separator);if(index>-1){result=result.slice(0,index)}}return result+omission}function unescape(string){string=baseToString(string);return string&&reHasEscapedHtml.test(string)?string.replace(reEscapedHtml,unescapeHtmlChar):string}function words(string,pattern,guard){if(guard&&isIterateeCall(string,pattern,guard)){pattern=undefined}string=baseToString(string);return string.match(pattern||reWords)||[]}var attempt=restParam(function(func,args){try{return func.apply(undefined,args)}catch(e){return isError(e)?e:new Error(e)}});function callback(func,thisArg,guard){if(guard&&isIterateeCall(func,thisArg,guard)){thisArg=undefined}return isObjectLike(func)?matches(func):baseCallback(func,thisArg)}function constant(value){return function(){return value}}function identity(value){return value}function matches(source){return baseMatches(baseClone(source,true))}function matchesProperty(path,srcValue){return baseMatchesProperty(path,baseClone(srcValue,true))}var method=restParam(function(path,args){return function(object){return invokePath(object,path,args)}});var methodOf=restParam(function(object,args){return function(path){return invokePath(object,path,args)}});function mixin(object,source,options){if(options==null){var isObj=isObject(source),props=isObj?keys(source):undefined,methodNames=props&&props.length?baseFunctions(source,props):undefined;if(!(methodNames?methodNames.length:isObj)){methodNames=false;options=source;source=object;object=this}}if(!methodNames){methodNames=baseFunctions(source,keys(source))}var chain=true,index=-1,isFunc=isFunction(object),length=methodNames.length;if(options===false){chain=false}else if(isObject(options)&&"chain"in options){chain=options.chain}while(++index<length){var methodName=methodNames[index],func=source[methodName];object[methodName]=func;if(isFunc){object.prototype[methodName]=function(func){return function(){var chainAll=this.__chain__;if(chain||chainAll){var result=object(this.__wrapped__),actions=result.__actions__=arrayCopy(this.__actions__);actions.push({func:func,args:arguments,thisArg:object});result.__chain__=chainAll;return result}return func.apply(object,arrayPush([this.value()],arguments))}}(func)}}return object}function noConflict(){root._=oldDash;return this}function noop(){}function property(path){return isKey(path)?baseProperty(path):basePropertyDeep(path)}function propertyOf(object){return function(path){return baseGet(object,toPath(path),path+"")}}function range(start,end,step){if(step&&isIterateeCall(start,end,step)){end=step=undefined}start=+start||0;step=step==null?1:+step||0;if(end==null){end=start;start=0}else{end=+end||0}var index=-1,length=nativeMax(nativeCeil((end-start)/(step||1)),0),result=Array(length);while(++index<length){result[index]=start;start+=step}return result}function times(n,iteratee,thisArg){n=nativeFloor(n);if(n<1||!nativeIsFinite(n)){return[]}var index=-1,result=Array(nativeMin(n,MAX_ARRAY_LENGTH));iteratee=bindCallback(iteratee,thisArg,1);while(++index<n){if(index<MAX_ARRAY_LENGTH){result[index]=iteratee(index)}else{iteratee(index)}}return result}function uniqueId(prefix){var id=++idCounter;return baseToString(prefix)+id}function add(augend,addend){return(+augend||0)+(+addend||0)}var ceil=createRound("ceil");var floor=createRound("floor");var max=createExtremum(gt,NEGATIVE_INFINITY);var min=createExtremum(lt,POSITIVE_INFINITY);var round=createRound("round");function sum(collection,iteratee,thisArg){if(thisArg&&isIterateeCall(collection,iteratee,thisArg)){iteratee=undefined}iteratee=getCallback(iteratee,thisArg,3);return iteratee.length==1?arraySum(isArray(collection)?collection:toIterable(collection),iteratee):baseSum(collection,iteratee)}lodash.prototype=baseLodash.prototype;LodashWrapper.prototype=baseCreate(baseLodash.prototype);LodashWrapper.prototype.constructor=LodashWrapper;LazyWrapper.prototype=baseCreate(baseLodash.prototype);LazyWrapper.prototype.constructor=LazyWrapper;MapCache.prototype["delete"]=mapDelete;MapCache.prototype.get=mapGet;MapCache.prototype.has=mapHas;MapCache.prototype.set=mapSet;SetCache.prototype.push=cachePush;memoize.Cache=MapCache;lodash.after=after;lodash.ary=ary;lodash.assign=assign;lodash.at=at;lodash.before=before;lodash.bind=bind;lodash.bindAll=bindAll;lodash.bindKey=bindKey;lodash.callback=callback;lodash.chain=chain;lodash.chunk=chunk;lodash.compact=compact;lodash.constant=constant;lodash.countBy=countBy;lodash.create=create;lodash.curry=curry;lodash.curryRight=curryRight;lodash.debounce=debounce;lodash.defaults=defaults;lodash.defaultsDeep=defaultsDeep;lodash.defer=defer;lodash.delay=delay;lodash.difference=difference;lodash.drop=drop;lodash.dropRight=dropRight;lodash.dropRightWhile=dropRightWhile;lodash.dropWhile=dropWhile;lodash.fill=fill;lodash.filter=filter;lodash.flatten=flatten;lodash.flattenDeep=flattenDeep;lodash.flow=flow;lodash.flowRight=flowRight;lodash.forEach=forEach;lodash.forEachRight=forEachRight;lodash.forIn=forIn;lodash.forInRight=forInRight;lodash.forOwn=forOwn;lodash.forOwnRight=forOwnRight;lodash.functions=functions;lodash.groupBy=groupBy;lodash.indexBy=indexBy;lodash.initial=initial;lodash.intersection=intersection;lodash.invert=invert;lodash.invoke=invoke;lodash.keys=keys;lodash.keysIn=keysIn;lodash.map=map;lodash.mapKeys=mapKeys;lodash.mapValues=mapValues;lodash.matches=matches;lodash.matchesProperty=matchesProperty;lodash.memoize=memoize;lodash.merge=merge;lodash.method=method;lodash.methodOf=methodOf;lodash.mixin=mixin;lodash.modArgs=modArgs;lodash.negate=negate;lodash.omit=omit;lodash.once=once;lodash.pairs=pairs;lodash.partial=partial;lodash.partialRight=partialRight;lodash.partition=partition;lodash.pick=pick;lodash.pluck=pluck;lodash.property=property;lodash.propertyOf=propertyOf;lodash.pull=pull;lodash.pullAt=pullAt;lodash.range=range;lodash.rearg=rearg;lodash.reject=reject;lodash.remove=remove;lodash.rest=rest;lodash.restParam=restParam;lodash.set=set;lodash.shuffle=shuffle;lodash.slice=slice;lodash.sortBy=sortBy;lodash.sortByAll=sortByAll;lodash.sortByOrder=sortByOrder;
-lodash.spread=spread;lodash.take=take;lodash.takeRight=takeRight;lodash.takeRightWhile=takeRightWhile;lodash.takeWhile=takeWhile;lodash.tap=tap;lodash.throttle=throttle;lodash.thru=thru;lodash.times=times;lodash.toArray=toArray;lodash.toPlainObject=toPlainObject;lodash.transform=transform;lodash.union=union;lodash.uniq=uniq;lodash.unzip=unzip;lodash.unzipWith=unzipWith;lodash.values=values;lodash.valuesIn=valuesIn;lodash.where=where;lodash.without=without;lodash.wrap=wrap;lodash.xor=xor;lodash.zip=zip;lodash.zipObject=zipObject;lodash.zipWith=zipWith;lodash.backflow=flowRight;lodash.collect=map;lodash.compose=flowRight;lodash.each=forEach;lodash.eachRight=forEachRight;lodash.extend=assign;lodash.iteratee=callback;lodash.methods=functions;lodash.object=zipObject;lodash.select=filter;lodash.tail=rest;lodash.unique=uniq;mixin(lodash,lodash);lodash.add=add;lodash.attempt=attempt;lodash.camelCase=camelCase;lodash.capitalize=capitalize;lodash.ceil=ceil;lodash.clone=clone;lodash.cloneDeep=cloneDeep;lodash.deburr=deburr;lodash.endsWith=endsWith;lodash.escape=escape;lodash.escapeRegExp=escapeRegExp;lodash.every=every;lodash.find=find;lodash.findIndex=findIndex;lodash.findKey=findKey;lodash.findLast=findLast;lodash.findLastIndex=findLastIndex;lodash.findLastKey=findLastKey;lodash.findWhere=findWhere;lodash.first=first;lodash.floor=floor;lodash.get=get;lodash.gt=gt;lodash.gte=gte;lodash.has=has;lodash.identity=identity;lodash.includes=includes;lodash.indexOf=indexOf;lodash.inRange=inRange;lodash.isArguments=isArguments;lodash.isArray=isArray;lodash.isBoolean=isBoolean;lodash.isDate=isDate;lodash.isElement=isElement;lodash.isEmpty=isEmpty;lodash.isEqual=isEqual;lodash.isError=isError;lodash.isFinite=isFinite;lodash.isFunction=isFunction;lodash.isMatch=isMatch;lodash.isNaN=isNaN;lodash.isNative=isNative;lodash.isNull=isNull;lodash.isNumber=isNumber;lodash.isObject=isObject;lodash.isPlainObject=isPlainObject;lodash.isRegExp=isRegExp;lodash.isString=isString;lodash.isTypedArray=isTypedArray;lodash.isUndefined=isUndefined;lodash.kebabCase=kebabCase;lodash.last=last;lodash.lastIndexOf=lastIndexOf;lodash.lt=lt;lodash.lte=lte;lodash.max=max;lodash.min=min;lodash.noConflict=noConflict;lodash.noop=noop;lodash.now=now;lodash.pad=pad;lodash.padLeft=padLeft;lodash.padRight=padRight;lodash.parseInt=parseInt;lodash.random=random;lodash.reduce=reduce;lodash.reduceRight=reduceRight;lodash.repeat=repeat;lodash.result=result;lodash.round=round;lodash.runInContext=runInContext;lodash.size=size;lodash.snakeCase=snakeCase;lodash.some=some;lodash.sortedIndex=sortedIndex;lodash.sortedLastIndex=sortedLastIndex;lodash.startCase=startCase;lodash.startsWith=startsWith;lodash.sum=sum;lodash.template=template;lodash.trim=trim;lodash.trimLeft=trimLeft;lodash.trimRight=trimRight;lodash.trunc=trunc;lodash.unescape=unescape;lodash.uniqueId=uniqueId;lodash.words=words;lodash.all=every;lodash.any=some;lodash.contains=includes;lodash.eq=isEqual;lodash.detect=find;lodash.foldl=reduce;lodash.foldr=reduceRight;lodash.head=first;lodash.include=includes;lodash.inject=reduce;mixin(lodash,function(){var source={};baseForOwn(lodash,function(func,methodName){if(!lodash.prototype[methodName]){source[methodName]=func}});return source}(),false);lodash.sample=sample;lodash.prototype.sample=function(n){if(!this.__chain__&&n==null){return sample(this.value())}return this.thru(function(value){return sample(value,n)})};lodash.VERSION=VERSION;arrayEach(["bind","bindKey","curry","curryRight","partial","partialRight"],function(methodName){lodash[methodName].placeholder=lodash});arrayEach(["drop","take"],function(methodName,index){LazyWrapper.prototype[methodName]=function(n){var filtered=this.__filtered__;if(filtered&&!index){return new LazyWrapper(this)}n=n==null?1:nativeMax(nativeFloor(n)||0,0);var result=this.clone();if(filtered){result.__takeCount__=nativeMin(result.__takeCount__,n)}else{result.__views__.push({size:n,type:methodName+(result.__dir__<0?"Right":"")})}return result};LazyWrapper.prototype[methodName+"Right"]=function(n){return this.reverse()[methodName](n).reverse()}});arrayEach(["filter","map","takeWhile"],function(methodName,index){var type=index+1,isFilter=type!=LAZY_MAP_FLAG;LazyWrapper.prototype[methodName]=function(iteratee,thisArg){var result=this.clone();result.__iteratees__.push({iteratee:getCallback(iteratee,thisArg,1),type:type});result.__filtered__=result.__filtered__||isFilter;return result}});arrayEach(["first","last"],function(methodName,index){var takeName="take"+(index?"Right":"");LazyWrapper.prototype[methodName]=function(){return this[takeName](1).value()[0]}});arrayEach(["initial","rest"],function(methodName,index){var dropName="drop"+(index?"":"Right");LazyWrapper.prototype[methodName]=function(){return this.__filtered__?new LazyWrapper(this):this[dropName](1)}});arrayEach(["pluck","where"],function(methodName,index){var operationName=index?"filter":"map",createCallback=index?baseMatches:property;LazyWrapper.prototype[methodName]=function(value){return this[operationName](createCallback(value))}});LazyWrapper.prototype.compact=function(){return this.filter(identity)};LazyWrapper.prototype.reject=function(predicate,thisArg){predicate=getCallback(predicate,thisArg,1);return this.filter(function(value){return!predicate(value)})};LazyWrapper.prototype.slice=function(start,end){start=start==null?0:+start||0;var result=this;if(result.__filtered__&&(start>0||end<0)){return new LazyWrapper(result)}if(start<0){result=result.takeRight(-start)}else if(start){result=result.drop(start)}if(end!==undefined){end=+end||0;result=end<0?result.dropRight(-end):result.take(end-start)}return result};LazyWrapper.prototype.takeRightWhile=function(predicate,thisArg){return this.reverse().takeWhile(predicate,thisArg).reverse()};LazyWrapper.prototype.toArray=function(){return this.take(POSITIVE_INFINITY)};baseForOwn(LazyWrapper.prototype,function(func,methodName){var checkIteratee=/^(?:filter|map|reject)|While$/.test(methodName),retUnwrapped=/^(?:first|last)$/.test(methodName),lodashFunc=lodash[retUnwrapped?"take"+(methodName=="last"?"Right":""):methodName];if(!lodashFunc){return}lodash.prototype[methodName]=function(){var args=retUnwrapped?[1]:arguments,chainAll=this.__chain__,value=this.__wrapped__,isHybrid=!!this.__actions__.length,isLazy=value instanceof LazyWrapper,iteratee=args[0],useLazy=isLazy||isArray(value);if(useLazy&&checkIteratee&&typeof iteratee=="function"&&iteratee.length!=1){isLazy=useLazy=false}var interceptor=function(value){return retUnwrapped&&chainAll?lodashFunc(value,1)[0]:lodashFunc.apply(undefined,arrayPush([value],args))};var action={func:thru,args:[interceptor],thisArg:undefined},onlyLazy=isLazy&&!isHybrid;if(retUnwrapped&&!chainAll){if(onlyLazy){value=value.clone();value.__actions__.push(action);return func.call(value)}return lodashFunc.call(undefined,this.value())[0]}if(!retUnwrapped&&useLazy){value=onlyLazy?value:new LazyWrapper(this);var result=func.apply(value,args);result.__actions__.push(action);return new LodashWrapper(result,chainAll)}return this.thru(interceptor)}});arrayEach(["join","pop","push","replace","shift","sort","splice","split","unshift"],function(methodName){var func=(/^(?:replace|split)$/.test(methodName)?stringProto:arrayProto)[methodName],chainName=/^(?:push|sort|unshift)$/.test(methodName)?"tap":"thru",retUnwrapped=/^(?:join|pop|replace|shift)$/.test(methodName);lodash.prototype[methodName]=function(){var args=arguments;if(retUnwrapped&&!this.__chain__){return func.apply(this.value(),args)}return this[chainName](function(value){return func.apply(value,args)})}});baseForOwn(LazyWrapper.prototype,function(func,methodName){var lodashFunc=lodash[methodName];if(lodashFunc){var key=lodashFunc.name,names=realNames[key]||(realNames[key]=[]);names.push({name:methodName,func:lodashFunc})}});realNames[createHybridWrapper(undefined,BIND_KEY_FLAG).name]=[{name:"wrapper",func:undefined}];LazyWrapper.prototype.clone=lazyClone;LazyWrapper.prototype.reverse=lazyReverse;LazyWrapper.prototype.value=lazyValue;lodash.prototype.chain=wrapperChain;lodash.prototype.commit=wrapperCommit;lodash.prototype.concat=wrapperConcat;lodash.prototype.plant=wrapperPlant;lodash.prototype.reverse=wrapperReverse;lodash.prototype.toString=wrapperToString;lodash.prototype.run=lodash.prototype.toJSON=lodash.prototype.valueOf=lodash.prototype.value=wrapperValue;lodash.prototype.collect=lodash.prototype.map;lodash.prototype.head=lodash.prototype.first;lodash.prototype.select=lodash.prototype.filter;lodash.prototype.tail=lodash.prototype.rest;return lodash}var _=runInContext();if(typeof define=="function"&&typeof define.amd=="object"&&define.amd){root._=_;define(function(){return _})}else if(freeExports&&freeModule){if(moduleExports){(freeModule.exports=_)._=_}else{freeExports._=_}}else{root._=_}}).call(this)}).call(this,typeof global!=="undefined"?global:typeof self!=="undefined"?self:typeof window!=="undefined"?window:{})},{}]},{},[1])(1)});
\ No newline at end of file
diff --git a/coremltools/graph_visualization/assets/jquery-3.2.1.min.js b/coremltools/graph_visualization/assets/jquery-3.2.1.min.js
deleted file mode 100644
index 644d35e27..000000000
--- a/coremltools/graph_visualization/assets/jquery-3.2.1.min.js
+++ /dev/null
@@ -1,4 +0,0 @@
-/*! jQuery v3.2.1 | (c) JS Foundation and other contributors | jquery.org/license */
-!function(a,b){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){"use strict";var c=[],d=a.document,e=Object.getPrototypeOf,f=c.slice,g=c.concat,h=c.push,i=c.indexOf,j={},k=j.toString,l=j.hasOwnProperty,m=l.toString,n=m.call(Object),o={};function p(a,b){b=b||d;var c=b.createElement("script");c.text=a,b.head.appendChild(c).parentNode.removeChild(c)}var q="3.2.1",r=function(a,b){return new r.fn.init(a,b)},s=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,t=/^-ms-/,u=/-([a-z])/g,v=function(a,b){return b.toUpperCase()};r.fn=r.prototype={jquery:q,constructor:r,length:0,toArray:function(){return f.call(this)},get:function(a){return null==a?f.call(this):a<0?this[a+this.length]:this[a]},pushStack:function(a){var b=r.merge(this.constructor(),a);return b.prevObject=this,b},each:function(a){return r.each(this,a)},map:function(a){return this.pushStack(r.map(this,function(b,c){return a.call(b,c,b)}))},slice:function(){return this.pushStack(f.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(a){var b=this.length,c=+a+(a<0?b:0);return this.pushStack(c>=0&&c<b?[this[c]]:[])},end:function(){return this.prevObject||this.constructor()},push:h,sort:c.sort,splice:c.splice},r.extend=r.fn.extend=function(){var a,b,c,d,e,f,g=arguments[0]||{},h=1,i=arguments.length,j=!1;for("boolean"==typeof g&&(j=g,g=arguments[h]||{},h++),"object"==typeof g||r.isFunction(g)||(g={}),h===i&&(g=this,h--);h<i;h++)if(null!=(a=arguments[h]))for(b in a)c=g[b],d=a[b],g!==d&&(j&&d&&(r.isPlainObject(d)||(e=Array.isArray(d)))?(e?(e=!1,f=c&&Array.isArray(c)?c:[]):f=c&&r.isPlainObject(c)?c:{},g[b]=r.extend(j,f,d)):void 0!==d&&(g[b]=d));return g},r.extend({expando:"jQuery"+(q+Math.random()).replace(/\D/g,""),isReady:!0,error:function(a){throw new Error(a)},noop:function(){},isFunction:function(a){return"function"===r.type(a)},isWindow:function(a){return null!=a&&a===a.window},isNumeric:function(a){var b=r.type(a);return("number"===b||"string"===b)&&!isNaN(a-parseFloat(a))},isPlainObject:function(a){var b,c;return!(!a||"[object Object]"!==k.call(a))&&(!(b=e(a))||(c=l.call(b,"constructor")&&b.constructor,"function"==typeof c&&m.call(c)===n))},isEmptyObject:function(a){var b;for(b in a)return!1;return!0},type:function(a){return null==a?a+"":"object"==typeof a||"function"==typeof a?j[k.call(a)]||"object":typeof a},globalEval:function(a){p(a)},camelCase:function(a){return a.replace(t,"ms-").replace(u,v)},each:function(a,b){var c,d=0;if(w(a)){for(c=a.length;d<c;d++)if(b.call(a[d],d,a[d])===!1)break}else for(d in a)if(b.call(a[d],d,a[d])===!1)break;return a},trim:function(a){return null==a?"":(a+"").replace(s,"")},makeArray:function(a,b){var c=b||[];return null!=a&&(w(Object(a))?r.merge(c,"string"==typeof a?[a]:a):h.call(c,a)),c},inArray:function(a,b,c){return null==b?-1:i.call(b,a,c)},merge:function(a,b){for(var c=+b.length,d=0,e=a.length;d<c;d++)a[e++]=b[d];return a.length=e,a},grep:function(a,b,c){for(var d,e=[],f=0,g=a.length,h=!c;f<g;f++)d=!b(a[f],f),d!==h&&e.push(a[f]);return e},map:function(a,b,c){var d,e,f=0,h=[];if(w(a))for(d=a.length;f<d;f++)e=b(a[f],f,c),null!=e&&h.push(e);else for(f in a)e=b(a[f],f,c),null!=e&&h.push(e);return g.apply([],h)},guid:1,proxy:function(a,b){var c,d,e;if("string"==typeof b&&(c=a[b],b=a,a=c),r.isFunction(a))return d=f.call(arguments,2),e=function(){return a.apply(b||this,d.concat(f.call(arguments)))},e.guid=a.guid=a.guid||r.guid++,e},now:Date.now,support:o}),"function"==typeof Symbol&&(r.fn[Symbol.iterator]=c[Symbol.iterator]),r.each("Boolean Number String Function Array Date RegExp Object Error Symbol".split(" "),function(a,b){j["[object "+b+"]"]=b.toLowerCase()});function w(a){var b=!!a&&"length"in a&&a.length,c=r.type(a);return"function"!==c&&!r.isWindow(a)&&("array"===c||0===b||"number"==typeof b&&b>0&&b-1 in a)}var x=function(a){var b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u="sizzle"+1*new Date,v=a.document,w=0,x=0,y=ha(),z=ha(),A=ha(),B=function(a,b){return a===b&&(l=!0),0},C={}.hasOwnProperty,D=[],E=D.pop,F=D.push,G=D.push,H=D.slice,I=function(a,b){for(var c=0,d=a.length;c<d;c++)if(a[c]===b)return c;return-1},J="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",K="[\\x20\\t\\r\\n\\f]",L="(?:\\\\.|[\\w-]|[^\0-\\xa0])+",M="\\["+K+"*("+L+")(?:"+K+"*([*^$|!~]?=)"+K+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+L+"))|)"+K+"*\\]",N=":("+L+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+M+")*)|.*)\\)|)",O=new RegExp(K+"+","g"),P=new RegExp("^"+K+"+|((?:^|[^\\\\])(?:\\\\.)*)"+K+"+$","g"),Q=new RegExp("^"+K+"*,"+K+"*"),R=new RegExp("^"+K+"*([>+~]|"+K+")"+K+"*"),S=new RegExp("="+K+"*([^\\]'\"]*?)"+K+"*\\]","g"),T=new RegExp(N),U=new RegExp("^"+L+"$"),V={ID:new RegExp("^#("+L+")"),CLASS:new RegExp("^\\.("+L+")"),TAG:new RegExp("^("+L+"|[*])"),ATTR:new RegExp("^"+M),PSEUDO:new RegExp("^"+N),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+K+"*(even|odd|(([+-]|)(\\d*)n|)"+K+"*(?:([+-]|)"+K+"*(\\d+)|))"+K+"*\\)|)","i"),bool:new RegExp("^(?:"+J+")$","i"),needsContext:new RegExp("^"+K+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+K+"*((?:-\\d)?\\d*)"+K+"*\\)|)(?=[^-]|$)","i")},W=/^(?:input|select|textarea|button)$/i,X=/^h\d$/i,Y=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,$=/[+~]/,_=new RegExp("\\\\([\\da-f]{1,6}"+K+"?|("+K+")|.)","ig"),aa=function(a,b,c){var d="0x"+b-65536;return d!==d||c?b:d<0?String.fromCharCode(d+65536):String.fromCharCode(d>>10|55296,1023&d|56320)},ba=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ca=function(a,b){return b?"\0"===a?"\ufffd":a.slice(0,-1)+"\\"+a.charCodeAt(a.length-1).toString(16)+" ":"\\"+a},da=function(){m()},ea=ta(function(a){return a.disabled===!0&&("form"in a||"label"in a)},{dir:"parentNode",next:"legend"});try{G.apply(D=H.call(v.childNodes),v.childNodes),D[v.childNodes.length].nodeType}catch(fa){G={apply:D.length?function(a,b){F.apply(a,H.call(b))}:function(a,b){var c=a.length,d=0;while(a[c++]=b[d++]);a.length=c-1}}}function ga(a,b,d,e){var f,h,j,k,l,o,r,s=b&&b.ownerDocument,w=b?b.nodeType:9;if(d=d||[],"string"!=typeof a||!a||1!==w&&9!==w&&11!==w)return d;if(!e&&((b?b.ownerDocument||b:v)!==n&&m(b),b=b||n,p)){if(11!==w&&(l=Z.exec(a)))if(f=l[1]){if(9===w){if(!(j=b.getElementById(f)))return d;if(j.id===f)return d.push(j),d}else if(s&&(j=s.getElementById(f))&&t(b,j)&&j.id===f)return d.push(j),d}else{if(l[2])return G.apply(d,b.getElementsByTagName(a)),d;if((f=l[3])&&c.getElementsByClassName&&b.getElementsByClassName)return G.apply(d,b.getElementsByClassName(f)),d}if(c.qsa&&!A[a+" "]&&(!q||!q.test(a))){if(1!==w)s=b,r=a;else if("object"!==b.nodeName.toLowerCase()){(k=b.getAttribute("id"))?k=k.replace(ba,ca):b.setAttribute("id",k=u),o=g(a),h=o.length;while(h--)o[h]="#"+k+" "+sa(o[h]);r=o.join(","),s=$.test(a)&&qa(b.parentNode)||b}if(r)try{return G.apply(d,s.querySelectorAll(r)),d}catch(x){}finally{k===u&&b.removeAttribute("id")}}}return i(a.replace(P,"$1"),b,d,e)}function ha(){var a=[];function b(c,e){return a.push(c+" ")>d.cacheLength&&delete b[a.shift()],b[c+" "]=e}return b}function ia(a){return a[u]=!0,a}function ja(a){var b=n.createElement("fieldset");try{return!!a(b)}catch(c){return!1}finally{b.parentNode&&b.parentNode.removeChild(b),b=null}}function ka(a,b){var c=a.split("|"),e=c.length;while(e--)d.attrHandle[c[e]]=b}function la(a,b){var c=b&&a,d=c&&1===a.nodeType&&1===b.nodeType&&a.sourceIndex-b.sourceIndex;if(d)return d;if(c)while(c=c.nextSibling)if(c===b)return-1;return a?1:-1}function ma(a){return function(b){var c=b.nodeName.toLowerCase();return"input"===c&&b.type===a}}function na(a){return function(b){var c=b.nodeName.toLowerCase();return("input"===c||"button"===c)&&b.type===a}}function oa(a){return function(b){return"form"in b?b.parentNode&&b.disabled===!1?"label"in b?"label"in b.parentNode?b.parentNode.disabled===a:b.disabled===a:b.isDisabled===a||b.isDisabled!==!a&&ea(b)===a:b.disabled===a:"label"in b&&b.disabled===a}}function pa(a){return ia(function(b){return b=+b,ia(function(c,d){var e,f=a([],c.length,b),g=f.length;while(g--)c[e=f[g]]&&(c[e]=!(d[e]=c[e]))})})}function qa(a){return a&&"undefined"!=typeof a.getElementsByTagName&&a}c=ga.support={},f=ga.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return!!b&&"HTML"!==b.nodeName},m=ga.setDocument=function(a){var b,e,g=a?a.ownerDocument||a:v;return g!==n&&9===g.nodeType&&g.documentElement?(n=g,o=n.documentElement,p=!f(n),v!==n&&(e=n.defaultView)&&e.top!==e&&(e.addEventListener?e.addEventListener("unload",da,!1):e.attachEvent&&e.attachEvent("onunload",da)),c.attributes=ja(function(a){return a.className="i",!a.getAttribute("className")}),c.getElementsByTagName=ja(function(a){return a.appendChild(n.createComment("")),!a.getElementsByTagName("*").length}),c.getElementsByClassName=Y.test(n.getElementsByClassName),c.getById=ja(function(a){return o.appendChild(a).id=u,!n.getElementsByName||!n.getElementsByName(u).length}),c.getById?(d.filter.ID=function(a){var b=a.replace(_,aa);return function(a){return a.getAttribute("id")===b}},d.find.ID=function(a,b){if("undefined"!=typeof b.getElementById&&p){var c=b.getElementById(a);return c?[c]:[]}}):(d.filter.ID=function(a){var b=a.replace(_,aa);return function(a){var c="undefined"!=typeof a.getAttributeNode&&a.getAttributeNode("id");return c&&c.value===b}},d.find.ID=function(a,b){if("undefined"!=typeof b.getElementById&&p){var c,d,e,f=b.getElementById(a);if(f){if(c=f.getAttributeNode("id"),c&&c.value===a)return[f];e=b.getElementsByName(a),d=0;while(f=e[d++])if(c=f.getAttributeNode("id"),c&&c.value===a)return[f]}return[]}}),d.find.TAG=c.getElementsByTagName?function(a,b){return"undefined"!=typeof b.getElementsByTagName?b.getElementsByTagName(a):c.qsa?b.querySelectorAll(a):void 0}:function(a,b){var c,d=[],e=0,f=b.getElementsByTagName(a);if("*"===a){while(c=f[e++])1===c.nodeType&&d.push(c);return d}return f},d.find.CLASS=c.getElementsByClassName&&function(a,b){if("undefined"!=typeof b.getElementsByClassName&&p)return b.getElementsByClassName(a)},r=[],q=[],(c.qsa=Y.test(n.querySelectorAll))&&(ja(function(a){o.appendChild(a).innerHTML="<a id='"+u+"'></a><select id='"+u+"-\r\\' msallowcapture=''><option selected=''></option></select>",a.querySelectorAll("[msallowcapture^='']").length&&q.push("[*^$]="+K+"*(?:''|\"\")"),a.querySelectorAll("[selected]").length||q.push("\\["+K+"*(?:value|"+J+")"),a.querySelectorAll("[id~="+u+"-]").length||q.push("~="),a.querySelectorAll(":checked").length||q.push(":checked"),a.querySelectorAll("a#"+u+"+*").length||q.push(".#.+[+~]")}),ja(function(a){a.innerHTML="<a href='' disabled='disabled'></a><select disabled='disabled'><option/></select>";var b=n.createElement("input");b.setAttribute("type","hidden"),a.appendChild(b).setAttribute("name","D"),a.querySelectorAll("[name=d]").length&&q.push("name"+K+"*[*^$|!~]?="),2!==a.querySelectorAll(":enabled").length&&q.push(":enabled",":disabled"),o.appendChild(a).disabled=!0,2!==a.querySelectorAll(":disabled").length&&q.push(":enabled",":disabled"),a.querySelectorAll("*,:x"),q.push(",.*:")})),(c.matchesSelector=Y.test(s=o.matches||o.webkitMatchesSelector||o.mozMatchesSelector||o.oMatchesSelector||o.msMatchesSelector))&&ja(function(a){c.disconnectedMatch=s.call(a,"*"),s.call(a,"[s!='']:x"),r.push("!=",N)}),q=q.length&&new RegExp(q.join("|")),r=r.length&&new RegExp(r.join("|")),b=Y.test(o.compareDocumentPosition),t=b||Y.test(o.contains)?function(a,b){var c=9===a.nodeType?a.documentElement:a,d=b&&b.parentNode;return a===d||!(!d||1!==d.nodeType||!(c.contains?c.contains(d):a.compareDocumentPosition&&16&a.compareDocumentPosition(d)))}:function(a,b){if(b)while(b=b.parentNode)if(b===a)return!0;return!1},B=b?function(a,b){if(a===b)return l=!0,0;var d=!a.compareDocumentPosition-!b.compareDocumentPosition;return d?d:(d=(a.ownerDocument||a)===(b.ownerDocument||b)?a.compareDocumentPosition(b):1,1&d||!c.sortDetached&&b.compareDocumentPosition(a)===d?a===n||a.ownerDocument===v&&t(v,a)?-1:b===n||b.ownerDocument===v&&t(v,b)?1:k?I(k,a)-I(k,b):0:4&d?-1:1)}:function(a,b){if(a===b)return l=!0,0;var c,d=0,e=a.parentNode,f=b.parentNode,g=[a],h=[b];if(!e||!f)return a===n?-1:b===n?1:e?-1:f?1:k?I(k,a)-I(k,b):0;if(e===f)return la(a,b);c=a;while(c=c.parentNode)g.unshift(c);c=b;while(c=c.parentNode)h.unshift(c);while(g[d]===h[d])d++;return d?la(g[d],h[d]):g[d]===v?-1:h[d]===v?1:0},n):n},ga.matches=function(a,b){return ga(a,null,null,b)},ga.matchesSelector=function(a,b){if((a.ownerDocument||a)!==n&&m(a),b=b.replace(S,"='$1']"),c.matchesSelector&&p&&!A[b+" "]&&(!r||!r.test(b))&&(!q||!q.test(b)))try{var d=s.call(a,b);if(d||c.disconnectedMatch||a.document&&11!==a.document.nodeType)return d}catch(e){}return ga(b,n,null,[a]).length>0},ga.contains=function(a,b){return(a.ownerDocument||a)!==n&&m(a),t(a,b)},ga.attr=function(a,b){(a.ownerDocument||a)!==n&&m(a);var e=d.attrHandle[b.toLowerCase()],f=e&&C.call(d.attrHandle,b.toLowerCase())?e(a,b,!p):void 0;return void 0!==f?f:c.attributes||!p?a.getAttribute(b):(f=a.getAttributeNode(b))&&f.specified?f.value:null},ga.escape=function(a){return(a+"").replace(ba,ca)},ga.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},ga.uniqueSort=function(a){var b,d=[],e=0,f=0;if(l=!c.detectDuplicates,k=!c.sortStable&&a.slice(0),a.sort(B),l){while(b=a[f++])b===a[f]&&(e=d.push(f));while(e--)a.splice(d[e],1)}return k=null,a},e=ga.getText=function(a){var b,c="",d=0,f=a.nodeType;if(f){if(1===f||9===f||11===f){if("string"==typeof a.textContent)return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=e(a)}else if(3===f||4===f)return a.nodeValue}else while(b=a[d++])c+=e(b);return c},d=ga.selectors={cacheLength:50,createPseudo:ia,match:V,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(_,aa),a[3]=(a[3]||a[4]||a[5]||"").replace(_,aa),"~="===a[2]&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),"nth"===a[1].slice(0,3)?(a[3]||ga.error(a[0]),a[4]=+(a[4]?a[5]+(a[6]||1):2*("even"===a[3]||"odd"===a[3])),a[5]=+(a[7]+a[8]||"odd"===a[3])):a[3]&&ga.error(a[0]),a},PSEUDO:function(a){var b,c=!a[6]&&a[2];return V.CHILD.test(a[0])?null:(a[3]?a[2]=a[4]||a[5]||"":c&&T.test(c)&&(b=g(c,!0))&&(b=c.indexOf(")",c.length-b)-c.length)&&(a[0]=a[0].slice(0,b),a[2]=c.slice(0,b)),a.slice(0,3))}},filter:{TAG:function(a){var b=a.replace(_,aa).toLowerCase();return"*"===a?function(){return!0}:function(a){return a.nodeName&&a.nodeName.toLowerCase()===b}},CLASS:function(a){var b=y[a+" "];return b||(b=new RegExp("(^|"+K+")"+a+"("+K+"|$)"))&&y(a,function(a){return b.test("string"==typeof a.className&&a.className||"undefined"!=typeof a.getAttribute&&a.getAttribute("class")||"")})},ATTR:function(a,b,c){return function(d){var e=ga.attr(d,a);return null==e?"!="===b:!b||(e+="","="===b?e===c:"!="===b?e!==c:"^="===b?c&&0===e.indexOf(c):"*="===b?c&&e.indexOf(c)>-1:"$="===b?c&&e.slice(-c.length)===c:"~="===b?(" "+e.replace(O," ")+" ").indexOf(c)>-1:"|="===b&&(e===c||e.slice(0,c.length+1)===c+"-"))}},CHILD:function(a,b,c,d,e){var f="nth"!==a.slice(0,3),g="last"!==a.slice(-4),h="of-type"===b;return 1===d&&0===e?function(a){return!!a.parentNode}:function(b,c,i){var j,k,l,m,n,o,p=f!==g?"nextSibling":"previousSibling",q=b.parentNode,r=h&&b.nodeName.toLowerCase(),s=!i&&!h,t=!1;if(q){if(f){while(p){m=b;while(m=m[p])if(h?m.nodeName.toLowerCase()===r:1===m.nodeType)return!1;o=p="only"===a&&!o&&"nextSibling"}return!0}if(o=[g?q.firstChild:q.lastChild],g&&s){m=q,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n&&j[2],m=n&&q.childNodes[n];while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if(1===m.nodeType&&++t&&m===b){k[a]=[w,n,t];break}}else if(s&&(m=b,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n),t===!1)while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if((h?m.nodeName.toLowerCase()===r:1===m.nodeType)&&++t&&(s&&(l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),k[a]=[w,t]),m===b))break;return t-=e,t===d||t%d===0&&t/d>=0}}},PSEUDO:function(a,b){var c,e=d.pseudos[a]||d.setFilters[a.toLowerCase()]||ga.error("unsupported pseudo: "+a);return e[u]?e(b):e.length>1?(c=[a,a,"",b],d.setFilters.hasOwnProperty(a.toLowerCase())?ia(function(a,c){var d,f=e(a,b),g=f.length;while(g--)d=I(a,f[g]),a[d]=!(c[d]=f[g])}):function(a){return e(a,0,c)}):e}},pseudos:{not:ia(function(a){var b=[],c=[],d=h(a.replace(P,"$1"));return d[u]?ia(function(a,b,c,e){var f,g=d(a,null,e,[]),h=a.length;while(h--)(f=g[h])&&(a[h]=!(b[h]=f))}):function(a,e,f){return b[0]=a,d(b,null,f,c),b[0]=null,!c.pop()}}),has:ia(function(a){return function(b){return ga(a,b).length>0}}),contains:ia(function(a){return a=a.replace(_,aa),function(b){return(b.textContent||b.innerText||e(b)).indexOf(a)>-1}}),lang:ia(function(a){return U.test(a||"")||ga.error("unsupported lang: "+a),a=a.replace(_,aa).toLowerCase(),function(b){var c;do if(c=p?b.lang:b.getAttribute("xml:lang")||b.getAttribute("lang"))return c=c.toLowerCase(),c===a||0===c.indexOf(a+"-");while((b=b.parentNode)&&1===b.nodeType);return!1}}),target:function(b){var c=a.location&&a.location.hash;return c&&c.slice(1)===b.id},root:function(a){return a===o},focus:function(a){return a===n.activeElement&&(!n.hasFocus||n.hasFocus())&&!!(a.type||a.href||~a.tabIndex)},enabled:oa(!1),disabled:oa(!0),checked:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&!!a.checked||"option"===b&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},empty:function(a){for(a=a.firstChild;a;a=a.nextSibling)if(a.nodeType<6)return!1;return!0},parent:function(a){return!d.pseudos.empty(a)},header:function(a){return X.test(a.nodeName)},input:function(a){return W.test(a.nodeName)},button:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&"button"===a.type||"button"===b},text:function(a){var b;return"input"===a.nodeName.toLowerCase()&&"text"===a.type&&(null==(b=a.getAttribute("type"))||"text"===b.toLowerCase())},first:pa(function(){return[0]}),last:pa(function(a,b){return[b-1]}),eq:pa(function(a,b,c){return[c<0?c+b:c]}),even:pa(function(a,b){for(var c=0;c<b;c+=2)a.push(c);return a}),odd:pa(function(a,b){for(var c=1;c<b;c+=2)a.push(c);return a}),lt:pa(function(a,b,c){for(var d=c<0?c+b:c;--d>=0;)a.push(d);return a}),gt:pa(function(a,b,c){for(var d=c<0?c+b:c;++d<b;)a.push(d);return a})}},d.pseudos.nth=d.pseudos.eq;for(b in{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})d.pseudos[b]=ma(b);for(b in{submit:!0,reset:!0})d.pseudos[b]=na(b);function ra(){}ra.prototype=d.filters=d.pseudos,d.setFilters=new ra,g=ga.tokenize=function(a,b){var c,e,f,g,h,i,j,k=z[a+" "];if(k)return b?0:k.slice(0);h=a,i=[],j=d.preFilter;while(h){c&&!(e=Q.exec(h))||(e&&(h=h.slice(e[0].length)||h),i.push(f=[])),c=!1,(e=R.exec(h))&&(c=e.shift(),f.push({value:c,type:e[0].replace(P," ")}),h=h.slice(c.length));for(g in d.filter)!(e=V[g].exec(h))||j[g]&&!(e=j[g](e))||(c=e.shift(),f.push({value:c,type:g,matches:e}),h=h.slice(c.length));if(!c)break}return b?h.length:h?ga.error(a):z(a,i).slice(0)};function sa(a){for(var b=0,c=a.length,d="";b<c;b++)d+=a[b].value;return d}function ta(a,b,c){var d=b.dir,e=b.next,f=e||d,g=c&&"parentNode"===f,h=x++;return b.first?function(b,c,e){while(b=b[d])if(1===b.nodeType||g)return a(b,c,e);return!1}:function(b,c,i){var j,k,l,m=[w,h];if(i){while(b=b[d])if((1===b.nodeType||g)&&a(b,c,i))return!0}else while(b=b[d])if(1===b.nodeType||g)if(l=b[u]||(b[u]={}),k=l[b.uniqueID]||(l[b.uniqueID]={}),e&&e===b.nodeName.toLowerCase())b=b[d]||b;else{if((j=k[f])&&j[0]===w&&j[1]===h)return m[2]=j[2];if(k[f]=m,m[2]=a(b,c,i))return!0}return!1}}function ua(a){return a.length>1?function(b,c,d){var e=a.length;while(e--)if(!a[e](b,c,d))return!1;return!0}:a[0]}function va(a,b,c){for(var d=0,e=b.length;d<e;d++)ga(a,b[d],c);return c}function wa(a,b,c,d,e){for(var f,g=[],h=0,i=a.length,j=null!=b;h<i;h++)(f=a[h])&&(c&&!c(f,d,e)||(g.push(f),j&&b.push(h)));return g}function xa(a,b,c,d,e,f){return d&&!d[u]&&(d=xa(d)),e&&!e[u]&&(e=xa(e,f)),ia(function(f,g,h,i){var j,k,l,m=[],n=[],o=g.length,p=f||va(b||"*",h.nodeType?[h]:h,[]),q=!a||!f&&b?p:wa(p,m,a,h,i),r=c?e||(f?a:o||d)?[]:g:q;if(c&&c(q,r,h,i),d){j=wa(r,n),d(j,[],h,i),k=j.length;while(k--)(l=j[k])&&(r[n[k]]=!(q[n[k]]=l))}if(f){if(e||a){if(e){j=[],k=r.length;while(k--)(l=r[k])&&j.push(q[k]=l);e(null,r=[],j,i)}k=r.length;while(k--)(l=r[k])&&(j=e?I(f,l):m[k])>-1&&(f[j]=!(g[j]=l))}}else r=wa(r===g?r.splice(o,r.length):r),e?e(null,g,r,i):G.apply(g,r)})}function ya(a){for(var b,c,e,f=a.length,g=d.relative[a[0].type],h=g||d.relative[" "],i=g?1:0,k=ta(function(a){return a===b},h,!0),l=ta(function(a){return I(b,a)>-1},h,!0),m=[function(a,c,d){var e=!g&&(d||c!==j)||((b=c).nodeType?k(a,c,d):l(a,c,d));return b=null,e}];i<f;i++)if(c=d.relative[a[i].type])m=[ta(ua(m),c)];else{if(c=d.filter[a[i].type].apply(null,a[i].matches),c[u]){for(e=++i;e<f;e++)if(d.relative[a[e].type])break;return xa(i>1&&ua(m),i>1&&sa(a.slice(0,i-1).concat({value:" "===a[i-2].type?"*":""})).replace(P,"$1"),c,i<e&&ya(a.slice(i,e)),e<f&&ya(a=a.slice(e)),e<f&&sa(a))}m.push(c)}return ua(m)}function za(a,b){var c=b.length>0,e=a.length>0,f=function(f,g,h,i,k){var l,o,q,r=0,s="0",t=f&&[],u=[],v=j,x=f||e&&d.find.TAG("*",k),y=w+=null==v?1:Math.random()||.1,z=x.length;for(k&&(j=g===n||g||k);s!==z&&null!=(l=x[s]);s++){if(e&&l){o=0,g||l.ownerDocument===n||(m(l),h=!p);while(q=a[o++])if(q(l,g||n,h)){i.push(l);break}k&&(w=y)}c&&((l=!q&&l)&&r--,f&&t.push(l))}if(r+=s,c&&s!==r){o=0;while(q=b[o++])q(t,u,g,h);if(f){if(r>0)while(s--)t[s]||u[s]||(u[s]=E.call(i));u=wa(u)}G.apply(i,u),k&&!f&&u.length>0&&r+b.length>1&&ga.uniqueSort(i)}return k&&(w=y,j=v),t};return c?ia(f):f}return h=ga.compile=function(a,b){var c,d=[],e=[],f=A[a+" "];if(!f){b||(b=g(a)),c=b.length;while(c--)f=ya(b[c]),f[u]?d.push(f):e.push(f);f=A(a,za(e,d)),f.selector=a}return f},i=ga.select=function(a,b,c,e){var f,i,j,k,l,m="function"==typeof a&&a,n=!e&&g(a=m.selector||a);if(c=c||[],1===n.length){if(i=n[0]=n[0].slice(0),i.length>2&&"ID"===(j=i[0]).type&&9===b.nodeType&&p&&d.relative[i[1].type]){if(b=(d.find.ID(j.matches[0].replace(_,aa),b)||[])[0],!b)return c;m&&(b=b.parentNode),a=a.slice(i.shift().value.length)}f=V.needsContext.test(a)?0:i.length;while(f--){if(j=i[f],d.relative[k=j.type])break;if((l=d.find[k])&&(e=l(j.matches[0].replace(_,aa),$.test(i[0].type)&&qa(b.parentNode)||b))){if(i.splice(f,1),a=e.length&&sa(i),!a)return G.apply(c,e),c;break}}}return(m||h(a,n))(e,b,!p,c,!b||$.test(a)&&qa(b.parentNode)||b),c},c.sortStable=u.split("").sort(B).join("")===u,c.detectDuplicates=!!l,m(),c.sortDetached=ja(function(a){return 1&a.compareDocumentPosition(n.createElement("fieldset"))}),ja(function(a){return a.innerHTML="<a href='#'></a>","#"===a.firstChild.getAttribute("href")})||ka("type|href|height|width",function(a,b,c){if(!c)return a.getAttribute(b,"type"===b.toLowerCase()?1:2)}),c.attributes&&ja(function(a){return a.innerHTML="<input/>",a.firstChild.setAttribute("value",""),""===a.firstChild.getAttribute("value")})||ka("value",function(a,b,c){if(!c&&"input"===a.nodeName.toLowerCase())return a.defaultValue}),ja(function(a){return null==a.getAttribute("disabled")})||ka(J,function(a,b,c){var d;if(!c)return a[b]===!0?b.toLowerCase():(d=a.getAttributeNode(b))&&d.specified?d.value:null}),ga}(a);r.find=x,r.expr=x.selectors,r.expr[":"]=r.expr.pseudos,r.uniqueSort=r.unique=x.uniqueSort,r.text=x.getText,r.isXMLDoc=x.isXML,r.contains=x.contains,r.escapeSelector=x.escape;var y=function(a,b,c){var d=[],e=void 0!==c;while((a=a[b])&&9!==a.nodeType)if(1===a.nodeType){if(e&&r(a).is(c))break;d.push(a)}return d},z=function(a,b){for(var c=[];a;a=a.nextSibling)1===a.nodeType&&a!==b&&c.push(a);return c},A=r.expr.match.needsContext;function B(a,b){return a.nodeName&&a.nodeName.toLowerCase()===b.toLowerCase()}var C=/^<([a-z][^\/\0>:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i,D=/^.[^:#\[\.,]*$/;function E(a,b,c){return r.isFunction(b)?r.grep(a,function(a,d){return!!b.call(a,d,a)!==c}):b.nodeType?r.grep(a,function(a){return a===b!==c}):"string"!=typeof b?r.grep(a,function(a){return i.call(b,a)>-1!==c}):D.test(b)?r.filter(b,a,c):(b=r.filter(b,a),r.grep(a,function(a){return i.call(b,a)>-1!==c&&1===a.nodeType}))}r.filter=function(a,b,c){var d=b[0];return c&&(a=":not("+a+")"),1===b.length&&1===d.nodeType?r.find.matchesSelector(d,a)?[d]:[]:r.find.matches(a,r.grep(b,function(a){return 1===a.nodeType}))},r.fn.extend({find:function(a){var b,c,d=this.length,e=this;if("string"!=typeof a)return this.pushStack(r(a).filter(function(){for(b=0;b<d;b++)if(r.contains(e[b],this))return!0}));for(c=this.pushStack([]),b=0;b<d;b++)r.find(a,e[b],c);return d>1?r.uniqueSort(c):c},filter:function(a){return this.pushStack(E(this,a||[],!1))},not:function(a){return this.pushStack(E(this,a||[],!0))},is:function(a){return!!E(this,"string"==typeof a&&A.test(a)?r(a):a||[],!1).length}});var F,G=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]+))$/,H=r.fn.init=function(a,b,c){var e,f;if(!a)return this;if(c=c||F,"string"==typeof a){if(e="<"===a[0]&&">"===a[a.length-1]&&a.length>=3?[null,a,null]:G.exec(a),!e||!e[1]&&b)return!b||b.jquery?(b||c).find(a):this.constructor(b).find(a);if(e[1]){if(b=b instanceof r?b[0]:b,r.merge(this,r.parseHTML(e[1],b&&b.nodeType?b.ownerDocument||b:d,!0)),C.test(e[1])&&r.isPlainObject(b))for(e in b)r.isFunction(this[e])?this[e](b[e]):this.attr(e,b[e]);return this}return f=d.getElementById(e[2]),f&&(this[0]=f,this.length=1),this}return a.nodeType?(this[0]=a,this.length=1,this):r.isFunction(a)?void 0!==c.ready?c.ready(a):a(r):r.makeArray(a,this)};H.prototype=r.fn,F=r(d);var I=/^(?:parents|prev(?:Until|All))/,J={children:!0,contents:!0,next:!0,prev:!0};r.fn.extend({has:function(a){var b=r(a,this),c=b.length;return this.filter(function(){for(var a=0;a<c;a++)if(r.contains(this,b[a]))return!0})},closest:function(a,b){var c,d=0,e=this.length,f=[],g="string"!=typeof a&&r(a);if(!A.test(a))for(;d<e;d++)for(c=this[d];c&&c!==b;c=c.parentNode)if(c.nodeType<11&&(g?g.index(c)>-1:1===c.nodeType&&r.find.matchesSelector(c,a))){f.push(c);break}return this.pushStack(f.length>1?r.uniqueSort(f):f)},index:function(a){return a?"string"==typeof a?i.call(r(a),this[0]):i.call(this,a.jquery?a[0]:a):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(a,b){return this.pushStack(r.uniqueSort(r.merge(this.get(),r(a,b))))},addBack:function(a){return this.add(null==a?this.prevObject:this.prevObject.filter(a))}});function K(a,b){while((a=a[b])&&1!==a.nodeType);return a}r.each({parent:function(a){var b=a.parentNode;return b&&11!==b.nodeType?b:null},parents:function(a){return y(a,"parentNode")},parentsUntil:function(a,b,c){return y(a,"parentNode",c)},next:function(a){return K(a,"nextSibling")},prev:function(a){return K(a,"previousSibling")},nextAll:function(a){return y(a,"nextSibling")},prevAll:function(a){return y(a,"previousSibling")},nextUntil:function(a,b,c){return y(a,"nextSibling",c)},prevUntil:function(a,b,c){return y(a,"previousSibling",c)},siblings:function(a){return z((a.parentNode||{}).firstChild,a)},children:function(a){return z(a.firstChild)},contents:function(a){return B(a,"iframe")?a.contentDocument:(B(a,"template")&&(a=a.content||a),r.merge([],a.childNodes))}},function(a,b){r.fn[a]=function(c,d){var e=r.map(this,b,c);return"Until"!==a.slice(-5)&&(d=c),d&&"string"==typeof d&&(e=r.filter(d,e)),this.length>1&&(J[a]||r.uniqueSort(e),I.test(a)&&e.reverse()),this.pushStack(e)}});var L=/[^\x20\t\r\n\f]+/g;function M(a){var b={};return r.each(a.match(L)||[],function(a,c){b[c]=!0}),b}r.Callbacks=function(a){a="string"==typeof a?M(a):r.extend({},a);var b,c,d,e,f=[],g=[],h=-1,i=function(){for(e=e||a.once,d=b=!0;g.length;h=-1){c=g.shift();while(++h<f.length)f[h].apply(c[0],c[1])===!1&&a.stopOnFalse&&(h=f.length,c=!1)}a.memory||(c=!1),b=!1,e&&(f=c?[]:"")},j={add:function(){return f&&(c&&!b&&(h=f.length-1,g.push(c)),function d(b){r.each(b,function(b,c){r.isFunction(c)?a.unique&&j.has(c)||f.push(c):c&&c.length&&"string"!==r.type(c)&&d(c)})}(arguments),c&&!b&&i()),this},remove:function(){return r.each(arguments,function(a,b){var c;while((c=r.inArray(b,f,c))>-1)f.splice(c,1),c<=h&&h--}),this},has:function(a){return a?r.inArray(a,f)>-1:f.length>0},empty:function(){return f&&(f=[]),this},disable:function(){return e=g=[],f=c="",this},disabled:function(){return!f},lock:function(){return e=g=[],c||b||(f=c=""),this},locked:function(){return!!e},fireWith:function(a,c){return e||(c=c||[],c=[a,c.slice?c.slice():c],g.push(c),b||i()),this},fire:function(){return j.fireWith(this,arguments),this},fired:function(){return!!d}};return j};function N(a){return a}function O(a){throw a}function P(a,b,c,d){var e;try{a&&r.isFunction(e=a.promise)?e.call(a).done(b).fail(c):a&&r.isFunction(e=a.then)?e.call(a,b,c):b.apply(void 0,[a].slice(d))}catch(a){c.apply(void 0,[a])}}r.extend({Deferred:function(b){var c=[["notify","progress",r.Callbacks("memory"),r.Callbacks("memory"),2],["resolve","done",r.Callbacks("once memory"),r.Callbacks("once memory"),0,"resolved"],["reject","fail",r.Callbacks("once memory"),r.Callbacks("once memory"),1,"rejected"]],d="pending",e={state:function(){return d},always:function(){return f.done(arguments).fail(arguments),this},"catch":function(a){return e.then(null,a)},pipe:function(){var a=arguments;return r.Deferred(function(b){r.each(c,function(c,d){var e=r.isFunction(a[d[4]])&&a[d[4]];f[d[1]](function(){var a=e&&e.apply(this,arguments);a&&r.isFunction(a.promise)?a.promise().progress(b.notify).done(b.resolve).fail(b.reject):b[d[0]+"With"](this,e?[a]:arguments)})}),a=null}).promise()},then:function(b,d,e){var f=0;function g(b,c,d,e){return function(){var h=this,i=arguments,j=function(){var a,j;if(!(b<f)){if(a=d.apply(h,i),a===c.promise())throw new TypeError("Thenable self-resolution");j=a&&("object"==typeof a||"function"==typeof a)&&a.then,r.isFunction(j)?e?j.call(a,g(f,c,N,e),g(f,c,O,e)):(f++,j.call(a,g(f,c,N,e),g(f,c,O,e),g(f,c,N,c.notifyWith))):(d!==N&&(h=void 0,i=[a]),(e||c.resolveWith)(h,i))}},k=e?j:function(){try{j()}catch(a){r.Deferred.exceptionHook&&r.Deferred.exceptionHook(a,k.stackTrace),b+1>=f&&(d!==O&&(h=void 0,i=[a]),c.rejectWith(h,i))}};b?k():(r.Deferred.getStackHook&&(k.stackTrace=r.Deferred.getStackHook()),a.setTimeout(k))}}return r.Deferred(function(a){c[0][3].add(g(0,a,r.isFunction(e)?e:N,a.notifyWith)),c[1][3].add(g(0,a,r.isFunction(b)?b:N)),c[2][3].add(g(0,a,r.isFunction(d)?d:O))}).promise()},promise:function(a){return null!=a?r.extend(a,e):e}},f={};return r.each(c,function(a,b){var g=b[2],h=b[5];e[b[1]]=g.add,h&&g.add(function(){d=h},c[3-a][2].disable,c[0][2].lock),g.add(b[3].fire),f[b[0]]=function(){return f[b[0]+"With"](this===f?void 0:this,arguments),this},f[b[0]+"With"]=g.fireWith}),e.promise(f),b&&b.call(f,f),f},when:function(a){var b=arguments.length,c=b,d=Array(c),e=f.call(arguments),g=r.Deferred(),h=function(a){return function(c){d[a]=this,e[a]=arguments.length>1?f.call(arguments):c,--b||g.resolveWith(d,e)}};if(b<=1&&(P(a,g.done(h(c)).resolve,g.reject,!b),"pending"===g.state()||r.isFunction(e[c]&&e[c].then)))return g.then();while(c--)P(e[c],h(c),g.reject);return g.promise()}});var Q=/^(Eval|Internal|Range|Reference|Syntax|Type|URI)Error$/;r.Deferred.exceptionHook=function(b,c){a.console&&a.console.warn&&b&&Q.test(b.name)&&a.console.warn("jQuery.Deferred exception: "+b.message,b.stack,c)},r.readyException=function(b){a.setTimeout(function(){throw b})};var R=r.Deferred();r.fn.ready=function(a){return R.then(a)["catch"](function(a){r.readyException(a)}),this},r.extend({isReady:!1,readyWait:1,ready:function(a){(a===!0?--r.readyWait:r.isReady)||(r.isReady=!0,a!==!0&&--r.readyWait>0||R.resolveWith(d,[r]))}}),r.ready.then=R.then;function S(){d.removeEventListener("DOMContentLoaded",S),
-a.removeEventListener("load",S),r.ready()}"complete"===d.readyState||"loading"!==d.readyState&&!d.documentElement.doScroll?a.setTimeout(r.ready):(d.addEventListener("DOMContentLoaded",S),a.addEventListener("load",S));var T=function(a,b,c,d,e,f,g){var h=0,i=a.length,j=null==c;if("object"===r.type(c)){e=!0;for(h in c)T(a,b,h,c[h],!0,f,g)}else if(void 0!==d&&(e=!0,r.isFunction(d)||(g=!0),j&&(g?(b.call(a,d),b=null):(j=b,b=function(a,b,c){return j.call(r(a),c)})),b))for(;h<i;h++)b(a[h],c,g?d:d.call(a[h],h,b(a[h],c)));return e?a:j?b.call(a):i?b(a[0],c):f},U=function(a){return 1===a.nodeType||9===a.nodeType||!+a.nodeType};function V(){this.expando=r.expando+V.uid++}V.uid=1,V.prototype={cache:function(a){var b=a[this.expando];return b||(b={},U(a)&&(a.nodeType?a[this.expando]=b:Object.defineProperty(a,this.expando,{value:b,configurable:!0}))),b},set:function(a,b,c){var d,e=this.cache(a);if("string"==typeof b)e[r.camelCase(b)]=c;else for(d in b)e[r.camelCase(d)]=b[d];return e},get:function(a,b){return void 0===b?this.cache(a):a[this.expando]&&a[this.expando][r.camelCase(b)]},access:function(a,b,c){return void 0===b||b&&"string"==typeof b&&void 0===c?this.get(a,b):(this.set(a,b,c),void 0!==c?c:b)},remove:function(a,b){var c,d=a[this.expando];if(void 0!==d){if(void 0!==b){Array.isArray(b)?b=b.map(r.camelCase):(b=r.camelCase(b),b=b in d?[b]:b.match(L)||[]),c=b.length;while(c--)delete d[b[c]]}(void 0===b||r.isEmptyObject(d))&&(a.nodeType?a[this.expando]=void 0:delete a[this.expando])}},hasData:function(a){var b=a[this.expando];return void 0!==b&&!r.isEmptyObject(b)}};var W=new V,X=new V,Y=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,Z=/[A-Z]/g;function $(a){return"true"===a||"false"!==a&&("null"===a?null:a===+a+""?+a:Y.test(a)?JSON.parse(a):a)}function _(a,b,c){var d;if(void 0===c&&1===a.nodeType)if(d="data-"+b.replace(Z,"-$&").toLowerCase(),c=a.getAttribute(d),"string"==typeof c){try{c=$(c)}catch(e){}X.set(a,b,c)}else c=void 0;return c}r.extend({hasData:function(a){return X.hasData(a)||W.hasData(a)},data:function(a,b,c){return X.access(a,b,c)},removeData:function(a,b){X.remove(a,b)},_data:function(a,b,c){return W.access(a,b,c)},_removeData:function(a,b){W.remove(a,b)}}),r.fn.extend({data:function(a,b){var c,d,e,f=this[0],g=f&&f.attributes;if(void 0===a){if(this.length&&(e=X.get(f),1===f.nodeType&&!W.get(f,"hasDataAttrs"))){c=g.length;while(c--)g[c]&&(d=g[c].name,0===d.indexOf("data-")&&(d=r.camelCase(d.slice(5)),_(f,d,e[d])));W.set(f,"hasDataAttrs",!0)}return e}return"object"==typeof a?this.each(function(){X.set(this,a)}):T(this,function(b){var c;if(f&&void 0===b){if(c=X.get(f,a),void 0!==c)return c;if(c=_(f,a),void 0!==c)return c}else this.each(function(){X.set(this,a,b)})},null,b,arguments.length>1,null,!0)},removeData:function(a){return this.each(function(){X.remove(this,a)})}}),r.extend({queue:function(a,b,c){var d;if(a)return b=(b||"fx")+"queue",d=W.get(a,b),c&&(!d||Array.isArray(c)?d=W.access(a,b,r.makeArray(c)):d.push(c)),d||[]},dequeue:function(a,b){b=b||"fx";var c=r.queue(a,b),d=c.length,e=c.shift(),f=r._queueHooks(a,b),g=function(){r.dequeue(a,b)};"inprogress"===e&&(e=c.shift(),d--),e&&("fx"===b&&c.unshift("inprogress"),delete f.stop,e.call(a,g,f)),!d&&f&&f.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return W.get(a,c)||W.access(a,c,{empty:r.Callbacks("once memory").add(function(){W.remove(a,[b+"queue",c])})})}}),r.fn.extend({queue:function(a,b){var c=2;return"string"!=typeof a&&(b=a,a="fx",c--),arguments.length<c?r.queue(this[0],a):void 0===b?this:this.each(function(){var c=r.queue(this,a,b);r._queueHooks(this,a),"fx"===a&&"inprogress"!==c[0]&&r.dequeue(this,a)})},dequeue:function(a){return this.each(function(){r.dequeue(this,a)})},clearQueue:function(a){return this.queue(a||"fx",[])},promise:function(a,b){var c,d=1,e=r.Deferred(),f=this,g=this.length,h=function(){--d||e.resolveWith(f,[f])};"string"!=typeof a&&(b=a,a=void 0),a=a||"fx";while(g--)c=W.get(f[g],a+"queueHooks"),c&&c.empty&&(d++,c.empty.add(h));return h(),e.promise(b)}});var aa=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,ba=new RegExp("^(?:([+-])=|)("+aa+")([a-z%]*)$","i"),ca=["Top","Right","Bottom","Left"],da=function(a,b){return a=b||a,"none"===a.style.display||""===a.style.display&&r.contains(a.ownerDocument,a)&&"none"===r.css(a,"display")},ea=function(a,b,c,d){var e,f,g={};for(f in b)g[f]=a.style[f],a.style[f]=b[f];e=c.apply(a,d||[]);for(f in b)a.style[f]=g[f];return e};function fa(a,b,c,d){var e,f=1,g=20,h=d?function(){return d.cur()}:function(){return r.css(a,b,"")},i=h(),j=c&&c[3]||(r.cssNumber[b]?"":"px"),k=(r.cssNumber[b]||"px"!==j&&+i)&&ba.exec(r.css(a,b));if(k&&k[3]!==j){j=j||k[3],c=c||[],k=+i||1;do f=f||".5",k/=f,r.style(a,b,k+j);while(f!==(f=h()/i)&&1!==f&&--g)}return c&&(k=+k||+i||0,e=c[1]?k+(c[1]+1)*c[2]:+c[2],d&&(d.unit=j,d.start=k,d.end=e)),e}var ga={};function ha(a){var b,c=a.ownerDocument,d=a.nodeName,e=ga[d];return e?e:(b=c.body.appendChild(c.createElement(d)),e=r.css(b,"display"),b.parentNode.removeChild(b),"none"===e&&(e="block"),ga[d]=e,e)}function ia(a,b){for(var c,d,e=[],f=0,g=a.length;f<g;f++)d=a[f],d.style&&(c=d.style.display,b?("none"===c&&(e[f]=W.get(d,"display")||null,e[f]||(d.style.display="")),""===d.style.display&&da(d)&&(e[f]=ha(d))):"none"!==c&&(e[f]="none",W.set(d,"display",c)));for(f=0;f<g;f++)null!=e[f]&&(a[f].style.display=e[f]);return a}r.fn.extend({show:function(){return ia(this,!0)},hide:function(){return ia(this)},toggle:function(a){return"boolean"==typeof a?a?this.show():this.hide():this.each(function(){da(this)?r(this).show():r(this).hide()})}});var ja=/^(?:checkbox|radio)$/i,ka=/<([a-z][^\/\0>\x20\t\r\n\f]+)/i,la=/^$|\/(?:java|ecma)script/i,ma={option:[1,"<select multiple='multiple'>","</select>"],thead:[1,"<table>","</table>"],col:[2,"<table><colgroup>","</colgroup></table>"],tr:[2,"<table><tbody>","</tbody></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],_default:[0,"",""]};ma.optgroup=ma.option,ma.tbody=ma.tfoot=ma.colgroup=ma.caption=ma.thead,ma.th=ma.td;function na(a,b){var c;return c="undefined"!=typeof a.getElementsByTagName?a.getElementsByTagName(b||"*"):"undefined"!=typeof a.querySelectorAll?a.querySelectorAll(b||"*"):[],void 0===b||b&&B(a,b)?r.merge([a],c):c}function oa(a,b){for(var c=0,d=a.length;c<d;c++)W.set(a[c],"globalEval",!b||W.get(b[c],"globalEval"))}var pa=/<|&#?\w+;/;function qa(a,b,c,d,e){for(var f,g,h,i,j,k,l=b.createDocumentFragment(),m=[],n=0,o=a.length;n<o;n++)if(f=a[n],f||0===f)if("object"===r.type(f))r.merge(m,f.nodeType?[f]:f);else if(pa.test(f)){g=g||l.appendChild(b.createElement("div")),h=(ka.exec(f)||["",""])[1].toLowerCase(),i=ma[h]||ma._default,g.innerHTML=i[1]+r.htmlPrefilter(f)+i[2],k=i[0];while(k--)g=g.lastChild;r.merge(m,g.childNodes),g=l.firstChild,g.textContent=""}else m.push(b.createTextNode(f));l.textContent="",n=0;while(f=m[n++])if(d&&r.inArray(f,d)>-1)e&&e.push(f);else if(j=r.contains(f.ownerDocument,f),g=na(l.appendChild(f),"script"),j&&oa(g),c){k=0;while(f=g[k++])la.test(f.type||"")&&c.push(f)}return l}!function(){var a=d.createDocumentFragment(),b=a.appendChild(d.createElement("div")),c=d.createElement("input");c.setAttribute("type","radio"),c.setAttribute("checked","checked"),c.setAttribute("name","t"),b.appendChild(c),o.checkClone=b.cloneNode(!0).cloneNode(!0).lastChild.checked,b.innerHTML="<textarea>x</textarea>",o.noCloneChecked=!!b.cloneNode(!0).lastChild.defaultValue}();var ra=d.documentElement,sa=/^key/,ta=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,ua=/^([^.]*)(?:\.(.+)|)/;function va(){return!0}function wa(){return!1}function xa(){try{return d.activeElement}catch(a){}}function ya(a,b,c,d,e,f){var g,h;if("object"==typeof b){"string"!=typeof c&&(d=d||c,c=void 0);for(h in b)ya(a,h,c,d,b[h],f);return a}if(null==d&&null==e?(e=c,d=c=void 0):null==e&&("string"==typeof c?(e=d,d=void 0):(e=d,d=c,c=void 0)),e===!1)e=wa;else if(!e)return a;return 1===f&&(g=e,e=function(a){return r().off(a),g.apply(this,arguments)},e.guid=g.guid||(g.guid=r.guid++)),a.each(function(){r.event.add(this,b,e,d,c)})}r.event={global:{},add:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,n,o,p,q=W.get(a);if(q){c.handler&&(f=c,c=f.handler,e=f.selector),e&&r.find.matchesSelector(ra,e),c.guid||(c.guid=r.guid++),(i=q.events)||(i=q.events={}),(g=q.handle)||(g=q.handle=function(b){return"undefined"!=typeof r&&r.event.triggered!==b.type?r.event.dispatch.apply(a,arguments):void 0}),b=(b||"").match(L)||[""],j=b.length;while(j--)h=ua.exec(b[j])||[],n=p=h[1],o=(h[2]||"").split(".").sort(),n&&(l=r.event.special[n]||{},n=(e?l.delegateType:l.bindType)||n,l=r.event.special[n]||{},k=r.extend({type:n,origType:p,data:d,handler:c,guid:c.guid,selector:e,needsContext:e&&r.expr.match.needsContext.test(e),namespace:o.join(".")},f),(m=i[n])||(m=i[n]=[],m.delegateCount=0,l.setup&&l.setup.call(a,d,o,g)!==!1||a.addEventListener&&a.addEventListener(n,g)),l.add&&(l.add.call(a,k),k.handler.guid||(k.handler.guid=c.guid)),e?m.splice(m.delegateCount++,0,k):m.push(k),r.event.global[n]=!0)}},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,n,o,p,q=W.hasData(a)&&W.get(a);if(q&&(i=q.events)){b=(b||"").match(L)||[""],j=b.length;while(j--)if(h=ua.exec(b[j])||[],n=p=h[1],o=(h[2]||"").split(".").sort(),n){l=r.event.special[n]||{},n=(d?l.delegateType:l.bindType)||n,m=i[n]||[],h=h[2]&&new RegExp("(^|\\.)"+o.join("\\.(?:.*\\.|)")+"(\\.|$)"),g=f=m.length;while(f--)k=m[f],!e&&p!==k.origType||c&&c.guid!==k.guid||h&&!h.test(k.namespace)||d&&d!==k.selector&&("**"!==d||!k.selector)||(m.splice(f,1),k.selector&&m.delegateCount--,l.remove&&l.remove.call(a,k));g&&!m.length&&(l.teardown&&l.teardown.call(a,o,q.handle)!==!1||r.removeEvent(a,n,q.handle),delete i[n])}else for(n in i)r.event.remove(a,n+b[j],c,d,!0);r.isEmptyObject(i)&&W.remove(a,"handle events")}},dispatch:function(a){var b=r.event.fix(a),c,d,e,f,g,h,i=new Array(arguments.length),j=(W.get(this,"events")||{})[b.type]||[],k=r.event.special[b.type]||{};for(i[0]=b,c=1;c<arguments.length;c++)i[c]=arguments[c];if(b.delegateTarget=this,!k.preDispatch||k.preDispatch.call(this,b)!==!1){h=r.event.handlers.call(this,b,j),c=0;while((f=h[c++])&&!b.isPropagationStopped()){b.currentTarget=f.elem,d=0;while((g=f.handlers[d++])&&!b.isImmediatePropagationStopped())b.rnamespace&&!b.rnamespace.test(g.namespace)||(b.handleObj=g,b.data=g.data,e=((r.event.special[g.origType]||{}).handle||g.handler).apply(f.elem,i),void 0!==e&&(b.result=e)===!1&&(b.preventDefault(),b.stopPropagation()))}return k.postDispatch&&k.postDispatch.call(this,b),b.result}},handlers:function(a,b){var c,d,e,f,g,h=[],i=b.delegateCount,j=a.target;if(i&&j.nodeType&&!("click"===a.type&&a.button>=1))for(;j!==this;j=j.parentNode||this)if(1===j.nodeType&&("click"!==a.type||j.disabled!==!0)){for(f=[],g={},c=0;c<i;c++)d=b[c],e=d.selector+" ",void 0===g[e]&&(g[e]=d.needsContext?r(e,this).index(j)>-1:r.find(e,this,null,[j]).length),g[e]&&f.push(d);f.length&&h.push({elem:j,handlers:f})}return j=this,i<b.length&&h.push({elem:j,handlers:b.slice(i)}),h},addProp:function(a,b){Object.defineProperty(r.Event.prototype,a,{enumerable:!0,configurable:!0,get:r.isFunction(b)?function(){if(this.originalEvent)return b(this.originalEvent)}:function(){if(this.originalEvent)return this.originalEvent[a]},set:function(b){Object.defineProperty(this,a,{enumerable:!0,configurable:!0,writable:!0,value:b})}})},fix:function(a){return a[r.expando]?a:new r.Event(a)},special:{load:{noBubble:!0},focus:{trigger:function(){if(this!==xa()&&this.focus)return this.focus(),!1},delegateType:"focusin"},blur:{trigger:function(){if(this===xa()&&this.blur)return this.blur(),!1},delegateType:"focusout"},click:{trigger:function(){if("checkbox"===this.type&&this.click&&B(this,"input"))return this.click(),!1},_default:function(a){return B(a.target,"a")}},beforeunload:{postDispatch:function(a){void 0!==a.result&&a.originalEvent&&(a.originalEvent.returnValue=a.result)}}}},r.removeEvent=function(a,b,c){a.removeEventListener&&a.removeEventListener(b,c)},r.Event=function(a,b){return this instanceof r.Event?(a&&a.type?(this.originalEvent=a,this.type=a.type,this.isDefaultPrevented=a.defaultPrevented||void 0===a.defaultPrevented&&a.returnValue===!1?va:wa,this.target=a.target&&3===a.target.nodeType?a.target.parentNode:a.target,this.currentTarget=a.currentTarget,this.relatedTarget=a.relatedTarget):this.type=a,b&&r.extend(this,b),this.timeStamp=a&&a.timeStamp||r.now(),void(this[r.expando]=!0)):new r.Event(a,b)},r.Event.prototype={constructor:r.Event,isDefaultPrevented:wa,isPropagationStopped:wa,isImmediatePropagationStopped:wa,isSimulated:!1,preventDefault:function(){var a=this.originalEvent;this.isDefaultPrevented=va,a&&!this.isSimulated&&a.preventDefault()},stopPropagation:function(){var a=this.originalEvent;this.isPropagationStopped=va,a&&!this.isSimulated&&a.stopPropagation()},stopImmediatePropagation:function(){var a=this.originalEvent;this.isImmediatePropagationStopped=va,a&&!this.isSimulated&&a.stopImmediatePropagation(),this.stopPropagation()}},r.each({altKey:!0,bubbles:!0,cancelable:!0,changedTouches:!0,ctrlKey:!0,detail:!0,eventPhase:!0,metaKey:!0,pageX:!0,pageY:!0,shiftKey:!0,view:!0,"char":!0,charCode:!0,key:!0,keyCode:!0,button:!0,buttons:!0,clientX:!0,clientY:!0,offsetX:!0,offsetY:!0,pointerId:!0,pointerType:!0,screenX:!0,screenY:!0,targetTouches:!0,toElement:!0,touches:!0,which:function(a){var b=a.button;return null==a.which&&sa.test(a.type)?null!=a.charCode?a.charCode:a.keyCode:!a.which&&void 0!==b&&ta.test(a.type)?1&b?1:2&b?3:4&b?2:0:a.which}},r.event.addProp),r.each({mouseenter:"mouseover",mouseleave:"mouseout",pointerenter:"pointerover",pointerleave:"pointerout"},function(a,b){r.event.special[a]={delegateType:b,bindType:b,handle:function(a){var c,d=this,e=a.relatedTarget,f=a.handleObj;return e&&(e===d||r.contains(d,e))||(a.type=f.origType,c=f.handler.apply(this,arguments),a.type=b),c}}}),r.fn.extend({on:function(a,b,c,d){return ya(this,a,b,c,d)},one:function(a,b,c,d){return ya(this,a,b,c,d,1)},off:function(a,b,c){var d,e;if(a&&a.preventDefault&&a.handleObj)return d=a.handleObj,r(a.delegateTarget).off(d.namespace?d.origType+"."+d.namespace:d.origType,d.selector,d.handler),this;if("object"==typeof a){for(e in a)this.off(e,b,a[e]);return this}return b!==!1&&"function"!=typeof b||(c=b,b=void 0),c===!1&&(c=wa),this.each(function(){r.event.remove(this,a,c,b)})}});var za=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([a-z][^\/\0>\x20\t\r\n\f]*)[^>]*)\/>/gi,Aa=/<script|<style|<link/i,Ba=/checked\s*(?:[^=]|=\s*.checked.)/i,Ca=/^true\/(.*)/,Da=/^\s*<!(?:\[CDATA\[|--)|(?:\]\]|--)>\s*$/g;function Ea(a,b){return B(a,"table")&&B(11!==b.nodeType?b:b.firstChild,"tr")?r(">tbody",a)[0]||a:a}function Fa(a){return a.type=(null!==a.getAttribute("type"))+"/"+a.type,a}function Ga(a){var b=Ca.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function Ha(a,b){var c,d,e,f,g,h,i,j;if(1===b.nodeType){if(W.hasData(a)&&(f=W.access(a),g=W.set(b,f),j=f.events)){delete g.handle,g.events={};for(e in j)for(c=0,d=j[e].length;c<d;c++)r.event.add(b,e,j[e][c])}X.hasData(a)&&(h=X.access(a),i=r.extend({},h),X.set(b,i))}}function Ia(a,b){var c=b.nodeName.toLowerCase();"input"===c&&ja.test(a.type)?b.checked=a.checked:"input"!==c&&"textarea"!==c||(b.defaultValue=a.defaultValue)}function Ja(a,b,c,d){b=g.apply([],b);var e,f,h,i,j,k,l=0,m=a.length,n=m-1,q=b[0],s=r.isFunction(q);if(s||m>1&&"string"==typeof q&&!o.checkClone&&Ba.test(q))return a.each(function(e){var f=a.eq(e);s&&(b[0]=q.call(this,e,f.html())),Ja(f,b,c,d)});if(m&&(e=qa(b,a[0].ownerDocument,!1,a,d),f=e.firstChild,1===e.childNodes.length&&(e=f),f||d)){for(h=r.map(na(e,"script"),Fa),i=h.length;l<m;l++)j=e,l!==n&&(j=r.clone(j,!0,!0),i&&r.merge(h,na(j,"script"))),c.call(a[l],j,l);if(i)for(k=h[h.length-1].ownerDocument,r.map(h,Ga),l=0;l<i;l++)j=h[l],la.test(j.type||"")&&!W.access(j,"globalEval")&&r.contains(k,j)&&(j.src?r._evalUrl&&r._evalUrl(j.src):p(j.textContent.replace(Da,""),k))}return a}function Ka(a,b,c){for(var d,e=b?r.filter(b,a):a,f=0;null!=(d=e[f]);f++)c||1!==d.nodeType||r.cleanData(na(d)),d.parentNode&&(c&&r.contains(d.ownerDocument,d)&&oa(na(d,"script")),d.parentNode.removeChild(d));return a}r.extend({htmlPrefilter:function(a){return a.replace(za,"<$1></$2>")},clone:function(a,b,c){var d,e,f,g,h=a.cloneNode(!0),i=r.contains(a.ownerDocument,a);if(!(o.noCloneChecked||1!==a.nodeType&&11!==a.nodeType||r.isXMLDoc(a)))for(g=na(h),f=na(a),d=0,e=f.length;d<e;d++)Ia(f[d],g[d]);if(b)if(c)for(f=f||na(a),g=g||na(h),d=0,e=f.length;d<e;d++)Ha(f[d],g[d]);else Ha(a,h);return g=na(h,"script"),g.length>0&&oa(g,!i&&na(a,"script")),h},cleanData:function(a){for(var b,c,d,e=r.event.special,f=0;void 0!==(c=a[f]);f++)if(U(c)){if(b=c[W.expando]){if(b.events)for(d in b.events)e[d]?r.event.remove(c,d):r.removeEvent(c,d,b.handle);c[W.expando]=void 0}c[X.expando]&&(c[X.expando]=void 0)}}}),r.fn.extend({detach:function(a){return Ka(this,a,!0)},remove:function(a){return Ka(this,a)},text:function(a){return T(this,function(a){return void 0===a?r.text(this):this.empty().each(function(){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||(this.textContent=a)})},null,a,arguments.length)},append:function(){return Ja(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Ea(this,a);b.appendChild(a)}})},prepend:function(){return Ja(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Ea(this,a);b.insertBefore(a,b.firstChild)}})},before:function(){return Ja(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this)})},after:function(){return Ja(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this.nextSibling)})},empty:function(){for(var a,b=0;null!=(a=this[b]);b++)1===a.nodeType&&(r.cleanData(na(a,!1)),a.textContent="");return this},clone:function(a,b){return a=null!=a&&a,b=null==b?a:b,this.map(function(){return r.clone(this,a,b)})},html:function(a){return T(this,function(a){var b=this[0]||{},c=0,d=this.length;if(void 0===a&&1===b.nodeType)return b.innerHTML;if("string"==typeof a&&!Aa.test(a)&&!ma[(ka.exec(a)||["",""])[1].toLowerCase()]){a=r.htmlPrefilter(a);try{for(;c<d;c++)b=this[c]||{},1===b.nodeType&&(r.cleanData(na(b,!1)),b.innerHTML=a);b=0}catch(e){}}b&&this.empty().append(a)},null,a,arguments.length)},replaceWith:function(){var a=[];return Ja(this,arguments,function(b){var c=this.parentNode;r.inArray(this,a)<0&&(r.cleanData(na(this)),c&&c.replaceChild(b,this))},a)}}),r.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){r.fn[a]=function(a){for(var c,d=[],e=r(a),f=e.length-1,g=0;g<=f;g++)c=g===f?this:this.clone(!0),r(e[g])[b](c),h.apply(d,c.get());return this.pushStack(d)}});var La=/^margin/,Ma=new RegExp("^("+aa+")(?!px)[a-z%]+$","i"),Na=function(b){var c=b.ownerDocument.defaultView;return c&&c.opener||(c=a),c.getComputedStyle(b)};!function(){function b(){if(i){i.style.cssText="box-sizing:border-box;position:relative;display:block;margin:auto;border:1px;padding:1px;top:1%;width:50%",i.innerHTML="",ra.appendChild(h);var b=a.getComputedStyle(i);c="1%"!==b.top,g="2px"===b.marginLeft,e="4px"===b.width,i.style.marginRight="50%",f="4px"===b.marginRight,ra.removeChild(h),i=null}}var c,e,f,g,h=d.createElement("div"),i=d.createElement("div");i.style&&(i.style.backgroundClip="content-box",i.cloneNode(!0).style.backgroundClip="",o.clearCloneStyle="content-box"===i.style.backgroundClip,h.style.cssText="border:0;width:8px;height:0;top:0;left:-9999px;padding:0;margin-top:1px;position:absolute",h.appendChild(i),r.extend(o,{pixelPosition:function(){return b(),c},boxSizingReliable:function(){return b(),e},pixelMarginRight:function(){return b(),f},reliableMarginLeft:function(){return b(),g}}))}();function Oa(a,b,c){var d,e,f,g,h=a.style;return c=c||Na(a),c&&(g=c.getPropertyValue(b)||c[b],""!==g||r.contains(a.ownerDocument,a)||(g=r.style(a,b)),!o.pixelMarginRight()&&Ma.test(g)&&La.test(b)&&(d=h.width,e=h.minWidth,f=h.maxWidth,h.minWidth=h.maxWidth=h.width=g,g=c.width,h.width=d,h.minWidth=e,h.maxWidth=f)),void 0!==g?g+"":g}function Pa(a,b){return{get:function(){return a()?void delete this.get:(this.get=b).apply(this,arguments)}}}var Qa=/^(none|table(?!-c[ea]).+)/,Ra=/^--/,Sa={position:"absolute",visibility:"hidden",display:"block"},Ta={letterSpacing:"0",fontWeight:"400"},Ua=["Webkit","Moz","ms"],Va=d.createElement("div").style;function Wa(a){if(a in Va)return a;var b=a[0].toUpperCase()+a.slice(1),c=Ua.length;while(c--)if(a=Ua[c]+b,a in Va)return a}function Xa(a){var b=r.cssProps[a];return b||(b=r.cssProps[a]=Wa(a)||a),b}function Ya(a,b,c){var d=ba.exec(b);return d?Math.max(0,d[2]-(c||0))+(d[3]||"px"):b}function Za(a,b,c,d,e){var f,g=0;for(f=c===(d?"border":"content")?4:"width"===b?1:0;f<4;f+=2)"margin"===c&&(g+=r.css(a,c+ca[f],!0,e)),d?("content"===c&&(g-=r.css(a,"padding"+ca[f],!0,e)),"margin"!==c&&(g-=r.css(a,"border"+ca[f]+"Width",!0,e))):(g+=r.css(a,"padding"+ca[f],!0,e),"padding"!==c&&(g+=r.css(a,"border"+ca[f]+"Width",!0,e)));return g}function $a(a,b,c){var d,e=Na(a),f=Oa(a,b,e),g="border-box"===r.css(a,"boxSizing",!1,e);return Ma.test(f)?f:(d=g&&(o.boxSizingReliable()||f===a.style[b]),"auto"===f&&(f=a["offset"+b[0].toUpperCase()+b.slice(1)]),f=parseFloat(f)||0,f+Za(a,b,c||(g?"border":"content"),d,e)+"px")}r.extend({cssHooks:{opacity:{get:function(a,b){if(b){var c=Oa(a,"opacity");return""===c?"1":c}}}},cssNumber:{animationIterationCount:!0,columnCount:!0,fillOpacity:!0,flexGrow:!0,flexShrink:!0,fontWeight:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{"float":"cssFloat"},style:function(a,b,c,d){if(a&&3!==a.nodeType&&8!==a.nodeType&&a.style){var e,f,g,h=r.camelCase(b),i=Ra.test(b),j=a.style;return i||(b=Xa(h)),g=r.cssHooks[b]||r.cssHooks[h],void 0===c?g&&"get"in g&&void 0!==(e=g.get(a,!1,d))?e:j[b]:(f=typeof c,"string"===f&&(e=ba.exec(c))&&e[1]&&(c=fa(a,b,e),f="number"),null!=c&&c===c&&("number"===f&&(c+=e&&e[3]||(r.cssNumber[h]?"":"px")),o.clearCloneStyle||""!==c||0!==b.indexOf("background")||(j[b]="inherit"),g&&"set"in g&&void 0===(c=g.set(a,c,d))||(i?j.setProperty(b,c):j[b]=c)),void 0)}},css:function(a,b,c,d){var e,f,g,h=r.camelCase(b),i=Ra.test(b);return i||(b=Xa(h)),g=r.cssHooks[b]||r.cssHooks[h],g&&"get"in g&&(e=g.get(a,!0,c)),void 0===e&&(e=Oa(a,b,d)),"normal"===e&&b in Ta&&(e=Ta[b]),""===c||c?(f=parseFloat(e),c===!0||isFinite(f)?f||0:e):e}}),r.each(["height","width"],function(a,b){r.cssHooks[b]={get:function(a,c,d){if(c)return!Qa.test(r.css(a,"display"))||a.getClientRects().length&&a.getBoundingClientRect().width?$a(a,b,d):ea(a,Sa,function(){return $a(a,b,d)})},set:function(a,c,d){var e,f=d&&Na(a),g=d&&Za(a,b,d,"border-box"===r.css(a,"boxSizing",!1,f),f);return g&&(e=ba.exec(c))&&"px"!==(e[3]||"px")&&(a.style[b]=c,c=r.css(a,b)),Ya(a,c,g)}}}),r.cssHooks.marginLeft=Pa(o.reliableMarginLeft,function(a,b){if(b)return(parseFloat(Oa(a,"marginLeft"))||a.getBoundingClientRect().left-ea(a,{marginLeft:0},function(){return a.getBoundingClientRect().left}))+"px"}),r.each({margin:"",padding:"",border:"Width"},function(a,b){r.cssHooks[a+b]={expand:function(c){for(var d=0,e={},f="string"==typeof c?c.split(" "):[c];d<4;d++)e[a+ca[d]+b]=f[d]||f[d-2]||f[0];return e}},La.test(a)||(r.cssHooks[a+b].set=Ya)}),r.fn.extend({css:function(a,b){return T(this,function(a,b,c){var d,e,f={},g=0;if(Array.isArray(b)){for(d=Na(a),e=b.length;g<e;g++)f[b[g]]=r.css(a,b[g],!1,d);return f}return void 0!==c?r.style(a,b,c):r.css(a,b)},a,b,arguments.length>1)}});function _a(a,b,c,d,e){return new _a.prototype.init(a,b,c,d,e)}r.Tween=_a,_a.prototype={constructor:_a,init:function(a,b,c,d,e,f){this.elem=a,this.prop=c,this.easing=e||r.easing._default,this.options=b,this.start=this.now=this.cur(),this.end=d,this.unit=f||(r.cssNumber[c]?"":"px")},cur:function(){var a=_a.propHooks[this.prop];return a&&a.get?a.get(this):_a.propHooks._default.get(this)},run:function(a){var b,c=_a.propHooks[this.prop];return this.options.duration?this.pos=b=r.easing[this.easing](a,this.options.duration*a,0,1,this.options.duration):this.pos=b=a,this.now=(this.end-this.start)*b+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),c&&c.set?c.set(this):_a.propHooks._default.set(this),this}},_a.prototype.init.prototype=_a.prototype,_a.propHooks={_default:{get:function(a){var b;return 1!==a.elem.nodeType||null!=a.elem[a.prop]&&null==a.elem.style[a.prop]?a.elem[a.prop]:(b=r.css(a.elem,a.prop,""),b&&"auto"!==b?b:0)},set:function(a){r.fx.step[a.prop]?r.fx.step[a.prop](a):1!==a.elem.nodeType||null==a.elem.style[r.cssProps[a.prop]]&&!r.cssHooks[a.prop]?a.elem[a.prop]=a.now:r.style(a.elem,a.prop,a.now+a.unit)}}},_a.propHooks.scrollTop=_a.propHooks.scrollLeft={set:function(a){a.elem.nodeType&&a.elem.parentNode&&(a.elem[a.prop]=a.now)}},r.easing={linear:function(a){return a},swing:function(a){return.5-Math.cos(a*Math.PI)/2},_default:"swing"},r.fx=_a.prototype.init,r.fx.step={};var ab,bb,cb=/^(?:toggle|show|hide)$/,db=/queueHooks$/;function eb(){bb&&(d.hidden===!1&&a.requestAnimationFrame?a.requestAnimationFrame(eb):a.setTimeout(eb,r.fx.interval),r.fx.tick())}function fb(){return a.setTimeout(function(){ab=void 0}),ab=r.now()}function gb(a,b){var c,d=0,e={height:a};for(b=b?1:0;d<4;d+=2-b)c=ca[d],e["margin"+c]=e["padding"+c]=a;return b&&(e.opacity=e.width=a),e}function hb(a,b,c){for(var d,e=(kb.tweeners[b]||[]).concat(kb.tweeners["*"]),f=0,g=e.length;f<g;f++)if(d=e[f].call(c,b,a))return d}function ib(a,b,c){var d,e,f,g,h,i,j,k,l="width"in b||"height"in b,m=this,n={},o=a.style,p=a.nodeType&&da(a),q=W.get(a,"fxshow");c.queue||(g=r._queueHooks(a,"fx"),null==g.unqueued&&(g.unqueued=0,h=g.empty.fire,g.empty.fire=function(){g.unqueued||h()}),g.unqueued++,m.always(function(){m.always(function(){g.unqueued--,r.queue(a,"fx").length||g.empty.fire()})}));for(d in b)if(e=b[d],cb.test(e)){if(delete b[d],f=f||"toggle"===e,e===(p?"hide":"show")){if("show"!==e||!q||void 0===q[d])continue;p=!0}n[d]=q&&q[d]||r.style(a,d)}if(i=!r.isEmptyObject(b),i||!r.isEmptyObject(n)){l&&1===a.nodeType&&(c.overflow=[o.overflow,o.overflowX,o.overflowY],j=q&&q.display,null==j&&(j=W.get(a,"display")),k=r.css(a,"display"),"none"===k&&(j?k=j:(ia([a],!0),j=a.style.display||j,k=r.css(a,"display"),ia([a]))),("inline"===k||"inline-block"===k&&null!=j)&&"none"===r.css(a,"float")&&(i||(m.done(function(){o.display=j}),null==j&&(k=o.display,j="none"===k?"":k)),o.display="inline-block")),c.overflow&&(o.overflow="hidden",m.always(function(){o.overflow=c.overflow[0],o.overflowX=c.overflow[1],o.overflowY=c.overflow[2]})),i=!1;for(d in n)i||(q?"hidden"in q&&(p=q.hidden):q=W.access(a,"fxshow",{display:j}),f&&(q.hidden=!p),p&&ia([a],!0),m.done(function(){p||ia([a]),W.remove(a,"fxshow");for(d in n)r.style(a,d,n[d])})),i=hb(p?q[d]:0,d,m),d in q||(q[d]=i.start,p&&(i.end=i.start,i.start=0))}}function jb(a,b){var c,d,e,f,g;for(c in a)if(d=r.camelCase(c),e=b[d],f=a[c],Array.isArray(f)&&(e=f[1],f=a[c]=f[0]),c!==d&&(a[d]=f,delete a[c]),g=r.cssHooks[d],g&&"expand"in g){f=g.expand(f),delete a[d];for(c in f)c in a||(a[c]=f[c],b[c]=e)}else b[d]=e}function kb(a,b,c){var d,e,f=0,g=kb.prefilters.length,h=r.Deferred().always(function(){delete i.elem}),i=function(){if(e)return!1;for(var b=ab||fb(),c=Math.max(0,j.startTime+j.duration-b),d=c/j.duration||0,f=1-d,g=0,i=j.tweens.length;g<i;g++)j.tweens[g].run(f);return h.notifyWith(a,[j,f,c]),f<1&&i?c:(i||h.notifyWith(a,[j,1,0]),h.resolveWith(a,[j]),!1)},j=h.promise({elem:a,props:r.extend({},b),opts:r.extend(!0,{specialEasing:{},easing:r.easing._default},c),originalProperties:b,originalOptions:c,startTime:ab||fb(),duration:c.duration,tweens:[],createTween:function(b,c){var d=r.Tween(a,j.opts,b,c,j.opts.specialEasing[b]||j.opts.easing);return j.tweens.push(d),d},stop:function(b){var c=0,d=b?j.tweens.length:0;if(e)return this;for(e=!0;c<d;c++)j.tweens[c].run(1);return b?(h.notifyWith(a,[j,1,0]),h.resolveWith(a,[j,b])):h.rejectWith(a,[j,b]),this}}),k=j.props;for(jb(k,j.opts.specialEasing);f<g;f++)if(d=kb.prefilters[f].call(j,a,k,j.opts))return r.isFunction(d.stop)&&(r._queueHooks(j.elem,j.opts.queue).stop=r.proxy(d.stop,d)),d;return r.map(k,hb,j),r.isFunction(j.opts.start)&&j.opts.start.call(a,j),j.progress(j.opts.progress).done(j.opts.done,j.opts.complete).fail(j.opts.fail).always(j.opts.always),r.fx.timer(r.extend(i,{elem:a,anim:j,queue:j.opts.queue})),j}r.Animation=r.extend(kb,{tweeners:{"*":[function(a,b){var c=this.createTween(a,b);return fa(c.elem,a,ba.exec(b),c),c}]},tweener:function(a,b){r.isFunction(a)?(b=a,a=["*"]):a=a.match(L);for(var c,d=0,e=a.length;d<e;d++)c=a[d],kb.tweeners[c]=kb.tweeners[c]||[],kb.tweeners[c].unshift(b)},prefilters:[ib],prefilter:function(a,b){b?kb.prefilters.unshift(a):kb.prefilters.push(a)}}),r.speed=function(a,b,c){var d=a&&"object"==typeof a?r.extend({},a):{complete:c||!c&&b||r.isFunction(a)&&a,duration:a,easing:c&&b||b&&!r.isFunction(b)&&b};return r.fx.off?d.duration=0:"number"!=typeof d.duration&&(d.duration in r.fx.speeds?d.duration=r.fx.speeds[d.duration]:d.duration=r.fx.speeds._default),null!=d.queue&&d.queue!==!0||(d.queue="fx"),d.old=d.complete,d.complete=function(){r.isFunction(d.old)&&d.old.call(this),d.queue&&r.dequeue(this,d.queue)},d},r.fn.extend({fadeTo:function(a,b,c,d){return this.filter(da).css("opacity",0).show().end().animate({opacity:b},a,c,d)},animate:function(a,b,c,d){var e=r.isEmptyObject(a),f=r.speed(b,c,d),g=function(){var b=kb(this,r.extend({},a),f);(e||W.get(this,"finish"))&&b.stop(!0)};return g.finish=g,e||f.queue===!1?this.each(g):this.queue(f.queue,g)},stop:function(a,b,c){var d=function(a){var b=a.stop;delete a.stop,b(c)};return"string"!=typeof a&&(c=b,b=a,a=void 0),b&&a!==!1&&this.queue(a||"fx",[]),this.each(function(){var b=!0,e=null!=a&&a+"queueHooks",f=r.timers,g=W.get(this);if(e)g[e]&&g[e].stop&&d(g[e]);else for(e in g)g[e]&&g[e].stop&&db.test(e)&&d(g[e]);for(e=f.length;e--;)f[e].elem!==this||null!=a&&f[e].queue!==a||(f[e].anim.stop(c),b=!1,f.splice(e,1));!b&&c||r.dequeue(this,a)})},finish:function(a){return a!==!1&&(a=a||"fx"),this.each(function(){var b,c=W.get(this),d=c[a+"queue"],e=c[a+"queueHooks"],f=r.timers,g=d?d.length:0;for(c.finish=!0,r.queue(this,a,[]),e&&e.stop&&e.stop.call(this,!0),b=f.length;b--;)f[b].elem===this&&f[b].queue===a&&(f[b].anim.stop(!0),f.splice(b,1));for(b=0;b<g;b++)d[b]&&d[b].finish&&d[b].finish.call(this);delete c.finish})}}),r.each(["toggle","show","hide"],function(a,b){var c=r.fn[b];r.fn[b]=function(a,d,e){return null==a||"boolean"==typeof a?c.apply(this,arguments):this.animate(gb(b,!0),a,d,e)}}),r.each({slideDown:gb("show"),slideUp:gb("hide"),slideToggle:gb("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(a,b){r.fn[a]=function(a,c,d){return this.animate(b,a,c,d)}}),r.timers=[],r.fx.tick=function(){var a,b=0,c=r.timers;for(ab=r.now();b<c.length;b++)a=c[b],a()||c[b]!==a||c.splice(b--,1);c.length||r.fx.stop(),ab=void 0},r.fx.timer=function(a){r.timers.push(a),r.fx.start()},r.fx.interval=13,r.fx.start=function(){bb||(bb=!0,eb())},r.fx.stop=function(){bb=null},r.fx.speeds={slow:600,fast:200,_default:400},r.fn.delay=function(b,c){return b=r.fx?r.fx.speeds[b]||b:b,c=c||"fx",this.queue(c,function(c,d){var e=a.setTimeout(c,b);d.stop=function(){a.clearTimeout(e)}})},function(){var a=d.createElement("input"),b=d.createElement("select"),c=b.appendChild(d.createElement("option"));a.type="checkbox",o.checkOn=""!==a.value,o.optSelected=c.selected,a=d.createElement("input"),a.value="t",a.type="radio",o.radioValue="t"===a.value}();var lb,mb=r.expr.attrHandle;r.fn.extend({attr:function(a,b){return T(this,r.attr,a,b,arguments.length>1)},removeAttr:function(a){return this.each(function(){r.removeAttr(this,a)})}}),r.extend({attr:function(a,b,c){var d,e,f=a.nodeType;if(3!==f&&8!==f&&2!==f)return"undefined"==typeof a.getAttribute?r.prop(a,b,c):(1===f&&r.isXMLDoc(a)||(e=r.attrHooks[b.toLowerCase()]||(r.expr.match.bool.test(b)?lb:void 0)),void 0!==c?null===c?void r.removeAttr(a,b):e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:(a.setAttribute(b,c+""),c):e&&"get"in e&&null!==(d=e.get(a,b))?d:(d=r.find.attr(a,b),
-null==d?void 0:d))},attrHooks:{type:{set:function(a,b){if(!o.radioValue&&"radio"===b&&B(a,"input")){var c=a.value;return a.setAttribute("type",b),c&&(a.value=c),b}}}},removeAttr:function(a,b){var c,d=0,e=b&&b.match(L);if(e&&1===a.nodeType)while(c=e[d++])a.removeAttribute(c)}}),lb={set:function(a,b,c){return b===!1?r.removeAttr(a,c):a.setAttribute(c,c),c}},r.each(r.expr.match.bool.source.match(/\w+/g),function(a,b){var c=mb[b]||r.find.attr;mb[b]=function(a,b,d){var e,f,g=b.toLowerCase();return d||(f=mb[g],mb[g]=e,e=null!=c(a,b,d)?g:null,mb[g]=f),e}});var nb=/^(?:input|select|textarea|button)$/i,ob=/^(?:a|area)$/i;r.fn.extend({prop:function(a,b){return T(this,r.prop,a,b,arguments.length>1)},removeProp:function(a){return this.each(function(){delete this[r.propFix[a]||a]})}}),r.extend({prop:function(a,b,c){var d,e,f=a.nodeType;if(3!==f&&8!==f&&2!==f)return 1===f&&r.isXMLDoc(a)||(b=r.propFix[b]||b,e=r.propHooks[b]),void 0!==c?e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:a[b]=c:e&&"get"in e&&null!==(d=e.get(a,b))?d:a[b]},propHooks:{tabIndex:{get:function(a){var b=r.find.attr(a,"tabindex");return b?parseInt(b,10):nb.test(a.nodeName)||ob.test(a.nodeName)&&a.href?0:-1}}},propFix:{"for":"htmlFor","class":"className"}}),o.optSelected||(r.propHooks.selected={get:function(a){var b=a.parentNode;return b&&b.parentNode&&b.parentNode.selectedIndex,null},set:function(a){var b=a.parentNode;b&&(b.selectedIndex,b.parentNode&&b.parentNode.selectedIndex)}}),r.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){r.propFix[this.toLowerCase()]=this});function pb(a){var b=a.match(L)||[];return b.join(" ")}function qb(a){return a.getAttribute&&a.getAttribute("class")||""}r.fn.extend({addClass:function(a){var b,c,d,e,f,g,h,i=0;if(r.isFunction(a))return this.each(function(b){r(this).addClass(a.call(this,b,qb(this)))});if("string"==typeof a&&a){b=a.match(L)||[];while(c=this[i++])if(e=qb(c),d=1===c.nodeType&&" "+pb(e)+" "){g=0;while(f=b[g++])d.indexOf(" "+f+" ")<0&&(d+=f+" ");h=pb(d),e!==h&&c.setAttribute("class",h)}}return this},removeClass:function(a){var b,c,d,e,f,g,h,i=0;if(r.isFunction(a))return this.each(function(b){r(this).removeClass(a.call(this,b,qb(this)))});if(!arguments.length)return this.attr("class","");if("string"==typeof a&&a){b=a.match(L)||[];while(c=this[i++])if(e=qb(c),d=1===c.nodeType&&" "+pb(e)+" "){g=0;while(f=b[g++])while(d.indexOf(" "+f+" ")>-1)d=d.replace(" "+f+" "," ");h=pb(d),e!==h&&c.setAttribute("class",h)}}return this},toggleClass:function(a,b){var c=typeof a;return"boolean"==typeof b&&"string"===c?b?this.addClass(a):this.removeClass(a):r.isFunction(a)?this.each(function(c){r(this).toggleClass(a.call(this,c,qb(this),b),b)}):this.each(function(){var b,d,e,f;if("string"===c){d=0,e=r(this),f=a.match(L)||[];while(b=f[d++])e.hasClass(b)?e.removeClass(b):e.addClass(b)}else void 0!==a&&"boolean"!==c||(b=qb(this),b&&W.set(this,"__className__",b),this.setAttribute&&this.setAttribute("class",b||a===!1?"":W.get(this,"__className__")||""))})},hasClass:function(a){var b,c,d=0;b=" "+a+" ";while(c=this[d++])if(1===c.nodeType&&(" "+pb(qb(c))+" ").indexOf(b)>-1)return!0;return!1}});var rb=/\r/g;r.fn.extend({val:function(a){var b,c,d,e=this[0];{if(arguments.length)return d=r.isFunction(a),this.each(function(c){var e;1===this.nodeType&&(e=d?a.call(this,c,r(this).val()):a,null==e?e="":"number"==typeof e?e+="":Array.isArray(e)&&(e=r.map(e,function(a){return null==a?"":a+""})),b=r.valHooks[this.type]||r.valHooks[this.nodeName.toLowerCase()],b&&"set"in b&&void 0!==b.set(this,e,"value")||(this.value=e))});if(e)return b=r.valHooks[e.type]||r.valHooks[e.nodeName.toLowerCase()],b&&"get"in b&&void 0!==(c=b.get(e,"value"))?c:(c=e.value,"string"==typeof c?c.replace(rb,""):null==c?"":c)}}}),r.extend({valHooks:{option:{get:function(a){var b=r.find.attr(a,"value");return null!=b?b:pb(r.text(a))}},select:{get:function(a){var b,c,d,e=a.options,f=a.selectedIndex,g="select-one"===a.type,h=g?null:[],i=g?f+1:e.length;for(d=f<0?i:g?f:0;d<i;d++)if(c=e[d],(c.selected||d===f)&&!c.disabled&&(!c.parentNode.disabled||!B(c.parentNode,"optgroup"))){if(b=r(c).val(),g)return b;h.push(b)}return h},set:function(a,b){var c,d,e=a.options,f=r.makeArray(b),g=e.length;while(g--)d=e[g],(d.selected=r.inArray(r.valHooks.option.get(d),f)>-1)&&(c=!0);return c||(a.selectedIndex=-1),f}}}}),r.each(["radio","checkbox"],function(){r.valHooks[this]={set:function(a,b){if(Array.isArray(b))return a.checked=r.inArray(r(a).val(),b)>-1}},o.checkOn||(r.valHooks[this].get=function(a){return null===a.getAttribute("value")?"on":a.value})});var sb=/^(?:focusinfocus|focusoutblur)$/;r.extend(r.event,{trigger:function(b,c,e,f){var g,h,i,j,k,m,n,o=[e||d],p=l.call(b,"type")?b.type:b,q=l.call(b,"namespace")?b.namespace.split("."):[];if(h=i=e=e||d,3!==e.nodeType&&8!==e.nodeType&&!sb.test(p+r.event.triggered)&&(p.indexOf(".")>-1&&(q=p.split("."),p=q.shift(),q.sort()),k=p.indexOf(":")<0&&"on"+p,b=b[r.expando]?b:new r.Event(p,"object"==typeof b&&b),b.isTrigger=f?2:3,b.namespace=q.join("."),b.rnamespace=b.namespace?new RegExp("(^|\\.)"+q.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,b.result=void 0,b.target||(b.target=e),c=null==c?[b]:r.makeArray(c,[b]),n=r.event.special[p]||{},f||!n.trigger||n.trigger.apply(e,c)!==!1)){if(!f&&!n.noBubble&&!r.isWindow(e)){for(j=n.delegateType||p,sb.test(j+p)||(h=h.parentNode);h;h=h.parentNode)o.push(h),i=h;i===(e.ownerDocument||d)&&o.push(i.defaultView||i.parentWindow||a)}g=0;while((h=o[g++])&&!b.isPropagationStopped())b.type=g>1?j:n.bindType||p,m=(W.get(h,"events")||{})[b.type]&&W.get(h,"handle"),m&&m.apply(h,c),m=k&&h[k],m&&m.apply&&U(h)&&(b.result=m.apply(h,c),b.result===!1&&b.preventDefault());return b.type=p,f||b.isDefaultPrevented()||n._default&&n._default.apply(o.pop(),c)!==!1||!U(e)||k&&r.isFunction(e[p])&&!r.isWindow(e)&&(i=e[k],i&&(e[k]=null),r.event.triggered=p,e[p](),r.event.triggered=void 0,i&&(e[k]=i)),b.result}},simulate:function(a,b,c){var d=r.extend(new r.Event,c,{type:a,isSimulated:!0});r.event.trigger(d,null,b)}}),r.fn.extend({trigger:function(a,b){return this.each(function(){r.event.trigger(a,b,this)})},triggerHandler:function(a,b){var c=this[0];if(c)return r.event.trigger(a,b,c,!0)}}),r.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(a,b){r.fn[b]=function(a,c){return arguments.length>0?this.on(b,null,a,c):this.trigger(b)}}),r.fn.extend({hover:function(a,b){return this.mouseenter(a).mouseleave(b||a)}}),o.focusin="onfocusin"in a,o.focusin||r.each({focus:"focusin",blur:"focusout"},function(a,b){var c=function(a){r.event.simulate(b,a.target,r.event.fix(a))};r.event.special[b]={setup:function(){var d=this.ownerDocument||this,e=W.access(d,b);e||d.addEventListener(a,c,!0),W.access(d,b,(e||0)+1)},teardown:function(){var d=this.ownerDocument||this,e=W.access(d,b)-1;e?W.access(d,b,e):(d.removeEventListener(a,c,!0),W.remove(d,b))}}});var tb=a.location,ub=r.now(),vb=/\?/;r.parseXML=function(b){var c;if(!b||"string"!=typeof b)return null;try{c=(new a.DOMParser).parseFromString(b,"text/xml")}catch(d){c=void 0}return c&&!c.getElementsByTagName("parsererror").length||r.error("Invalid XML: "+b),c};var wb=/\[\]$/,xb=/\r?\n/g,yb=/^(?:submit|button|image|reset|file)$/i,zb=/^(?:input|select|textarea|keygen)/i;function Ab(a,b,c,d){var e;if(Array.isArray(b))r.each(b,function(b,e){c||wb.test(a)?d(a,e):Ab(a+"["+("object"==typeof e&&null!=e?b:"")+"]",e,c,d)});else if(c||"object"!==r.type(b))d(a,b);else for(e in b)Ab(a+"["+e+"]",b[e],c,d)}r.param=function(a,b){var c,d=[],e=function(a,b){var c=r.isFunction(b)?b():b;d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(null==c?"":c)};if(Array.isArray(a)||a.jquery&&!r.isPlainObject(a))r.each(a,function(){e(this.name,this.value)});else for(c in a)Ab(c,a[c],b,e);return d.join("&")},r.fn.extend({serialize:function(){return r.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var a=r.prop(this,"elements");return a?r.makeArray(a):this}).filter(function(){var a=this.type;return this.name&&!r(this).is(":disabled")&&zb.test(this.nodeName)&&!yb.test(a)&&(this.checked||!ja.test(a))}).map(function(a,b){var c=r(this).val();return null==c?null:Array.isArray(c)?r.map(c,function(a){return{name:b.name,value:a.replace(xb,"\r\n")}}):{name:b.name,value:c.replace(xb,"\r\n")}}).get()}});var Bb=/%20/g,Cb=/#.*$/,Db=/([?&])_=[^&]*/,Eb=/^(.*?):[ \t]*([^\r\n]*)$/gm,Fb=/^(?:about|app|app-storage|.+-extension|file|res|widget):$/,Gb=/^(?:GET|HEAD)$/,Hb=/^\/\//,Ib={},Jb={},Kb="*/".concat("*"),Lb=d.createElement("a");Lb.href=tb.href;function Mb(a){return function(b,c){"string"!=typeof b&&(c=b,b="*");var d,e=0,f=b.toLowerCase().match(L)||[];if(r.isFunction(c))while(d=f[e++])"+"===d[0]?(d=d.slice(1)||"*",(a[d]=a[d]||[]).unshift(c)):(a[d]=a[d]||[]).push(c)}}function Nb(a,b,c,d){var e={},f=a===Jb;function g(h){var i;return e[h]=!0,r.each(a[h]||[],function(a,h){var j=h(b,c,d);return"string"!=typeof j||f||e[j]?f?!(i=j):void 0:(b.dataTypes.unshift(j),g(j),!1)}),i}return g(b.dataTypes[0])||!e["*"]&&g("*")}function Ob(a,b){var c,d,e=r.ajaxSettings.flatOptions||{};for(c in b)void 0!==b[c]&&((e[c]?a:d||(d={}))[c]=b[c]);return d&&r.extend(!0,a,d),a}function Pb(a,b,c){var d,e,f,g,h=a.contents,i=a.dataTypes;while("*"===i[0])i.shift(),void 0===d&&(d=a.mimeType||b.getResponseHeader("Content-Type"));if(d)for(e in h)if(h[e]&&h[e].test(d)){i.unshift(e);break}if(i[0]in c)f=i[0];else{for(e in c){if(!i[0]||a.converters[e+" "+i[0]]){f=e;break}g||(g=e)}f=f||g}if(f)return f!==i[0]&&i.unshift(f),c[f]}function Qb(a,b,c,d){var e,f,g,h,i,j={},k=a.dataTypes.slice();if(k[1])for(g in a.converters)j[g.toLowerCase()]=a.converters[g];f=k.shift();while(f)if(a.responseFields[f]&&(c[a.responseFields[f]]=b),!i&&d&&a.dataFilter&&(b=a.dataFilter(b,a.dataType)),i=f,f=k.shift())if("*"===f)f=i;else if("*"!==i&&i!==f){if(g=j[i+" "+f]||j["* "+f],!g)for(e in j)if(h=e.split(" "),h[1]===f&&(g=j[i+" "+h[0]]||j["* "+h[0]])){g===!0?g=j[e]:j[e]!==!0&&(f=h[0],k.unshift(h[1]));break}if(g!==!0)if(g&&a["throws"])b=g(b);else try{b=g(b)}catch(l){return{state:"parsererror",error:g?l:"No conversion from "+i+" to "+f}}}return{state:"success",data:b}}r.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:tb.href,type:"GET",isLocal:Fb.test(tb.protocol),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":Kb,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/\bxml\b/,html:/\bhtml/,json:/\bjson\b/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":JSON.parse,"text xml":r.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(a,b){return b?Ob(Ob(a,r.ajaxSettings),b):Ob(r.ajaxSettings,a)},ajaxPrefilter:Mb(Ib),ajaxTransport:Mb(Jb),ajax:function(b,c){"object"==typeof b&&(c=b,b=void 0),c=c||{};var e,f,g,h,i,j,k,l,m,n,o=r.ajaxSetup({},c),p=o.context||o,q=o.context&&(p.nodeType||p.jquery)?r(p):r.event,s=r.Deferred(),t=r.Callbacks("once memory"),u=o.statusCode||{},v={},w={},x="canceled",y={readyState:0,getResponseHeader:function(a){var b;if(k){if(!h){h={};while(b=Eb.exec(g))h[b[1].toLowerCase()]=b[2]}b=h[a.toLowerCase()]}return null==b?null:b},getAllResponseHeaders:function(){return k?g:null},setRequestHeader:function(a,b){return null==k&&(a=w[a.toLowerCase()]=w[a.toLowerCase()]||a,v[a]=b),this},overrideMimeType:function(a){return null==k&&(o.mimeType=a),this},statusCode:function(a){var b;if(a)if(k)y.always(a[y.status]);else for(b in a)u[b]=[u[b],a[b]];return this},abort:function(a){var b=a||x;return e&&e.abort(b),A(0,b),this}};if(s.promise(y),o.url=((b||o.url||tb.href)+"").replace(Hb,tb.protocol+"//"),o.type=c.method||c.type||o.method||o.type,o.dataTypes=(o.dataType||"*").toLowerCase().match(L)||[""],null==o.crossDomain){j=d.createElement("a");try{j.href=o.url,j.href=j.href,o.crossDomain=Lb.protocol+"//"+Lb.host!=j.protocol+"//"+j.host}catch(z){o.crossDomain=!0}}if(o.data&&o.processData&&"string"!=typeof o.data&&(o.data=r.param(o.data,o.traditional)),Nb(Ib,o,c,y),k)return y;l=r.event&&o.global,l&&0===r.active++&&r.event.trigger("ajaxStart"),o.type=o.type.toUpperCase(),o.hasContent=!Gb.test(o.type),f=o.url.replace(Cb,""),o.hasContent?o.data&&o.processData&&0===(o.contentType||"").indexOf("application/x-www-form-urlencoded")&&(o.data=o.data.replace(Bb,"+")):(n=o.url.slice(f.length),o.data&&(f+=(vb.test(f)?"&":"?")+o.data,delete o.data),o.cache===!1&&(f=f.replace(Db,"$1"),n=(vb.test(f)?"&":"?")+"_="+ub++ +n),o.url=f+n),o.ifModified&&(r.lastModified[f]&&y.setRequestHeader("If-Modified-Since",r.lastModified[f]),r.etag[f]&&y.setRequestHeader("If-None-Match",r.etag[f])),(o.data&&o.hasContent&&o.contentType!==!1||c.contentType)&&y.setRequestHeader("Content-Type",o.contentType),y.setRequestHeader("Accept",o.dataTypes[0]&&o.accepts[o.dataTypes[0]]?o.accepts[o.dataTypes[0]]+("*"!==o.dataTypes[0]?", "+Kb+"; q=0.01":""):o.accepts["*"]);for(m in o.headers)y.setRequestHeader(m,o.headers[m]);if(o.beforeSend&&(o.beforeSend.call(p,y,o)===!1||k))return y.abort();if(x="abort",t.add(o.complete),y.done(o.success),y.fail(o.error),e=Nb(Jb,o,c,y)){if(y.readyState=1,l&&q.trigger("ajaxSend",[y,o]),k)return y;o.async&&o.timeout>0&&(i=a.setTimeout(function(){y.abort("timeout")},o.timeout));try{k=!1,e.send(v,A)}catch(z){if(k)throw z;A(-1,z)}}else A(-1,"No Transport");function A(b,c,d,h){var j,m,n,v,w,x=c;k||(k=!0,i&&a.clearTimeout(i),e=void 0,g=h||"",y.readyState=b>0?4:0,j=b>=200&&b<300||304===b,d&&(v=Pb(o,y,d)),v=Qb(o,v,y,j),j?(o.ifModified&&(w=y.getResponseHeader("Last-Modified"),w&&(r.lastModified[f]=w),w=y.getResponseHeader("etag"),w&&(r.etag[f]=w)),204===b||"HEAD"===o.type?x="nocontent":304===b?x="notmodified":(x=v.state,m=v.data,n=v.error,j=!n)):(n=x,!b&&x||(x="error",b<0&&(b=0))),y.status=b,y.statusText=(c||x)+"",j?s.resolveWith(p,[m,x,y]):s.rejectWith(p,[y,x,n]),y.statusCode(u),u=void 0,l&&q.trigger(j?"ajaxSuccess":"ajaxError",[y,o,j?m:n]),t.fireWith(p,[y,x]),l&&(q.trigger("ajaxComplete",[y,o]),--r.active||r.event.trigger("ajaxStop")))}return y},getJSON:function(a,b,c){return r.get(a,b,c,"json")},getScript:function(a,b){return r.get(a,void 0,b,"script")}}),r.each(["get","post"],function(a,b){r[b]=function(a,c,d,e){return r.isFunction(c)&&(e=e||d,d=c,c=void 0),r.ajax(r.extend({url:a,type:b,dataType:e,data:c,success:d},r.isPlainObject(a)&&a))}}),r._evalUrl=function(a){return r.ajax({url:a,type:"GET",dataType:"script",cache:!0,async:!1,global:!1,"throws":!0})},r.fn.extend({wrapAll:function(a){var b;return this[0]&&(r.isFunction(a)&&(a=a.call(this[0])),b=r(a,this[0].ownerDocument).eq(0).clone(!0),this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstElementChild)a=a.firstElementChild;return a}).append(this)),this},wrapInner:function(a){return r.isFunction(a)?this.each(function(b){r(this).wrapInner(a.call(this,b))}):this.each(function(){var b=r(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=r.isFunction(a);return this.each(function(c){r(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(a){return this.parent(a).not("body").each(function(){r(this).replaceWith(this.childNodes)}),this}}),r.expr.pseudos.hidden=function(a){return!r.expr.pseudos.visible(a)},r.expr.pseudos.visible=function(a){return!!(a.offsetWidth||a.offsetHeight||a.getClientRects().length)},r.ajaxSettings.xhr=function(){try{return new a.XMLHttpRequest}catch(b){}};var Rb={0:200,1223:204},Sb=r.ajaxSettings.xhr();o.cors=!!Sb&&"withCredentials"in Sb,o.ajax=Sb=!!Sb,r.ajaxTransport(function(b){var c,d;if(o.cors||Sb&&!b.crossDomain)return{send:function(e,f){var g,h=b.xhr();if(h.open(b.type,b.url,b.async,b.username,b.password),b.xhrFields)for(g in b.xhrFields)h[g]=b.xhrFields[g];b.mimeType&&h.overrideMimeType&&h.overrideMimeType(b.mimeType),b.crossDomain||e["X-Requested-With"]||(e["X-Requested-With"]="XMLHttpRequest");for(g in e)h.setRequestHeader(g,e[g]);c=function(a){return function(){c&&(c=d=h.onload=h.onerror=h.onabort=h.onreadystatechange=null,"abort"===a?h.abort():"error"===a?"number"!=typeof h.status?f(0,"error"):f(h.status,h.statusText):f(Rb[h.status]||h.status,h.statusText,"text"!==(h.responseType||"text")||"string"!=typeof h.responseText?{binary:h.response}:{text:h.responseText},h.getAllResponseHeaders()))}},h.onload=c(),d=h.onerror=c("error"),void 0!==h.onabort?h.onabort=d:h.onreadystatechange=function(){4===h.readyState&&a.setTimeout(function(){c&&d()})},c=c("abort");try{h.send(b.hasContent&&b.data||null)}catch(i){if(c)throw i}},abort:function(){c&&c()}}}),r.ajaxPrefilter(function(a){a.crossDomain&&(a.contents.script=!1)}),r.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/\b(?:java|ecma)script\b/},converters:{"text script":function(a){return r.globalEval(a),a}}}),r.ajaxPrefilter("script",function(a){void 0===a.cache&&(a.cache=!1),a.crossDomain&&(a.type="GET")}),r.ajaxTransport("script",function(a){if(a.crossDomain){var b,c;return{send:function(e,f){b=r("<script>").prop({charset:a.scriptCharset,src:a.url}).on("load error",c=function(a){b.remove(),c=null,a&&f("error"===a.type?404:200,a.type)}),d.head.appendChild(b[0])},abort:function(){c&&c()}}}});var Tb=[],Ub=/(=)\?(?=&|$)|\?\?/;r.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var a=Tb.pop()||r.expando+"_"+ub++;return this[a]=!0,a}}),r.ajaxPrefilter("json jsonp",function(b,c,d){var e,f,g,h=b.jsonp!==!1&&(Ub.test(b.url)?"url":"string"==typeof b.data&&0===(b.contentType||"").indexOf("application/x-www-form-urlencoded")&&Ub.test(b.data)&&"data");if(h||"jsonp"===b.dataTypes[0])return e=b.jsonpCallback=r.isFunction(b.jsonpCallback)?b.jsonpCallback():b.jsonpCallback,h?b[h]=b[h].replace(Ub,"$1"+e):b.jsonp!==!1&&(b.url+=(vb.test(b.url)?"&":"?")+b.jsonp+"="+e),b.converters["script json"]=function(){return g||r.error(e+" was not called"),g[0]},b.dataTypes[0]="json",f=a[e],a[e]=function(){g=arguments},d.always(function(){void 0===f?r(a).removeProp(e):a[e]=f,b[e]&&(b.jsonpCallback=c.jsonpCallback,Tb.push(e)),g&&r.isFunction(f)&&f(g[0]),g=f=void 0}),"script"}),o.createHTMLDocument=function(){var a=d.implementation.createHTMLDocument("").body;return a.innerHTML="<form></form><form></form>",2===a.childNodes.length}(),r.parseHTML=function(a,b,c){if("string"!=typeof a)return[];"boolean"==typeof b&&(c=b,b=!1);var e,f,g;return b||(o.createHTMLDocument?(b=d.implementation.createHTMLDocument(""),e=b.createElement("base"),e.href=d.location.href,b.head.appendChild(e)):b=d),f=C.exec(a),g=!c&&[],f?[b.createElement(f[1])]:(f=qa([a],b,g),g&&g.length&&r(g).remove(),r.merge([],f.childNodes))},r.fn.load=function(a,b,c){var d,e,f,g=this,h=a.indexOf(" ");return h>-1&&(d=pb(a.slice(h)),a=a.slice(0,h)),r.isFunction(b)?(c=b,b=void 0):b&&"object"==typeof b&&(e="POST"),g.length>0&&r.ajax({url:a,type:e||"GET",dataType:"html",data:b}).done(function(a){f=arguments,g.html(d?r("<div>").append(r.parseHTML(a)).find(d):a)}).always(c&&function(a,b){g.each(function(){c.apply(this,f||[a.responseText,b,a])})}),this},r.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(a,b){r.fn[b]=function(a){return this.on(b,a)}}),r.expr.pseudos.animated=function(a){return r.grep(r.timers,function(b){return a===b.elem}).length},r.offset={setOffset:function(a,b,c){var d,e,f,g,h,i,j,k=r.css(a,"position"),l=r(a),m={};"static"===k&&(a.style.position="relative"),h=l.offset(),f=r.css(a,"top"),i=r.css(a,"left"),j=("absolute"===k||"fixed"===k)&&(f+i).indexOf("auto")>-1,j?(d=l.position(),g=d.top,e=d.left):(g=parseFloat(f)||0,e=parseFloat(i)||0),r.isFunction(b)&&(b=b.call(a,c,r.extend({},h))),null!=b.top&&(m.top=b.top-h.top+g),null!=b.left&&(m.left=b.left-h.left+e),"using"in b?b.using.call(a,m):l.css(m)}},r.fn.extend({offset:function(a){if(arguments.length)return void 0===a?this:this.each(function(b){r.offset.setOffset(this,a,b)});var b,c,d,e,f=this[0];if(f)return f.getClientRects().length?(d=f.getBoundingClientRect(),b=f.ownerDocument,c=b.documentElement,e=b.defaultView,{top:d.top+e.pageYOffset-c.clientTop,left:d.left+e.pageXOffset-c.clientLeft}):{top:0,left:0}},position:function(){if(this[0]){var a,b,c=this[0],d={top:0,left:0};return"fixed"===r.css(c,"position")?b=c.getBoundingClientRect():(a=this.offsetParent(),b=this.offset(),B(a[0],"html")||(d=a.offset()),d={top:d.top+r.css(a[0],"borderTopWidth",!0),left:d.left+r.css(a[0],"borderLeftWidth",!0)}),{top:b.top-d.top-r.css(c,"marginTop",!0),left:b.left-d.left-r.css(c,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var a=this.offsetParent;while(a&&"static"===r.css(a,"position"))a=a.offsetParent;return a||ra})}}),r.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(a,b){var c="pageYOffset"===b;r.fn[a]=function(d){return T(this,function(a,d,e){var f;return r.isWindow(a)?f=a:9===a.nodeType&&(f=a.defaultView),void 0===e?f?f[b]:a[d]:void(f?f.scrollTo(c?f.pageXOffset:e,c?e:f.pageYOffset):a[d]=e)},a,d,arguments.length)}}),r.each(["top","left"],function(a,b){r.cssHooks[b]=Pa(o.pixelPosition,function(a,c){if(c)return c=Oa(a,b),Ma.test(c)?r(a).position()[b]+"px":c})}),r.each({Height:"height",Width:"width"},function(a,b){r.each({padding:"inner"+a,content:b,"":"outer"+a},function(c,d){r.fn[d]=function(e,f){var g=arguments.length&&(c||"boolean"!=typeof e),h=c||(e===!0||f===!0?"margin":"border");return T(this,function(b,c,e){var f;return r.isWindow(b)?0===d.indexOf("outer")?b["inner"+a]:b.document.documentElement["client"+a]:9===b.nodeType?(f=b.documentElement,Math.max(b.body["scroll"+a],f["scroll"+a],b.body["offset"+a],f["offset"+a],f["client"+a])):void 0===e?r.css(b,c,h):r.style(b,c,e,h)},b,g?e:void 0,g)}})}),r.fn.extend({bind:function(a,b,c){return this.on(a,null,b,c)},unbind:function(a,b){return this.off(a,null,b)},delegate:function(a,b,c,d){return this.on(b,a,c,d)},undelegate:function(a,b,c){return 1===arguments.length?this.off(a,"**"):this.off(b,a||"**",c)}}),r.holdReady=function(a){a?r.readyWait++:r.ready(!0)},r.isArray=Array.isArray,r.parseJSON=JSON.parse,r.nodeName=B,"function"==typeof define&&define.amd&&define("jquery",[],function(){return r});var Vb=a.jQuery,Wb=a.$;return r.noConflict=function(b){return a.$===r&&(a.$=Wb),b&&a.jQuery===r&&(a.jQuery=Vb),r},b||(a.jQuery=a.$=r),r});
diff --git a/coremltools/graph_visualization/icons/activation.png b/coremltools/graph_visualization/icons/activation.png
deleted file mode 100644
index 40bdf9488..000000000
Binary files a/coremltools/graph_visualization/icons/activation.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/arrayFeatureExtractor.png b/coremltools/graph_visualization/icons/arrayFeatureExtractor.png
deleted file mode 100644
index 588cf7770..000000000
Binary files a/coremltools/graph_visualization/icons/arrayFeatureExtractor.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/average.png b/coremltools/graph_visualization/icons/average.png
deleted file mode 100644
index 07e22567d..000000000
Binary files a/coremltools/graph_visualization/icons/average.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/batchnorm.png b/coremltools/graph_visualization/icons/batchnorm.png
deleted file mode 100644
index 55c1602e5..000000000
Binary files a/coremltools/graph_visualization/icons/batchnorm.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/biDirectionalLSTM.png b/coremltools/graph_visualization/icons/biDirectionalLSTM.png
deleted file mode 100644
index fee478207..000000000
Binary files a/coremltools/graph_visualization/icons/biDirectionalLSTM.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/bias.png b/coremltools/graph_visualization/icons/bias.png
deleted file mode 100644
index 0137ba70d..000000000
Binary files a/coremltools/graph_visualization/icons/bias.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/categoricalMapping.png b/coremltools/graph_visualization/icons/categoricalMapping.png
deleted file mode 100644
index 9580c49dc..000000000
Binary files a/coremltools/graph_visualization/icons/categoricalMapping.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/concat.png b/coremltools/graph_visualization/icons/concat.png
deleted file mode 100644
index 26b0332f5..000000000
Binary files a/coremltools/graph_visualization/icons/concat.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/convolution.png b/coremltools/graph_visualization/icons/convolution.png
deleted file mode 100644
index 191592270..000000000
Binary files a/coremltools/graph_visualization/icons/convolution.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/crop.png b/coremltools/graph_visualization/icons/crop.png
deleted file mode 100644
index 4ffbf6637..000000000
Binary files a/coremltools/graph_visualization/icons/crop.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/custom.png b/coremltools/graph_visualization/icons/custom.png
deleted file mode 100644
index ef4b8a367..000000000
Binary files a/coremltools/graph_visualization/icons/custom.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/dictVectorizer.png b/coremltools/graph_visualization/icons/dictVectorizer.png
deleted file mode 100644
index 2df66ca66..000000000
Binary files a/coremltools/graph_visualization/icons/dictVectorizer.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/dot.png b/coremltools/graph_visualization/icons/dot.png
deleted file mode 100644
index d2bf74f72..000000000
Binary files a/coremltools/graph_visualization/icons/dot.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/embedding.png b/coremltools/graph_visualization/icons/embedding.png
deleted file mode 100644
index 723c6bcd6..000000000
Binary files a/coremltools/graph_visualization/icons/embedding.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/featureVectorizer.png b/coremltools/graph_visualization/icons/featureVectorizer.png
deleted file mode 100644
index db35b4d2f..000000000
Binary files a/coremltools/graph_visualization/icons/featureVectorizer.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/flatten.png b/coremltools/graph_visualization/icons/flatten.png
deleted file mode 100644
index 11c6b584b..000000000
Binary files a/coremltools/graph_visualization/icons/flatten.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/glmClassifier.png b/coremltools/graph_visualization/icons/glmClassifier.png
deleted file mode 100644
index 891988135..000000000
Binary files a/coremltools/graph_visualization/icons/glmClassifier.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/glmRegressor.png b/coremltools/graph_visualization/icons/glmRegressor.png
deleted file mode 100644
index 864e78ef1..000000000
Binary files a/coremltools/graph_visualization/icons/glmRegressor.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/gru.png b/coremltools/graph_visualization/icons/gru.png
deleted file mode 100644
index 271cd24cb..000000000
Binary files a/coremltools/graph_visualization/icons/gru.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/identity.png b/coremltools/graph_visualization/icons/identity.png
deleted file mode 100644
index 259b15fb9..000000000
Binary files a/coremltools/graph_visualization/icons/identity.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/imputer.png b/coremltools/graph_visualization/icons/imputer.png
deleted file mode 100644
index 2206807b6..000000000
Binary files a/coremltools/graph_visualization/icons/imputer.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/innerProduct.png b/coremltools/graph_visualization/icons/innerProduct.png
deleted file mode 100644
index a4b07b9ac..000000000
Binary files a/coremltools/graph_visualization/icons/innerProduct.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/input.png b/coremltools/graph_visualization/icons/input.png
deleted file mode 100644
index 2206807b6..000000000
Binary files a/coremltools/graph_visualization/icons/input.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/l2normalize.png b/coremltools/graph_visualization/icons/l2normalize.png
deleted file mode 100644
index 466496da2..000000000
Binary files a/coremltools/graph_visualization/icons/l2normalize.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/loadConstant.png b/coremltools/graph_visualization/icons/loadConstant.png
deleted file mode 100644
index ef4b8a367..000000000
Binary files a/coremltools/graph_visualization/icons/loadConstant.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/lrn.png b/coremltools/graph_visualization/icons/lrn.png
deleted file mode 100644
index 475be33ef..000000000
Binary files a/coremltools/graph_visualization/icons/lrn.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/max.png b/coremltools/graph_visualization/icons/max.png
deleted file mode 100644
index 7deedd549..000000000
Binary files a/coremltools/graph_visualization/icons/max.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/min.png b/coremltools/graph_visualization/icons/min.png
deleted file mode 100644
index bc37c906f..000000000
Binary files a/coremltools/graph_visualization/icons/min.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/multiply.png b/coremltools/graph_visualization/icons/multiply.png
deleted file mode 100644
index 19707b674..000000000
Binary files a/coremltools/graph_visualization/icons/multiply.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/mvn.png b/coremltools/graph_visualization/icons/mvn.png
deleted file mode 100644
index 896d51d49..000000000
Binary files a/coremltools/graph_visualization/icons/mvn.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/neuralNetwork.png b/coremltools/graph_visualization/icons/neuralNetwork.png
deleted file mode 100644
index 5b71319bd..000000000
Binary files a/coremltools/graph_visualization/icons/neuralNetwork.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/neuralNetworkClassifier.png b/coremltools/graph_visualization/icons/neuralNetworkClassifier.png
deleted file mode 100644
index 5b71319bd..000000000
Binary files a/coremltools/graph_visualization/icons/neuralNetworkClassifier.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/neuralNetworkRegressor.png b/coremltools/graph_visualization/icons/neuralNetworkRegressor.png
deleted file mode 100644
index 5b71319bd..000000000
Binary files a/coremltools/graph_visualization/icons/neuralNetworkRegressor.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/node.png b/coremltools/graph_visualization/icons/node.png
deleted file mode 100644
index 5ede14963..000000000
Binary files a/coremltools/graph_visualization/icons/node.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/normalizer.png b/coremltools/graph_visualization/icons/normalizer.png
deleted file mode 100644
index 59df45bc5..000000000
Binary files a/coremltools/graph_visualization/icons/normalizer.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/output.png b/coremltools/graph_visualization/icons/output.png
deleted file mode 100644
index 2206807b6..000000000
Binary files a/coremltools/graph_visualization/icons/output.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/padding.png b/coremltools/graph_visualization/icons/padding.png
deleted file mode 100644
index 928c55692..000000000
Binary files a/coremltools/graph_visualization/icons/padding.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/parent.png b/coremltools/graph_visualization/icons/parent.png
deleted file mode 100644
index dcf615566..000000000
Binary files a/coremltools/graph_visualization/icons/parent.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/permute.png b/coremltools/graph_visualization/icons/permute.png
deleted file mode 100644
index 2b0022b5f..000000000
Binary files a/coremltools/graph_visualization/icons/permute.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/pooling.png b/coremltools/graph_visualization/icons/pooling.png
deleted file mode 100644
index e20556b7d..000000000
Binary files a/coremltools/graph_visualization/icons/pooling.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/reduce.png b/coremltools/graph_visualization/icons/reduce.png
deleted file mode 100644
index afcad8f32..000000000
Binary files a/coremltools/graph_visualization/icons/reduce.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/reorganizeData.png b/coremltools/graph_visualization/icons/reorganizeData.png
deleted file mode 100644
index f7fda3476..000000000
Binary files a/coremltools/graph_visualization/icons/reorganizeData.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/reshape.png b/coremltools/graph_visualization/icons/reshape.png
deleted file mode 100644
index 02da858c4..000000000
Binary files a/coremltools/graph_visualization/icons/reshape.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/scale.png b/coremltools/graph_visualization/icons/scale.png
deleted file mode 100644
index 08f230567..000000000
Binary files a/coremltools/graph_visualization/icons/scale.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/scaler.png b/coremltools/graph_visualization/icons/scaler.png
deleted file mode 100644
index 741c96d82..000000000
Binary files a/coremltools/graph_visualization/icons/scaler.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/sequenceRepeat.png b/coremltools/graph_visualization/icons/sequenceRepeat.png
deleted file mode 100644
index 4d0e2d39e..000000000
Binary files a/coremltools/graph_visualization/icons/sequenceRepeat.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/simpleRecurrent.png b/coremltools/graph_visualization/icons/simpleRecurrent.png
deleted file mode 100644
index 86eaf681f..000000000
Binary files a/coremltools/graph_visualization/icons/simpleRecurrent.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/slice.png b/coremltools/graph_visualization/icons/slice.png
deleted file mode 100644
index dc4d9a46b..000000000
Binary files a/coremltools/graph_visualization/icons/slice.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/softmax.png b/coremltools/graph_visualization/icons/softmax.png
deleted file mode 100644
index 713024f61..000000000
Binary files a/coremltools/graph_visualization/icons/softmax.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/split.png b/coremltools/graph_visualization/icons/split.png
deleted file mode 100644
index 4d0e2d39e..000000000
Binary files a/coremltools/graph_visualization/icons/split.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/supportVectorClassifier.png b/coremltools/graph_visualization/icons/supportVectorClassifier.png
deleted file mode 100644
index 9c8e8bb39..000000000
Binary files a/coremltools/graph_visualization/icons/supportVectorClassifier.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/supportVectorRegressor.png b/coremltools/graph_visualization/icons/supportVectorRegressor.png
deleted file mode 100644
index 9c8e8bb39..000000000
Binary files a/coremltools/graph_visualization/icons/supportVectorRegressor.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/treeEnsembleClassifier.png b/coremltools/graph_visualization/icons/treeEnsembleClassifier.png
deleted file mode 100644
index 27f53778d..000000000
Binary files a/coremltools/graph_visualization/icons/treeEnsembleClassifier.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/treeEnsembleRegressor.png b/coremltools/graph_visualization/icons/treeEnsembleRegressor.png
deleted file mode 100644
index 27f53778d..000000000
Binary files a/coremltools/graph_visualization/icons/treeEnsembleRegressor.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/unary.png b/coremltools/graph_visualization/icons/unary.png
deleted file mode 100644
index d3a5eb516..000000000
Binary files a/coremltools/graph_visualization/icons/unary.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/uniDirectionalLSTM.png b/coremltools/graph_visualization/icons/uniDirectionalLSTM.png
deleted file mode 100644
index 4f797946f..000000000
Binary files a/coremltools/graph_visualization/icons/uniDirectionalLSTM.png and /dev/null differ
diff --git a/coremltools/graph_visualization/icons/upsample.png b/coremltools/graph_visualization/icons/upsample.png
deleted file mode 100644
index f3c0e8cd5..000000000
Binary files a/coremltools/graph_visualization/icons/upsample.png and /dev/null differ
diff --git a/coremltools/graph_visualization/index.html b/coremltools/graph_visualization/index.html
deleted file mode 100644
index b9f000723..000000000
--- a/coremltools/graph_visualization/index.html
+++ /dev/null
@@ -1,28 +0,0 @@
-<!doctype html>
-<html>
-<head>
-    <meta charset='utf-8'>
-    <title>CoreML Graph Vizualization</title>
-    <script src='assets/cytoscape.min.js'></script>
-    <script src='assets/jquery-3.2.1.min.js'></script>
-    <script src="assets/dagre.min.js"></script>
-    <script src="assets/cytoscape-dagre.js"></script>
-    <script src='app.js'></script>
-    <link href="https://fonts.googleapis.com/css?family=Roboto+Slab" rel="stylesheet">
-    <link rel="stylesheet" type="text/css" href="style.css">
-</head>
-<body>
-    <div id='title'></div>
-    <div id='node-info-title'>
-    Node Information
-    <div id='node-info' align="left"></div>
-   </div>
-    <div id='label-switch'>
-        <div id="toggle" class="button">Shapes On/Off</div>
-    </div>
-    <div id='reset-state'>
-        <div id="reset" class="button">Reset Graph</div>
-    </div>
-    <div id='cy'></div>
-</body>
-</html>
diff --git a/coremltools/graph_visualization/style.css b/coremltools/graph_visualization/style.css
deleted file mode 100644
index aa99ad5f9..000000000
--- a/coremltools/graph_visualization/style.css
+++ /dev/null
@@ -1,100 +0,0 @@
-#cy {
-    width: 60%;
-    height: 80%;
-    position: absolute;
-    float: left;
-    top: 100px;
-    left: 0px;
-    overflow: hidden;
-    padding: 10px;
-    box-shadow: inset 0 0 10px #000000;
-}
-
-#label-switch {
-    position: absolute;
-    right: 10px;
-    top: 100px;
-    border: 1px solid silver;
-    border-radius: 10px;
-    box-shadow: 2px 2px 2px #888888;
-    text-align: center;
-    text-decoration: none;
-    display: inline-block;
-    font-size: 16px;
-    font-family: 'Helvetica Neue';
-    font-weight: bold;
-    padding: 10px;
-    background-color: ghostwhite;
-    color: #111111;
-    cursor: pointer;
-}
-
-#label-switch:hover {
-    background-color: black;
-    color: whitesmoke;
-}
-
-#reset-state {
-    position: absolute;
-    right: 10px;
-    top: 150px;
-    border: 1px solid silver;
-    border-radius: 10px;
-    box-shadow: 2px 2px 2px #888888;
-    text-align: center;
-    text-decoration: none;
-    display: inline-block;
-    font-size: 16px;
-    font-family: 'Helvetica Neue';
-    font-weight: bold;
-    padding: 10px;
-    background-color: ghostwhite;
-    color: #111111;
-    cursor: pointer;
-}
-
-#reset-state:hover {
-    background-color: black;
-    color: whitesmoke;
-}
-
-#title {
-    font-family: 'Roboto Slab', serif;
-    text-align: center;
-    font-weight: bold;
-    font-size: 250%;
-}
-
-#node-info-title {
-    display: block;
-    float: right;
-    width: 30%;
-    height: 100%;
-    padding: 50px;
-    border: 1px solid silver;
-    background-color: white;
-    font-weight: bold;
-    font-family: 'Roboto Slab', serif;
-    text-align: center;
-    margin-top: 170px;
-    font-size: 200%;
-    box-shadow: 10px 10px 10px #888888;
-
-}
-
-.subtitle {
-    font-family: 'Roboto Slab', serif;
-    font-weight: bold;
-    font-size: 150%;
-}
-
-#node-info {
-    font-family: 'Roboto Slab', serif;
-    font-weight: normal;
-    font-size: 60%;
-}
-
-
-body {
-    background-color: #FEFEFE;
-}
diff --git a/coremltools/models/__init__.py b/coremltools/models/__init__.py
index 4955e0434..63379af43 100644
--- a/coremltools/models/__init__.py
+++ b/coremltools/models/__init__.py
@@ -14,11 +14,11 @@
 
 from .model import MLModel
 from .model import (
-    _MLMODEL_FULL_PRECISION, 
-    _MLMODEL_HALF_PRECISION, 
-    _MLMODEL_QUANTIZED, 
+    _MLMODEL_FULL_PRECISION,
+    _MLMODEL_HALF_PRECISION,
+    _MLMODEL_QUANTIZED,
     _VALID_MLMODEL_PRECISION_TYPES,
-    _SUPPORTED_QUANTIZATION_MODES, 
+    _SUPPORTED_QUANTIZATION_MODES,
     _QUANTIZATION_MODE_LINEAR_QUANTIZATION,
     _QUANTIZATION_MODE_LINEAR_SYMMETRIC,
     _QUANTIZATION_MODE_LOOKUP_TABLE_LINEAR,
@@ -26,8 +26,9 @@
     _QUANTIZATION_MODE_CUSTOM_LOOKUP_TABLE,
     _QUANTIZATION_MODE_DEQUANTIZE,
     _LUT_BASED_QUANTIZATION,
-    _QUANTIZATION_MODE_DEQUANTIZE)
-
-from .model import NeuralNetworkShaper
+    _QUANTIZATION_MODE_DEQUANTIZE,
+    _METADATA_VERSION,
+    _METADATA_SOURCE,
+)
 
 from . import neural_network
diff --git a/coremltools/models/_deprecation.py b/coremltools/models/_deprecation.py
index 5e506f414..9c4aeb6a2 100644
--- a/coremltools/models/_deprecation.py
+++ b/coremltools/models/_deprecation.py
@@ -1,26 +1,33 @@
 import warnings
 import functools
 
+
 def deprecated(obj=None, suffix=""):
-    '''
+    """
     Decorator to mark a function or a class as deprecated
-    '''
+    """
 
     def decorator_deprecation_warning(obj):
         @functools.wraps(obj)
         def wrapped(*args, **kwargs):
             if isinstance(obj, type):
-                msg = "Class \"%s\" is deprecated and will be removed in the next release" % obj.__name__
+                msg = (
+                    'Class "%s" is deprecated and will be removed in the next release'
+                    % obj.__name__
+                )
             else:
-                msg = "Function \"%s\" is deprecated and will be removed in the next release" % obj.__name__
+                msg = (
+                    'Function "%s" is deprecated and will be removed in the next release'
+                    % obj.__name__
+                )
             if suffix:
                 msg += "; %s" % suffix
             warnings.warn(msg, category=FutureWarning)
             return obj(*args, **kwargs)
+
         return wrapped
 
     if obj is None:
         return decorator_deprecation_warning
 
     return decorator_deprecation_warning(obj)
-
diff --git a/coremltools/models/_feature_management.py b/coremltools/models/_feature_management.py
index cb0b5606c..64150f87e 100644
--- a/coremltools/models/_feature_management.py
+++ b/coremltools/models/_feature_management.py
@@ -8,7 +8,11 @@
 from functools import reduce
 import numpy as _np
 import operator as op
-from six import integer_types as _integer_types, string_types as _string_types, PY3 as _PY3
+from six import (
+    integer_types as _integer_types,
+    string_types as _string_types,
+    PY3 as _PY3,
+)
 
 from . import datatypes
 from .. import SPECIFICATION_VERSION
@@ -17,11 +21,11 @@
 
 
 def process_or_validate_classifier_output_features(
-        output_features, class_labels, supports_class_scores = True): 
-
+    output_features, class_labels, supports_class_scores=True
+):
     """
-    Given a list of class labels and a list of output_features, validate the 
-    list and return a valid version of output_features with all the correct 
+    Given a list of class labels and a list of output_features, validate the
+    list and return a valid version of output_features with all the correct
     data type information included.
     """
 
@@ -38,10 +42,10 @@ def raise_error(msg):
         output_class_type = datatypes.Int64()
 
     elif all(isinstance(cl, _string_types) for cl in class_labels):
-        output_class_type = datatypes.String() 
+        output_class_type = datatypes.String()
 
     else:
-        raise ValueError('Class labels must be all of type int or all of type string.')
+        raise ValueError("Class labels must be all of type int or all of type string.")
 
     if output_features is None:
 
@@ -57,42 +61,59 @@ def raise_error(msg):
         if supports_class_scores:
             out += [("classProbability", datatypes.Dictionary(output_class_type))]
 
-    elif (isinstance(output_features, (list, tuple))
-            and all(isinstance(fn, _string_types) for fn in output_features)
-            and len(output_features) == 2):
-        
+    elif (
+        isinstance(output_features, (list, tuple))
+        and all(isinstance(fn, _string_types) for fn in output_features)
+        and len(output_features) == 2
+    ):
+
         if supports_class_scores:
-            out = [(output_features[0], output_class_type),
-                   (output_features[1], datatypes.Dictionary(output_class_type))]
+            out = [
+                (output_features[0], output_class_type),
+                (output_features[1], datatypes.Dictionary(output_class_type)),
+            ]
         else:
-            raise ValueError("Classifier model (as trained) does not support output scores for classes.")
+            raise ValueError(
+                "Classifier model (as trained) does not support output scores for classes."
+            )
 
     elif is_valid_feature_list(output_features):
 
-        output_features = [(k, datatypes._normalize_datatype(dt)) for k, dt in output_features]
+        output_features = [
+            (k, datatypes._normalize_datatype(dt)) for k, dt in output_features
+        ]
 
         if len(output_features) == 1 or not supports_class_scores:
             if not output_features[0][1] == output_class_type:
-                raise ValueError("Type of output class feature does not match type of class labels.")
+                raise ValueError(
+                    "Type of output class feature does not match type of class labels."
+                )
 
         else:
-            # Make sure the first two output features specified give the output 
+            # Make sure the first two output features specified give the output
             # class field and the output class scores dictionary field
-            if (isinstance(output_features[0][1], datatypes.Dictionary)
-                    and isinstance(output_features[1][1], output_class_type)):
-
-                output_features[0], output_features[1] = output_features[1], output_features[0]
+            if isinstance(output_features[0][1], datatypes.Dictionary) and isinstance(
+                output_features[1][1], output_class_type
+            ):
+                output_features[0], output_features[1] = (
+                    output_features[1],
+                    output_features[0],
+                )
 
             if not isinstance(output_features[1][1], datatypes.Dictionary):
                 raise_error("Output features class scores should be dictionary type.")
 
             if output_features[1][1].key_type != output_class_type:
-                raise_error("Class scores dictionary key type does not match type of class labels.")
+                raise_error(
+                    "Class scores dictionary key type does not match type of class labels."
+                )
 
-            if output_features[0][1] != output_class_type: 
-                raise_error("Specified type of output class does not match type of class labels.")
+            if output_features[0][1] != output_class_type:
+                raise_error(
+                    "Specified type of output class does not match type of class labels."
+                )
 
-        # NOTE: We are intentionally allowing the case where additional fields are allowed 
+        # NOTE: We are intentionally allowing the case where additional fields are allowed
         # beyond the original two features.
 
         out = output_features
@@ -102,14 +123,16 @@ def raise_error(msg):
 
     return out
 
+
 def is_valid_feature_list(features):
-    
     # Just test all the ways this could be
-    return (type(features) is list
+    return (
+        type(features) is list
         and len(features) >= 1
         and all(type(t) is tuple and len(t) == 2 for t in features)
-        and all(isinstance(n, str) for n, td in features)
-        and all(datatypes._is_valid_datatype(td) for n, td in features))
+        and all(isinstance(n, _string_types) for n, td in features)
+        and all(datatypes._is_valid_datatype(td) for n, td in features)
+    )
 
 
 def dimension_of_array_features(features):
@@ -123,73 +146,79 @@ def dimension_of_array_features(features):
         elif isinstance(td, datatypes.Array):
             dim += reduce(op.mul, td.dimensions, 1)
         else:
-            raise ValueError("Unable to determine number of dimensions from feature list.")
+            raise ValueError(
+                "Unable to determine number of dimensions from feature list."
+            )
 
     return dim
 
-def process_or_validate_features(features, num_dimensions = None, feature_type_map = {}):
+
+def process_or_validate_features(features, num_dimensions=None, feature_type_map={}):
     """
     Puts features into a standard form from a number of different possible forms.
 
-    The standard form is a list of 2-tuples of (name, datatype) pairs.  The name 
+    The standard form is a list of 2-tuples of (name, datatype) pairs.  The name
     is a string and the datatype is an object as defined in the _datatype module.
 
     The possible input forms are as follows:
 
-    *   A list of strings. in this case, the overall dimension is assumed to be 
-        the length of the list.  If neighboring names are identical, they are 
+    *   A list of strings. in this case, the overall dimension is assumed to be
+        the length of the list.  If neighboring names are identical, they are
         assumed to be an input array of that length.  For example:
 
-           ["a", "b", "c"] 
+           ["a", "b", "c"]
 
-        resolves to 
+        resolves to
 
             [("a", Double), ("b", Double), ("c", Double)].
 
-        And: 
+        And:
 
             ["a", "a", "b"]
 
-        resolves to 
+        resolves to
 
             [("a", Array(2)), ("b", Double)].
 
-    *   A dictionary of keys to indices or ranges of feature indices. 
+    *   A dictionary of keys to indices or ranges of feature indices.
 
-        In this case, it's presented as a mapping from keys to indices or 
-        ranges of contiguous indices.  For example, 
+        In this case, it's presented as a mapping from keys to indices or
+        ranges of contiguous indices.  For example,
 
-            {"a" : 0, "b" : [2,3], "c" : 1} 
+            {"a" : 0, "b" : [2,3], "c" : 1}
 
-        Resolves to 
+        Resolves to
 
             [("a", Double), ("c", Double), ("b", Array(2))].
 
-        Note that the ordering is determined by the indices. 
+        Note that the ordering is determined by the indices.
 
-    *   A single string.  In this case, the input is assumed to be a single array, 
-        with the number of dimensions set using num_dimensions. 
+    *   A single string.  In this case, the input is assumed to be a single array,
+        with the number of dimensions set using num_dimensions.
 
 
     Notes:
 
-    If the features variable is in the standard form, it is simply checked and 
-    returned. 
+    If the features variable is in the standard form, it is simply checked and
+    returned.
 
-    If num_dimensions is given, it is used to check against the existing features, 
-    or fill in missing information in the case when features is a single string. 
+    If num_dimensions is given, it is used to check against the existing features,
+    or fill in missing information in the case when features is a single string.
     """
 
     original_features = copy(features)
 
     if num_dimensions is not None and not isinstance(num_dimensions, _integer_types):
-        raise TypeError("num_dimensions must be None, an integer or a long, not '%s'"
-                % str(type(num_dimensions)))
-
+        raise TypeError(
+            "num_dimensions must be None, an integer or a long, not '%s'"
+            % str(type(num_dimensions))
+        )
 
     def raise_type_error(additional_msg):
-        raise TypeError("Error processing feature list: %s\nfeatures = %s"
-                % (additional_msg, str(original_features)))
+        raise TypeError(
+            "Error processing feature list: %s\nfeatures = %s"
+            % (additional_msg, str(original_features))
+        )
 
     if type(features) is dict and is_valid_feature_list(features.items()):
         features = features.items()
@@ -206,15 +235,17 @@ def raise_type_error(additional_msg):
             if feature_dims is not None and feature_dims != num_dimensions:
                 raise_type_error("Dimension mismatch.")
 
-        # We may need to translate some parts of this back to the actual 
-        # datatype class -- e.g. translate str to datatypes.String(). 
+        # We may need to translate some parts of this back to the actual
+        # datatype class -- e.g. translate str to datatypes.String().
         return [(k, datatypes._normalize_datatype(dt)) for k, dt in features]
 
     if isinstance(features, _string_types):
         if num_dimensions is None:
-            raise_type_error("If a single feature name is given, then "
-                             "num_dimensions must be provided.")
-        features = {features : range(num_dimensions)}
+            raise_type_error(
+                "If a single feature name is given, then "
+                "num_dimensions must be provided."
+            )
+        features = {features: range(num_dimensions)}
 
     if isinstance(features, (list, tuple, _np.ndarray)):
         # Change this into a dictionary
@@ -223,12 +254,15 @@ def raise_type_error(additional_msg):
 
         for i, k in enumerate(features):
             if not isinstance(k, _string_types):
-                raise_type_error("List of feature names must either be a list of strings, or a list of (name, datatypes.Array instance) tuples.")
+                raise_type_error(
+                    "List of feature names must either be a list of strings, or a list of (name, datatypes.Array instance) tuples."
+                )
 
         if num_dimensions is not None and len(features) != num_dimensions:
-            raise_type_error(("List of feature names has wrong length; "
-                    "%d required, %d provided.")
-                        % (num_dimensions, len(features)))
+            raise_type_error(
+                ("List of feature names has wrong length; " "%d required, %d provided.")
+                % (num_dimensions, len(features))
+            )
 
         for i, k in enumerate(features):
             mapping[k].append(i)
@@ -237,15 +271,17 @@ def raise_type_error(additional_msg):
         features = mapping
 
     if not isinstance(features, dict):
-        raise_type_error("features must be either a list of feature names "
-                "or a dictionary of feature names to ranges.")
+        raise_type_error(
+            "features must be either a list of feature names "
+            "or a dictionary of feature names to ranges."
+        )
 
     # We'll be invasive here so make a copy.
     features = copy(features)
 
     for k, v in list(features.items()):
 
-        if not isinstance(k, str):
+        if not isinstance(k, _string_types):
             raise_type_error("Feature names must be strings.")
 
         def test_index(val):
@@ -257,7 +293,9 @@ def test_index(val):
                 error = True
 
             if error:
-                raise_type_error("Specified indices for feature %s must be integers." % k)
+                raise_type_error(
+                    "Specified indices for feature %s must be integers." % k
+                )
 
             if val < 0 or (num_dimensions is not None and val >= num_dimensions):
                 raise_type_error("Index in feature %s out of range." % k)
@@ -278,13 +316,20 @@ def test_index(val):
             test_index(v)
             features[k] = v = [v]
         else:
-            raise_type_error(("Value type for feature %s not recognized; "
-                    "values must be either integers, lists or range objects.") % k)
+            raise_type_error(
+                (
+                    "Value type for feature %s not recognized; "
+                    "values must be either integers, lists or range objects."
+                )
+                % k
+            )
 
         # check to make sure things are contiguous
         if v != list(range(v[0], v[-1] + 1)):
-            raise_type_error("Index list for feature %s must consist of "
-            "a contiguous range of indices." % k)
+            raise_type_error(
+                "Index list for feature %s must consist of "
+                "a contiguous range of indices." % k
+            )
 
         if len(set(v)) != len(v):
             raise_type_error("Index list for feature %s contains duplicates." % k)
@@ -292,20 +337,23 @@ def test_index(val):
     # Now, set num dimensions from the list if it's actually None
     if num_dimensions is None:
         from itertools import chain
-        num_dimensions = 1 + max(chain(*[il for k, il in features.items()]))
-
-    if (set().union(*features.values()) != set(range(num_dimensions))
-        or sum(len(v) for v in features.values()) != num_dimensions):
 
-        raise_type_error("Supplied indices must cover entire range of 0, ..., num_dimensions-1.")
+        num_dimensions = 1 + max(chain(*[il for k, il in features.items()]))
 
+    if (
+        set().union(*features.values()) != set(range(num_dimensions))
+        or sum(len(v) for v in features.values()) != num_dimensions
+    ):
+        raise_type_error(
+            "Supplied indices must cover entire range of 0, ..., num_dimensions-1."
+        )
 
     # Define the output feature types
-    output_features = [None]*len(features)
+    output_features = [None] * len(features)
 
     # Finally, go through and map all these things out as types.
     # Sort by first value of the index range.
-    for i, (k, v) in enumerate(sorted(features.items(), key = lambda t: t[1][0])):
+    for i, (k, v) in enumerate(sorted(features.items(), key=lambda t: t[1][0])):
         if k in feature_type_map:
             output_features[i] = (k, feature_type_map[k])
 
@@ -315,4 +363,3 @@ def test_index(val):
             output_features[i] = (k, datatypes.Array(len(v)))
 
     return output_features
-
diff --git a/coremltools/models/_graph_visualization.py b/coremltools/models/_graph_visualization.py
index e208ae834..da92863d6 100644
--- a/coremltools/models/_graph_visualization.py
+++ b/coremltools/models/_graph_visualization.py
@@ -36,15 +36,17 @@ def _calculate_edges(cy_nodes, cy_edges, shape_dict=None):
     for upper_index in range(0, node_len):
         for lower_index in range(upper_index + 1, node_len):
 
-            if 'outputs' in cy_nodes[upper_index]['data']['info'].keys() and \
-                            'inputs' in cy_nodes[upper_index]['data']['info'].keys() \
-                    and 'outputs' in cy_nodes[lower_index]['data']['info'].keys() \
-                    and 'inputs' in cy_nodes[lower_index]['data']['info'].keys():
+            if (
+                "outputs" in cy_nodes[upper_index]["data"]["info"].keys()
+                and "inputs" in cy_nodes[upper_index]["data"]["info"].keys()
+                and "outputs" in cy_nodes[lower_index]["data"]["info"].keys()
+                and "inputs" in cy_nodes[lower_index]["data"]["info"].keys()
+            ):
                 outputs = _ast.literal_eval(
-                    cy_nodes[upper_index]['data']['info']['outputs']
+                    cy_nodes[upper_index]["data"]["info"]["outputs"]
                 )
                 inputs = _ast.literal_eval(
-                    cy_nodes[lower_index]['data']['info']['inputs']
+                    cy_nodes[lower_index]["data"]["info"]["inputs"]
                 )
                 for output in outputs:
                     if output in inputs:
@@ -55,16 +57,16 @@ def _calculate_edges(cy_nodes, cy_edges, shape_dict=None):
 
                         cy_edges.append(
                             {
-                                'data':{'id':
-                                        '{}.{}.{}'.format(
-                                            output,
-                                            cy_nodes[upper_index]['data']['id'],
-                                            cy_nodes[lower_index]['data']['id']
-                                        ),
-                                        'source': cy_nodes[upper_index]['data']['id'],
-                                        'target': cy_nodes[lower_index]['data']['id'],
-                                        'label': label,
-                                        'shape': label
+                                "data": {
+                                    "id": "{}.{}.{}".format(
+                                        output,
+                                        cy_nodes[upper_index]["data"]["id"],
+                                        cy_nodes[lower_index]["data"]["id"],
+                                    ),
+                                    "source": cy_nodes[upper_index]["data"]["id"],
+                                    "target": cy_nodes[lower_index]["data"]["id"],
+                                    "label": label,
+                                    "shape": label,
                                 }
                             }
                         )
@@ -90,81 +92,87 @@ def _layer_specific_info(layer):
     info : info specific to layer type
 
     """
-    if layer.WhichOneof('layer') == 'convolution':
-        info = {
-            'type': layer.WhichOneof('layer'),
-            'outputChannels': _json.dumps(str(layer.convolution.outputChannels)),
-            'kernelChannels': _json.dumps(str(layer.convolution.kernelChannels)),
-            'groups': _json.dumps(str(layer.convolution.nGroups)),
-            'kernelSize': _json.dumps(str(layer.convolution.kernelSize)),
-            'stride': _json.dumps(str(layer.convolution.stride)),
-            'dilationFactor': _json.dumps(str(layer.convolution.dilationFactor)),
-            'isDeconvolution': _json.dumps(str(layer.convolution.isDeconvolution)),
-            'paddingType' : _json.dumps(layer.convolution.WhichOneof('ConvolutionPaddingType')),
-            'desc': 'A layer that performs spatial convolution'
-        }
-        if _json.dumps(layer.convolution.isDeconvolution) == 'true':
-            info['type'] = 'deconvolution'
-            info['desc'] = 'A layer that performs spatial deconvolution'
-
-    elif layer.WhichOneof('layer') == 'activation':
+    if layer.WhichOneof("layer") == "convolution":
+        info = {
+            "type": layer.WhichOneof("layer"),
+            "outputChannels": _json.dumps(str(layer.convolution.outputChannels)),
+            "kernelChannels": _json.dumps(str(layer.convolution.kernelChannels)),
+            "groups": _json.dumps(str(layer.convolution.nGroups)),
+            "kernelSize": _json.dumps(str(layer.convolution.kernelSize)),
+            "stride": _json.dumps(str(layer.convolution.stride)),
+            "dilationFactor": _json.dumps(str(layer.convolution.dilationFactor)),
+            "isDeconvolution": _json.dumps(str(layer.convolution.isDeconvolution)),
+            "paddingType": _json.dumps(
+                layer.convolution.WhichOneof("ConvolutionPaddingType")
+            ),
+            "desc": "A layer that performs spatial convolution",
+        }
+        if _json.dumps(layer.convolution.isDeconvolution) == "true":
+            info["type"] = "deconvolution"
+            info["desc"] = "A layer that performs spatial deconvolution"
+
+    elif layer.WhichOneof("layer") == "activation":
         params = layer.activation
-        act_type = params.WhichOneof('NonlinearityType')
-        info = {
-            'type': layer.WhichOneof('layer'),
-            'activationType': act_type,
-            'desc': 'Applies specified type of activation function to input.'
-        }
-        if act_type == 'linear':
-            info['alpha'] = _json.dumps(str(params.linear.alpha))
-            info['beta'] = _json.dumps(str(params.linear.beta))
-        if act_type == 'leakyReLU':
-            info['alpha'] = _json.dumps(str(params.leakyReLU.alpha))
-        if act_type == 'thresholdedReLU':
-            info['alpha'] = _json.dumps(str(params.thresholdedReLU.alpha))
-        if act_type == 'scaledTanh':
-            info['alpha'] = _json.dumps(str(params.scaledTanh.alpha))
-            info['beta'] = _json.dumps(str(params.scaledTanh.beta))
-        if act_type == 'sigmoidHard':
-            info['alpha'] = _json.dumps(str(params.sigmoidHard.alpha))
-            info['beta'] = _json.dumps(str(params.sigmoidHard.beta))
-        if act_type == 'ELU':
-            info['alpha'] = _json.dumps(str(params.ELU.alpha))
-
-    elif layer.WhichOneof('layer') == 'pooling':
+        act_type = params.WhichOneof("NonlinearityType")
+        info = {
+            "type": layer.WhichOneof("layer"),
+            "activationType": act_type,
+            "desc": "Applies specified type of activation function to input.",
+        }
+        if act_type == "linear":
+            info["alpha"] = _json.dumps(str(params.linear.alpha))
+            info["beta"] = _json.dumps(str(params.linear.beta))
+        if act_type == "leakyReLU":
+            info["alpha"] = _json.dumps(str(params.leakyReLU.alpha))
+        if act_type == "thresholdedReLU":
+            info["alpha"] = _json.dumps(str(params.thresholdedReLU.alpha))
+        if act_type == "scaledTanh":
+            info["alpha"] = _json.dumps(str(params.scaledTanh.alpha))
+            info["beta"] = _json.dumps(str(params.scaledTanh.beta))
+        if act_type == "sigmoidHard":
+            info["alpha"] = _json.dumps(str(params.sigmoidHard.alpha))
+            info["beta"] = _json.dumps(str(params.sigmoidHard.beta))
+        if act_type == "ELU":
+            info["alpha"] = _json.dumps(str(params.ELU.alpha))
+
+    elif layer.WhichOneof("layer") == "pooling":
         params = layer.pooling
-        paddingType = params.WhichOneof('PoolingPaddingType')
+        paddingType = params.WhichOneof("PoolingPaddingType")
         info = {
-            'type': layer.WhichOneof('layer'),
-            'desc': 'Spatial Pooling layer to reduce dimensions of input using the '
-                    'specified kernel size and type.'
+            "type": layer.WhichOneof("layer"),
+            "desc": "Spatial Pooling layer to reduce dimensions of input using the "
+            "specified kernel size and type.",
         }
         if params.globalPooling:
-            info['globalPooling'] = 'True'
-            info['poolingType'] = 'global pooling'
+            info["globalPooling"] = "True"
+            info["poolingType"] = "global pooling"
         else:
-            info['poolingType'] = _json.dumps(_NeuralNetwork_pb2.PoolingLayerParams.PoolingType.Name(params.type))
-            info['stride'] = _json.dumps(str(params.stride))
-            info['kernelSize'] = _json.dumps(str(params.kernelSize))
-            info['paddingType'] =  _json.dumps(paddingType)
-
-    elif layer.WhichOneof('layer') == 'add':
-        info = {
-            'type': layer.WhichOneof('layer'),
-            'alpha': _json.dumps(str(layer.add.alpha)),
-            'desc': 'A layer that performs elementwise addition.'
-        }
-    elif layer.WhichOneof('layer') == 'batchnorm':
-        info = {
-            'type': layer.WhichOneof('layer'),
-            'channels': _json.dumps(str(layer.batchnorm.channels)),
-            'computeMeanVar': _json.dumps(str(layer.batchnorm.computeMeanVar)),
-            'instanceNormalization': _json.dumps(str(layer.batchnorm.instanceNormalization)),
-            'desc': 'A layer that performs batch normalization, \n'
-                    'which is performed along the channel axis, \n'
-                    'and repeated along the other axes, if present.'
-        }
-    elif layer.WhichOneof('layer') == 'biDirectionalLSTM':
+            info["poolingType"] = _json.dumps(
+                _NeuralNetwork_pb2.PoolingLayerParams.PoolingType.Name(params.type)
+            )
+            info["stride"] = _json.dumps(str(params.stride))
+            info["kernelSize"] = _json.dumps(str(params.kernelSize))
+            info["paddingType"] = _json.dumps(paddingType)
+
+    elif layer.WhichOneof("layer") == "add":
+        info = {
+            "type": layer.WhichOneof("layer"),
+            "alpha": _json.dumps(str(layer.add.alpha)),
+            "desc": "A layer that performs elementwise addition.",
+        }
+    elif layer.WhichOneof("layer") == "batchnorm":
+        info = {
+            "type": layer.WhichOneof("layer"),
+            "channels": _json.dumps(str(layer.batchnorm.channels)),
+            "computeMeanVar": _json.dumps(str(layer.batchnorm.computeMeanVar)),
+            "instanceNormalization": _json.dumps(
+                str(layer.batchnorm.instanceNormalization)
+            ),
+            "desc": "A layer that performs batch normalization, \n"
+            "which is performed along the channel axis, \n"
+            "and repeated along the other axes, if present.",
+        }
+    elif layer.WhichOneof("layer") == "biDirectionalLSTM":
         forward_activations = ""
         for activation in layer.biDirectionalLSTM.activationsForwardLSTM:
             forward_activations += str(activation)[:-5] + ", "
@@ -172,294 +180,320 @@ def _layer_specific_info(layer):
         for activation in layer.biDirectionalLSTM.activationsBackwardLSTM:
             backward_activations += str(activation)[:-5] + ", "
         info = {
-            'type': layer.WhichOneof('layer'),
-            'inputVectorSize': _json.dumps(str(layer.biDirectionalLSTM.inputVectorSize)),
-            'outputVectorSize': _json.dumps(str(layer.biDirectionalLSTM.outputVectorSize)),
-            'forward_activations': _json.dumps(forward_activations),
-            'backward_activations': _json.dumps(backward_activations),
-            'lstm_params': _json.dumps(str(layer.biDirectionalLSTM.params)),
-            'desc': 'Bidirectional long short-term memory (LSTM) layer\n'
-                    'The first LSTM operates on the input sequence in the forward direction.\n'
-                    'The second LSTM operates on the input sequence in the reverse direction.'
-        }
-    elif layer.WhichOneof('layer') == 'uniDirectionalLSTM':
+            "type": layer.WhichOneof("layer"),
+            "inputVectorSize": _json.dumps(
+                str(layer.biDirectionalLSTM.inputVectorSize)
+            ),
+            "outputVectorSize": _json.dumps(
+                str(layer.biDirectionalLSTM.outputVectorSize)
+            ),
+            "forward_activations": _json.dumps(forward_activations),
+            "backward_activations": _json.dumps(backward_activations),
+            "lstm_params": _json.dumps(str(layer.biDirectionalLSTM.params)),
+            "desc": "Bidirectional long short-term memory (LSTM) layer\n"
+            "The first LSTM operates on the input sequence in the forward direction.\n"
+            "The second LSTM operates on the input sequence in the reverse direction.",
+        }
+    elif layer.WhichOneof("layer") == "uniDirectionalLSTM":
         activations = ""
         for activation in layer.uniDirectionalLSTM.activations:
             activations += str(activation)[:-5] + ", "
         info = {
-            'type': layer.WhichOneof('layer'),
-            'inputVectorSize': _json.dumps(str(layer.uniDirectionalLSTM.inputVectorSize)),
-            'outputVectorSize': _json.dumps(str(layer.uniDirectionalLSTM.outputVectorSize)),
-            'activations': _json.dumps(activations),
-            'lstm_params': _json.dumps(str(layer.uniDirectionalLSTM.params)),
-            'reverse_input': _json.dumps(str(layer.uniDirectionalLSTM.reverseInput)),
-            'desc': 'A unidirectional long short-term memory (LSTM) layer.'
-
-        }
-    elif layer.WhichOneof('layer') == 'gru':
+            "type": layer.WhichOneof("layer"),
+            "inputVectorSize": _json.dumps(
+                str(layer.uniDirectionalLSTM.inputVectorSize)
+            ),
+            "outputVectorSize": _json.dumps(
+                str(layer.uniDirectionalLSTM.outputVectorSize)
+            ),
+            "activations": _json.dumps(activations),
+            "lstm_params": _json.dumps(str(layer.uniDirectionalLSTM.params)),
+            "reverse_input": _json.dumps(str(layer.uniDirectionalLSTM.reverseInput)),
+            "desc": "A unidirectional long short-term memory (LSTM) layer.",
+        }
+    elif layer.WhichOneof("layer") == "gru":
         activations = ""
         for activation in layer.gru.activations:
             activations += str(activation)[:-5] + ", "
         info = {
-            'type': layer.WhichOneof('layer'),
-            'inputVectorSize': _json.dumps(str(layer.gru.inputVectorSize)),
-            'outputVectorSize': _json.dumps(str(layer.gru.outputVectorSize)),
-            'activations': _json.dumps(activations),
-            'hasBiasVectors': _json.dumps(str(layer.gru.hasBiasVectors)),
-            'reverseInput': _json.dumps(str(layer.gru.reverseInput)),
-            'sequenceOutput': _json.dumps(str(layer.gru.sequenceOutput)),
-            'desc': 'Gated-Recurrent Unit (GRU) Layer.\n'
-
-        }
-    elif layer.WhichOneof('layer') == 'simpleRecurrent':
-        info = {
-            'type': layer.WhichOneof('layer'),
-            'inputVectorSize': _json.dumps(str(layer.simpleRecurrent.inputVectorSize)),
-            'outputVectorSize': _json.dumps(str(layer.simpleRecurrent.outputVectorSize)),
-            'activation': _json.dumps(str(layer.simpleRecurrent.activation)),
-            'hasBiasVector': _json.dumps(str(layer.simpleRecurrent.hasBiasVector)),
-            'reverseInput': _json.dumps(str(layer.simpleRecurrent.reverseInput)),
-            'sequenceOutput': _json.dumps(str(layer.simpleRecurrent.sequenceOutput)),
-            'desc': 'A simple recurrent layer.'
-        }
-    elif layer.WhichOneof('layer') == 'bias':
-        info = {
-            'type': layer.WhichOneof('layer'),
-            'shape': _json.dumps(str(layer.bias.shape)),
-            'desc': 'A layer that performs elementwise addition of a bias,\n'
-                    'which is broadcasted to match the input shape.'
-        }
-    elif layer.WhichOneof('layer') == 'concat':
-        info = {
-            'type': layer.WhichOneof('layer'),
-            'sequenceConcat': _json.dumps(str(layer.concat.sequenceConcat)),
-            'desc': 'A layer that concatenates along the channel axis (default) or sequence axis.'
-        }
-    elif layer.WhichOneof('layer') == 'crop':
-        info = {
-            'type': layer.WhichOneof('layer'),
-            'cropAmounts': _json.dumps(str(layer.crop.cropAmounts)),
-            'offset': _json.dumps(str(layer.crop.offset)),
-            'desc': 'A layer that crops the spatial dimensions of an input.\n'
-                    'If two inputs are provided, the shape of the second '
-                    'input is used as the reference shape.'
-        }
-    elif layer.WhichOneof('layer') == 'dot':
-        info = {
-            'type': layer.WhichOneof('layer'),
-            'cosineSimilarity': _json.dumps(str(layer.dot.cosineSimilarity)),
-            'desc': 'If true, inputs are normalized first, '
-                    'thereby computing the cosine similarity.'
-        }
-    elif layer.WhichOneof('layer') == 'embedding':
-        info = {
-            'type': layer.WhichOneof('layer'),
-            'inputDim': _json.dumps(str(layer.embedding.inputDim)),
-            'outputChannels': _json.dumps(str(layer.embedding.outputChannels)),
-            'hasBias': _json.dumps(str(layer.embedding.inputDim)),
-            'desc': 'A layer that performs a matrix lookup and optionally adds a bias.'
-        }
-    elif layer.WhichOneof('layer') == 'flatten':
-        info = {
-            'type': layer.WhichOneof('layer'),
-            'mode': _json.dumps(_NeuralNetwork_pb2.FlattenLayerParams.FlattenOrder.Name(layer.flatten.mode)),
-            'desc': 'A layer that flattens the input.'
+            "type": layer.WhichOneof("layer"),
+            "inputVectorSize": _json.dumps(str(layer.gru.inputVectorSize)),
+            "outputVectorSize": _json.dumps(str(layer.gru.outputVectorSize)),
+            "activations": _json.dumps(activations),
+            "hasBiasVectors": _json.dumps(str(layer.gru.hasBiasVectors)),
+            "reverseInput": _json.dumps(str(layer.gru.reverseInput)),
+            "sequenceOutput": _json.dumps(str(layer.gru.sequenceOutput)),
+            "desc": "Gated-Recurrent Unit (GRU) Layer.\n",
+        }
+    elif layer.WhichOneof("layer") == "simpleRecurrent":
+        info = {
+            "type": layer.WhichOneof("layer"),
+            "inputVectorSize": _json.dumps(str(layer.simpleRecurrent.inputVectorSize)),
+            "outputVectorSize": _json.dumps(
+                str(layer.simpleRecurrent.outputVectorSize)
+            ),
+            "activation": _json.dumps(str(layer.simpleRecurrent.activation)),
+            "hasBiasVector": _json.dumps(str(layer.simpleRecurrent.hasBiasVector)),
+            "reverseInput": _json.dumps(str(layer.simpleRecurrent.reverseInput)),
+            "sequenceOutput": _json.dumps(str(layer.simpleRecurrent.sequenceOutput)),
+            "desc": "A simple recurrent layer.",
+        }
+    elif layer.WhichOneof("layer") == "bias":
+        info = {
+            "type": layer.WhichOneof("layer"),
+            "shape": _json.dumps(str(layer.bias.shape)),
+            "desc": "A layer that performs elementwise addition of a bias,\n"
+            "which is broadcasted to match the input shape.",
+        }
+    elif layer.WhichOneof("layer") == "concat":
+        info = {
+            "type": layer.WhichOneof("layer"),
+            "sequenceConcat": _json.dumps(str(layer.concat.sequenceConcat)),
+            "desc": "A layer that concatenates along the channel axis (default) or sequence axis.",
+        }
+    elif layer.WhichOneof("layer") == "crop":
+        info = {
+            "type": layer.WhichOneof("layer"),
+            "cropAmounts": _json.dumps(str(layer.crop.cropAmounts)),
+            "offset": _json.dumps(str(layer.crop.offset)),
+            "desc": "A layer that crops the spatial dimensions of an input.\n"
+            "If two inputs are provided, the shape of the second "
+            "input is used as the reference shape.",
+        }
+    elif layer.WhichOneof("layer") == "dot":
+        info = {
+            "type": layer.WhichOneof("layer"),
+            "cosineSimilarity": _json.dumps(str(layer.dot.cosineSimilarity)),
+            "desc": "If true, inputs are normalized first, "
+            "thereby computing the cosine similarity.",
+        }
+    elif layer.WhichOneof("layer") == "embedding":
+        info = {
+            "type": layer.WhichOneof("layer"),
+            "inputDim": _json.dumps(str(layer.embedding.inputDim)),
+            "outputChannels": _json.dumps(str(layer.embedding.outputChannels)),
+            "hasBias": _json.dumps(str(layer.embedding.inputDim)),
+            "desc": "A layer that performs a matrix lookup and optionally adds a bias.",
+        }
+    elif layer.WhichOneof("layer") == "flatten":
+        info = {
+            "type": layer.WhichOneof("layer"),
+            "mode": _json.dumps(
+                _NeuralNetwork_pb2.FlattenLayerParams.FlattenOrder.Name(
+                    layer.flatten.mode
+                )
+            ),
+            "desc": "A layer that flattens the input.",
         }
-    elif layer.WhichOneof('layer') == 'innerProduct':
+    elif layer.WhichOneof("layer") == "innerProduct":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'inputChannels': _json.dumps(str(layer.innerProduct.inputChannels)),
-            'outputChannels': _json.dumps(str(layer.innerProduct.outputChannels)),
-            'hasBias': _json.dumps(str(layer.innerProduct.hasBias)),
-            'desc': 'A layer that performs a matrix vector product.\n'
-                    'This is equivalent to a fully-connected, or dense layer.'
+            "type": layer.WhichOneof("layer"),
+            "inputChannels": _json.dumps(str(layer.innerProduct.inputChannels)),
+            "outputChannels": _json.dumps(str(layer.innerProduct.outputChannels)),
+            "hasBias": _json.dumps(str(layer.innerProduct.hasBias)),
+            "desc": "A layer that performs a matrix vector product.\n"
+            "This is equivalent to a fully-connected, or dense layer.",
         }
-    elif layer.WhichOneof('layer') == 'l2normalize':
+    elif layer.WhichOneof("layer") == "l2normalize":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'epsilon': _json.dumps(str(layer.l2normalize.epsilon)),
-            'desc': 'A layer that performs L2 normalization, i.e. divides by the \n'
-                    'the square root of the sum of squares of all elements of input.'
+            "type": layer.WhichOneof("layer"),
+            "epsilon": _json.dumps(str(layer.l2normalize.epsilon)),
+            "desc": "A layer that performs L2 normalization, i.e. divides by the \n"
+            "the square root of the sum of squares of all elements of input.",
         }
-    elif layer.WhichOneof('layer') == 'loadConstant':
+    elif layer.WhichOneof("layer") == "loadConstant":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'shape': _json.dumps(str(layer.loadConstant.shape)),
-            'desc': 'The shape of the constant to be loaded'
+            "type": layer.WhichOneof("layer"),
+            "shape": _json.dumps(str(layer.loadConstant.shape)),
+            "desc": "The shape of the constant to be loaded",
         }
-    elif layer.WhichOneof('layer') == 'lrn':
+    elif layer.WhichOneof("layer") == "lrn":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'alpha': _json.dumps(str(layer.lrn.alpha)),
-            'beta': _json.dumps(str(layer.lrn.beta)),
-            'localSize': _json.dumps(str(layer.lrn.localSize)),
-            'k': _json.dumps(str(layer.lrn.k)),
-            'desc': 'A layer that performs local response normalization (LRN).'
+            "type": layer.WhichOneof("layer"),
+            "alpha": _json.dumps(str(layer.lrn.alpha)),
+            "beta": _json.dumps(str(layer.lrn.beta)),
+            "localSize": _json.dumps(str(layer.lrn.localSize)),
+            "k": _json.dumps(str(layer.lrn.k)),
+            "desc": "A layer that performs local response normalization (LRN).",
         }
-    elif layer.WhichOneof('layer') == 'multiply':
+    elif layer.WhichOneof("layer") == "multiply":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'alpha': _json.dumps(str(layer.multiply.alpha)),
-            'desc': 'A layer that performs elementwise multiplication.'
+            "type": layer.WhichOneof("layer"),
+            "alpha": _json.dumps(str(layer.multiply.alpha)),
+            "desc": "A layer that performs elementwise multiplication.",
         }
-    elif layer.WhichOneof('layer') == 'mvn':
+    elif layer.WhichOneof("layer") == "mvn":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'acrossChannels': _json.dumps(str(layer.mvn.acrossChannels)),
-            'normalizeVariance': _json.dumps(str(layer.mvn.normalizeVariance)),
-            'epsilon': _json.dumps(str(layer.mvn.epsilon)),
-            'desc': 'A layer that performs mean variance normalization.'
+            "type": layer.WhichOneof("layer"),
+            "acrossChannels": _json.dumps(str(layer.mvn.acrossChannels)),
+            "normalizeVariance": _json.dumps(str(layer.mvn.normalizeVariance)),
+            "epsilon": _json.dumps(str(layer.mvn.epsilon)),
+            "desc": "A layer that performs mean variance normalization.",
         }
-    elif layer.WhichOneof('layer') == 'padding':
+    elif layer.WhichOneof("layer") == "padding":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'paddingAmounts': _json.dumps(str(layer.padding.paddingAmounts)),
-            'paddingType': _json.dumps(str(layer.padding.WhichOneof('PaddingType'))),
-            'desc': 'Fill a constant value in the padded region.'
+            "type": layer.WhichOneof("layer"),
+            "paddingAmounts": _json.dumps(str(layer.padding.paddingAmounts)),
+            "paddingType": _json.dumps(str(layer.padding.WhichOneof("PaddingType"))),
+            "desc": "Fill a constant value in the padded region.",
         }
-    elif layer.WhichOneof('layer') == 'permute':
+    elif layer.WhichOneof("layer") == "permute":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'axis': _json.dumps(str(layer.permute.axis)),
-            'desc': 'A layer that rearranges the dimensions and data of an input.'
+            "type": layer.WhichOneof("layer"),
+            "axis": _json.dumps(str(layer.permute.axis)),
+            "desc": "A layer that rearranges the dimensions and data of an input.",
         }
-    elif layer.WhichOneof('layer') == 'reduce':
+    elif layer.WhichOneof("layer") == "reduce":
         params = layer.reduce
         info = {
-            'type': layer.WhichOneof('layer'),
-            'mode': _json.dumps(str(params.mode)),
-            'epsilon': _json.dumps(str(params.epsilon)),
-            'axis': _json.dumps(_NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Name(params.axis)),
-            'desc': 'A layer that reduces the input using a specified operation.'
+            "type": layer.WhichOneof("layer"),
+            "mode": _json.dumps(str(params.mode)),
+            "epsilon": _json.dumps(str(params.epsilon)),
+            "axis": _json.dumps(
+                _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Name(params.axis)
+            ),
+            "desc": "A layer that reduces the input using a specified operation.",
         }
-    elif layer.WhichOneof('layer') == 'reorganizeData':
+    elif layer.WhichOneof("layer") == "reorganizeData":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'mode': _json.dumps(_NeuralNetwork_pb2.ReorganizeDataLayerParams.ReorganizationType.Name(layer.reorganizeData.mode)),
-            'blockSize': _json.dumps(str(layer.reorganizeData.blockSize)),
-            'desc': 'A layer that reorganizes data in the input in: \n'
-                    '1. SPACE_TO_DEPTH\n'
-                    '2. DEPTH_TO_SPACE'
-        }
-    elif layer.WhichOneof('layer') == 'reshape':
-        info = {
-            'type': layer.WhichOneof('layer'),
-            'mode': _json.dumps(_NeuralNetwork_pb2.ReshapeLayerParams.ReshapeOrder.Name(layer.reshape.mode)),
-            'targetShape': _json.dumps(str(layer.reshape.targetShape)),
-            'desc': 'A layer that recasts the input into a new shape.'
-        }
-    elif layer.WhichOneof('layer') == 'scale':
-        info = {
-            'type': layer.WhichOneof('layer'),
-            'shapeScale': _json.dumps(str(layer.scale.shapeScale)),
-            'hasBias': _json.dumps(str(layer.scale.hasBias)),
-            'shapeBias': _json.dumps(str(layer.scale.shapeBias)),
-            'desc': 'A layer that performs elmentwise multiplication by a scale factor\n'
-                    'and optionally adds a bias;'
+            "type": layer.WhichOneof("layer"),
+            "mode": _json.dumps(
+                _NeuralNetwork_pb2.ReorganizeDataLayerParams.ReorganizationType.Name(
+                    layer.reorganizeData.mode
+                )
+            ),
+            "blockSize": _json.dumps(str(layer.reorganizeData.blockSize)),
+            "desc": "A layer that reorganizes data in the input in: \n"
+            "1. SPACE_TO_DEPTH\n"
+            "2. DEPTH_TO_SPACE",
+        }
+    elif layer.WhichOneof("layer") == "reshape":
+        info = {
+            "type": layer.WhichOneof("layer"),
+            "mode": _json.dumps(
+                _NeuralNetwork_pb2.ReshapeLayerParams.ReshapeOrder.Name(
+                    layer.reshape.mode
+                )
+            ),
+            "targetShape": _json.dumps(str(layer.reshape.targetShape)),
+            "desc": "A layer that recasts the input into a new shape.",
         }
-    elif layer.WhichOneof('layer') == 'sequenceRepeat':
+    elif layer.WhichOneof("layer") == "scale":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'nRepetitions': _json.dumps(str(layer.sequenceRepeat.nRepetitions)),
-            'desc': 'A layer that repeats a sequence.'
+            "type": layer.WhichOneof("layer"),
+            "shapeScale": _json.dumps(str(layer.scale.shapeScale)),
+            "hasBias": _json.dumps(str(layer.scale.hasBias)),
+            "shapeBias": _json.dumps(str(layer.scale.shapeBias)),
+            "desc": "A layer that performs elmentwise multiplication by a scale factor\n"
+            "and optionally adds a bias;",
         }
-    elif layer.WhichOneof('layer') == 'slice':
+    elif layer.WhichOneof("layer") == "sequenceRepeat":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'startIndex': _json.dumps(str(layer.slice.startIndex)),
-            'endIndex': _json.dumps(str(layer.slice.endIndex)),
-            'stride': _json.dumps(str(layer.slice.stride)),
-            'axis': _json.dumps(_NeuralNetwork_pb2.SliceLayerParams.SliceAxis.Name(layer.slice.axis)),
-            'desc': 'A layer that slices the input data along a given axis.'
+            "type": layer.WhichOneof("layer"),
+            "nRepetitions": _json.dumps(str(layer.sequenceRepeat.nRepetitions)),
+            "desc": "A layer that repeats a sequence.",
         }
-    elif layer.WhichOneof('layer') == 'split':
+    elif layer.WhichOneof("layer") == "slice":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'nOutputs': _json.dumps(str(layer.split.nOutputs)),
-            'desc': 'A layer that uniformly splits across the channel dimension\n'
-                    'to produce a specified number of outputs.'
+            "type": layer.WhichOneof("layer"),
+            "startIndex": _json.dumps(str(layer.slice.startIndex)),
+            "endIndex": _json.dumps(str(layer.slice.endIndex)),
+            "stride": _json.dumps(str(layer.slice.stride)),
+            "axis": _json.dumps(
+                _NeuralNetwork_pb2.SliceLayerParams.SliceAxis.Name(layer.slice.axis)
+            ),
+            "desc": "A layer that slices the input data along a given axis.",
         }
-    elif layer.WhichOneof('layer') == 'unary':
+    elif layer.WhichOneof("layer") == "split":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'unary_type': _json.dumps(_NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Name(layer.unary.type)),
-            'alpha': _json.dumps(str(layer.unary.alpha)),
-            'epsilon': _json.dumps(str(layer.unary.epsilon)),
-            'shift': _json.dumps(str(layer.unary.shift)),
-            'scale': _json.dumps(str(layer.unary.scale)),
-            'desc': 'A layer that applies a unary function.'
+            "type": layer.WhichOneof("layer"),
+            "nOutputs": _json.dumps(str(layer.split.nOutputs)),
+            "desc": "A layer that uniformly splits across the channel dimension\n"
+            "to produce a specified number of outputs.",
         }
-    elif layer.WhichOneof('layer') == 'upsample':
+    elif layer.WhichOneof("layer") == "unary":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'scalingFactor': _json.dumps(str(layer.upsample.scalingFactor)),
-            'mode': _json.dumps(_NeuralNetwork_pb2.UpsampleLayerParams.InterpolationMode.Name(layer.upsample.mode)),
-            'desc': 'A layer that scales up spatial dimensions.\n'
-                    'It supports two modes: '
-                    'nearest neighbour (default) and bilinear.'
+            "type": layer.WhichOneof("layer"),
+            "unary_type": _json.dumps(
+                _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Name(
+                    layer.unary.type
+                )
+            ),
+            "alpha": _json.dumps(str(layer.unary.alpha)),
+            "epsilon": _json.dumps(str(layer.unary.epsilon)),
+            "shift": _json.dumps(str(layer.unary.shift)),
+            "scale": _json.dumps(str(layer.unary.scale)),
+            "desc": "A layer that applies a unary function.",
+        }
+    elif layer.WhichOneof("layer") == "upsample":
+        info = {
+            "type": layer.WhichOneof("layer"),
+            "scalingFactor": _json.dumps(str(layer.upsample.scalingFactor)),
+            "mode": _json.dumps(
+                _NeuralNetwork_pb2.UpsampleLayerParams.InterpolationMode.Name(
+                    layer.upsample.mode
+                )
+            ),
+            "desc": "A layer that scales up spatial dimensions.\n"
+            "It supports two modes: "
+            "nearest neighbour (default) and bilinear.",
         }
-    elif layer.WhichOneof('layer') == 'max':
+    elif layer.WhichOneof("layer") == "max":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'desc': 'A layer that computes the elementwise maximum '
-                    'over the inputs.'
+            "type": layer.WhichOneof("layer"),
+            "desc": "A layer that computes the elementwise maximum " "over the inputs.",
         }
-    elif layer.WhichOneof('layer') == 'min':
+    elif layer.WhichOneof("layer") == "min":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'desc': 'A layer that computes the elementwise minimum '
-                    'over the inputs.'
+            "type": layer.WhichOneof("layer"),
+            "desc": "A layer that computes the elementwise minimum " "over the inputs.",
         }
-    elif layer.WhichOneof('layer') == 'average':
+    elif layer.WhichOneof("layer") == "average":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'desc': 'A layer that computes the elementwise average '
-                    'of the inputs.'
+            "type": layer.WhichOneof("layer"),
+            "desc": "A layer that computes the elementwise average " "of the inputs.",
         }
-    elif layer.WhichOneof('layer') == 'softmax':
+    elif layer.WhichOneof("layer") == "softmax":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'desc': 'A layer that performs softmax normalization.\n'
-                    'Normalization is done along the channel axis.'
+            "type": layer.WhichOneof("layer"),
+            "desc": "A layer that performs softmax normalization.\n"
+            "Normalization is done along the channel axis.",
         }
-    elif layer.WhichOneof('layer') == 'custom':
+    elif layer.WhichOneof("layer") == "custom":
         info = {
-            'type': layer.WhichOneof('layer'),
-            'className': layer.custom.className,
-            'desc': 'A custom layer'
+            "type": layer.WhichOneof("layer"),
+            "className": layer.custom.className,
+            "desc": "A custom layer",
         }
         if layer.custom.parameters != {}:
             for key in layer.custom.parameters.keys():
                 value = _get_custom_layer_value(layer.custom.parameters[key])
                 info[key] = value
         if layer.custom.description:
-            info['desc'] = layer.custom.description
+            info["desc"] = layer.custom.description
 
     else:
-        info = {
-            'type': layer.WhichOneof('layer')
-        }
+        info = {"type": layer.WhichOneof("layer")}
 
-    info['inputs'] = str(layer.input)
-    info['outputs'] = str(layer.output)
+    info["inputs"] = str(layer.input)
+    info["outputs"] = str(layer.output)
 
     return info
 
-def _get_custom_layer_value(parameter):
 
-    if 'intValue' in str(parameter):
+def _get_custom_layer_value(parameter):
+    if "intValue" in str(parameter):
         return str(parameter.intValue)
-    elif 'doubleValue' in str(parameter):
+    elif "doubleValue" in str(parameter):
         return str(parameter.doubleValue)
-    elif 'boolValue' in str(parameter):
+    elif "boolValue" in str(parameter):
         return str(parameter.boolValue)
-    elif 'longValue' in str(parameter):
+    elif "longValue" in str(parameter):
         return str(parameter.longValue)
-    elif 'stringValue' in str(parameter):
+    elif "stringValue" in str(parameter):
         return str(parameter.stringValue)
 
 
-
 def _pipeline_component_info(model, info):
     """
 
@@ -480,97 +514,149 @@ def _pipeline_component_info(model, info):
     info : info dict with required info for model
 
     """
-    model_type = model.WhichOneof('Type')
-    if model_type == 'arrayFeatureExtractor':
-        info["desc"] = 'Given an index, extracts the value at ' \
-                       'that index from its array input.\n' \
-                       'Indexes are zero-based.'
-    elif model_type == 'categoricalMapping':
-        info["mappingType"] = _json.dumps(str(model.categoricalMapping.WhichOneof('MappingType')))
-        info["valueOnUnknown"] = _json.dumps(str(model.categoricalMapping.WhichOneof('ValueOnUnknown')))
-        info["desc"] = 'This allows conversion from integers ' \
-                       'to strings, or from strings to integers.'
-    elif model_type == 'dictVectorizer':
-        info["map"] = _json.dumps(str(model.dictVectorizer.WhichOneof('Map')))
-        info["desc"] = 'Uses an index mapping to convert a dictionary ' \
-                       'to an array.\n The output array will be equal in ' \
-                       'length to the index mapping vector parameter.\n' \
-                       'All keys in the input dictionary must be present in ' \
-                       'the index mapping vector.'
-    elif model_type == 'featureVectorizer':
+    model_type = model.WhichOneof("Type")
+    if model_type == "arrayFeatureExtractor":
+        info["desc"] = (
+            "Given an index, extracts the value at "
+            "that index from its array input.\n"
+            "Indexes are zero-based."
+        )
+    elif model_type == "categoricalMapping":
+        info["mappingType"] = _json.dumps(
+            str(model.categoricalMapping.WhichOneof("MappingType"))
+        )
+        info["valueOnUnknown"] = _json.dumps(
+            str(model.categoricalMapping.WhichOneof("ValueOnUnknown"))
+        )
+        info["desc"] = (
+            "This allows conversion from integers "
+            "to strings, or from strings to integers."
+        )
+    elif model_type == "dictVectorizer":
+        info["map"] = _json.dumps(str(model.dictVectorizer.WhichOneof("Map")))
+        info["desc"] = (
+            "Uses an index mapping to convert a dictionary "
+            "to an array.\n The output array will be equal in "
+            "length to the index mapping vector parameter.\n"
+            "All keys in the input dictionary must be present in "
+            "the index mapping vector."
+        )
+    elif model_type == "featureVectorizer":
         info["inputList"] = _json.dumps(str(model.featureVectorizer.inputList))
-        info["desc"] = 'A FeatureVectorizer puts one or more features into a single' \
-                       ' array.\n The ordering of features in the output array is ' \
-                       'determined by inputList.'
-    elif model_type == 'glmClassifier':
+        info["desc"] = (
+            "A FeatureVectorizer puts one or more features into a single"
+            " array.\n The ordering of features in the output array is "
+            "determined by inputList."
+        )
+    elif model_type == "glmClassifier":
         info["offset"] = _json.dumps(str(model.glmClassifier.offset))
-        info["postEvaluationTransform"] = _json.dumps(str(model.glmClassifier.postEvaluationTransform))
+        info["postEvaluationTransform"] = _json.dumps(
+            str(model.glmClassifier.postEvaluationTransform)
+        )
         info["classEncoding"] = _json.dumps(str(model.glmClassifier.classEncoding))
-        info["classLabels"] = _json.dumps(str(model.glmClassifier.WhichOneof('ClassLabels')))
-        info["desc"] = 'A generalized linear model classifier.'
-    elif model_type == 'glmRegressor':
+        info["classLabels"] = _json.dumps(
+            str(model.glmClassifier.WhichOneof("ClassLabels"))
+        )
+        info["desc"] = "A generalized linear model classifier."
+    elif model_type == "glmRegressor":
         info["offset"] = _json.dumps(str(model.glmRegressor.offset))
-        info["postEvaluationTransform"] = _json.dumps(str(model.glmRegressor.postEvaluationTransform))
-        info["desc"] = 'A generalized linear model regressor.'
-    elif model_type == 'imputer':
-        info["ImputedValue"] = _json.dumps(str(model.imputer.WhichOneof('ImputedValue')))
-        info["desc"] = 'A transformer that replaces missing values with a ' \
-                       'default value,\n such as a statistically-derived ' \
-                       'value.\nIf ``ReplaceValue`` is set, then missing ' \
-                       'values of that type are\n replaced with the ' \
-                       'corresponding value.'
-    elif model_type == 'normalizer':
+        info["postEvaluationTransform"] = _json.dumps(
+            str(model.glmRegressor.postEvaluationTransform)
+        )
+        info["desc"] = "A generalized linear model regressor."
+    elif model_type == "imputer":
+        info["ImputedValue"] = _json.dumps(
+            str(model.imputer.WhichOneof("ImputedValue"))
+        )
+        info["desc"] = (
+            "A transformer that replaces missing values with a "
+            "default value,\n such as a statistically-derived "
+            "value.\nIf ``ReplaceValue`` is set, then missing "
+            "values of that type are\n replaced with the "
+            "corresponding value."
+        )
+    elif model_type == "normalizer":
         info["normType"] = _json.dumps(str(model.normalizer.normType))
-        info["desc"] = 'A normalization preprocessor.There are three normalization modes\n' \
-                       '1. Max\n' \
-                       '2. L1\n' \
-                       '3. L2'
-    elif model_type == 'oneHotEncoder':
-        info["CategoryType"] = _json.dumps(str(model.oneHotEncoder.WhichOneof('CategoryType')))
+        info["desc"] = (
+            "A normalization preprocessor.There are three normalization modes\n"
+            "1. Max\n"
+            "2. L1\n"
+            "3. L2"
+        )
+    elif model_type == "oneHotEncoder":
+        info["CategoryType"] = _json.dumps(
+            str(model.oneHotEncoder.WhichOneof("CategoryType"))
+        )
         info["outputSparse"] = _json.dumps(str(model.oneHotEncoder.outputSparse))
         info["handleUnknown"] = _json.dumps(str(model.oneHotEncoder.handleUnknown))
-        info["desc"] = 'Transforms a categorical feature into an array. The array will be all\n' \
-                       'zeros expect a single entry of one.\n' \
-                       'Each categorical value will map to an index, this mapping is given by\n' \
-                       'either the ``stringCategories`` parameter or the ``int64Categories``\n' \
-                       'parameter.'
-    elif model_type == 'scaler':
+        info["desc"] = (
+            "Transforms a categorical feature into an array. The array will be all\n"
+            "zeros expect a single entry of one.\n"
+            "Each categorical value will map to an index, this mapping is given by\n"
+            "either the ``stringCategories`` parameter or the ``int64Categories``\n"
+            "parameter."
+        )
+    elif model_type == "scaler":
         info["shiftValue"] = _json.dumps(str(model.scaler.shiftValue))
         info["scaleValue"] = _json.dumps(str(model.scaler.scaleValue))
-        info["desc"] = 'A scaling operation.\n' \
-                       r'f(x) = scaleValue \cdot (x + shiftValue)'
-    elif model_type == 'supportVectorClassifier':
+        info["desc"] = (
+            "A scaling operation.\n" r"f(x) = scaleValue \cdot (x + shiftValue)"
+        )
+    elif model_type == "supportVectorClassifier":
         info["kernel"] = _json.dumps(str(model.supportVectorClassifier.kernel))
-        info["numberOfSupportVectorsPerClass"] = _json.dumps(str(model.supportVectorClassifier.numberOfSupportVectorsPerClass))
+        info["numberOfSupportVectorsPerClass"] = _json.dumps(
+            str(model.supportVectorClassifier.numberOfSupportVectorsPerClass)
+        )
         info["rho"] = _json.dumps(str(model.supportVectorClassifier.rho))
         info["probA"] = _json.dumps(str(model.supportVectorClassifier.probA))
         info["probB"] = _json.dumps(str(model.supportVectorClassifier.probB))
-        info["ClassLabels"] = _json.dumps(str(model.supportVectorClassifier.WhichOneof('ClassLabels')))
-        info["desc"] = 'Support Vector Machine Classifier with one of ' \
-                       'Linear, RBF, Polynomial or Sigmoid ' \
-                       'kernels available'
-    elif model_type == 'supportVectorRegressor':
+        info["ClassLabels"] = _json.dumps(
+            str(model.supportVectorClassifier.WhichOneof("ClassLabels"))
+        )
+        info["desc"] = (
+            "Support Vector Machine Classifier with one of "
+            "Linear, RBF, Polynomial or Sigmoid "
+            "kernels available"
+        )
+    elif model_type == "supportVectorRegressor":
         info["kernel"] = _json.dumps(str(model.supportVectorRegressor.kernel))
         info["numberOfSupportVectorsPerClass"] = _json.dumps(
-            str(model.supportVectorRegressor.numberOfSupportVectorsPerClass))
+            str(model.supportVectorRegressor.numberOfSupportVectorsPerClass)
+        )
         info["rho"] = _json.dumps(str(model.supportVectorRegressor.rho))
-        info["desc"] = 'Support Vector Machine Regressor with one of ' \
-                       'Linear, RBF, Polynomial or Sigmoid kernels available'
-    elif model_type == 'treeEnsembleClassifier':
-        info["treeEnsemble"] = _json.dumps(str(model.treeEnsembleClassifier.treeEnsemble))
-        info["postEvaluationTransform"] = _json.dumps(str(model.treeEnsembleClassifier.postEvaluationTransform))
-        info["ClassLabels"] = _json.dumps(str(model.treeEnsembleClassifier.WhichOneof('ClassLabels')))
-        info["desc"] = 'Each tree is a collection of nodes, each of which is identified ' \
-                       'by a unique identifier.\nEach node is either a branch or a leaf node.' \
-                       ' A branch node evaluates a value according to a behavior;\n' \
-                       'A tree must have exactly one root node, which has no parent node.'
-    elif model_type == 'treeEnsembleRegressor':
-        info["treeEnsemble"] = _json.dumps(str(model.treeEnsembleRegressor.treeEnsemble))
-        info["postEvaluationTransform"] = _json.dumps(str(model.treeEnsembleRegressor.postEvaluationTransform))
-        info["desc"] = 'Each tree is a collection of nodes, each of which is identified' \
-                       ' by a unique identifier.\nEach node is either a branch or a leaf' \
-                       ' node. A branch node evaluates a value according to a behavior;\n' \
-                       'A tree must have exactly one root node, which has no parent node.'
+        info["desc"] = (
+            "Support Vector Machine Regressor with one of "
+            "Linear, RBF, Polynomial or Sigmoid kernels available"
+        )
+    elif model_type == "treeEnsembleClassifier":
+        info["treeEnsemble"] = _json.dumps(
+            str(model.treeEnsembleClassifier.treeEnsemble)
+        )
+        info["postEvaluationTransform"] = _json.dumps(
+            str(model.treeEnsembleClassifier.postEvaluationTransform)
+        )
+        info["ClassLabels"] = _json.dumps(
+            str(model.treeEnsembleClassifier.WhichOneof("ClassLabels"))
+        )
+        info["desc"] = (
+            "Each tree is a collection of nodes, each of which is identified "
+            "by a unique identifier.\nEach node is either a branch or a leaf node."
+            " A branch node evaluates a value according to a behavior;\n"
+            "A tree must have exactly one root node, which has no parent node."
+        )
+    elif model_type == "treeEnsembleRegressor":
+        info["treeEnsemble"] = _json.dumps(
+            str(model.treeEnsembleRegressor.treeEnsemble)
+        )
+        info["postEvaluationTransform"] = _json.dumps(
+            str(model.treeEnsembleRegressor.postEvaluationTransform)
+        )
+        info["desc"] = (
+            "Each tree is a collection of nodes, each of which is identified"
+            " by a unique identifier.\nEach node is either a branch or a leaf"
+            " node. A branch node evaluates a value according to a behavior;\n"
+            "A tree must have exactly one root node, which has no parent node."
+        )
     return info
 
 
@@ -595,36 +681,32 @@ def _neural_network_node_info(nn_spec, cy_nodes, child=False, parent=None):
         info = _layer_specific_info(layer)
         if child:
             info["name"] = layer.name
-            cy_nodes.append({
-                'data': {
-                    'id': layer.name,
-                    'name': info["type"],
-                    'info': info,
-                    'parent': parent
-                },
-                'classes': info["type"],
-            })
+            cy_nodes.append(
+                {
+                    "data": {
+                        "id": layer.name,
+                        "name": info["type"],
+                        "info": info,
+                        "parent": parent,
+                    },
+                    "classes": info["type"],
+                }
+            )
         else:
             info["name"] = layer.name
-            cy_nodes.append({
-                'data': {
-                    'id': layer.name,
-                    'name': info["type"],
-                    'info': info
-                },
-                'classes': info["type"],
-            })
+            cy_nodes.append(
+                {
+                    "data": {"id": layer.name, "name": info["type"], "info": info},
+                    "classes": info["type"],
+                }
+            )
 
     return cy_nodes
 
 
-def _neural_network_nodes_and_edges(nn_spec,
-                                    cy_nodes,
-                                    cy_edges,
-                                    spec_outputs,
-                                    input_spec,
-                                    input_shape_dict=None
-                                    ):
+def _neural_network_nodes_and_edges(
+    nn_spec, cy_nodes, cy_edges, spec_outputs, input_spec, input_shape_dict=None
+):
     """
 
     Parameters
@@ -642,32 +724,33 @@ def _neural_network_nodes_and_edges(nn_spec,
 
     """
     cy_nodes = _neural_network_node_info(nn_spec, cy_nodes)
-    cy_nodes.append({
-        'data': {
-            'id': 'output_node',
-            'name': '',
-            'info': {
-                'type': 'output node'
-            },
-            'classes': 'output',
-
+    cy_nodes.append(
+        {
+            "data": {
+                "id": "output_node",
+                "name": "",
+                "info": {"type": "output node"},
+                "classes": "output",
+            }
         }
-    })
+    )
 
     for model_output, output_type in spec_outputs:
-        cy_nodes.append({
-            'data': {
-                'id': str(model_output),
-                'name': str(model_output),
-                'info': {
-                    'type': "\n".join(str(output_type).split("\n")),
-                    'inputs': str([model_output]),
-                    'outputs': str([])
+        cy_nodes.append(
+            {
+                "data": {
+                    "id": str(model_output),
+                    "name": str(model_output),
+                    "info": {
+                        "type": "\n".join(str(output_type).split("\n")),
+                        "inputs": str([model_output]),
+                        "outputs": str([]),
+                    },
+                    "parent": "output_node",
                 },
-                'parent': 'output_node'
-            },
-            'classes': 'output'
-        })
+                "classes": "output",
+            }
+        )
 
     shape_dict = _infer_shapes(nn_spec, input_spec, input_shape_dict=input_shape_dict)
     cy_nodes, cy_edges = _calculate_edges(cy_nodes, cy_edges, shape_dict)
@@ -693,82 +776,92 @@ def _pipeline_nodes_and_edges(cy_nodes, cy_edges, pipeline_spec, spec_outputs):
 
     """
     i = 1
-    nn_model_types = ['neuralNetwork', 'neuralNetworkClassifier', 'neuralNetworkRegressor']
+    nn_model_types = [
+        "neuralNetwork",
+        "neuralNetworkClassifier",
+        "neuralNetworkRegressor",
+    ]
     models = pipeline_spec.models
     shape_dict = None
     for model in models:
-        sub_model_type = model.WhichOneof('Type')
+        sub_model_type = model.WhichOneof("Type")
         if not sub_model_type:
-            sub_model_type = 'input'
+            sub_model_type = "input"
         info = {}
         input_names = []
         output_names = []
-        info['Pipeline Component'] = sub_model_type.upper()
+        info["Pipeline Component"] = sub_model_type.upper()
         for model_input in model.description.input:
             input_names.append(model_input.name)
-            info['inputs'] = str(input_names)
+            info["inputs"] = str(input_names)
 
         for model_output in model.description.output:
             output_names.append(model_output.name)
-            info['outputs'] = str(output_names)
+            info["outputs"] = str(output_names)
 
         info = _pipeline_component_info(model, info)
 
         if sub_model_type in nn_model_types:
-            cy_nodes.append({
-                'data': {
-                    'id': "{}_{}".format(sub_model_type, i),
-                    'name': sub_model_type,
-                    'info': info
-                },
-                'classes': 'parent',
-            })
-            if sub_model_type == 'neuralNetwork':
+            cy_nodes.append(
+                {
+                    "data": {
+                        "id": "{}_{}".format(sub_model_type, i),
+                        "name": sub_model_type,
+                        "info": info,
+                    },
+                    "classes": "parent",
+                }
+            )
+            if sub_model_type == "neuralNetwork":
                 nn_spec = model.neuralNetwork
-            elif sub_model_type == 'neuralNetworkClassifier':
+            elif sub_model_type == "neuralNetworkClassifier":
                 nn_spec = model.neuralNetworkClassifier
-            elif sub_model_type == 'neuralNetworkRegressor':
+            elif sub_model_type == "neuralNetworkRegressor":
                 nn_spec = model.neuralNetworkRegressor
-            cy_nodes = _neural_network_node_info(nn_spec, cy_nodes, child=True, parent="{}_{}".format(sub_model_type, i))
+            cy_nodes = _neural_network_node_info(
+                nn_spec, cy_nodes, child=True, parent="{}_{}".format(sub_model_type, i)
+            )
             shape_dict = _infer_shapes(nn_spec, model.description.input)
         else:
-            cy_nodes.append({
-                'data': {
-                    'id': "{}_{}".format(sub_model_type, i),
-                    'name': sub_model_type,
-                    'info': info
-                },
-                'classes': sub_model_type
-            })
+            cy_nodes.append(
+                {
+                    "data": {
+                        "id": "{}_{}".format(sub_model_type, i),
+                        "name": sub_model_type,
+                        "info": info,
+                    },
+                    "classes": sub_model_type,
+                }
+            )
         i += 1
 
-    cy_nodes.append({
-        'data': {
-            'id': 'output_node',
-            'name': '',
-            'info': {
-                'type': 'output node'
-            },
-            'classes': 'output',
-
+    cy_nodes.append(
+        {
+            "data": {
+                "id": "output_node",
+                "name": "",
+                "info": {"type": "output node"},
+                "classes": "output",
+            }
         }
-    })
+    )
 
     for model_output, output_type in spec_outputs:
-        cy_nodes.append({
-            'data': {
-                'id': str(model_output),
-                'name': str(model_output),
-                'info': {
-                    'type': "\n".join(str(output_type).split("\n")),
-                    'inputs': str([model_output]),
-                    'outputs': str([])
+        cy_nodes.append(
+            {
+                "data": {
+                    "id": str(model_output),
+                    "name": str(model_output),
+                    "info": {
+                        "type": "\n".join(str(output_type).split("\n")),
+                        "inputs": str([model_output]),
+                        "outputs": str([]),
+                    },
+                    "parent": "output_node",
                 },
-                'parent' : 'output_node'
-            },
-            'classes': 'output'
-        })
-
+                "classes": "output",
+            }
+        )
 
     cy_nodes, cy_edges = _calculate_edges(cy_nodes, cy_edges, shape_dict)
 
@@ -795,12 +888,13 @@ def _start_server(port, web_dir):
     import subprocess
     import sys
     import webbrowser
+
     if port is None:
         port = _np.random.randint(8000, 9000)
     if sys.version_info[0] <= 2:
-        subprocess.Popen(['python', '-m', 'SimpleHTTPServer', str(port)])
+        subprocess.Popen(["python", "-m", "SimpleHTTPServer", str(port)])
     else:
-        subprocess.Popen(['python', '-m', 'http.server', str(port)])
-    webbrowser.open_new_tab('http://localhost:{}'.format(str(port)))
+        subprocess.Popen(["python", "-m", "http.server", str(port)])
+    webbrowser.open_new_tab("http://localhost:{}".format(str(port)))
     _os.chdir(curr_dir)
     return True
diff --git a/coremltools/models/_infer_shapes_nn_mlmodel.py b/coremltools/models/_infer_shapes_nn_mlmodel.py
index c0ab6e571..ea5f0ae88 100644
--- a/coremltools/models/_infer_shapes_nn_mlmodel.py
+++ b/coremltools/models/_infer_shapes_nn_mlmodel.py
@@ -10,7 +10,9 @@ def _get_translator_function(layer_type):
     if layer_type in _LAYER_REGISTERY:
         return _LAYER_REGISTERY[layer_type]
     else:
-        raise TypeError("Shape computation function missing for layer of type %s." % layer_type)
+        raise TypeError(
+            "Shape computation function missing for layer of type %s." % layer_type
+        )
 
 
 def _identity(layer, shape_dict):
@@ -20,7 +22,7 @@ def _identity(layer, shape_dict):
 def _convolution(layer, shape_dict):
     params = layer.convolution
     Seq, Batch, Cin, Hin, Win = shape_dict[layer.input[0]]
-    
+
     n_groups = params.nGroups
     Kh = Kw = 3
     hstride = wstride = hdilation = wdilation = 1
@@ -30,40 +32,40 @@ def _convolution(layer, shape_dict):
         hstride, wstride = params.stride
     if len(params.dilationFactor) != 0:
         hdilation, wdilation = params.dilationFactor
-    Kh_dilated = (Kh-1) * hdilation + 1
-    Kw_dilated = (Kw-1) * wdilation + 1
+    Kh_dilated = (Kh - 1) * hdilation + 1
+    Kw_dilated = (Kw - 1) * wdilation + 1
     l = r = b = t = 0
-    if params.WhichOneof('ConvolutionPaddingType') == 'valid':
+    if params.WhichOneof("ConvolutionPaddingType") == "valid":
         if len(params.valid.paddingAmounts.borderAmounts) != 0:
             t = params.valid.paddingAmounts.borderAmounts[0].startEdgeSize
             b = params.valid.paddingAmounts.borderAmounts[0].endEdgeSize
             l = params.valid.paddingAmounts.borderAmounts[1].startEdgeSize
             r = params.valid.paddingAmounts.borderAmounts[1].endEdgeSize
         if params.isDeconvolution:
-            Hout = (Hin -1) * hstride + Kh_dilated - t - b
-            Wout = (Win -1) * wstride + Kw_dilated - r - l
+            Hout = (Hin - 1) * hstride + Kh_dilated - t - b
+            Wout = (Win - 1) * wstride + Kw_dilated - r - l
         else:
-            Hout = (Hin + t + b - Kh_dilated)/hstride + 1
-            Wout = (Win + r + l - Kw_dilated)/wstride + 1    
+            Hout = (Hin + t + b - Kh_dilated) / hstride + 1
+            Wout = (Win + r + l - Kw_dilated) / wstride + 1
     else:
         if params.isDeconvolution:
             Hout = Hin * hstride
             Wout = Win * wstride
         else:
-            Hout = math.ceil(Hin/float(hstride))
-            Wout = math.ceil(Win/float(wstride))
-    
+            Hout = math.ceil(Hin / float(hstride))
+            Wout = math.ceil(Win / float(wstride))
+
     if params.isDeconvolution:
         if len(params.outputShape) != 0:
-            Hout, Wout = params.outputShape        
-    
-    Cout = params.outputChannels    
-    shape_dict[layer.output[0]] = (Seq, Batch, int(Cout), int(Hout), int(Wout))   
+            Hout, Wout = params.outputShape
+
+    Cout = params.outputChannels
+    shape_dict[layer.output[0]] = (Seq, Batch, int(Cout), int(Hout), int(Wout))
 
 
 def _pooling(layer, shape_dict):
     params = layer.pooling
-    Seq, Batch, Cin, Hin, Win = shape_dict[layer.input[0]]                       
+    Seq, Batch, Cin, Hin, Win = shape_dict[layer.input[0]]
 
     Kh = Kw = 3
     hstride = wstride = 1
@@ -75,32 +77,32 @@ def _pooling(layer, shape_dict):
     if params.globalPooling:
         Hout = Wout = 1
     else:
-        if params.WhichOneof('PoolingPaddingType') == 'valid':
+        if params.WhichOneof("PoolingPaddingType") == "valid":
             if len(params.valid.paddingAmounts.borderAmounts) != 0:
                 t = params.valid.paddingAmounts.borderAmounts[0].startEdgeSize
                 b = params.valid.paddingAmounts.borderAmounts[0].endEdgeSize
                 l = params.valid.paddingAmounts.borderAmounts[1].startEdgeSize
                 r = params.valid.paddingAmounts.borderAmounts[1].endEdgeSize
-            Hout = (Hin + t + b - Kh)/hstride + 1
-            Wout = (Win + r + l - Kw)/wstride + 1 
-        elif params.WhichOneof('PoolingPaddingType') == 'same':
-            Hout = math.ceil(Hin/float(hstride))
-            Wout = math.ceil(Win/float(wstride))
+            Hout = (Hin + t + b - Kh) / hstride + 1
+            Wout = (Win + r + l - Kw) / wstride + 1
+        elif params.WhichOneof("PoolingPaddingType") == "same":
+            Hout = math.ceil(Hin / float(hstride))
+            Wout = math.ceil(Win / float(wstride))
         else:
             if len(params.includeLastPixel.paddingAmounts) != 0:
                 t = params.includeLastPixel.paddingAmounts[0]
                 b = t
                 l = params.includeLastPixel.paddingAmounts[1]
                 r = l
-            Hout = math.ceil((Hin + 2*t - Kh)/float(hstride)) + 1
-            Wout = math.ceil((Win + 2*l - Kw)/float(wstride)) + 1
+            Hout = math.ceil((Hin + 2 * t - Kh) / float(hstride)) + 1
+            Wout = math.ceil((Win + 2 * l - Kw) / float(wstride)) + 1
             if t or l:
-                if (Hout-1) * hstride >= Hin + t:
+                if (Hout - 1) * hstride >= Hin + t:
                     Hout -= 1
-                if (Wout-1) * wstride >= Win + l:
+                if (Wout - 1) * wstride >= Win + l:
                     Wout -= 1
-                        
-    shape_dict[layer.output[0]] = (Seq, Batch, int(Cin), int(Hout), int(Wout))                       
+
+    shape_dict[layer.output[0]] = (Seq, Batch, int(Cin), int(Hout), int(Wout))
 
 
 def _inner_product(layer, shape_dict):
@@ -114,13 +116,13 @@ def _embedding(layer, shape_dict):
     params = layer.embedding
     Seq, Batch, Cin, Hin, Win = shape_dict[layer.input[0]]
     Cout = params.outputChannels
-    shape_dict[layer.output[0]] = (Seq, Batch, int(Cout), 1, 1)    
+    shape_dict[layer.output[0]] = (Seq, Batch, int(Cout), 1, 1)
 
 
 def _crop(layer, shape_dict):
     params = layer.crop
     Seq, Batch, Cin, Hin, Win = shape_dict[layer.input[0]]
-    
+
     l = r = t = b = 0
     if len(layer.input) == 1:
         if len(params.cropAmounts.borderAmounts) != 0:
@@ -132,15 +134,15 @@ def _crop(layer, shape_dict):
         Wout = Win - l - r
     else:
         Hout = shape_dict[layer.input[1]][3]
-        Wout = shape_dict[layer.input[1]][4] 
-        
-    shape_dict[layer.output[0]] = (Seq, Batch, Cin, int(Hout), int(Wout))  
+        Wout = shape_dict[layer.input[1]][4]
+
+    shape_dict[layer.output[0]] = (Seq, Batch, Cin, int(Hout), int(Wout))
 
 
 def _padding(layer, shape_dict):
     params = layer.padding
-    Seq, Batch, Cin, Hin, Win = shape_dict[layer.input[0]]            
-    
+    Seq, Batch, Cin, Hin, Win = shape_dict[layer.input[0]]
+
     l = r = t = b = 0
     if len(params.paddingAmounts.borderAmounts) != 0:
         t = params.paddingAmounts.borderAmounts[0].startEdgeSize
@@ -148,56 +150,56 @@ def _padding(layer, shape_dict):
         l = params.paddingAmounts.borderAmounts[1].startEdgeSize
         r = params.paddingAmounts.borderAmounts[1].endEdgeSize
     Hout = Hin + t + b
-    Wout = Win + l + r    
-    shape_dict[layer.output[0]] = (Seq, Batch, Cin, int(Hout), int(Wout))  
+    Wout = Win + l + r
+    shape_dict[layer.output[0]] = (Seq, Batch, Cin, int(Hout), int(Wout))
 
 
 def _upsample(layer, shape_dict):
     params = layer.upsample
-    Seq, Batch, Cin, Hin, Win = shape_dict[layer.input[0]]  
-    
+    Seq, Batch, Cin, Hin, Win = shape_dict[layer.input[0]]
+
     sh = sw = 1
     if len(params.scalingFactor) != 0:
         sh, sw = params.scalingFactor
     Hout = Hin * sh
     Wout = Win * sw
-    shape_dict[layer.output[0]] = (Seq, Batch, Cin, int(Hout), int(Wout))  
+    shape_dict[layer.output[0]] = (Seq, Batch, Cin, int(Hout), int(Wout))
 
 
 def _add(layer, shape_dict):
-    Seq, Batch, C, H, W = shape_dict[layer.input[0]]    
-         
+    Seq, Batch, C, H, W = shape_dict[layer.input[0]]
+
     for i, inp in enumerate(layer.input):
-        if i==0:
+        if i == 0:
             continue
         _, _, c, h, w = shape_dict[inp]
         C = max(C, c)
         H = max(H, h)
         W = max(W, w)
-    shape_dict[layer.output[0]] = (Seq, Batch, int(C), int(H), int(W))     
+    shape_dict[layer.output[0]] = (Seq, Batch, int(C), int(H), int(W))
 
 
 def _dot(layer, shape_dict):
-    Seq, Batch, _, _, _ = shape_dict[layer.input[0]]     
-    shape_dict[layer.output[0]] = (Seq, Batch, 1, 1, 1) 
+    Seq, Batch, _, _, _ = shape_dict[layer.input[0]]
+    shape_dict[layer.output[0]] = (Seq, Batch, 1, 1, 1)
 
 
 def _reduce(layer, shape_dict):
     params = layer.reduce
-    Seq, Batch, C, H, W = shape_dict[layer.input[0]]     
-    
+    Seq, Batch, C, H, W = shape_dict[layer.input[0]]
+
     axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Name(params.axis)
-    if axis == 'CHW':
+    if axis == "CHW":
         C = H = W = 1
-    elif axis == 'HW':
+    elif axis == "HW":
         H = W = 1
-    elif axis == 'C':
+    elif axis == "C":
         C = 1
-    elif axis == 'H':
+    elif axis == "H":
         H = 1
-    elif axis == 'W':
-        W = 1                
-    
+    elif axis == "W":
+        W = 1
+
     shape_dict[layer.output[0]] = (Seq, Batch, int(C), int(H), int(W))
 
 
@@ -210,12 +212,12 @@ def _load_constant(layer, shape_dict):
 def _reshape(layer, shape_dict):
     params = layer.reshape
     Seq, Batch, _, _, _ = shape_dict[layer.input[0]]
-    
+
     if len(params.targetShape) == 3:
         C, H, W = params.targetShape
     else:
-        Seq, C, H, W = params.targetShape     
-        
+        Seq, C, H, W = params.targetShape
+
     shape_dict[layer.output[0]] = (int(Seq), Batch, int(C), int(H), int(W))
 
 
@@ -223,12 +225,13 @@ def _flatten(layer, shape_dict):
     params = layer.permute
     Seq, Batch, Cin, Hin, Win = shape_dict[layer.input[0]]
 
-    shape_dict[layer.output[0]] = (int(Seq), int(Batch), int(Cin*Hin*Win), 1, 1)
+    shape_dict[layer.output[0]] = (int(Seq), int(Batch), int(Cin * Hin * Win), 1, 1)
+
 
 def _permute(layer, shape_dict):
     params = layer.permute
     Seq, Batch, Cin, Hin, Win = shape_dict[layer.input[0]]
-    
+
     axis = list(map(int, params.axis))
     dims = (Seq, Cin, Hin, Win)
     Seq_out = dims[axis[0]]
@@ -240,31 +243,32 @@ def _permute(layer, shape_dict):
 
 def _concat(layer, shape_dict):
     params = layer.concat
-    Seq, Batch, C , H, W = shape_dict[layer.input[0]]
-    
+    Seq, Batch, C, H, W = shape_dict[layer.input[0]]
+
     if params.sequenceConcat:
         Seq = 0
         for inp in layer.input:
-            Seq += shape_dict[inp][0] 
-    else:             
+            Seq += shape_dict[inp][0]
+    else:
         C = 0
         for inp in layer.input:
             C += shape_dict[inp][2]
-    
+
     shape_dict[layer.output[0]] = (int(Seq), Batch, int(C), int(H), int(W))
 
 
 def _split(layer, shape_dict):
     input_shape = shape_dict[layer.input[0]]
     Seq, Batch, C, H, W = input_shape
-    for out in layer.output:     
-        shape_dict[out] = (Seq, Batch, C / len(layer.output), H, W)             
+    for out in layer.output:
+        shape_dict[out] = (Seq, Batch, C / len(layer.output), H, W)
 
 
 def _sequence_repeat(layer, shape_dict):
     params = layer.sequenceRepeat
     n = params.nRepetitions
-    if n == 0: n = 1
+    if n == 0:
+        n = 1
     Seq, Batch, C, H, W = shape_dict[layer.input[0]]
     shape_dict[layer.output[0]] = (int(Seq * n), Batch, C, H, W)
 
@@ -273,8 +277,10 @@ def _reorganize_data(layer, shape_dict):
     params = layer.reorganizeData
     Seq, Batch, Cin, Hin, Win = shape_dict[layer.input[0]]
     block_size = params.blockSize
-    Type = _NeuralNetwork_pb2.ReorganizeDataLayerParams.ReorganizationType.Name(params.mode)
-    if Type == 'SPACE_TO_DEPTH':
+    Type = _NeuralNetwork_pb2.ReorganizeDataLayerParams.ReorganizationType.Name(
+        params.mode
+    )
+    if Type == "SPACE_TO_DEPTH":
         Cout = Cin * block_size * block_size
         Hout = Hin / block_size
         Wout = Win / block_size
@@ -282,7 +288,7 @@ def _reorganize_data(layer, shape_dict):
         Cout = Cin / (block_size * block_size)
         Hout = Hin * block_size
         Wout = Win * block_size
-    shape_dict[layer.output[0]] = (Seq, Batch, int(Cout), int(Hout), int(Wout))            
+    shape_dict[layer.output[0]] = (Seq, Batch, int(Cout), int(Hout), int(Wout))
 
 
 def _slice(layer, shape_dict):
@@ -292,57 +298,65 @@ def _slice(layer, shape_dict):
     end = params.endIndex
     stride = params.stride
     axis = _NeuralNetwork_pb2.SliceLayerParams.SliceAxis.Name(params.axis)
-    if axis == 'CHANNEL_AXIS': N = C
-    if axis == 'HEIGHT_AXIS': N = H
-    if axis == 'WIDTH_AXIS': N = W
-    if end < 0: end = end + N
+    if axis == "CHANNEL_AXIS":
+        N = C
+    if axis == "HEIGHT_AXIS":
+        N = H
+    if axis == "WIDTH_AXIS":
+        N = W
+    if end < 0:
+        end = end + N
     end = min(end, N)
-    if start > N-1:
+    if start > N - 1:
         L = 0
     else:
-        L = np.floor((end - 1 - start)/stride) + 1
-        if L<0 : L = 0
-    if axis == 'CHANNEL_AXIS': C = L
-    if axis == 'HEIGHT_AXIS': H = L
-    if axis == 'WIDTH_AXIS': W = L
+        L = np.floor((end - 1 - start) / stride) + 1
+        if L < 0:
+            L = 0
+    if axis == "CHANNEL_AXIS":
+        C = L
+    if axis == "HEIGHT_AXIS":
+        H = L
+    if axis == "WIDTH_AXIS":
+        W = L
     shape_dict[layer.output[0]] = (Seq, Batch, int(C), int(H), int(W))
 
 
 def _simple_recurrent(layer, shape_dict):
     params = layer.simpleRecurrent
     Seq, Batch, C, H, W = shape_dict[layer.input[0]]
-    
+
     Cout = params.outputVectorSize
     if params.sequenceOutput:
         shape_dict[layer.output[0]] = (Seq, Batch, int(Cout), 1, 1)
     else:
-        shape_dict[layer.output[0]] = (1, Batch, int(Cout), 1, 1)  
-    shape_dict[layer.output[1]] = (1, Batch, int(Cout), 1, 1)      
+        shape_dict[layer.output[0]] = (1, Batch, int(Cout), 1, 1)
+    shape_dict[layer.output[1]] = (1, Batch, int(Cout), 1, 1)
 
 
 def _gru(layer, shape_dict):
     params = layer.gru
     Seq, Batch, C, H, W = shape_dict[layer.input[0]]
-    
+
     Cout = params.outputVectorSize
     if params.sequenceOutput:
         shape_dict[layer.output[0]] = (Seq, Batch, int(Cout), 1, 1)
     else:
-        shape_dict[layer.output[0]] = (1, Batch, int(Cout), 1, 1)  
+        shape_dict[layer.output[0]] = (1, Batch, int(Cout), 1, 1)
     shape_dict[layer.output[1]] = (1, Batch, int(Cout), 1, 1)
 
 
 def _uni_directional_lstm(layer, shape_dict):
     params = layer.uniDirectionalLSTM
     Seq, Batch, C, H, W = shape_dict[layer.input[0]]
-    
+
     Cout = params.outputVectorSize
     if params.params.sequenceOutput:
         shape_dict[layer.output[0]] = (Seq, Batch, int(Cout), 1, 1)
     else:
-        shape_dict[layer.output[0]] = (1, Batch, int(Cout), 1, 1)  
-    shape_dict[layer.output[1]] = (1, Batch, int(Cout), 1, 1) 
-    shape_dict[layer.output[2]] = (1, Batch, int(Cout), 1, 1)    
+        shape_dict[layer.output[0]] = (1, Batch, int(Cout), 1, 1)
+    shape_dict[layer.output[1]] = (1, Batch, int(Cout), 1, 1)
+    shape_dict[layer.output[2]] = (1, Batch, int(Cout), 1, 1)
 
 
 def _bi_directional_lstm(layer, shape_dict):
@@ -352,115 +366,119 @@ def _bi_directional_lstm(layer, shape_dict):
     if params.params.sequenceOutput:
         shape_dict[layer.output[0]] = (Seq, Batch, 2 * int(Cout), 1, 1)
     else:
-        shape_dict[layer.output[0]] = (1, Batch, 2 * int(Cout), 1, 1)  
-    shape_dict[layer.output[1]] = (1, Batch, int(Cout), 1, 1) 
+        shape_dict[layer.output[0]] = (1, Batch, 2 * int(Cout), 1, 1)
+    shape_dict[layer.output[1]] = (1, Batch, int(Cout), 1, 1)
     shape_dict[layer.output[2]] = (1, Batch, int(Cout), 1, 1)
     shape_dict[layer.output[3]] = (1, Batch, int(Cout), 1, 1)
     shape_dict[layer.output[4]] = (1, Batch, int(Cout), 1, 1)
 
 
 _LAYER_REGISTERY = {
-    'convolution': _convolution,
-    'pooling': _pooling,
-    'activation': _identity,
-    'innerProduct': _inner_product,
-    'embedding': _embedding,
-    'batchnorm': _identity,
-    'mvn': _identity,
-    'l2normalize': _identity,
-    'softmax': _identity,
-    'lrn': _identity,
-    'crop': _crop,
-    'padding': _padding,
-    'upsample': _upsample,
-    'unary': _identity,
-    'add': _add,
-    'multiply': _add,
-    'average': _add,
-    'scale': _add,
-    'bias': _add,
-    'max': _add,
-    'min': _add,
-    'dot': _dot,
-    'reduce': _reduce,
-    'loadConstant': _load_constant,
-    'reshape': _reshape,
-    'flatten': _flatten,
-    'permute': _permute,
-    'concat': _concat,
-    'split': _split,
-    'sequenceRepeat': _sequence_repeat,
-    'reorganizeData': _reorganize_data,
-    'slice': _slice,
-    'simpleRecurrent': _simple_recurrent,
-    'gru': _gru,
-    'uniDirectionalLSTM': _uni_directional_lstm,
-    'biDirectionalLSTM': _bi_directional_lstm
-} 
-
-
-def infer_shapes(nn_spec, input_spec, input_shape_dict = None):
-    
+    "convolution": _convolution,
+    "pooling": _pooling,
+    "activation": _identity,
+    "innerProduct": _inner_product,
+    "embedding": _embedding,
+    "batchnorm": _identity,
+    "mvn": _identity,
+    "l2normalize": _identity,
+    "softmax": _identity,
+    "lrn": _identity,
+    "crop": _crop,
+    "padding": _padding,
+    "upsample": _upsample,
+    "unary": _identity,
+    "add": _add,
+    "multiply": _add,
+    "average": _add,
+    "scale": _add,
+    "bias": _add,
+    "max": _add,
+    "min": _add,
+    "dot": _dot,
+    "reduce": _reduce,
+    "loadConstant": _load_constant,
+    "reshape": _reshape,
+    "flatten": _flatten,
+    "permute": _permute,
+    "concat": _concat,
+    "split": _split,
+    "sequenceRepeat": _sequence_repeat,
+    "reorganizeData": _reorganize_data,
+    "slice": _slice,
+    "simpleRecurrent": _simple_recurrent,
+    "gru": _gru,
+    "uniDirectionalLSTM": _uni_directional_lstm,
+    "biDirectionalLSTM": _bi_directional_lstm,
+}
+
+
+def infer_shapes(nn_spec, input_spec, input_shape_dict=None):
     """
     Input:
-    
+
         spec : mlmodel spec
-        input_shape_dict: dictionary of  string --> tuple 
+        input_shape_dict: dictionary of  string --> tuple
                       string:  input name
                       tuple: input shape as a 5 length tuple in order (Seq, Batch, C, H, W)
-    
+
         If input_shape_dict is not provided, input shapes are inferred from the input description in the mlmodel.
         Since the description in the specification only contains values of C,H,W; Seq and Batch dimensions are set to 1.
-    
+
     Output:
-        
-        shape_dict:  dictionary containing all the blobs in the neural network and their shapes, expressed as length 5 tuples, 
+
+        shape_dict:  dictionary containing all the blobs in the neural network and their shapes, expressed as length 5 tuples,
                      to be interpreted in order (Seq, Batch, C, H, W).
     """
-    
+
     shape_dict = {}
     if input_shape_dict:
         for key, value in input_shape_dict.items():
-            assert len(value) == 5, 'Shape of the input must be of length 5'
+            assert len(value) == 5, "Shape of the input must be of length 5"
             shape_dict[key] = value
-            
-    # construct input_shape_dict from the model description        
+
+    # construct input_shape_dict from the model description
     else:
         for inp in input_spec:
             input_name = inp.name
             C = H = W = 1
-            if inp.type.WhichOneof('Type') == 'imageType':
+            if inp.type.WhichOneof("Type") == "imageType":
                 W = int(inp.type.imageType.width)
                 H = int(inp.type.imageType.height)
-                colorspace = _FeatureTypes_pb2.ImageFeatureType.ColorSpace.Name(inp.type.imageType.colorSpace)
-                if colorspace == 'GRAYSCALE':
+                colorspace = _FeatureTypes_pb2.ImageFeatureType.ColorSpace.Name(
+                    inp.type.imageType.colorSpace
+                )
+                if colorspace == "GRAYSCALE":
                     C = 1
-                elif colorspace == 'RGB' or colorspace == 'BGR':
+                elif colorspace == "RGB" or colorspace == "BGR":
                     C = 3
-                else:        
-                    raise ValueError('Input %s : Invalid Colorspace' %(input_name))
-            elif inp.type.WhichOneof('Type') == 'multiArrayType':
+                else:
+                    raise ValueError("Input %s : Invalid Colorspace" % (input_name))
+            elif inp.type.WhichOneof("Type") == "multiArrayType":
                 array_shape = inp.type.multiArrayType.shape
                 if len(array_shape) == 1:
                     C = array_shape[0]
                 elif len(array_shape) == 3:
                     C, H, W = map(int, array_shape)
                 else:
-                    raise ValueError("Input %s : Multi array must be of length 1 or 3" %(input_name))              
-            else:    
-                raise ValueError("Input %s : Input type must be image or multi-array" %(input_name))  
+                    raise ValueError(
+                        "Input %s : Multi array must be of length 1 or 3" % (input_name)
+                    )
+            else:
+                raise ValueError(
+                    "Input %s : Input type must be image or multi-array" % (input_name)
+                )
             shape_dict[input_name] = (1, 1, C, H, W)
 
     layers = nn_spec.layers
 
     for i, layer in enumerate(layers):
         for inp in layer.input:
-            assert inp in shape_dict, ('Input %s shape not cannot be determined' %(inp))
-        layer_type = layer.WhichOneof('layer')
-        if layer_type == 'custom':
+            assert inp in shape_dict, "Input %s shape not cannot be determined" % (inp)
+        layer_type = layer.WhichOneof("layer")
+        if layer_type == "custom":
             break
         layer_translator = _get_translator_function(layer_type)
         layer_translator(layer, shape_dict)
 
-
     return shape_dict
diff --git a/coremltools/models/_interface_management.py b/coremltools/models/_interface_management.py
index 25d424cfe..897488ff6 100644
--- a/coremltools/models/_interface_management.py
+++ b/coremltools/models/_interface_management.py
@@ -12,8 +12,14 @@
 from ..proto import Model_pb2
 
 
-def set_classifier_interface_params(spec, features, class_labels,
-        model_accessor_for_class_labels, output_features = None, training_features=None):
+def set_classifier_interface_params(
+    spec,
+    features,
+    class_labels,
+    model_accessor_for_class_labels,
+    output_features=None,
+    training_features=None,
+):
     """
     Common utilities to set the regression interface params.
     """
@@ -25,7 +31,9 @@ def set_classifier_interface_params(spec, features, class_labels,
 
     n_classes = len(class_labels)
 
-    output_features = _fm.process_or_validate_classifier_output_features(output_features, class_labels)
+    output_features = _fm.process_or_validate_classifier_output_features(
+        output_features, class_labels
+    )
 
     if len(output_features) == 1:
         predicted_class_output, pred_cl_type = output_features[0]
@@ -34,25 +42,35 @@ def set_classifier_interface_params(spec, features, class_labels,
         predicted_class_output, pred_cl_type = output_features[0]
         score_output, score_output_type = output_features[1]
     else:
-        raise ValueError("Provided output classes for a classifier must be "
-                "a list of features, predicted class and (optionally) class_score.")
+        raise ValueError(
+            "Provided output classes for a classifier must be "
+            "a list of features, predicted class and (optionally) class_score."
+        )
 
     spec.description.predictedFeatureName = predicted_class_output
 
     # Are they out of order?
     if not (pred_cl_type == datatypes.Int64() or pred_cl_type == datatypes.String()):
-        raise ValueError("Provided predicted class output type not Int64 or String (%s)."
-                % repr(pred_cl_type))
+        raise ValueError(
+            "Provided predicted class output type not Int64 or String (%s)."
+            % repr(pred_cl_type)
+        )
 
     if score_output is not None:
         if not isinstance(score_output_type, datatypes.Dictionary):
-            raise ValueError("Provided class score output type not a Dictionary (%s)."
-                    % repr(score_output_type))
+            raise ValueError(
+                "Provided class score output type not a Dictionary (%s)."
+                % repr(score_output_type)
+            )
 
         if score_output_type.key_type != pred_cl_type:
-            raise ValueError(("Provided class score output (%s) key_type (%s) does not "
-                    "match type of class prediction (%s).")
-                    % (score_output, repr(score_output_type.key_type), repr(pred_cl_type)))
+            raise ValueError(
+                (
+                    "Provided class score output (%s) key_type (%s) does not "
+                    "match type of class prediction (%s)."
+                )
+                % (score_output, repr(score_output_type.key_type), repr(pred_cl_type))
+            )
 
         spec.description.predictedProbabilitiesName = score_output
 
@@ -61,7 +79,7 @@ def set_classifier_interface_params(spec, features, class_labels,
         input_ = spec.description.input.add()
         input_.name = cur_input_name
         datatypes._set_datatype(input_.type, input_type)
-    
+
     # add output
     for index, (cur_output_name, output_type) in enumerate(output_features):
         output_ = spec.description.output.add()
@@ -76,9 +94,11 @@ def set_classifier_interface_params(spec, features, class_labels,
     if pred_cl_type == datatypes.String():
         try:
             for c in class_labels:
-                getattr(spec, model_accessor_for_class_labels).stringClassLabels.vector.append(str(c))
+                getattr(
+                    spec, model_accessor_for_class_labels
+                ).stringClassLabels.vector.append(str(c))
         # Not all the classifiers have class labels; in particular the pipeline
-        # classifier.  Thus it's not an error if we can't actually set them. 
+        # classifier.  Thus it's not an error if we can't actually set them.
         except AttributeError:
             pass
 
@@ -92,13 +112,17 @@ def set_classifier_interface_params(spec, features, class_labels,
                 conv_error = True
 
             if conv_error:
-                raise TypeError(("Cannot cast '%s' class to an int type " % str(c))
-                + "(class type determined by type of first class).")
-            
+                raise TypeError(
+                    ("Cannot cast '%s' class to an int type " % str(c))
+                    + "(class type determined by type of first class)."
+                )
+
             try:
-                getattr(spec, model_accessor_for_class_labels).int64ClassLabels.vector.append(int(c))
+                getattr(
+                    spec, model_accessor_for_class_labels
+                ).int64ClassLabels.vector.append(int(c))
             # Not all the classifiers have class labels; in particular the pipeline
-            # classifier.  Thus it's not an error if we can't actually set them. 
+            # classifier.  Thus it's not an error if we can't actually set them.
             except AttributeError:
                 break
 
@@ -106,22 +130,25 @@ def set_classifier_interface_params(spec, features, class_labels,
     return spec
 
 
-def set_regressor_interface_params(spec, features, output_features, training_features=None):
+def set_regressor_interface_params(
+    spec, features, output_features, training_features=None
+):
     """ Common utilities to set the regressor interface params.
     """
     if output_features is None:
         output_features = [("predicted_class", datatypes.Double())]
     else:
         output_features = _fm.process_or_validate_features(output_features, 1)
- 
+
     if len(output_features) != 1:
-        raise ValueError("Provided output features for a regressor must be "
-                    "one Double feature.")
-    
+        raise ValueError(
+            "Provided output features for a regressor must be " "one Double feature."
+        )
+
     if output_features[0][1] != datatypes.Double():
         raise ValueError("Output type of a regressor must be a Double.")
 
-    prediction_name = output_features[0][0] 
+    prediction_name = output_features[0][0]
     spec.description.predictedFeatureName = prediction_name
 
     # Normalize the features list.
@@ -138,13 +165,19 @@ def set_regressor_interface_params(spec, features, output_features, training_fea
         spec = set_training_features(spec, training_features)
 
     output_ = spec.description.output.add()
-    output_.name = prediction_name 
-    datatypes._set_datatype(output_.type, 'Double')
+    output_.name = prediction_name
+    datatypes._set_datatype(output_.type, "Double")
     return spec
 
 
-def set_transform_interface_params(spec, input_features, output_features, are_optional=False, training_features=None,
-                                   array_datatype=Model_pb2.ArrayFeatureType.DOUBLE):
+def set_transform_interface_params(
+    spec,
+    input_features,
+    output_features,
+    are_optional=False,
+    training_features=None,
+    array_datatype=Model_pb2.ArrayFeatureType.DOUBLE,
+):
     """ Common utilities to set transform interface params.
     """
     input_features = _fm.process_or_validate_features(input_features)
@@ -171,7 +204,6 @@ def set_transform_interface_params(spec, input_features, output_features, are_op
 
 
 def set_training_features(spec, training_features):
-
     for (fname, ftype) in training_features:
         training_input_ = spec.description.trainingInput.add()
         training_input_.name = fname
diff --git a/coremltools/models/array_feature_extractor.py b/coremltools/models/array_feature_extractor.py
index ca8aabfda..75351ea18 100644
--- a/coremltools/models/array_feature_extractor.py
+++ b/coremltools/models/array_feature_extractor.py
@@ -12,24 +12,24 @@
 from ._interface_management import set_transform_interface_params
 
 
-
-def create_array_feature_extractor(input_features, output_name, extract_indices, 
-                                   output_type = None): 
+def create_array_feature_extractor(
+    input_features, output_name, extract_indices, output_type=None
+):
     """
     Creates a feature extractor from an input array feature, return
 
-    input_features is a list of one (name, array) tuple. 
+    input_features is a list of one (name, array) tuple.
 
-    extract_indices is either an integer or a list.  If it's an integer, 
+    extract_indices is either an integer or a list.  If it's an integer,
     the output type is by default a double (but may also be an integer).
-    If a list, the output type is an array. 
+    If a list, the output type is an array.
     """
 
     # Make sure that our starting stuff is in the proper form.
     assert len(input_features) == 1
     assert isinstance(input_features[0][1], datatypes.Array)
 
-    # Create the model. 
+    # Create the model.
     spec = _Model_pb2.Model()
     spec.specificationVersion = SPECIFICATION_VERSION
 
@@ -56,6 +56,4 @@ def create_array_feature_extractor(input_features, output_name, extract_indices,
 
     set_transform_interface_params(spec, input_features, output_features)
 
-    return spec    
-
-
+    return spec
diff --git a/coremltools/models/datatypes.py b/coremltools/models/datatypes.py
index 0bfce03cf..b1c7d20c9 100644
--- a/coremltools/models/datatypes.py
+++ b/coremltools/models/datatypes.py
@@ -14,7 +14,6 @@
 
 
 class _DatatypeBase(object):
-
     def __init__(self, type_tag, full_tag, num_elements):
         self.type_tag, self.full_tag = type_tag, full_tag
         self.num_elements = num_elements
@@ -81,17 +80,21 @@ def __init__(self, *dimensions):
         >>> multi_arr = coremltools.models.datatypes.Array(5, 2, 10)
         """
         assert len(dimensions) >= 1
-        assert all(isinstance(d, _integer_types + (_np.int64,)) for d in dimensions), \
-            "Dimensions must be ints, not {}".format(str(dimensions))
+        assert all(
+            isinstance(d, _integer_types + (_np.int64, _np.int32)) for d in dimensions
+        ), "Dimensions must be ints, not {}".format(str(dimensions))
         self.dimensions = dimensions
 
         num_elements = 1
         for d in self.dimensions:
             num_elements *= d
 
-        _DatatypeBase.__init__(self, "Array",
-                               "Array({%s})" % (",".join("%d" % d for d in self.dimensions)),
-                               num_elements)
+        _DatatypeBase.__init__(
+            self,
+            "Array",
+            "Array({%s})" % (",".join("%d" % d for d in self.dimensions)),
+            num_elements,
+        )
 
 
 class Dictionary(_DatatypeBase):
@@ -127,20 +130,22 @@ def __init__(self, key_type=None):
 
         self.key_type = key_type
 
-        _DatatypeBase.__init__(self, "Dictionary",
-                               "Dictionary(%s)" % repr(self.key_type),
-                               None)
+        _DatatypeBase.__init__(
+            self, "Dictionary", "Dictionary(%s)" % repr(self.key_type), None
+        )
 
 
-_simple_type_remap = {int: Int64(),
-                      str: String(),
-                      float: Double(),
-                      Double: Double(),
-                      Int64: Int64(),
-                      String: String(),
-                      'Double': Double(),
-                      'Int64': Int64(),
-                      'String': String()}
+_simple_type_remap = {
+    int: Int64(),
+    str: String(),
+    float: Double(),
+    Double: Double(),
+    Int64: Int64(),
+    String: String(),
+    "Double": Double(),
+    "Int64": Int64(),
+    "String": String(),
+}
 
 
 def _is_valid_datatype(datatype_instance):
@@ -196,7 +201,9 @@ def _normalize_datatype(datatype_instance):
     raise ValueError("Datatype instance not recognized.")
 
 
-def _set_datatype(proto_type_obj, datatype_instance, array_datatype=Model_pb2.ArrayFeatureType.DOUBLE):
+def _set_datatype(
+    proto_type_obj, datatype_instance, array_datatype=Model_pb2.ArrayFeatureType.DOUBLE
+):
     # Remap so we can still use the python types for the simple cases
     global _simple_type_remap
     if datatype_instance in _simple_type_remap:
@@ -204,34 +211,36 @@ def _set_datatype(proto_type_obj, datatype_instance, array_datatype=Model_pb2.Ar
 
     # Now set the protobuf from this interface.
     if isinstance(datatype_instance, Int64):
-        proto_type_obj.int64Type.MergeFromString(b'')
+        proto_type_obj.int64Type.MergeFromString(b"")
 
     elif isinstance(datatype_instance, Double):
-        proto_type_obj.doubleType.MergeFromString(b'')
+        proto_type_obj.doubleType.MergeFromString(b"")
 
     elif isinstance(datatype_instance, String):
-        proto_type_obj.stringType.MergeFromString(b'')
+        proto_type_obj.stringType.MergeFromString(b"")
 
     elif isinstance(datatype_instance, Array):
-        proto_type_obj.multiArrayType.MergeFromString(b'')
+        proto_type_obj.multiArrayType.MergeFromString(b"")
         proto_type_obj.multiArrayType.dataType = array_datatype
 
         for n in datatype_instance.dimensions:
             proto_type_obj.multiArrayType.shape.append(n)
 
     elif isinstance(datatype_instance, Dictionary):
-        proto_type_obj.dictionaryType.MergeFromString(b'')
+        proto_type_obj.dictionaryType.MergeFromString(b"")
 
         kt = datatype_instance.key_type
 
         if isinstance(kt, Int64):
-            proto_type_obj.dictionaryType.int64KeyType.MergeFromString(b'')
+            proto_type_obj.dictionaryType.int64KeyType.MergeFromString(b"")
         elif isinstance(kt, String):
-            proto_type_obj.dictionaryType.stringKeyType.MergeFromString(b'')
+            proto_type_obj.dictionaryType.stringKeyType.MergeFromString(b"")
         else:
             raise ValueError("Dictionary key type must be either string or int.")
 
     else:
-        raise TypeError("Datatype parameter not recognized; must be an instance "
-                        "of datatypes.{Double, Int64, String, Dictionary, Array}, or "
-                        "python int, float, or str types.")
+        raise TypeError(
+            "Datatype parameter not recognized; must be an instance "
+            "of datatypes.{Double, Int64, String, Dictionary, Array}, or "
+            "python int, float, or str types."
+        )
diff --git a/coremltools/models/feature_vectorizer.py b/coremltools/models/feature_vectorizer.py
index e70c87a52..19e449a52 100644
--- a/coremltools/models/feature_vectorizer.py
+++ b/coremltools/models/feature_vectorizer.py
@@ -13,38 +13,37 @@
 from ._feature_management import is_valid_feature_list, process_or_validate_features
 
 
-def create_feature_vectorizer(input_features, output_feature_name, 
-                              known_size_map = {}):
+def create_feature_vectorizer(input_features, output_feature_name, known_size_map={}):
     """
-    Creates a feature vectorizer from input features, return the spec for 
+    Creates a feature vectorizer from input features, return the spec for
     a feature vectorizer that puts everything into a single array of length
-    equal to the total size of all the input features.  Returns a 2-tuple 
+    equal to the total size of all the input features.  Returns a 2-tuple
     `(spec, num_dimension)`
 
     Parameters
     ----------
     input_features: [list of 2-tuples]
         Name(s) of the input features, given as a list of `('name', datatype)`
-        tuples.  The datatypes entry is one of the data types defined in the 
-        :ref:`datatypes` module.  Allowed datatypes are :ref:`datatype.Int64`, 
-        :ref:`datatype.Double`, :ref:`datatypes.Dictionary`, 
-        or :ref:`datatype.Array`.  
-        
-        If the feature is a dictionary type, then the dictionary must have integer 
-        keys, and the number of dimensions to expand it into must be given by 
-        `known_size_map`. 
-        
-        Feature indices in the final array are counted sequentially from the 
-        from 0 through the total number of features. 
+        tuples.  The datatypes entry is one of the data types defined in the
+        :ref:`datatypes` module.  Allowed datatypes are :ref:`datatype.Int64`,
+        :ref:`datatype.Double`, :ref:`datatypes.Dictionary`,
+        or :ref:`datatype.Array`.
+
+        If the feature is a dictionary type, then the dictionary must have integer
+        keys, and the number of dimensions to expand it into must be given by
+        `known_size_map`.
+
+        Feature indices in the final array are counted sequentially from the
+        from 0 through the total number of features.
 
 
     output_feature_name: str
-        The name of the output feature.  The type is an Array 
-        List of output feature of the network. 
+        The name of the output feature.  The type is an Array
+        List of output feature of the network.
 
-    known_size_map: 
-        A dictionary mapping the feature name to the expanded size in the final 
-        array.  This is most useful for specifying the size of sparse vectors 
+    known_size_map:
+        A dictionary mapping the feature name to the expanded size in the final
+        array.  This is most useful for specifying the size of sparse vectors
         given as dictionaries of index to value.
 
     """
@@ -57,18 +56,24 @@ def create_feature_vectorizer(input_features, output_feature_name,
     feature_vectorizer = spec.featureVectorizer
 
     num_output_dimensions = 0
-    
+
     for n, ft in input_features:
         if n in known_size_map:
             dim = known_size_map[n]
 
             if ft.num_elements is not None:
                 if dim != ft.num_elements:
-                    raise ValueError("In feature {}, override size {} not compatible with inherent "
-                                     "value size {}.".format(n, dim, ft.num_elements))
+                    raise ValueError(
+                        "In feature {}, override size {} not compatible with inherent "
+                        "value size {}.".format(n, dim, ft.num_elements)
+                    )
         else:
             if ft.num_elements is None:
-                raise ValueError("In feature {}, inherent size unknown so must be manually supplied.".format(n))
+                raise ValueError(
+                    "In feature {}, inherent size unknown so must be manually supplied.".format(
+                        n
+                    )
+                )
             dim = ft.num_elements
 
         num_output_dimensions += dim
@@ -78,18 +83,20 @@ def create_feature_vectorizer(input_features, output_feature_name,
         new_feature.inputDimensions = dim
 
     if not isinstance(output_feature_name, _string_types):
-        if (is_valid_feature_list(output_feature_name) 
-                and len(output_feature_name) == 1
-                and output_feature_name[0][1] == datatypes.Array(num_output_dimensions)):
+        if (
+            is_valid_feature_list(output_feature_name)
+            and len(output_feature_name) == 1
+            and output_feature_name[0][1] == datatypes.Array(num_output_dimensions)
+        ):
 
             output_feature_name = output_feature_name[0][0]
 
         else:
-            raise TypeError("Output feature must be specified as a feature name or correct output feature list.")
+            raise TypeError(
+                "Output feature must be specified as a feature name or correct output feature list."
+            )
 
     output_features = [(output_feature_name, datatypes.Array(num_output_dimensions))]
     set_transform_interface_params(spec, input_features, output_features)
 
     return spec, num_output_dimensions
-
-
diff --git a/coremltools/models/model.py b/coremltools/models/model.py
index f0c84e457..1dbbe8f7f 100644
--- a/coremltools/models/model.py
+++ b/coremltools/models/model.py
@@ -7,48 +7,54 @@
 import tempfile as _tempfile
 import warnings as _warnings
 from copy import deepcopy as _deepcopy
+from six import string_types as _string_types
 
-from ._graph_visualization import \
-    _neural_network_nodes_and_edges, \
-    _pipeline_nodes_and_edges, _start_server
-from .utils import has_custom_layer as _has_custom_layer
+from .utils import _has_custom_layer as _has_custom_layer
 from .utils import load_spec as _load_spec
-from .utils import macos_version as _macos_version
+from .utils import _macos_version as _macos_version
 from .utils import save_spec as _save_spec
 from ..proto import Model_pb2 as _Model_pb2
-from coremltools.models._deprecation import deprecated
 
-_MLMODEL_FULL_PRECISION = 'float32'
-_MLMODEL_HALF_PRECISION = 'float16'
-_MLMODEL_QUANTIZED = 'quantized_model'
+_MLMODEL_FULL_PRECISION = "float32"
+_MLMODEL_HALF_PRECISION = "float16"
+_MLMODEL_QUANTIZED = "quantized_model"
 
-_VALID_MLMODEL_PRECISION_TYPES = [_MLMODEL_FULL_PRECISION,
-                                  _MLMODEL_HALF_PRECISION,
-                                  _MLMODEL_QUANTIZED]
+_VALID_MLMODEL_PRECISION_TYPES = [
+    _MLMODEL_FULL_PRECISION,
+    _MLMODEL_HALF_PRECISION,
+    _MLMODEL_QUANTIZED,
+]
 
 # Linear quantization
-_QUANTIZATION_MODE_LINEAR_QUANTIZATION = '_linear_quantization'
+_QUANTIZATION_MODE_LINEAR_QUANTIZATION = "_linear_quantization"
 # Linear quantization represented as a lookup table
-_QUANTIZATION_MODE_LOOKUP_TABLE_LINEAR = '_lookup_table_quantization_linear'
+_QUANTIZATION_MODE_LOOKUP_TABLE_LINEAR = "_lookup_table_quantization_linear"
 # Lookup table quantization generated by K-Means
-_QUANTIZATION_MODE_LOOKUP_TABLE_KMEANS = '_lookup_table_quantization_kmeans'
+_QUANTIZATION_MODE_LOOKUP_TABLE_KMEANS = "_lookup_table_quantization_kmeans"
 # Custom lookup table quantization
-_QUANTIZATION_MODE_CUSTOM_LOOKUP_TABLE = '_lookup_table_quantization_custom'
+_QUANTIZATION_MODE_CUSTOM_LOOKUP_TABLE = "_lookup_table_quantization_custom"
 # Dequantization
-_QUANTIZATION_MODE_DEQUANTIZE = '_dequantize_network'  # used for testing
+_QUANTIZATION_MODE_DEQUANTIZE = "_dequantize_network"  # used for testing
 # Symmetric linear quantization
-_QUANTIZATION_MODE_LINEAR_SYMMETRIC = '_linear_quantization_symmetric'
+_QUANTIZATION_MODE_LINEAR_SYMMETRIC = "_linear_quantization_symmetric"
 
-_SUPPORTED_QUANTIZATION_MODES = [_QUANTIZATION_MODE_LINEAR_QUANTIZATION,
-                                 _QUANTIZATION_MODE_LOOKUP_TABLE_LINEAR,
-                                 _QUANTIZATION_MODE_LOOKUP_TABLE_KMEANS,
-                                 _QUANTIZATION_MODE_CUSTOM_LOOKUP_TABLE,
-                                 _QUANTIZATION_MODE_DEQUANTIZE,
-                                 _QUANTIZATION_MODE_LINEAR_SYMMETRIC]
+_SUPPORTED_QUANTIZATION_MODES = [
+    _QUANTIZATION_MODE_LINEAR_QUANTIZATION,
+    _QUANTIZATION_MODE_LOOKUP_TABLE_LINEAR,
+    _QUANTIZATION_MODE_LOOKUP_TABLE_KMEANS,
+    _QUANTIZATION_MODE_CUSTOM_LOOKUP_TABLE,
+    _QUANTIZATION_MODE_DEQUANTIZE,
+    _QUANTIZATION_MODE_LINEAR_SYMMETRIC,
+]
 
-_LUT_BASED_QUANTIZATION = [_QUANTIZATION_MODE_LOOKUP_TABLE_LINEAR,
-                           _QUANTIZATION_MODE_LOOKUP_TABLE_KMEANS,
-                           _QUANTIZATION_MODE_CUSTOM_LOOKUP_TABLE]
+_LUT_BASED_QUANTIZATION = [
+    _QUANTIZATION_MODE_LOOKUP_TABLE_LINEAR,
+    _QUANTIZATION_MODE_LOOKUP_TABLE_KMEANS,
+    _QUANTIZATION_MODE_CUSTOM_LOOKUP_TABLE,
+]
+
+_METADATA_VERSION = "com.github.apple.coremltools.version"
+_METADATA_SOURCE = "com.github.apple.coremltools.source"
 
 
 class _FeatureDescription(object):
@@ -56,7 +62,7 @@ def __init__(self, fd_spec):
         self._fd_spec = fd_spec
 
     def __repr__(self):
-        return "Features(%s)" % ','.join(map(lambda x: x.name, self._fd_spec))
+        return "Features(%s)" % ",".join(map(lambda x: x.name, self._fd_spec))
 
     def __len__(self):
         return len(self._fd_spec)
@@ -101,47 +107,20 @@ def _get_proxy_and_spec(filename, use_cpu_only=False):
         if specification.specificationVersion > engine_version:
             # in this case the specification is a newer kind of .mlmodel than this
             # version of the engine can support so we'll not try to have a proxy object
-            return None, specification
+            return (None, specification, None)
 
         try:
-            return _MLModelProxy(filename, use_cpu_only), specification
+            return (_MLModelProxy(filename, use_cpu_only), specification, None)
         except RuntimeError as e:
             _warnings.warn(
-                "You will not be able to run predict() on this Core ML model." +
-                " Underlying exception message was: " + str(e),
-                RuntimeWarning)
-            return None, specification
-
-    return None, specification
-
-@deprecated
-class NeuralNetworkShaper(object):
-    """
-    This class computes the intermediate tensor shapes for a neural network model.
-    """
-
-    def __init__(self, model, useInputAndOutputShapes=True):
-
-        from ..libcoremlpython import _NeuralNetworkShaperProxy
-
-        path = ''
-        if isinstance(model, str):
-            self._spec = _load_spec(model)
-            path = model
-        elif isinstance(model, _Model_pb2.Model):
-            self._spec = model
-            filename = _tempfile.mktemp(suffix='.mlmodel')
-            _save_spec(model, filename)
-            path = filename
-        else:
-            raise TypeError("Expected argument to be a path to a .mlmodel file or a Model_pb2.Model object")
-
-        self._shaper = _NeuralNetworkShaperProxy(path, useInputAndOutputShapes)
+                "You will not be able to run predict() on this Core ML model."
+                + " Underlying exception message was: "
+                + str(e),
+                RuntimeWarning,
+            )
+            return (None, specification, e)
 
-    def shape(self, name):
-        strname = str(name)
-        shape_dict = self._shaper.shape(strname)
-        return shape_dict
+    return (None, specification, None)
 
 
 class MLModel(object):
@@ -170,7 +149,7 @@ class MLModel(object):
         >>> model.short_description = 'Predicts the price of a house in the Seattle area.'
 
         # Get the interface to the model
-        >>> model.input_descriptions
+        >>> model.input_description
         >>> model.output_description
 
         # Set feature descriptions manually
@@ -212,18 +191,24 @@ def __init__(self, model, useCPUOnly=False):
         >>> loaded_model = MLModel('my_model_file.mlmodel')
         """
 
-        if isinstance(model, str):
-            self.__proxy__, self._spec = _get_proxy_and_spec(model, useCPUOnly)
+        if isinstance(model, _string_types):
+            self.__proxy__, self._spec, self._framework_error = _get_proxy_and_spec(
+                model, useCPUOnly
+            )
         elif isinstance(model, _Model_pb2.Model):
-            filename = _tempfile.mktemp(suffix='.mlmodel')
+            filename = _tempfile.mktemp(suffix=".mlmodel")
             _save_spec(model, filename)
-            self.__proxy__, self._spec = _get_proxy_and_spec(filename, useCPUOnly)
+            self.__proxy__, self._spec, self._framework_error = _get_proxy_and_spec(
+                filename, useCPUOnly
+            )
             try:
                 _os.remove(filename)
             except OSError:
                 pass
         else:
-            raise TypeError("Expected model to be a .mlmodel file or a Model_pb2 object")
+            raise TypeError(
+                "Expected model to be a .mlmodel file or a Model_pb2 object"
+            )
 
         self._input_description = _FeatureDescription(self._spec.description.input)
         self._output_description = _FeatureDescription(self._spec.description.output)
@@ -264,6 +249,14 @@ def license(self):
     def license(self, license):
         self._spec.description.metadata.license = license
 
+    @property
+    def version(self):
+        return self._spec.description.metadata.versionString
+
+    @version.setter
+    def version(self, version_string):
+        self._spec.description.metadata.versionString = version_string
+
     def __repr__(self):
         return self._spec.description.__repr__()
 
@@ -336,7 +329,9 @@ def predict(self, data, useCPUOnly=False, **kwargs):
             return self.__proxy__.predict(data, useCPUOnly)
         else:
             if _macos_version() < (10, 13):
-                raise Exception('Model prediction is only supported on macOS version 10.13 or later.')
+                raise Exception(
+                    "Model prediction is only supported on macOS version 10.13 or later."
+                )
 
             try:
                 from ..libcoremlpython import _MLModelProxy
@@ -348,150 +343,29 @@ def predict(self, data, useCPUOnly=False, **kwargs):
                 _MLModelProxy = None
 
             if not _MLModelProxy:
-                raise Exception('Unable to load CoreML.framework. Cannot make predictions.')
-            elif _MLModelProxy.maximum_supported_specification_version() < self._spec.specificationVersion:
+                raise Exception(
+                    "Unable to load CoreML.framework. Cannot make predictions."
+                )
+            elif (
+                _MLModelProxy.maximum_supported_specification_version()
+                < self._spec.specificationVersion
+            ):
                 engineVersion = _MLModelProxy.maximum_supported_specification_version()
-                raise Exception('The specification has version ' + str(self._spec.specificationVersion)
-                                + ' but the Core ML framework version installed only supports Core ML model specification version '
-                                + str(engineVersion) + ' or older.')
+                raise Exception(
+                    "The specification has version "
+                    + str(self._spec.specificationVersion)
+                    + " but the Core ML framework version installed only supports Core ML model specification version "
+                    + str(engineVersion)
+                    + " or older."
+                )
             elif _has_custom_layer(self._spec):
-                raise Exception('This model contains a custom neural network layer, so predict is not supported.')
+                raise Exception(
+                    "This model contains a custom neural network layer, so predict is not supported."
+                )
             else:
-                raise Exception('Unable to load CoreML.framework. Cannot make predictions.')
-
-    @deprecated
-    def visualize_spec(self, port=None, input_shape_dict=None, title='CoreML Graph Visualization'):
-        """
-        Visualize the model.
-
-        Parameters
-        ----------
-        port: int
-            if server is to be hosted on specific localhost port
-
-        input_shape_dict: dict
-            The shapes are calculated assuming the batch and sequence
-            are 1 i.e. (1, 1, C, H, W). If either is not 1, then provide
-            full input shape
-
-        title: str
-            Title for the visualized model
-
-        Returns
-        -------
-        None
-
-        Examples
-        --------
-        >>> model = coreml.models.MLModel('HousePricer.mlmodel')
-        >>> model.visualize_spec()
-        """
-
-        spec = self._spec
-        model_type = spec.WhichOneof('Type')
-        model_description = spec.description
-        input_spec = model_description.input
-        output_spec = model_description.output
-
-        spec_inputs = []
-        for model_input in input_spec:
-            spec_inputs.append((model_input.name, str(model_input.type)))
-
-        spec_outputs = []
-        for model_output in output_spec:
-            spec_outputs.append((model_output.name, str(model_output.type)))
-
-        cy_nodes = []
-        cy_edges = []
-
-        cy_nodes.append({
-            'data': {
-                'id': 'input_node',
-                'name': '',
-                'info': {
-                    'type': 'input node'
-                },
-                'classes': 'input',
-
-            }
-        })
-
-        for model_input, input_type in spec_inputs:
-            cy_nodes.append({
-                'data': {
-                    'id': str(model_input),
-                    'name': str(model_input),
-                    'info': {
-                        'type': "\n".join(str(input_type).split("\n")),
-                        'inputs': str([]),
-                        'outputs': str([model_input])
-                    },
-                    'parent': 'input_node'
-                },
-                'classes': 'input'
-            })
-
-        if model_type == 'pipeline':
-            pipeline_spec = spec.pipeline
-            cy_data = _pipeline_nodes_and_edges(cy_nodes,
-                                                cy_edges,
-                                                pipeline_spec,
-                                                spec_outputs
-                                                )
-        elif model_type == 'pipelineRegressor':
-            pipeline_spec = spec.pipelineRegressor.pipeline
-            cy_data = _pipeline_nodes_and_edges(cy_nodes,
-                                                cy_edges,
-                                                pipeline_spec,
-                                                spec_outputs
-                                                )
-        elif model_type == 'pipelineClassifier':
-            pipeline_spec = spec.pipelineClassifier.pipeline
-            cy_data = _pipeline_nodes_and_edges(cy_nodes,
-                                                cy_edges,
-                                                pipeline_spec,
-                                                spec_outputs
-                                                )
-        elif model_type == 'neuralNetwork':
-            nn_spec = spec.neuralNetwork
-            cy_data = _neural_network_nodes_and_edges(nn_spec,
-                                                      cy_nodes,
-                                                      cy_edges,
-                                                      spec_outputs,
-                                                      input_spec,
-                                                      input_shape_dict=input_shape_dict
-                                                      )
-        elif model_type == 'neuralNetworkClassifier':
-            nn_spec = spec.neuralNetworkClassifier
-            cy_data = _neural_network_nodes_and_edges(nn_spec,
-                                                      cy_nodes,
-                                                      cy_edges,
-                                                      spec_outputs,
-                                                      input_spec,
-                                                      input_shape_dict=input_shape_dict
-                                                      )
-        elif model_type == 'neuralNetworkRegressor':
-            nn_spec = spec.neuralNetworkRegressor
-            cy_data = _neural_network_nodes_and_edges(nn_spec,
-                                                      cy_nodes,
-                                                      cy_edges,
-                                                      spec_outputs,
-                                                      input_spec,
-                                                      input_shape_dict=input_shape_dict
-                                                      )
-        else:
-            print("Model is not of type Pipeline or Neural Network "
-                  "and cannot be visualized")
-            return
-
-        import coremltools
-        web_dir = _os.path.join(_os.path.dirname(coremltools.__file__),
-                                'graph_visualization')
-        with open('{}/model.json'.format(web_dir), 'w') as file:
-            model_data = {
-                'title': title,
-                'cy_data': cy_data,
-            }
-            _json.dump(model_data, file)
-
-        _start_server(port, web_dir)
+                if self._framework_error:
+                    raise self._framework_error
+                else:
+                    raise Exception(
+                        "Unable to load CoreML.framework. Cannot make predictions."
+                    )
diff --git a/coremltools/models/nearest_neighbors/__init__.py b/coremltools/models/nearest_neighbors/__init__.py
index 08b95e649..0746089ad 100644
--- a/coremltools/models/nearest_neighbors/__init__.py
+++ b/coremltools/models/nearest_neighbors/__init__.py
@@ -3,4 +3,4 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from .builder import *
\ No newline at end of file
+from .builder import *
diff --git a/coremltools/models/nearest_neighbors/builder.py b/coremltools/models/nearest_neighbors/builder.py
index 5a7ed2964..cd7e6cdd8 100644
--- a/coremltools/models/nearest_neighbors/builder.py
+++ b/coremltools/models/nearest_neighbors/builder.py
@@ -3,16 +3,15 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from ... import SPECIFICATION_VERSION
-
 from ...proto import FeatureTypes_pb2
 from .. import datatypes
 
 import coremltools
 
-import numpy as np
+import numpy as _np
 import six as _six
 
+
 class KNearestNeighborsClassifierBuilder(object):
     """
     KNearestNeighborsClassifierBuilder class to construct a CoreML KNearestNeighborsClassifier specification.
@@ -41,27 +40,34 @@ class KNearestNeighborsClassifierBuilder(object):
     MLModel, save_spec
     """
 
-    _VALID_INDEX_TYPES = ['linear', 'kd_tree']
+    _VALID_INDEX_TYPES = ["linear", "kd_tree"]
 
-    _VALID_WEIGHTING_SCHEMES = ['uniform', 'inverse_distance']
+    _VALID_WEIGHTING_SCHEMES = ["uniform", "inverse_distance"]
 
-    _VALID_DISTANCE_METRICS = ['squared_euclidean']
+    _VALID_DISTANCE_METRICS = ["squared_euclidean"]
 
     # Optional parameter keys for constructor
-    _PARAMETER_KEY_NUMBER_OF_NEIGHBORS = 'number_of_neighbors'
-    _PARAMETER_KEY_WEIGHTING_SCHEME = 'weighting_scheme'
-    _PARAMETER_KEY_INDEX_TYPE = 'index_type'
-    _PARAMETER_KEY_LEAF_SIZE = 'leaf_size'
-    _PARAMETER_KEY_INPUT_TYPE = 'input_type'
+    _PARAMETER_KEY_NUMBER_OF_NEIGHBORS = "number_of_neighbors"
+    _PARAMETER_KEY_WEIGHTING_SCHEME = "weighting_scheme"
+    _PARAMETER_KEY_INDEX_TYPE = "index_type"
+    _PARAMETER_KEY_LEAF_SIZE = "leaf_size"
+    _PARAMETER_KEY_INPUT_TYPE = "input_type"
 
     # Optional parameter default values
     _PARAMETER_DEFAULT_NUMBER_OF_NEIGHBORS = 5
-    _PARAMETER_DEFAULT_WEIGHTING_SCHEME = 'uniform'
-    _PARAMETER_DEFAULT_INDEX_TYPE = 'linear'
+    _PARAMETER_DEFAULT_WEIGHTING_SCHEME = "uniform"
+    _PARAMETER_DEFAULT_INDEX_TYPE = "linear"
     _PARAMETER_DEFAULT_LEAF_SIZE = 30
-    _PARAMETER_DEFAULT_INPUT_TYPE = 'NotSpecified'
+    _PARAMETER_DEFAULT_INPUT_TYPE = "NotSpecified"
 
-    def __init__(self, input_name, output_name, number_of_dimensions, default_class_label, **kwargs):
+    def __init__(
+        self,
+        input_name,
+        output_name,
+        number_of_dimensions,
+        default_class_label,
+        **kwargs
+    ):
         """
         Create a KNearestNeighborsClassifierBuilder object.
         :param input_name: Name of the model input
@@ -76,16 +82,22 @@ def __init__(self, input_name, output_name, number_of_dimensions, default_class_
         super(KNearestNeighborsClassifierBuilder, self).__init__()
 
         self.spec = coremltools.proto.Model_pb2.Model()
-        self.spec.specificationVersion = coremltools._MINIMUM_NEAREST_NEIGHBORS_SPEC_VERSION
+        self.spec.specificationVersion = (
+            coremltools._MINIMUM_NEAREST_NEIGHBORS_SPEC_VERSION
+        )
 
         # the model is initially empty - assume it's updatable
         self.is_updatable = True
 
         if number_of_dimensions <= 0:
-            raise ValueError('number_of_dimensions must be >= 0')
-        self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions = number_of_dimensions
-
-        input_type = kwargs.get(self._PARAMETER_KEY_INPUT_TYPE, self._PARAMETER_DEFAULT_INPUT_TYPE)
+            raise ValueError("number_of_dimensions must be >= 0")
+        self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions = (
+            number_of_dimensions
+        )
+
+        input_type = kwargs.get(
+            self._PARAMETER_KEY_INPUT_TYPE, self._PARAMETER_DEFAULT_INPUT_TYPE
+        )
         input_feature_type = FeatureTypes_pb2.ArrayFeatureType.FLOAT32
         if input_type == datatypes.Double:
             input_feature_type = FeatureTypes_pb2.ArrayFeatureType.DOUBLE
@@ -103,40 +115,61 @@ def __init__(self, input_name, output_name, number_of_dimensions, default_class_
         output_label = self.spec.description.output.add()
         output_label.name = output_name
         output_label_probs = self.spec.description.output.add()
-        output_label_probs.name = output_name + 'Probs'
+        output_label_probs.name = output_name + "Probs"
         training_features = self.spec.description.trainingInput.add()
         training_features.name = output_name
 
         if self._is_valid_text_type(default_class_label):
-            output_label.type.stringType.MergeFromString(b'')
-            training_features.type.stringType.MergeFromString(b'')
-            output_label_probs.type.dictionaryType.stringKeyType.MergeFromString(b'')
-            self.spec.kNearestNeighborsClassifier.stringClassLabels.MergeFromString(b'')
-            self.spec.kNearestNeighborsClassifier.defaultStringLabel = default_class_label
+            output_label.type.stringType.MergeFromString(b"")
+            training_features.type.stringType.MergeFromString(b"")
+            output_label_probs.type.dictionaryType.stringKeyType.MergeFromString(b"")
+            self.spec.kNearestNeighborsClassifier.stringClassLabels.MergeFromString(b"")
+            self.spec.kNearestNeighborsClassifier.defaultStringLabel = (
+                default_class_label
+            )
         elif self._is_valid_number_type(default_class_label):
-            output_label.type.int64Type.MergeFromString(b'')
-            training_features.type.int64Type.MergeFromString(b'')
-            output_label_probs.type.dictionaryType.int64KeyType.MergeFromString(b'')
-            self.spec.kNearestNeighborsClassifier.int64ClassLabels.MergeFromString(b'')
-            self.spec.kNearestNeighborsClassifier.defaultInt64Label = default_class_label
+            output_label.type.int64Type.MergeFromString(b"")
+            training_features.type.int64Type.MergeFromString(b"")
+            output_label_probs.type.dictionaryType.int64KeyType.MergeFromString(b"")
+            self.spec.kNearestNeighborsClassifier.int64ClassLabels.MergeFromString(b"")
+            self.spec.kNearestNeighborsClassifier.defaultInt64Label = (
+                default_class_label
+            )
         else:
-            raise TypeError('default_class_label type ({}) is invalid. Must be either string or int64'.format(type(default_class_label)))
+            raise TypeError(
+                "default_class_label type ({}) is invalid. Must be either string or int64".format(
+                    type(default_class_label)
+                )
+            )
 
         self.spec.description.predictedFeatureName = output_label.name
         self.spec.description.predictedProbabilitiesName = output_label_probs.name
 
-        number_of_neighbors = kwargs.get(self._PARAMETER_KEY_NUMBER_OF_NEIGHBORS,
-                                         self._PARAMETER_DEFAULT_NUMBER_OF_NEIGHBORS)
-        self.set_number_of_neighbors_with_bounds(number_of_neighbors, allowed_range=(1, 1000)) # Can we think of a more sensible default value?
-
-        self.weighting_scheme = kwargs.get(self._PARAMETER_KEY_WEIGHTING_SCHEME, self._PARAMETER_DEFAULT_WEIGHTING_SCHEME)
-
-        index_type = kwargs.get(self._PARAMETER_KEY_INDEX_TYPE, self._PARAMETER_DEFAULT_INDEX_TYPE)
-        leaf_size = kwargs.get(self._PARAMETER_KEY_LEAF_SIZE, self._PARAMETER_DEFAULT_LEAF_SIZE)
+        number_of_neighbors = kwargs.get(
+            self._PARAMETER_KEY_NUMBER_OF_NEIGHBORS,
+            self._PARAMETER_DEFAULT_NUMBER_OF_NEIGHBORS,
+        )
+        self.set_number_of_neighbors_with_bounds(
+            number_of_neighbors, allowed_range=(1, 1000)
+        )  # Can we think of a more sensible default value?
+
+        self.weighting_scheme = kwargs.get(
+            self._PARAMETER_KEY_WEIGHTING_SCHEME,
+            self._PARAMETER_DEFAULT_WEIGHTING_SCHEME,
+        )
+
+        index_type = kwargs.get(
+            self._PARAMETER_KEY_INDEX_TYPE, self._PARAMETER_DEFAULT_INDEX_TYPE
+        )
+        leaf_size = kwargs.get(
+            self._PARAMETER_KEY_LEAF_SIZE, self._PARAMETER_DEFAULT_LEAF_SIZE
+        )
         self.set_index_type(index_type, leaf_size)
 
         # SED is currently the only supported distance metric
-        self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.squaredEuclideanDistance.MergeFromString(b'')
+        self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.squaredEuclideanDistance.MergeFromString(
+            b""
+        )
 
     @property
     def author(self):
@@ -224,12 +257,14 @@ def weighting_scheme(self, weighting_scheme):
         """
         weighting_scheme = weighting_scheme.lower()
         if weighting_scheme not in self._VALID_WEIGHTING_SCHEMES:
-            raise TypeError('Invalid weighting scheme')
+            raise TypeError("Invalid weighting scheme")
 
-        if weighting_scheme == 'inverse_distance':
-            self.spec.kNearestNeighborsClassifier.inverseDistanceWeighting.MergeFromString(b'')
+        if weighting_scheme == "inverse_distance":
+            self.spec.kNearestNeighborsClassifier.inverseDistanceWeighting.MergeFromString(
+                b""
+            )
         else:
-            self.spec.kNearestNeighborsClassifier.uniformWeighting.MergeFromString(b'')
+            self.spec.kNearestNeighborsClassifier.uniformWeighting.MergeFromString(b"")
 
         # storing this in the object is just a convenience
         self._weighting_scheme = weighting_scheme
@@ -251,14 +286,18 @@ def set_index_type(self, index_type, leaf_size=30):
         """
         index_type = index_type.lower()
         if not index_type in self._VALID_INDEX_TYPES:
-            raise TypeError('Invalid index type')
+            raise TypeError("Invalid index type")
 
-        if index_type == 'kd_tree':
+        if index_type == "kd_tree":
             if leaf_size <= 0:
-                raise TypeError('leaf_size must be > 0')
-            self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.singleKdTreeIndex.leafSize = leaf_size
+                raise TypeError("leaf_size must be > 0")
+            self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.singleKdTreeIndex.leafSize = (
+                leaf_size
+            )
         else:
-            self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.linearIndex.MergeFromString(b'')
+            self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.linearIndex.MergeFromString(
+                b""
+            )
 
         # storing this in the object is just a convenience
         self._index_type = index_type
@@ -269,7 +308,9 @@ def leaf_size(self):
         Get the leaf size for the KNearestNeighborsClassifier
         :return: the leaf size
         """
-        return self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.singleKdTreeIndex.leafSize
+        return (
+            self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.singleKdTreeIndex.leafSize
+        )
 
     @leaf_size.setter
     def leaf_size(self, leaf_size):
@@ -279,8 +320,10 @@ def leaf_size(self, leaf_size):
         :return:
         """
         if leaf_size <= 0:
-            raise ValueError('leaf_size must be > 0')
-        self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.singleKdTreeIndex.leafSize = leaf_size
+            raise ValueError("leaf_size must be > 0")
+        self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.singleKdTreeIndex.leafSize = (
+            leaf_size
+        )
 
     @property
     def number_of_dimensions(self):
@@ -288,7 +331,9 @@ def number_of_dimensions(self):
         Get the number of dimensions of the input data for the KNearestNeighborsClassifier model
         :return: number of dimensions
         """
-        return self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions
+        return (
+            self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions
+        )
 
     @property
     def number_of_neighbors(self):
@@ -298,7 +343,9 @@ def number_of_neighbors(self):
         """
         return self.spec.kNearestNeighborsClassifier.numberOfNeighbors.defaultValue
 
-    def set_number_of_neighbors_with_bounds(self, number_of_neighbors, allowed_range=None, allowed_set=None):
+    def set_number_of_neighbors_with_bounds(
+        self, number_of_neighbors, allowed_range=None, allowed_set=None
+    ):
         """
         Set the numberOfNeighbors parameter for the KNearestNeighborsClassifier model.
         :param allowed_range: tuple of (min_value, max_value) defining the range of allowed values
@@ -306,51 +353,69 @@ def set_number_of_neighbors_with_bounds(self, number_of_neighbors, allowed_range
         :return: None
         """
         if number_of_neighbors <= 0:
-            raise ValueError('number_of_neighbors must be > 0')
+            raise ValueError("number_of_neighbors must be > 0")
         if allowed_range is None and allowed_set is None:
-            raise ValueError('Exactly one of allowed_range or allowed_values must be provided')
+            raise ValueError(
+                "Exactly one of allowed_range or allowed_values must be provided"
+            )
         if allowed_range is not None and allowed_set is not None:
-            raise ValueError('Exactly one of allowed_range or allowed_values must be provided')
+            raise ValueError(
+                "Exactly one of allowed_range or allowed_values must be provided"
+            )
 
         if allowed_range is not None:
             if not isinstance(allowed_range, tuple):
-                raise TypeError('allowed_range expects a tuple of (min_value, max_value)')
+                raise TypeError(
+                    "allowed_range expects a tuple of (min_value, max_value)"
+                )
             if len(allowed_range) != 2:
-                raise TypeError('allowed_range expects a tuple of (min_value, max_value)')
+                raise TypeError(
+                    "allowed_range expects a tuple of (min_value, max_value)"
+                )
 
             (min_value, max_value) = allowed_range
             if min_value <= 0:
-                raise ValueError('allowed_range minimum must be > 0')
+                raise ValueError("allowed_range minimum must be > 0")
             if max_value < min_value:
-                raise ValueError('allowed_range max_value must be >= min_value')
+                raise ValueError("allowed_range max_value must be >= min_value")
             if number_of_neighbors < min_value or number_of_neighbors > max_value:
-                raise ValueError('number_of_neighbors is not within allowed range')
-
-            self.spec.kNearestNeighborsClassifier.numberOfNeighbors.defaultValue = number_of_neighbors
-            self.spec.kNearestNeighborsClassifier.numberOfNeighbors.range.minValue = min_value
-            self.spec.kNearestNeighborsClassifier.numberOfNeighbors.range.maxValue = max_value
+                raise ValueError("number_of_neighbors is not within allowed range")
+
+            self.spec.kNearestNeighborsClassifier.numberOfNeighbors.defaultValue = (
+                number_of_neighbors
+            )
+            self.spec.kNearestNeighborsClassifier.numberOfNeighbors.range.minValue = (
+                min_value
+            )
+            self.spec.kNearestNeighborsClassifier.numberOfNeighbors.range.maxValue = (
+                max_value
+            )
 
         elif allowed_set is not None:
             if not isinstance(allowed_set, set):
-                raise TypeError('allowed_values expects \'set\' type')
+                raise TypeError("allowed_values expects 'set' type")
             if len(allowed_set) == 0:
-                raise TypeError('allowed_values cannot be empty')
+                raise TypeError("allowed_values cannot be empty")
 
             found_match = False
             for v in allowed_set:
                 if not self._is_valid_number_type(v):
-                    raise TypeError('allowed_values must contain only integer types')
+                    raise TypeError("allowed_values must contain only integer types")
                 if v <= 0:
-                    raise TypeError('allowed_values must only contain values > 0')
+                    raise TypeError("allowed_values must only contain values > 0")
                 if number_of_neighbors == v:
                     found_match = True
 
             if found_match:
-                self.spec.kNearestNeighborsClassifier.numberOfNeighbors.defaultValue = number_of_neighbors
+                self.spec.kNearestNeighborsClassifier.numberOfNeighbors.defaultValue = (
+                    number_of_neighbors
+                )
                 for v in allowed_set:
-                    self.spec.kNearestNeighborsClassifier.numberOfNeighbors.set.values.append(v)
+                    self.spec.kNearestNeighborsClassifier.numberOfNeighbors.set.values.append(
+                        v
+                    )
             else:
-                raise ValueError('number_of_neighbors is not a valid value')
+                raise ValueError("number_of_neighbors is not a valid value")
 
     def number_of_neighbors_allowed_range(self):
         """
@@ -358,8 +423,10 @@ def number_of_neighbors_allowed_range(self):
         :return: tuple of (min_value, max_value) or None if the range hasn't been set
         """
         if self.spec.kNearestNeighborsClassifier.numberOfNeighbors.HasField("range"):
-            return (self.spec.kNearestNeighborsClassifier.numberOfNeighbors.range.minValue,
-                    self.spec.kNearestNeighborsClassifier.numberOfNeighbors.range.maxValue)
+            return (
+                self.spec.kNearestNeighborsClassifier.numberOfNeighbors.range.minValue,
+                self.spec.kNearestNeighborsClassifier.numberOfNeighbors.range.maxValue,
+            )
         return None
 
     def number_of_neighbors_allowed_set(self):
@@ -368,7 +435,9 @@ def number_of_neighbors_allowed_set(self):
         :return: set of allowed values or None if the set of allowed values hasn't been populated
         """
         if self.spec.kNearestNeighborsClassifier.numberOfNeighbors.HasField("set"):
-            spec_values = self.spec.kNearestNeighborsClassifier.numberOfNeighbors.set.values
+            spec_values = (
+                self.spec.kNearestNeighborsClassifier.numberOfNeighbors.set.values
+            )
             allowed_values = set()
             for v in spec_values:
                 allowed_values.add(v)
@@ -383,32 +452,40 @@ def add_samples(self, data_points, labels):
         :return: None
         """
         if len(data_points) == 0:
-            raise TypeError('data_points is empty')
+            raise TypeError("data_points is empty")
 
         if len(labels) == 0:
-            raise TypeError('labels is empty')
+            raise TypeError("labels is empty")
 
         if len(data_points[0]) != self.number_of_dimensions:
-            raise TypeError('dimensionality of data_points != expected number of dimensions')
+            raise TypeError(
+                "dimensionality of data_points != expected number of dimensions"
+            )
 
         if len(data_points) != len(labels):
-            raise TypeError('len(data_points) !=  len(labels)')
+            raise TypeError("len(data_points) !=  len(labels)")
 
         # Validate the types of the labels before adding any points.
         self._validate_label_types(labels)
 
         for data_point in data_points:
-            sample = self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.floatSamples.add()
+            sample = (
+                self.spec.kNearestNeighborsClassifier.nearestNeighborsIndex.floatSamples.add()
+            )
             for feature in data_point:
                 sample.vector.append(feature)
 
         if self.spec.kNearestNeighborsClassifier.HasField("int64ClassLabels"):
             for label in labels:
-                self.spec.kNearestNeighborsClassifier.int64ClassLabels.vector.append(label)
+                self.spec.kNearestNeighborsClassifier.int64ClassLabels.vector.append(
+                    label
+                )
         else:
             # string labels
             for label in labels:
-                self.spec.kNearestNeighborsClassifier.stringClassLabels.vector.append(label)
+                self.spec.kNearestNeighborsClassifier.stringClassLabels.vector.append(
+                    label
+                )
 
     def _validate_label_types(self, labels):
         """
@@ -423,7 +500,7 @@ def _validate_label_types(self, labels):
             check_is_valid = KNearestNeighborsClassifierBuilder._is_valid_text_type
         for label in labels:
             if not check_is_valid(label):
-                raise TypeError('Invalid type for label: {}'.format(type(label)))
+                raise TypeError("Invalid type for label: {}".format(type(label)))
 
     @staticmethod
     def _is_valid_text_type(obj):
@@ -441,4 +518,4 @@ def _is_valid_number_type(obj):
         :param obj: the object to check
         :return: True if a valid number type, False otherwise
         """
-        return isinstance(obj, (_six.integer_types, np.integer))
+        return isinstance(obj, (_six.integer_types, _np.integer))
diff --git a/coremltools/models/neural_network/__init__.py b/coremltools/models/neural_network/__init__.py
index 015931b76..ac7868bea 100644
--- a/coremltools/models/neural_network/__init__.py
+++ b/coremltools/models/neural_network/__init__.py
@@ -3,11 +3,11 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from .builder import *
-
-from . import optimization_utils
-from . import quantization_utils
 from . import flexible_shape_utils
+from . import optimization_utils
 from . import printer
+from . import quantization_utils
 from . import spec_inspection_utils
 from . import update_optimizer_utils
+from . import utils
+from .builder import *
diff --git a/coremltools/models/neural_network/builder.py b/coremltools/models/neural_network/builder.py
index e79a03599..73b18d147 100644
--- a/coremltools/models/neural_network/builder.py
+++ b/coremltools/models/neural_network/builder.py
@@ -7,98 +7,138 @@
 Neural network builder class to construct Core ML models.
 """
 
-from ... import SPECIFICATION_VERSION
+from ... import SPECIFICATION_VERSION as _SPECIFICATION_VERSION
 from ... import _MINIMUM_NDARRAY_SPEC_VERSION
+
 from ... import _MINIMUM_UPDATABLE_SPEC_VERSION
+from ... import _SPECIFICATION_VERSION_IOS_14
 from ...proto import Model_pb2 as _Model_pb2
 from ...proto import NeuralNetwork_pb2 as _NeuralNetwork_pb2
 from ...proto import FeatureTypes_pb2 as _FeatureTypes_pb2
-from .._interface_management import set_transform_interface_params, set_training_features
+from .._interface_management import (
+    set_transform_interface_params,
+    set_training_features,
+)
 from .. import datatypes
-import numpy as np
-from .quantization_utils import unpack_to_bytes, _convert_array_to_nbit_quantized_bytes
+import numpy as _np
+from .quantization_utils import _unpack_to_bytes, _convert_array_to_nbit_quantized_bytes
 from .spec_inspection_utils import *
 from .update_optimizer_utils import AdamParams, SgdParams
+from six import string_types as _string_types
+from math import floor as _math_floor
 
-_SUPPORTED_UPDATABLE_LAYERS = ['innerProduct', 'convolution']
+_SUPPORTED_UPDATABLE_LAYERS = ["innerProduct", "convolution"]
 
 
 def _set_recurrent_activation(param, activation):
-
     if isinstance(activation, bytes):
         activation = activation.decode("utf8")
 
-    activation = activation.upper() if isinstance(activation, str) else activation
-    if activation == 'SIGMOID':
-        param.sigmoid.MergeFromString(b'')
-    elif activation == 'TANH':
-        param.tanh.MergeFromString(b'')
-    elif activation == 'LINEAR':
-        param.linear.MergeFromString(b'')
-    elif activation == 'SIGMOID_HARD':
-        param.sigmoidHard.MergeFromString(b'')
-    elif activation == 'SCALED_TANH':
-        param.scaledTanh.MergeFromString(b'')
-    elif activation == 'RELU':
-        param.ReLU.MergeFromString(b'')
+    activation = (
+        activation.upper() if isinstance(activation, _string_types) else activation
+    )
+
+    if activation == "SIGMOID":
+        param.sigmoid.MergeFromString(b"")
+    elif activation == "TANH":
+        param.tanh.MergeFromString(b"")
+    elif activation == "LINEAR":
+        param.linear.MergeFromString(b"")
+    elif activation == "SIGMOID_HARD":
+        param.sigmoidHard.MergeFromString(b"")
+    elif activation == "SCALED_TANH":
+        param.scaledTanh.MergeFromString(b"")
+    elif activation == "RELU":
+        param.ReLU.MergeFromString(b"")
     else:
-        raise TypeError("Unsupported activation type with Recurrent layer: %s." % activation)
+        raise TypeError(
+            "Unsupported activation type with Recurrent layer: %s." % activation
+        )
+
 
+def _verify_quantization_arguments(weight=bytes(), output_channels=1, **kwargs):
+    quantization_type = kwargs.get("quantization_type", "").lower()
+    nbits = kwargs.get("nbits", 8)
+    quant_scale = kwargs.get("quant_scale", None)
+    quant_bias = kwargs.get("quant_bias", None)
+    quant_lut = kwargs.get("quant_lut", None)
+    int_8_dynamic_quantize = kwargs.get("int_8_dynamic_quantize", False)
 
-def _verify_quantization_arguments(weight=bytes(),
-                                   output_channels=1, **kwargs):
-    quantization_type = kwargs.get('quantization_type', '').lower()
-    nbits = kwargs.get('nbits', 8)
-    quant_scale = kwargs.get('quant_scale', None)
-    quant_bias = kwargs.get('quant_bias', None)
-    quant_lut = kwargs.get('quant_lut', None)
+    if int_8_dynamic_quantize and nbits != 8:
+        raise ValueError("nbits must be 8 when 'int_8_dynamic_quantize' is true ")
+
+    if int_8_dynamic_quantize and quant_bias is not None:
+        raise ValueError(
+            "quant_bias must be empty when 'int_8_dynamic_quantize' is true "
+        )
+
+    if int_8_dynamic_quantize and quant_scale.size != 1:
+        raise ValueError(
+            "quant_scale must be of size 1 when 'int_8_dynamic_quantize' is true "
+        )
 
     if not isinstance(weight, bytes):
-        raise ValueError('Weight must be of type bytes() for quantization')
+        raise ValueError("Weight must be of type bytes() for quantization")
 
-    if quantization_type == 'linear':
-        if quant_scale is None or quant_bias is None:
-            raise ValueError("quant_scale and quant_bias parameters must be provided for linear quantization type")
+    if quantization_type == "linear":
+        if not int_8_dynamic_quantize:
+            if quant_scale is None or quant_bias is None:
+                raise ValueError(
+                    "quant_scale and quant_bias parameters must be provided for linear quantization type"
+                )
         if len(quant_scale) != 1 and len(quant_scale) != output_channels:
-            raise ValueError("quant_scale should be of type float or an array of length outputChannels")
-        if len(quant_bias) != 1 and len(quant_bias) != output_channels:
-            raise ValueError("quant_bias should be of type float or an array of length outputChannels")
-    elif quantization_type == 'lut':
+            raise ValueError(
+                "quant_scale should be of type float or an array of length outputChannels"
+            )
+        if not int_8_dynamic_quantize:
+            if len(quant_bias) != 1 and len(quant_bias) != output_channels:
+                raise ValueError(
+                    "quant_bias should be of type float or an array of length outputChannels"
+                )
+    elif quantization_type == "lut":
         if quant_lut is None:
-            raise ValueError("quant_lut must be provided for look up table quantization type")
+            raise ValueError(
+                "quant_lut must be provided for look up table quantization type"
+            )
         if len(quant_lut) != 2 ** nbits:
             raise ValueError("quant_lut must be an array of length 2^nbits")
     else:
         raise ValueError("quantization_type must be either linear or lut")
 
-    if quantization_type == 'linear' or 'lut':
+    if quantization_type == "linear" or "lut":
         if nbits > 8 or nbits < 1:
-            raise ValueError('nbits must be between 1 and 8')
+            raise ValueError("nbits must be between 1 and 8")
 
 
-def _fill_quantized_weights(weights_message=None,
-                            W=bytes(), **kwargs):
-    weights_message.rawValue = bytes()
-    weights_message.rawValue += W
-    nbits = kwargs.get('nbits', 8)
+def _fill_quantized_weights(weights_message=None, W=bytes(), use_int_8=False, **kwargs):
+    if use_int_8:
+        weights_message.int8RawValue = bytes()
+        weights_message.int8RawValue += W
+    else:
+        weights_message.rawValue = bytes()
+        weights_message.rawValue += W
+    nbits = kwargs.get("nbits", 8)
     weights_message.quantization.numberOfBits = nbits
-    quantization_type = kwargs.get('quantization_type', '').lower()
-    if quantization_type == 'linear':
-        quant_scale = kwargs.get('quant_scale', [1.0])
-        quant_bias = kwargs.get('quant_bias', [0.0])
-        weights_message.quantization.linearQuantization.scale.extend(map(float, quant_scale))
-        weights_message.quantization.linearQuantization.bias.extend(map(float, quant_bias))
+    quantization_type = kwargs.get("quantization_type", "").lower()
+    if quantization_type == "linear":
+        quant_scale = kwargs.get("quant_scale", [1.0])
+        quant_bias = kwargs.get("quant_bias", [0.0])
+        weights_message.quantization.linearQuantization.scale.extend(quant_scale)
+        if not use_int_8:
+            weights_message.quantization.linearQuantization.bias.extend(quant_bias)
     else:
-        quant_lut = kwargs.get('quant_lut', [0.0, 1.0])
-        weights_message.quantization.lookupTableQuantization.floatValue.extend(map(float, quant_lut))
+        quant_lut = kwargs.get("quant_lut", [0.0, 1.0])
+        weights_message.quantization.lookupTableQuantization.floatValue.extend(
+            quant_lut
+        )
 
 
 def _get_nn_spec(spec):
-    if spec.HasField('neuralNetworkClassifier'):
+    if spec.HasField("neuralNetworkClassifier"):
         return spec.neuralNetworkClassifier
-    elif spec.HasField('neuralNetworkRegressor'):
+    elif spec.HasField("neuralNetworkRegressor"):
         return spec.neuralNetworkRegressor
-    elif spec.HasField('neuralNetwork'):
+    elif spec.HasField("neuralNetwork"):
         return spec.neuralNetwork
     else:
         return None
@@ -124,7 +164,8 @@ def _get_lstm_weight_fields(lstm_wp):
         lstm_wp.outputGateBiasVector,
         lstm_wp.inputGatePeepholeVector,
         lstm_wp.forgetGatePeepholeVector,
-        lstm_wp.outputGatePeepholeVector]
+        lstm_wp.outputGatePeepholeVector,
+    ]
 
 
 def _fill_tensor_fields(tensor_field, ranks=None, shapes=None):
@@ -135,26 +176,43 @@ def _fill_tensor_fields(tensor_field, ranks=None, shapes=None):
     """
     if ranks is None and shapes is None:
         return
+
     if ranks is None and shapes is not None:
         ranks = [len(shape) for shape in shapes]
+
     # Fill ranks only
     for rank in ranks:
         if rank is None:
-            raise ValueError('Rank of a tensor should not be None')
-        # if rank > 5:
-        #     raise ValueError('Rank greater than 5 not supported')
+            continue
+
+        if not _np.issubclass_(type(rank), (int, _np.integer)):
+            rank = -1  # Symbolic rank set to -1
+
         field = tensor_field.add()
         field.rank = rank
+
     if ranks is not None and shapes is not None:
-        # Check validity
         if len(ranks) != len(shapes):
-            raise ValueError('Number of rank and shape of tensor field does not match')
-        for i, (r, s) in enumerate(zip(ranks, shapes)):
-            if s is None:
+            raise ValueError("Number of rank and shape of tensor field does not match.")
+
+        for i in range(0, len(ranks)):
+            shape = shapes[i]
+            rank = ranks[i]
+
+            # Ignore incomplete info
+            if shape is None or rank is None:
                 continue
-            if r != len(s):
-                raise ValueError('Rank and shape does not match')
-            tensor_field[i].dimValue.extend(s)
+
+            # Raise error on inconsistent input
+            if rank != len(shape):
+                raise ValueError("Rank and shape does not match")
+
+            # Add the shape to the proto
+            is_symbolic = False
+            for s in shape:
+                if not _np.issubclass_(type(s), (int, _np.integer)):
+                    s = -1  # Symbolic shape set to -1
+                tensor_field[i].dimValue.append(s)
 
 
 class NeuralNetworkBuilder(object):
@@ -199,15 +257,17 @@ class NeuralNetworkBuilder(object):
     MLModel, datatypes, save_spec
     """
 
-    def __init__(self,
-                 input_features=None,
-                 output_features=None,
-                 mode=None,
-                 spec=None,
-                 nn_spec=None,
-                 disable_rank5_shape_mapping=False,
-                 training_features=None,
-                 use_float_arraytype=False):
+    def __init__(
+        self,
+        input_features=None,
+        output_features=None,
+        mode=None,
+        spec=None,
+        nn_spec=None,
+        disable_rank5_shape_mapping=False,
+        training_features=None,
+        use_float_arraytype=False,
+    ):
         """
         Construct a NeuralNetworkBuilder object to build an MLModel specification with
         model interface or a NeuralNetwork protobuf message, either from scratch or an
@@ -294,17 +354,21 @@ def __init__(self,
                     self.layer_specs[layer_spec.name] = layer_spec
             else:
                 # Both spec and nn_spec are not None
-                raise ValueError('Attempting to assign another NeuralNetwork Spec to an existing MLModel Spec')
+                raise ValueError(
+                    "Attempting to assign another NeuralNetwork Spec to an existing MLModel Spec"
+                )
             if input_features is None and output_features is None:
                 return
 
-        if self.spec is None and self.nn_spec is not None:  # Building nested Neural Network
+        if (
+            self.spec is None and self.nn_spec is not None
+        ):  # Building nested Neural Network
             return
 
         # Set the interface params.
         if self.spec is None:
             self.spec = _Model_pb2.Model()
-        self.spec.specificationVersion = SPECIFICATION_VERSION
+        self.spec.specificationVersion = _SPECIFICATION_VERSION
         if disable_rank5_shape_mapping:
             self.spec.specificationVersion = _MINIMUM_NDARRAY_SPEC_VERSION
 
@@ -328,21 +392,27 @@ def __init__(self,
         else:
             array_datatype = _Model_pb2.ArrayFeatureType.DOUBLE
 
-        self.spec = set_transform_interface_params(self.spec, input_features,
-                                                   out_features_with_shape, training_features=training_features,
-                                                   array_datatype=array_datatype)
+        self.spec = set_transform_interface_params(
+            self.spec,
+            input_features,
+            out_features_with_shape,
+            training_features=training_features,
+            array_datatype=array_datatype,
+        )
 
         for input in input_features:
             self.rank_dict[input[0]] = len(input[1].dimensions)
 
         for idx, output_feature in enumerate(output_features):
             if output_features[idx][1] is None:
-                self.spec.description.output[idx].type.multiArrayType.ClearField("shape")
+                self.spec.description.output[idx].type.multiArrayType.ClearField(
+                    "shape"
+                )
 
         if self.nn_spec is None:
-            if mode == 'classifier':
+            if mode == "classifier":
                 nn_spec = self.spec.neuralNetworkClassifier
-            elif mode == 'regressor':
+            elif mode == "regressor":
                 nn_spec = self.spec.neuralNetworkRegressor
             else:
                 nn_spec = self.spec.neuralNetwork
@@ -350,9 +420,11 @@ def __init__(self,
 
         if disable_rank5_shape_mapping and self.nn_spec:
             self.nn_spec.arrayInputShapeMapping = _NeuralNetwork_pb2.NeuralNetworkMultiArrayShapeMapping.Value(
-                'EXACT_ARRAY_MAPPING')
+                "EXACT_ARRAY_MAPPING"
+            )
             self.nn_spec.imageInputShapeMapping = _NeuralNetwork_pb2.NeuralNetworkImageShapeMapping.Value(
-                'RANK4_IMAGE_MAPPING')
+                "RANK4_IMAGE_MAPPING"
+            )
 
     def set_input(self, input_names, input_dims):
         """
@@ -381,11 +453,14 @@ def set_input(self, input_names, input_dims):
         """
 
         if len(input_names) != len(input_dims):
-            raise ValueError('input_names and input_dims must be of the same sizes.')
+            raise ValueError("input_names and input_dims must be of the same sizes.")
 
         spec = self.spec
         for idx, dim in enumerate(input_dims):
-            if hasattr(self, '_disable_rank5_shape_mapping') and self._disable_rank5_shape_mapping:
+            if (
+                hasattr(self, "_disable_rank5_shape_mapping")
+                and self._disable_rank5_shape_mapping
+            ):
                 input_shape = dim
             else:
                 if len(dim) == 3:
@@ -395,15 +470,20 @@ def set_input(self, input_names, input_dims):
                 elif len(dim) == 1:
                     input_shape = tuple(dim)
                 else:
-                    raise RuntimeError("Attempting to add a neural network " +
-                                       "input with rank " + str(len(dim)) +
-                                       ". All networks should take inputs of rank 1 or 3.")
+                    raise RuntimeError(
+                        "Attempting to add a neural network "
+                        + "input with rank "
+                        + str(len(dim))
+                        + ". All networks should take inputs of rank 1 or 3."
+                    )
 
             spec.description.input[idx].type.multiArrayType.ClearField("shape")
             spec.description.input[idx].type.multiArrayType.shape.extend(input_shape)
 
             # TODO: if it's an embedding, this should be integer
-            spec.description.input[idx].type.multiArrayType.dataType = _Model_pb2.ArrayFeatureType.DOUBLE
+            spec.description.input[
+                idx
+            ].type.multiArrayType.dataType = _Model_pb2.ArrayFeatureType.DOUBLE
 
             spec.description.input[idx].name = input_names[idx]
 
@@ -434,14 +514,15 @@ def set_output(self, output_names, output_dims):
         """
 
         if len(output_names) != len(output_dims):
-            raise ValueError('output_names and output_dims must be of the same sizes.')
+            raise ValueError("output_names and output_dims must be of the same sizes.")
 
         spec = self.spec
         for idx, dim in enumerate(output_dims):
             spec.description.output[idx].type.multiArrayType.ClearField("shape")
             spec.description.output[idx].type.multiArrayType.shape.extend(dim)
-            spec.description.output[idx].type.multiArrayType.dataType = \
-                _Model_pb2.ArrayFeatureType.DOUBLE
+            spec.description.output[
+                idx
+            ].type.multiArrayType.dataType = _Model_pb2.ArrayFeatureType.DOUBLE
 
             spec.description.output[idx].name = output_names[idx]
 
@@ -465,7 +546,9 @@ def set_training_input(self, training_input):
         spec = self.spec
         set_training_features(spec, training_input)
 
-    def set_class_labels(self, class_labels, predicted_feature_name='classLabel', prediction_blob=''):
+    def set_class_labels(
+        self, class_labels, predicted_feature_name="classLabel", prediction_blob=""
+    ):
         """
         Set class labels to the model spec to make it a neural network classifier.
 
@@ -477,7 +560,7 @@ def set_class_labels(self, class_labels, predicted_feature_name='classLabel', pr
 
         predicted_feature_name: str
             Name of the output feature for the class labels exposed in the
-            Core ML neural network classifier, defaults: 'class_output'.
+            Core ML neural network classifier, defaults: 'classLabel'.
 
         prediction_blob: str
             If provided, then this is the name of the neural network blob which
@@ -494,14 +577,17 @@ def set_class_labels(self, class_labels, predicted_feature_name='classLabel', pr
 
         if len(spec.description.output) == 0:
             raise ValueError(
-                "Model should have at least one output (the probabilities) to automatically make it a classifier.")
+                "Model should have at least one output (the probabilities) to automatically make it a classifier."
+            )
         probOutput = spec.description.output[0]
-        probOutput.type.dictionaryType.MergeFromString(b'')
+        probOutput.type.dictionaryType.MergeFromString(b"")
         if len(class_labels) == 0:
             return
         class_type = type(class_labels[0])
-        if class_type not in [int, str]:
-            raise TypeError("Class labels must be of type Integer or String. (not %s)" % class_type)
+        if not isinstance(class_labels[0], (int, _string_types)):
+            raise TypeError(
+                "Class labels must be of type Integer or String. (not %s)" % class_type
+            )
 
         spec.description.predictedProbabilitiesName = probOutput.name
         spec.description.predictedFeatureName = predicted_feature_name
@@ -509,19 +595,19 @@ def set_class_labels(self, class_labels, predicted_feature_name='classLabel', pr
         classLabel = spec.description.output.add()
         classLabel.name = predicted_feature_name
         if class_type == int:
-            nn_spec.ClearField('int64ClassLabels')
-            probOutput.type.dictionaryType.int64KeyType.MergeFromString(b'')
-            classLabel.type.int64Type.MergeFromString(b'')
+            nn_spec.ClearField("int64ClassLabels")
+            probOutput.type.dictionaryType.int64KeyType.MergeFromString(b"")
+            classLabel.type.int64Type.MergeFromString(b"")
             for c in class_labels:
                 nn_spec.int64ClassLabels.vector.append(c)
         else:
-            nn_spec.ClearField('stringClassLabels')
-            probOutput.type.dictionaryType.stringKeyType.MergeFromString(b'')
-            classLabel.type.stringType.MergeFromString(b'')
+            nn_spec.ClearField("stringClassLabels")
+            probOutput.type.dictionaryType.stringKeyType.MergeFromString(b"")
+            classLabel.type.stringType.MergeFromString(b"")
             for c in class_labels:
                 nn_spec.stringClassLabels.vector.append(c)
 
-        if prediction_blob != '':
+        if prediction_blob != "":
             # correctness here will be checked in the validator -- i.e. to
             # make sure this string corresponds to a real blob
             nn_spec.labelProbabilityLayerName = prediction_blob
@@ -529,6 +615,56 @@ def set_class_labels(self, class_labels, predicted_feature_name='classLabel', pr
             # assume it's the last blob produced in the network
             nn_spec.labelProbabilityLayerName = nn_spec.layers[-1].output[0]
 
+    def set_optional_input(self, input_idx, value=None, format="float"):
+        """
+        Marks given input as optional input.
+        Optionally, sets default value for optional input if value is not None
+
+        Parameters
+        ----------
+        input_idx: int
+            Index of input to be marked and fill with default value
+        value: int/double/float/None
+            Value to be fill as default value
+        format: str
+            Format of default value
+            Must be one of 'float', 'double' or 'int'
+        """
+        if input_idx >= len(self.spec.description.input):
+            msg = (
+                str(input_idx)
+                + " out of "
+                + str(len(self.spec.description.input))
+                + " inputs!"
+            )
+            raise ValueError("Setting invalid input as optional! {}".format(msg))
+        self.spec.description.input[input_idx].type.isOptional = True
+        if value is None:
+            return
+        # Default value is supported from CoreML 4 onwards.
+        self.spec.specificationVersion = max(
+            self.spec.specificationVersion, _SPECIFICATION_VERSION_IOS_14
+        )
+        format = format.lower()
+        if format == "float":
+            self.spec.description.input[
+                input_idx
+            ].type.multiArrayType.floatDefaultValue = value
+        elif format == "double":
+            self.spec.description.input[
+                input_idx
+            ].type.multiArrayType.doubleDefaultValue = value
+        elif format == "int":
+            self.spec.description.input[
+                input_idx
+            ].type.multiArrayType.intDefaultValue = value
+        else:
+            raise ValueError(
+                "Incorrect format for optional inputs! Expecting int/float/double, got {}!".format(
+                    format
+                )
+            )
+
     def add_optionals(self, optionals_in, optionals_out):
         """
         Add optional inputs and outputs to the model spec.
@@ -550,7 +686,10 @@ def add_optionals(self, optionals_in, optionals_out):
         if (not optionals_in) and (not optionals_out):
             return
 
-        input_types = [datatypes.Array(dim) if isinstance(dim, int) else datatypes.Array(*dim) for (name, dim) in optionals_in]
+        input_types = [
+            datatypes.Array(dim) if isinstance(dim, int) else datatypes.Array(*dim)
+            for (name, dim) in optionals_in
+        ]
         output_types = []
         for name, dim in optionals_out:
             if not dim:
@@ -574,9 +713,13 @@ def add_optionals(self, optionals_in, optionals_out):
 
         # add types for any extra hidden inputs
         for idx in range(len_before_in, len(spec.description.input)):
-            spec.description.input[idx].type.multiArrayType.dataType = _Model_pb2.ArrayFeatureType.DOUBLE
+            spec.description.input[
+                idx
+            ].type.multiArrayType.dataType = _Model_pb2.ArrayFeatureType.DOUBLE
         for idx in range(len_before_out, len(spec.description.output)):
-            spec.description.output[idx].type.multiArrayType.dataType = _Model_pb2.ArrayFeatureType.DOUBLE
+            spec.description.output[
+                idx
+            ].type.multiArrayType.dataType = _Model_pb2.ArrayFeatureType.DOUBLE
 
     def make_updatable(self, trainables):
         """
@@ -591,22 +734,28 @@ def make_updatable(self, trainables):
             return
         self.spec.isUpdatable = True
 
-        if not self.spec.specificationVersion or self.spec.specificationVersion < _MINIMUM_UPDATABLE_SPEC_VERSION:
+        if (
+            not self.spec.specificationVersion
+            or self.spec.specificationVersion < _MINIMUM_UPDATABLE_SPEC_VERSION
+        ):
             self.spec.specificationVersion = _MINIMUM_UPDATABLE_SPEC_VERSION
 
-        self.nn_spec.updateParams.MergeFromString(b'')
+        self.nn_spec.updateParams.MergeFromString(b"")
         self.set_shuffle()
 
         for trainable in trainables:
             if trainable not in self.layer_specs:
-                raise ValueError('Layer %s does not exist.' % trainable)
+                raise ValueError("Layer %s does not exist." % trainable)
             spec_layer = self.layer_specs[trainable]
-            spec_layer_type = spec_layer.WhichOneof('layer')
+            spec_layer_type = spec_layer.WhichOneof("layer")
             if spec_layer_type not in _SUPPORTED_UPDATABLE_LAYERS:
-                raise ValueError('Layer %s is not supported to be marked as updatable. Only %s layers '
-                                 'are supported to be marked updatable.' % (trainable, _SUPPORTED_UPDATABLE_LAYERS))
+                raise ValueError(
+                    "Layer %s is not supported to be marked as updatable. Only %s layers "
+                    "are supported to be marked updatable."
+                    % (trainable, _SUPPORTED_UPDATABLE_LAYERS)
+                )
             spec_layer.isUpdatable = True
-            typed_layer = getattr(spec_layer, spec_layer.WhichOneof('layer'))
+            typed_layer = getattr(spec_layer, spec_layer.WhichOneof("layer"))
             for fd in typed_layer.DESCRIPTOR.fields:
                 field = getattr(typed_layer, fd.name)
                 if type(field) == _NeuralNetwork_pb2.LSTMWeightParams:
@@ -627,7 +776,8 @@ def set_categorical_cross_entropy_loss(self, name, input):
         name: The name of the loss layer
         input: The name of the input, which should be a vector of length N representing the distribution over N categories. This must be the output of a softmax.
 
-        .. math::
+        Math
+        ----------
         Loss_ {CCE}(input, target) = -\sum_{i = 1} ^ {N}(target == i) log(input[i]) = - log(input[target])
         """
         if self.spec is None:
@@ -637,36 +787,46 @@ def set_categorical_cross_entropy_loss(self, name, input):
             raise ValueError("Name %s is already used." % name)
 
         if input is None:
-            raise ValueError('Loss Layer input must be specified')
+            raise ValueError("Loss Layer input must be specified")
 
-        target = input + '_true'
+        target = input + "_true"
 
         if len(self.nn_spec.layers) < 1:
-            raise ValueError('Loss layer (%s) cannot be attached to an empty model.' % name)
+            raise ValueError(
+                "Loss layer (%s) cannot be attached to an empty model." % name
+            )
 
         # validate input
         # input must be a softmax layer output
         input_validated = False
         for _, layer in enumerate(self.nn_spec.layers[::-1]):
             layer_outputs = list(layer.output)
-            layer_type = layer.WhichOneof('layer')
+            layer_type = layer.WhichOneof("layer")
 
-            if input in layer_outputs and layer_type == 'softmax':
+            if input in layer_outputs and layer_type == "softmax":
                 input_validated = True
                 break
 
         if not input_validated:
-            raise ValueError('Categorical Cross Entropy loss layer input (%s) must be a softmax layer output.' % input)
+            raise ValueError(
+                "Categorical Cross Entropy loss layer input (%s) must be a softmax layer output."
+                % input
+            )
 
         # validate target
         output_names = [x.name for x in self.spec.description.output]
         if target in output_names:
-            raise ValueError('Loss layer target (%s) must not be a model output.' % target)
+            raise ValueError(
+                "Loss layer target (%s) must not be a model output." % target
+            )
 
         updating_classifier = False
         predicted_probabilities_name = self.spec.description.predictedProbabilitiesName
         predicted_feature_name = self.spec.description.predictedFeatureName
-        if self.spec.HasField('neuralNetworkClassifier') and input == predicted_probabilities_name:
+        if (
+            self.spec.HasField("neuralNetworkClassifier")
+            and input == predicted_probabilities_name
+        ):
             updating_classifier = True
 
         loss_layer = self.nn_spec.updateParams.lossLayers.add()
@@ -676,27 +836,35 @@ def set_categorical_cross_entropy_loss(self, name, input):
         loss_layer.categoricalCrossEntropyLossLayer.input = input
         loss_layer.categoricalCrossEntropyLossLayer.target = target
 
-        classifier_output = self.spec.description.predictedFeatureName
-
         training_inputs = self.spec.description.trainingInput
         training_inputs.extend(self.spec.description.input)
         training_input = training_inputs.add()
 
         if updating_classifier:
             training_input.name = predicted_feature_name
-            classifier_output_type = [x.type for x in self.spec.description.output if x.name == predicted_feature_name]
-
-            model_type = classifier_output_type[0].WhichOneof('Type')
-            if model_type == 'stringType':
+            classifier_output_type = [
+                x.type
+                for x in self.spec.description.output
+                if x.name == predicted_feature_name
+            ]
+
+            model_type = classifier_output_type[0].WhichOneof("Type")
+            if model_type == "stringType":
                 datatypes._set_datatype(training_input.type, datatypes.String())
-            elif model_type == 'int64Type':
+            elif model_type == "int64Type":
                 datatypes._set_datatype(training_input.type, datatypes.Int64())
         else:
             training_input.name = target
             datatypes._set_datatype(training_input.type, datatypes.Array(1))
-            training_input.type.multiArrayType.dataType = _Model_pb2.ArrayFeatureType.INT32
+            training_input.type.multiArrayType.dataType = (
+                _Model_pb2.ArrayFeatureType.INT32
+            )
 
-        print('Now adding input {} as target for categorical cross-entropy loss layer.'.format(target))
+        print(
+            "Now adding input {} as target for categorical cross-entropy loss layer.".format(
+                target
+            )
+        )
 
     def set_mean_squared_error_loss(self, name, input_feature=None):
         """
@@ -714,18 +882,24 @@ def set_mean_squared_error_loss(self, name, input_feature=None):
             raise ValueError("Name %s is already used." % name)
 
         if input_feature is None:
-            raise ValueError('Loss Layer input must be specified')
+            raise ValueError("Loss Layer input must be specified")
 
         if not isinstance(input_feature, tuple):
-            raise ValueError('Loss layer input must be a tuple of type (string, datatype)')
+            raise ValueError(
+                "Loss layer input must be a tuple of type (string, datatype)"
+            )
 
         (fname, ftype) = input_feature
-        if not isinstance(fname, str):
-            raise ValueError('Loss layer input must be a tuple of type (string, datatype)')
+        if not isinstance(fname, _string_types):
+            raise ValueError(
+                "Loss layer input must be a tuple of type (string, datatype)"
+            )
         if not isinstance(ftype, datatypes.Array):
-            raise ValueError('Loss layer input must be a tuple of type (string, datatype)')
+            raise ValueError(
+                "Loss layer input must be a tuple of type (string, datatype)"
+            )
 
-        target = fname + '_true'
+        target = fname + "_true"
 
         loss_layer = self.nn_spec.updateParams.lossLayers.add()
         self.layers.append(name)
@@ -734,7 +908,9 @@ def set_mean_squared_error_loss(self, name, input_feature=None):
 
         output_names = [x.name for x in self.spec.description.output]
         if target in output_names:
-            raise ValueError('Loss Layer target (%s) must not be a model output' % target)
+            raise ValueError(
+                "Loss Layer target (%s) must not be a model output" % target
+            )
 
         loss_layer.meanSquaredErrorLossLayer.input = input_feature[0]
         loss_layer.meanSquaredErrorLossLayer.target = target
@@ -746,14 +922,18 @@ def set_mean_squared_error_loss(self, name, input_feature=None):
 
         datatypes._set_datatype(training_input.type, input_feature[1])
         training_input.type.multiArrayType.dataType = _Model_pb2.ArrayFeatureType.DOUBLE
-        print('Now adding input {} as target for mean squared error loss layer.'.format(target))
+        print(
+            "Now adding input {} as target for mean squared error loss layer.".format(
+                target
+            )
+        )
 
     def set_sgd_optimizer(self, sgd_params):
         if self.spec is None:
             return
 
         if not isinstance(sgd_params, SgdParams):
-            raise Exception('sgd_params must be of instance SgdParams')
+            raise Exception("sgd_params must be of instance SgdParams")
 
         sgd_optimizer = self.nn_spec.updateParams.optimizer.sgdOptimizer
 
@@ -776,7 +956,7 @@ def set_adam_optimizer(self, adam_params):
             return
 
         if not isinstance(adam_params, AdamParams):
-            raise Exception('adam_params must be of instance AdamParams')
+            raise Exception("adam_params must be of instance AdamParams")
 
         adam_optimizer = self.nn_spec.updateParams.optimizer.adamOptimizer
 
@@ -822,22 +1002,32 @@ def set_shuffle(self, seed=None):
         # Validate that seed passed in is integer
         if seed is not None:
             if not isinstance(seed, int):
-                raise TypeError('Shuffle seed value must be integer')
+                raise TypeError("Shuffle seed value must be integer")
 
         self.nn_spec.updateParams.shuffle.defaultValue = True
         if seed is not None:
             self.nn_spec.updateParams.seed.defaultValue = seed
 
-    def _add_generic_layer(self, name, input_names, output_names,
-                           input_ranks=None, input_shapes=None,
-                           output_ranks=None, output_shapes=None):
+    def _add_generic_layer(
+        self,
+        name,
+        input_names,
+        output_names,
+        input_ranks=None,
+        input_shapes=None,
+        output_ranks=None,
+        output_shapes=None,
+    ):
         generic_layer = self.nn_spec.layers.add()
         generic_layer.name = name
         generic_layer.input.extend(input_names)
         generic_layer.output.extend(output_names)
         self.layers.append(name)
         if name in self.layer_specs:
-            raise ValueError('Layer with name \"%s\" has already been added. Please use a unique name.' % name)
+            raise ValueError(
+                'Layer with name "%s" has already been added. Please use a unique name.'
+                % name
+            )
         self.layer_specs[name] = generic_layer
         _fill_tensor_fields(generic_layer.inputTensor, input_ranks, input_shapes)
         _fill_tensor_fields(generic_layer.outputTensor, output_ranks, output_shapes)
@@ -866,72 +1056,126 @@ def inspect_layers(self, last=-1, verbose=False):
         for i, alayer in enumerate(self.nn_spec.layers[::-1]):
             if i >= last:
                 break
-            layer_type, name, in_blobs, out_blobs, params_info = _summarize_network_layer_info(alayer)
-            print('[Id: {}], Name: {} (Type: {})'.format(n_layers - i - 1, name, layer_type))
-            print(' ' * 10 + 'Updatable: {}'.format(alayer.isUpdatable))
-            print(' ' * 10 + 'Input blobs: {}'.format(in_blobs))
-            print(' ' * 10 + 'Output blobs: {}'.format(out_blobs))
+            (
+                layer_type,
+                name,
+                in_blobs,
+                out_blobs,
+                params_info,
+            ) = _summarize_network_layer_info(alayer)
+            print(
+                "[Id: {}], Name: {} (Type: {})".format(
+                    n_layers - i - 1, name, layer_type
+                )
+            )
+            print(" " * 10 + "Updatable: {}".format(alayer.isUpdatable))
+            print(" " * 10 + "Input blobs: {}".format(in_blobs))
+            print(" " * 10 + "Output blobs: {}".format(out_blobs))
             if verbose and len(params_info) > 0:
-                print(' ' * 10 + 'Parameters: ')
+                print(" " * 10 + "Parameters: ")
                 for param in params_info:
-                    print(' ' * 14 + '{} = {}'.format(param[0], param[1]))
+                    print(" " * 14 + "{} = {}".format(param[0], param[1]))
 
     def inspect_loss_layers(self):
         """ Prints the summary for the loss layer.
         """
         n_loss_layers = len(self.nn_spec.updateParams.lossLayers)
         if n_loss_layers < 1:
-            print('no loss layer detected.')
+            print("no loss layer detected.")
         for i, loss_layer in enumerate(self.nn_spec.updateParams.lossLayers[::-1]):
-            loss_type = loss_layer.WhichOneof('LossLayerType')
+            loss_type = loss_layer.WhichOneof("LossLayerType")
             loss_name = loss_layer.name
             loss_input = None
             loss_target = None
-            if loss_type == 'categoricalCrossEntropyLossLayer':
+            if loss_type == "categoricalCrossEntropyLossLayer":
                 loss_input = loss_layer.categoricalCrossEntropyLossLayer.input
                 loss_target = loss_layer.categoricalCrossEntropyLossLayer.target
-            elif loss_type == 'meanSquaredErrorLossLayer':
+            elif loss_type == "meanSquaredErrorLossLayer":
                 loss_input = loss_layer.meanSquaredErrorLossLayer.input
                 loss_target = loss_layer.meanSquaredErrorLossLayer.target
 
-            print('[Id: {}], Name: {} (Type: {})'.format(n_loss_layers - i - 1, loss_name, loss_type))
-            print(' ' * 10 + 'Loss Input: {}'.format(loss_input))
-            print(' ' * 10 + 'Loss Target: {}'.format(loss_target))
+            print(
+                "[Id: {}], Name: {} (Type: {})".format(
+                    n_loss_layers - i - 1, loss_name, loss_type
+                )
+            )
+            print(" " * 10 + "Loss Input: {}".format(loss_input))
+            print(" " * 10 + "Loss Target: {}".format(loss_target))
 
     def inspect_optimizer(self):
         """ Prints the summary for the optimizer.
         """
         optimizer = self.nn_spec.updateParams.optimizer
-        optimizer_type = optimizer.WhichOneof('OptimizerType')
-        print('Optimizer Type: {}'.format(optimizer_type))
-        if optimizer_type == 'sgdOptimizer':
+        optimizer_type = optimizer.WhichOneof("OptimizerType")
+        print("Optimizer Type: {}".format(optimizer_type))
+        if optimizer_type == "sgdOptimizer":
             lr = optimizer.sgdOptimizer.learningRate
             batch = optimizer.sgdOptimizer.miniBatchSize
             momentum = optimizer.sgdOptimizer.momentum
-            print('lr: {}, min: {}, max: {}'.format(lr.defaultValue, lr.range.minValue, lr.range.maxValue))
-            print('batch: {}, allowed_set: {}'.format(batch.defaultValue, batch.set.values))
-            print('momentum: {}, min: {}, max: {}'.format(momentum.defaultValue, momentum.range.minValue, momentum.range.maxValue))
-        elif optimizer_type == 'adamOptimizer':
+            print(
+                "lr: {}, min: {}, max: {}".format(
+                    lr.defaultValue, lr.range.minValue, lr.range.maxValue
+                )
+            )
+            print(
+                "batch: {}, allowed_set: {}".format(
+                    batch.defaultValue, batch.set.values
+                )
+            )
+            print(
+                "momentum: {}, min: {}, max: {}".format(
+                    momentum.defaultValue,
+                    momentum.range.minValue,
+                    momentum.range.maxValue,
+                )
+            )
+        elif optimizer_type == "adamOptimizer":
             lr = optimizer.adamOptimizer.learningRate
             batch = optimizer.adamOptimizer.miniBatchSize
             beta1 = optimizer.adamOptimizer.beta1
             beta2 = optimizer.adamOptimizer.beta2
             eps = optimizer.adamOptimizer.eps
-            print('lr: {}, min: {}, max: {}'.format(lr.defaultValue, lr.range.minValue, lr.range.maxValue))
-            print('batch: {}, allowed_set: {}'.format(batch.defaultValue, batch.set.values))
-            print('beta1: {}, min: {}, max: {}'.format(beta1.defaultValue, beta1.range.minValue, beta1.range.maxValue))
-            print('beta2: {}, min: {}, max: {}'.format(beta2.defaultValue, beta2.range.minValue, beta2.range.maxValue))
-            print('epsilon: {}, min: {}, max: {}'.format(eps.defaultValue, eps.range.minValue, eps.range.maxValue))
+            print(
+                "lr: {}, min: {}, max: {}".format(
+                    lr.defaultValue, lr.range.minValue, lr.range.maxValue
+                )
+            )
+            print(
+                "batch: {}, allowed_set: {}".format(
+                    batch.defaultValue, batch.set.values
+                )
+            )
+            print(
+                "beta1: {}, min: {}, max: {}".format(
+                    beta1.defaultValue, beta1.range.minValue, beta1.range.maxValue
+                )
+            )
+            print(
+                "beta2: {}, min: {}, max: {}".format(
+                    beta2.defaultValue, beta2.range.minValue, beta2.range.maxValue
+                )
+            )
+            print(
+                "epsilon: {}, min: {}, max: {}".format(
+                    eps.defaultValue, eps.range.minValue, eps.range.maxValue
+                )
+            )
 
     def inspect_updatable_layers(self):
         """ Prints all updatable layers with their inputs and outputs.
         """
         for _, layer in enumerate(self.nn_spec.layers[::-1]):
             if layer.isUpdatable:
-                layer_type, name, in_blobs, out_blobs, _ = _summarize_network_layer_info(layer)
-                print('Name: {} (Type: {})'.format(name, layer_type))
-                print(' ' * 10 + 'Input blobs: {}'.format(in_blobs))
-                print(' ' * 10 + 'Output blobs: {}'.format(out_blobs))
+                (
+                    layer_type,
+                    name,
+                    in_blobs,
+                    out_blobs,
+                    _,
+                ) = _summarize_network_layer_info(layer)
+                print("Name: {} (Type: {})".format(name, layer_type))
+                print(" " * 10 + "Input blobs: {}".format(in_blobs))
+                print(" " * 10 + "Output blobs: {}".format(out_blobs))
 
     def inspect_input_features(self):
         """ Prints the name and type of input features.
@@ -941,8 +1185,10 @@ def inspect_input_features(self):
         if n_input_features < 1:
             return
         for i, input_feature in enumerate(input_features[::-1]):
-            print('[Id: {}] Name: {}'.format(n_input_features - i - 1, input_feature.name))
-            print(' ' * 10 + 'Type: {}'.format(input_feature.type))
+            print(
+                "[Id: {}] Name: {}".format(n_input_features - i - 1, input_feature.name)
+            )
+            print(" " * 10 + "Type: {}".format(input_feature.type))
 
     def inspect_output_features(self):
         """ Prints the name and type of output features.
@@ -952,8 +1198,12 @@ def inspect_output_features(self):
         if n_output_features < 1:
             return
         for i, output_feature in enumerate(output_features[::-1]):
-            print('[Id: {}] Name: {}'.format(n_output_features - i - 1, output_feature.name))
-            print(' ' * 10 + 'Type: {}'.format(output_feature.type))
+            print(
+                "[Id: {}] Name: {}".format(
+                    n_output_features - i - 1, output_feature.name
+                )
+            )
+            print(" " * 10 + "Type: {}".format(output_feature.type))
 
     def inspect_conv_channels(self, layer_name):
         """ Prints the output and kernel channels of a convolution layer.
@@ -961,16 +1211,16 @@ def inspect_conv_channels(self, layer_name):
         if self.spec is None:
             return
         if layer_name not in self.layer_specs:
-            raise ValueError('Layer %s does not exist.' % (layer_name))
+            raise ValueError("Layer %s does not exist." % (layer_name))
         spec_layer = self.layer_specs[layer_name]
 
-        if spec_layer.WhichOneof('layer') != 'convolution':
-            raise ValueError('Layer %s is not a convolution layer.' % (layer_name))
+        if spec_layer.WhichOneof("layer") != "convolution":
+            raise ValueError("Layer %s is not a convolution layer." % (layer_name))
 
         output_channels = spec_layer.convolution.outputChannels
         kernel_channels = spec_layer.convolution.kernelChannels
-        print('outputChannels: {}'.format(output_channels))
-        print('kernelChannels: {}'.format(kernel_channels))
+        print("outputChannels: {}".format(output_channels))
+        print("kernelChannels: {}".format(kernel_channels))
 
     def inspect_innerproduct_channels(self, layer_name):
         """ Prints the output and kernel channels of an innerProduct layer.
@@ -978,23 +1228,23 @@ def inspect_innerproduct_channels(self, layer_name):
         if self.spec is None:
             return
         if layer_name not in self.layer_specs:
-            raise ValueError('Layer %s does not exist.' % (layer_name))
+            raise ValueError("Layer %s does not exist." % (layer_name))
         spec_layer = self.layer_specs[layer_name]
 
-        if spec_layer.WhichOneof('layer') != 'innerProduct':
-            raise ValueError('Layer %s is not an innerProduct layer.' % (layer_name))
+        if spec_layer.WhichOneof("layer") != "innerProduct":
+            raise ValueError("Layer %s is not an innerProduct layer." % (layer_name))
 
         input_channels = spec_layer.innerProduct.inputChannels
         output_channels = spec_layer.innerProduct.outputChannels
-        print('inputChannels: {}'.format(input_channels))
-        print('outputChannels: {}'.format(output_channels))
+        print("inputChannels: {}".format(input_channels))
+        print("outputChannels: {}".format(output_channels))
 
     def _get_rank(self, name):
         return self.rank_dict[name] if name in self.rank_dict else -1
 
     def _set_max_input_rank(self, input_names, output_name):
         if len(input_names) == 0:
-            raise ValueError('Input name list empty for collecting rank information')
+            raise ValueError("Input name list empty for collecting rank information")
         self.rank_dict[output_name] = -1
         for i in range(0, len(input_names)):
             input_rank = self._get_rank(input_names[i])
@@ -1003,20 +1253,40 @@ def _set_max_input_rank(self, input_names, output_name):
                 return
             self.rank_dict[output_name] = max(self._get_rank(output_name), input_rank)
 
-    def _set_rank_for_reduce_op(self, input_name, output_name, axes, keepdims, reduce_all):
+    def _set_rank_for_reduce_op(
+        self, input_name, output_name, axes, keepdims, reduce_all
+    ):
         if keepdims:
             self.rank_dict[output_name] = self._get_rank(input_name)
         else:
             if reduce_all or self._get_rank(input_name) == 1:
                 self.rank_dict[output_name] = 1
-            elif axes and len(axes) > 0:
+            elif axes is not None and len(axes) > 0:
                 rank = self._get_rank(input_name) - len(axes)
                 self.rank_dict[output_name] = rank if rank != 0 else 1
             else:
-                raise ValueError('Reduce Ops must provide axes to reduce on if reduce_all is False')
-
-    def add_inner_product(self, name, W, b, input_channels, output_channels, has_bias,
-                          input_name, output_name, **kwargs):
+                raise ValueError(
+                    "Reduce Ops must provide axes to reduce on if reduce_all is False"
+                )
+
+    def add_inner_product(
+        self,
+        name,
+        W,
+        b,
+        input_channels,
+        output_channels,
+        has_bias,
+        input_name,
+        output_name,
+        int_8_dynamic_quantize=False,
+        is_quantized_weight=False,
+        quantization_type="linear",
+        nbits=8,
+        quant_scale=None,
+        quant_bias=None,
+        quant_lut=None,
+    ):
         """
         Add an inner product layer to the model.
         Refer to the **InnerProductLayerParams** message in specification (NeuralNetwork.proto) for more details.
@@ -1039,13 +1309,19 @@ def add_inner_product(self, name, W, b, input_channels, output_channels, has_bia
 
             - If True, the bias vector of this layer is not ignored.
             - If False, the bias vector is ignored.
-
         input_name: str
             The input blob name of this layer.
         output_name: str
             The output blob name of this layer.
 
-        Quantization arguments expected in kwargs, when W is of type bytes():
+        Quantization arguments, used when W is of type bytes():
+
+            int_8_dynamic_quantize: boolean
+                Whether to quantize and dequantize before and after inner product, respectively.
+                Expects byte weights, representing int8 values, if True. See NeuralNetwork.proto for other validation conditions.
+
+            is_quantized_weight: bool, optional
+                Set it to true when W is of type bytes(), representing quantized weights, default: false.
 
             quantization_type: str
                 When weights are quantized (i.e. W is of type bytes()), this should be either "linear" or "lut".
@@ -1065,7 +1341,7 @@ def add_inner_product(self, name, W, b, input_channels, output_channels, has_bia
 
         See Also
         --------
-        add_embedding, add_convolution
+        add_embedding, add_convolution, add_batched_mat_mul
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
@@ -1075,27 +1351,58 @@ def add_inner_product(self, name, W, b, input_channels, output_channels, has_bia
         spec_layer_params.inputChannels = input_channels
         spec_layer_params.outputChannels = output_channels
         spec_layer_params.hasBias = has_bias
+        spec_layer_params.int8DynamicQuantize = int_8_dynamic_quantize
 
         weights = spec_layer_params.weights
-        if len(kwargs) == 0:
-            weights.floatValue.extend(map(float, W.flatten()))
+        if not is_quantized_weight and isinstance(W, _np.ndarray):
+            weights.floatValue.extend(W.flatten())
         else:
-            _verify_quantization_arguments(weight=W, output_channels=output_channels, **kwargs)
-            _fill_quantized_weights(weights_message=weights, W=W, **kwargs)
+
+            _verify_quantization_arguments(
+                weight=W,
+                output_channels=output_channels,
+                quantization_type=quantization_type,
+                nbits=nbits,
+                quant_scale=quant_scale,
+                quant_bias=quant_bias,
+                quant_lut=quant_lut,
+                int_8_dynamic_quantize=int_8_dynamic_quantize,
+            )
+
+            _fill_quantized_weights(
+                weights_message=weights,
+                W=W,
+                use_int_8=int_8_dynamic_quantize,
+                quantization_type=quantization_type,
+                nbits=nbits,
+                quant_scale=quant_scale,
+                quant_bias=quant_bias,
+                quant_lut=quant_lut,
+            )
 
         if has_bias:
             bias = spec_layer_params.bias
-            bias.floatValue.extend(map(float, b.flatten()))
-        return spec_layer
-
-    def add_embedding(self, name, W, b, input_dim, output_channels, has_bias,
-                      input_name, output_name,
-                      is_quantized_weight=False,
-                      quantization_type='linear',
-                      nbits=8,
-                      quant_scale=None,
-                      quant_bias=None,
-                      quant_lut=None):
+            bias.floatValue.extend(b.flatten())
+
+        return spec_layer
+
+    def add_embedding(
+        self,
+        name,
+        W,
+        b,
+        input_dim,
+        output_channels,
+        has_bias,
+        input_name,
+        output_name,
+        is_quantized_weight=False,
+        quantization_type="linear",
+        nbits=8,
+        quant_scale=None,
+        quant_bias=None,
+        quant_lut=None,
+    ):
         """
         Add an embedding layer to the model.
         Refer to the **EmbeddingLayerParams** message in specification (NeuralNetwork.proto) for more details.
@@ -1161,19 +1468,31 @@ def add_embedding(self, name, W, b, input_dim, output_channels, has_bias,
 
         weights = spec_layer_params.weights
         if not is_quantized_weight:
-            weights.floatValue.extend(map(float, W.flatten()))
+            weights.floatValue.extend(W.flatten())
         else:
-            _verify_quantization_arguments(weight=W, output_channels=output_channels,
-                                           quantization_type=quantization_type, nbits=nbits,
-                                           quant_scale=quant_scale, quant_bias=quant_bias, quant_lut=quant_lut)
-
-            _fill_quantized_weights(weights_message=weights, W=W,
-                                    quantization_type=quantization_type, nbits=nbits,
-                                    quant_scale=quant_scale, quant_bias=quant_bias, quant_lut=quant_lut)
+            _verify_quantization_arguments(
+                weight=W,
+                output_channels=output_channels,
+                quantization_type=quantization_type,
+                nbits=nbits,
+                quant_scale=quant_scale,
+                quant_bias=quant_bias,
+                quant_lut=quant_lut,
+            )
+
+            _fill_quantized_weights(
+                weights_message=weights,
+                W=W,
+                quantization_type=quantization_type,
+                nbits=nbits,
+                quant_scale=quant_scale,
+                quant_bias=quant_bias,
+                quant_lut=quant_lut,
+            )
 
         if has_bias:
             bias = spec_layer_params.bias
-            bias.floatValue.extend(map(float, b.flatten()))
+            bias.floatValue.extend(b.flatten())
 
         return spec_layer
 
@@ -1196,11 +1515,21 @@ def add_softmax(self, name, input_name, output_name):
         add_activation, add_inner_product, add_convolution
         """
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.softmax.MergeFromString(b'')
-        return spec_layer
-
-    def add_activation(self, name, non_linearity, input_name, output_name,
-                       params=None):
+        spec_layer.softmax.MergeFromString(b"")
+        return spec_layer
+
+    def add_activation(
+        self,
+        name,
+        non_linearity,
+        input_name,
+        output_name,
+        params=None,
+        input_rank=None,
+        input_shape=None,
+        output_rank=None,
+        output_shape=None,
+    ):
         """
         Add an activation layer to the model.
         Refer to the specification (NeuralNetwork.proto) for more details.
@@ -1283,23 +1612,40 @@ def add_activation(self, name, non_linearity, input_name, output_name,
         --------
         add_convolution, add_softmax
         """
-
-        spec_layer = self._add_generic_layer(name, [input_name], [output_name])
+        input_rank = (
+            len(input_shape) if (input_shape and not input_rank) else input_rank
+        )
+        output_rank = (
+            len(output_shape) if (output_shape and not output_rank) else output_rank
+        )
+        spec_layer = self._add_generic_layer(
+            name,
+            [input_name],
+            [output_name],
+            [input_rank] if input_rank else None,
+            [input_shape] if input_shape else None,
+            [output_rank] if output_rank else None,
+            [output_shape] if output_shape else None,
+        )
         spec_layer_params = spec_layer.activation
 
         # Fill in the parameters
-        non_linearity = non_linearity.upper() if isinstance(non_linearity, str) else non_linearity
-        if non_linearity == 'RELU':
-            spec_layer_params.ReLU.MergeFromString(b'')
-
-        elif non_linearity == 'SIGMOID':
-            spec_layer_params.sigmoid.MergeFromString(b'')
-
-        elif non_linearity == 'TANH':
-            spec_layer_params.tanh.MergeFromString(b'')
-
-        elif non_linearity == 'SCALED_TANH':
-            spec_layer_params.scaledTanh.MergeFromString(b'')
+        non_linearity = (
+            non_linearity.upper()
+            if isinstance(non_linearity, _string_types)
+            else non_linearity
+        )
+        if non_linearity == "RELU":
+            spec_layer_params.ReLU.MergeFromString(b"")
+
+        elif non_linearity == "SIGMOID":
+            spec_layer_params.sigmoid.MergeFromString(b"")
+
+        elif non_linearity == "TANH":
+            spec_layer_params.tanh.MergeFromString(b"")
+
+        elif non_linearity == "SCALED_TANH":
+            spec_layer_params.scaledTanh.MergeFromString(b"")
             if params is None:
                 alpha, beta = (0.0, 0.0)
             else:
@@ -1307,13 +1653,13 @@ def add_activation(self, name, non_linearity, input_name, output_name,
             spec_layer_params.scaledTanh.alpha = alpha
             spec_layer_params.scaledTanh.beta = beta
 
-        elif non_linearity == 'SOFTPLUS':
-            spec_layer_params.softplus.MergeFromString(b'')
+        elif non_linearity == "SOFTPLUS":
+            spec_layer_params.softplus.MergeFromString(b"")
 
-        elif non_linearity == 'SOFTSIGN':
-            spec_layer_params.softsign.MergeFromString(b'')
+        elif non_linearity == "SOFTSIGN":
+            spec_layer_params.softsign.MergeFromString(b"")
 
-        elif non_linearity == 'SIGMOID_HARD':
+        elif non_linearity == "SIGMOID_HARD":
             if params is None:
                 alpha, beta = (0.2, 0.5)
             else:
@@ -1321,36 +1667,38 @@ def add_activation(self, name, non_linearity, input_name, output_name,
             spec_layer_params.sigmoidHard.alpha = alpha
             spec_layer_params.sigmoidHard.beta = beta
 
-        elif non_linearity == 'LEAKYRELU':
+        elif non_linearity == "LEAKYRELU":
             if params is None:
                 alpha = 0.3
             else:
                 alpha = params[0]
             spec_layer_params.leakyReLU.alpha = float(alpha)
 
-        elif non_linearity == 'PRELU':
+        elif non_linearity == "PRELU":
             # PReLU must provide an np array in params[0]
-            spec_layer_params.PReLU.alpha.floatValue.extend(map(float, params.flatten()))
+            spec_layer_params.PReLU.alpha.floatValue.extend(params.flatten())
 
-        elif non_linearity == 'ELU':
+        elif non_linearity == "ELU":
             # ELU must provide an alpha in params[0]
             spec_layer_params.ELU.alpha = float(params)
 
-        elif non_linearity == 'PARAMETRICSOFTPLUS':
+        elif non_linearity == "PARAMETRICSOFTPLUS":
             # Parametric softplus must provide two np arrays for alpha and beta
             alphas, betas = (params[0], params[1])
             # Weight alignment: Keras [H,W,C,F]
-            spec_layer_params.parametricSoftplus.alpha.floatValue.extend(map(float, alphas.flatten()))
-            spec_layer_params.parametricSoftplus.beta.floatValue.extend(map(float, betas.flatten()))
+            spec_layer_params.parametricSoftplus.alpha.floatValue.extend(
+                alphas.flatten()
+            )
+            spec_layer_params.parametricSoftplus.beta.floatValue.extend(betas.flatten())
 
-        elif non_linearity == 'THRESHOLDEDRELU':
+        elif non_linearity == "THRESHOLDEDRELU":
             if params is None:
                 theta = 1.0
             else:
                 theta = params
             spec_layer_params.thresholdedReLU.alpha = float(theta)
 
-        elif non_linearity == 'LINEAR':
+        elif non_linearity == "LINEAR":
             if params is None:
                 alpha, beta = (1.0, 0.0)
             else:
@@ -1400,34 +1748,43 @@ def add_elementwise(self, name, input_names, output_name, mode, alpha=None):
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
 
         # add one of the following layers
-        mode = mode.upper() if isinstance(mode, str) else mode
-        if mode == 'CONCAT':
+        mode = mode.upper() if isinstance(mode, _string_types) else mode
+        if mode == "CONCAT":
             spec_layer.concat.sequenceConcat = False
-        elif mode == 'SEQUENCE_CONCAT':
+        elif mode == "SEQUENCE_CONCAT":
             spec_layer.concat.sequenceConcat = True
-        elif mode == 'ADD':
-            spec_layer.add.MergeFromString(b'')
+        elif mode == "ADD":
+            spec_layer.add.MergeFromString(b"")
             if alpha:
                 spec_layer.add.alpha = alpha
-        elif mode == 'MULTIPLY':
-            spec_layer.multiply.MergeFromString(b'')
+        elif mode == "MULTIPLY":
+            spec_layer.multiply.MergeFromString(b"")
             if alpha:
                 spec_layer.multiply.alpha = alpha
-        elif mode == 'COS':
+        elif mode == "COS":
             spec_layer.dot.cosineSimilarity = True
-        elif mode == 'DOT':
+        elif mode == "DOT":
             spec_layer.dot.cosineSimilarity = False
-        elif mode == 'MAX':
-            spec_layer.max.MergeFromString(b'')
-        elif mode == 'MIN':
-            spec_layer.min.MergeFromString(b'')
-        elif mode == 'AVE':
-            spec_layer.average.MergeFromString(b'')
+        elif mode == "MAX":
+            spec_layer.max.MergeFromString(b"")
+        elif mode == "MIN":
+            spec_layer.min.MergeFromString(b"")
+        elif mode == "AVE":
+            spec_layer.average.MergeFromString(b"")
         else:
-            raise ValueError('Unsupported elementwise mode %s' % mode)
+            raise ValueError("Unsupported elementwise mode %s" % mode)
         return spec_layer
 
-    def add_upsample(self, name, scaling_factor_h, scaling_factor_w, input_name, output_name, mode='NN'):
+    def add_upsample(
+        self,
+        name,
+        scaling_factor_h,
+        scaling_factor_w,
+        input_name,
+        output_name,
+        mode="NN",
+        linear_upsample_mode="DEFAULT",
+    ):
         """
         Add an upsample layer to the model.
         Refer to the **UpsampleLayerParams** message in specification (NeuralNetwork.proto) for more details.
@@ -1436,37 +1793,105 @@ def add_upsample(self, name, scaling_factor_h, scaling_factor_w, input_name, out
         ----------
         name: str
             The name of this layer.
-        scaling_factor_h: int
-            Scaling factor on the vertical direction.
-        scaling_factor_w: int
-            Scaling factor on the horizontal direction.
+        scaling_factor_h: int or float
+            Scaling factor on the vertical direction. Float values only supported with BILINEAR and ALIGN_CORNERS_*
+        scaling_factor_w: int or float
+            Scaling factor on the horizontal direction. Float values only supported with BILINEAR and ALIGN_CORNERS_*
         input_name: str
             The input blob name of this layer.
         output_name: str
             The output blob name of this layer.
         mode: str
-            Following values are supported:
+            Overall interpolation mode. The following values are supported:
             'NN': nearest neighbour
             'BILINEAR': bilinear interpolation
+        linear_upsample_mode: str
+            Specifies the behavior for linear upsampling. Only valid when Interpolation Mode is BILINEAR.
+            If input grid is [0, Xin-1] (corresponding to an input size of Xin), and if the output size is Xout,
+            then the grid points are sampled in the following manner:
+            'DEFAULT':
+                spacing = (Xin-Xin/Xout) / (Xout-1)
+                grid_point[i] = min(Xin-1, max(0, i * spacing)), for i = 0,1,2,..,Xout-1
+            'ALIGN_CORNERS_TRUE':
+                spacing = (Xin-1) / (Xout-1)
+                grid_point[i] = min(Xin-1, max(0, i * spacing)), for i = 0,1,2,..,Xout-1
+            'ALIGN_CORNERS_FALSE':
+                spacing = Xin / Xout
+                grid_point[i] = min(Xin-1, max(0, i * spacing + 0.5 * spacing - 0.5)), for i = 0,1,2,..,Xout-1
 
         See Also
         --------
-        add_sequence_repeat, add_elementwise
-        """
+        add_resize_bilinear
+
+        """
+
+        mode = mode.upper() if isinstance(mode, _string_types) else mode
+        linear_upsample_mode = (
+            linear_upsample_mode.upper()
+            if isinstance(linear_upsample_mode, _string_types)
+            else linear_upsample_mode
+        )
+        if not mode in ["NN", "BILINEAR"]:
+            raise ValueError("Unsupported upsampling mode %s" % mode)
+        if not linear_upsample_mode in [
+            "DEFAULT",
+            "ALIGN_CORNERS_TRUE",
+            "ALIGN_CORNERS_FALSE",
+        ]:
+            raise ValueError(
+                "Unsupported linear upsampling mode %s" % linear_upsample_mode
+            )
+
+        # Default linear upsample mode is backwards compatible, else set spec to iOS14
+        if (
+            linear_upsample_mode != "DEFAULT"
+            and self.spec
+            and (
+                not self.spec.specificationVersion
+                or self.spec.specificationVersion < _SPECIFICATION_VERSION_IOS_14
+            )
+        ):
+            self.spec.specificationVersion = _SPECIFICATION_VERSION_IOS_14
+
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
         spec_layer_params = spec_layer.upsample
-        spec_layer_params.scalingFactor.append(scaling_factor_h)
-        spec_layer_params.scalingFactor.append(scaling_factor_w)
-        mode = mode.upper() if isinstance(mode, str) else mode
-        if mode == 'NN':
-            spec_layer_params.mode = _NeuralNetwork_pb2.UpsampleLayerParams.InterpolationMode.Value('NN')
-        elif mode == 'BILINEAR':
-            spec_layer_params.mode = _NeuralNetwork_pb2.UpsampleLayerParams.InterpolationMode.Value('BILINEAR')
+        if (
+            scaling_factor_h - _math_floor(scaling_factor_h) > 0.001
+            or scaling_factor_w - _math_floor(scaling_factor_w) > 0.001
+        ):
+            if mode != "BILINEAR" or linear_upsample_mode not in [
+                "ALIGN_CORNERS_TRUE",
+                "ALIGN_CORNERS_FALSE",
+            ]:
+                raise ValueError(
+                    "Fractional upsampling only compatible with BILINEAR and ALIGN_CORNERS_TRUE or ALIGN_CORNERS_FALSE"
+                )
+            spec_layer_params.fractionalScalingFactor.append(float(scaling_factor_h))
+            spec_layer_params.fractionalScalingFactor.append(float(scaling_factor_w))
         else:
-            raise ValueError('Unsupported upsampling mode %s' % mode)
-        return spec_layer
-
-    def add_scale(self, name, W, b, has_bias, input_name, output_name, shape_scale=None, shape_bias=None):
+            spec_layer_params.scalingFactor.append(int(scaling_factor_h))
+            spec_layer_params.scalingFactor.append(int(scaling_factor_w))
+
+        spec_layer_params.mode = _NeuralNetwork_pb2.UpsampleLayerParams.InterpolationMode.Value(
+            mode
+        )
+        spec_layer_params.linearUpsampleMode = _NeuralNetwork_pb2.UpsampleLayerParams.LinearUpsampleMode.Value(
+            linear_upsample_mode
+        )
+
+        return spec_layer
+
+    def add_scale(
+        self,
+        name,
+        W,
+        b,
+        has_bias,
+        input_name,
+        output_name,
+        shape_scale=None,
+        shape_bias=None,
+    ):
         """
         Add a scale layer to the model.
         Refer to the **ScaleLayerParams** message in specification (NeuralNetwork.proto) for more details.
@@ -1512,9 +1937,11 @@ def add_scale(self, name, W, b, has_bias, input_name, output_name, shape_scale=N
         if isinstance(W, int):
             scale.floatValue.append(float(W))
         else:
-            scale.floatValue.extend(map(float, W.flatten()))
-        if len(scale.floatValue) != np.prod(shape_scale):
-            raise ValueError("Dimensions of 'shape_scale' do not match the size of the provided 'scale' parameter")
+            scale.floatValue.extend(W.flatten())
+        if len(scale.floatValue) != _np.prod(shape_scale):
+            raise ValueError(
+                "Dimensions of 'shape_scale' do not match the size of the provided 'scale' parameter"
+            )
 
         # add bias and its shape
         if has_bias:
@@ -1523,9 +1950,11 @@ def add_scale(self, name, W, b, has_bias, input_name, output_name, shape_scale=N
             if isinstance(b, int):
                 bias.floatValue.append(float(b))
             else:
-                bias.floatValue.extend(map(float, b.flatten()))
-            if len(bias.floatValue) != np.prod(shape_bias):
-                raise ValueError("Dimensions of 'shape_bias' do not match the size of the provided 'b' parameter")
+                bias.floatValue.extend(b.flatten())
+            if len(bias.floatValue) != _np.prod(shape_bias):
+                raise ValueError(
+                    "Dimensions of 'shape_bias' do not match the size of the provided 'b' parameter"
+                )
         return spec_layer
 
     def add_bias(self, name, b, input_name, output_name, shape_bias=None):
@@ -1560,16 +1989,18 @@ def add_bias(self, name, b, input_name, output_name, shape_bias=None):
         # add bias and its shape
         bias = spec_layer_params.bias
         if len(shape_bias) != 1 and len(shape_bias) != 3:
-            raise ValueError('Shape of bias layer must have length 1 or 3.')
+            raise ValueError("Shape of bias layer must have length 1 or 3.")
 
         spec_layer_params.shape.extend(shape_bias)
         if isinstance(b, int):
             bias.floatValue.append(float(b))
         else:
-            bias.floatValue.extend(map(float, b.flatten()))
-        if len(bias.floatValue) != np.prod(shape_bias):
-            raise ValueError("Dimensions of 'shape_bias' do not match the size"
-                             "of the provided 'b' parameter")
+            bias.floatValue.extend(b.flatten())
+        if len(bias.floatValue) != _np.prod(shape_bias):
+            raise ValueError(
+                "Dimensions of 'shape_bias' do not match the size"
+                "of the provided 'b' parameter"
+            )
         return spec_layer
 
     def add_sequence_repeat(self, name, nrep, input_name, output_name):
@@ -1597,14 +2028,32 @@ def add_sequence_repeat(self, name, nrep, input_name, output_name):
         spec_layer_params.nRepetitions = nrep
         return spec_layer
 
-    def add_convolution(self, name, kernel_channels, output_channels, height,
-                        width, stride_height, stride_width, border_mode, groups, W, b, has_bias,
-                        is_deconv=False, output_shape=None,
-                        input_name='data', output_name='out',
-                        dilation_factors=[1, 1],
-                        padding_top=0, padding_bottom=0, padding_left=0, padding_right=0,
-                        same_padding_asymmetry_mode='BOTTOM_RIGHT_HEAVY',
-                        **kwargs):
+    def add_convolution(
+        self,
+        name,
+        kernel_channels,
+        output_channels,
+        height,
+        width,
+        stride_height,
+        stride_width,
+        border_mode,
+        groups,
+        W,
+        b,
+        has_bias,
+        is_deconv=False,
+        output_shape=None,
+        input_name="data",
+        output_name="out",
+        dilation_factors=[1, 1],
+        padding_top=0,
+        padding_bottom=0,
+        padding_left=0,
+        padding_right=0,
+        same_padding_asymmetry_mode="BOTTOM_RIGHT_HEAVY",
+        **kwargs
+    ):
         """
         Add a convolution layer to the network.
         Refer to the **ConvolutionLayerParams** message in specification (NeuralNetwork.proto) for more details.
@@ -1702,7 +2151,7 @@ def add_convolution(self, name, kernel_channels, output_channels, height,
 
         See Also
         --------
-        add_pooling, add_activation, add_batchnorm
+        add_convolution3d, add_pooling, add_activation, add_batchnorm
 
         """
 
@@ -1729,27 +2178,40 @@ def add_convolution(self, name, kernel_channels, output_channels, height,
         spec_layer_params.stride.append(stride_height)
         spec_layer_params.stride.append(stride_width)
 
-        border_mode = border_mode.lower() if isinstance(border_mode, str) else border_mode
-        same_padding_asymmetry_mode = same_padding_asymmetry_mode.upper() \
-            if isinstance(same_padding_asymmetry_mode, str) else same_padding_asymmetry_mode
-
-        if border_mode == 'valid':
+        border_mode = (
+            border_mode.lower()
+            if isinstance(border_mode, _string_types)
+            else border_mode
+        )
+        same_padding_asymmetry_mode = (
+            same_padding_asymmetry_mode.upper()
+            if isinstance(same_padding_asymmetry_mode, _string_types)
+            else same_padding_asymmetry_mode
+        )
+
+        if border_mode == "valid":
             height_border = spec_layer_params.valid.paddingAmounts.borderAmounts.add()
             height_border.startEdgeSize = padding_top
             height_border.endEdgeSize = padding_bottom
             width_border = spec_layer_params.valid.paddingAmounts.borderAmounts.add()
             width_border.startEdgeSize = padding_left
             width_border.endEdgeSize = padding_right
-        elif border_mode == 'same':
+        elif border_mode == "same":
             if not (
-                    same_padding_asymmetry_mode == 'BOTTOM_RIGHT_HEAVY' or same_padding_asymmetry_mode == 'TOP_LEFT_HEAVY'):
+                same_padding_asymmetry_mode == "BOTTOM_RIGHT_HEAVY"
+                or same_padding_asymmetry_mode == "TOP_LEFT_HEAVY"
+            ):
                 raise ValueError(
-                    "Invalid value %d of same_padding_asymmetry_mode parameter" % same_padding_asymmetry_mode)
+                    "Invalid value %d of same_padding_asymmetry_mode parameter"
+                    % same_padding_asymmetry_mode
+                )
             spec_layer_params.same.asymmetryMode = _NeuralNetwork_pb2.SamePadding.SamePaddingMode.Value(
-                same_padding_asymmetry_mode)
+                same_padding_asymmetry_mode
+            )
         else:
             raise NotImplementedError(
-                'Border mode %s is not implemented.' % border_mode)
+                "Border mode %s is not implemented." % border_mode
+            )
 
         spec_layer_params.nGroups = groups
         spec_layer_params.hasBias = has_bias
@@ -1764,20 +2226,24 @@ def add_convolution(self, name, kernel_channels, output_channels, height,
 
         # Weight assignments
         if len(kwargs) > 0:
-            _verify_quantization_arguments(weight=W, output_channels=output_channels, **kwargs)
+            _verify_quantization_arguments(
+                weight=W, output_channels=output_channels, **kwargs
+            )
 
-            nbits = kwargs.get('nbits', 8)
+            nbits = kwargs.get("nbits", 8)
             num_weights = (output_channels * kernel_channels * height * width) / groups
             if nbits < 8:
-                byte_arr = np.frombuffer(W, dtype=np.uint8)
-                W = unpack_to_bytes(byte_arr, num_weights, nbits)
+                byte_arr = _np.frombuffer(W, dtype=_np.uint8)
+                W = _unpack_to_bytes(byte_arr, num_weights, nbits)
             else:
-                W = np.frombuffer(W, dtype=np.uint8)
+                W = _np.frombuffer(W, dtype=_np.uint8)
 
             if is_deconv:
-                W = np.reshape(W, (height, width, kernel_channels, output_channels / groups))
+                W = _np.reshape(
+                    W, (height, width, kernel_channels, output_channels / groups)
+                )
             else:
-                W = np.reshape(W, (height, width, kernel_channels, output_channels))
+                W = _np.reshape(W, (height, width, kernel_channels, output_channels))
 
         # Weight alignment: MLModel Spec requires following weight arrangement:
         # is_deconv == False ==> (output_channels, kernel_channels, height, width), where kernel_channel = input_channels / groups
@@ -1791,13 +2257,15 @@ def add_convolution(self, name, kernel_channels, output_channels, height,
         # Assign weights
         weights = spec_layer_params.weights
         if len(kwargs) == 0:  # no quantization
-            weights.floatValue.extend(map(float, Wt.flatten()))
+            weights.floatValue.extend(Wt.flatten())
         else:  # there is quantization
             W_bytes = bytes()
             if nbits == 8:
                 W_bytes += Wt.flatten().tobytes()
             else:
-                W_bytes += _convert_array_to_nbit_quantized_bytes(Wt.flatten(), nbits).tobytes()
+                W_bytes += _convert_array_to_nbit_quantized_bytes(
+                    Wt.flatten(), nbits
+                ).tobytes()
             _fill_quantized_weights(weights_message=weights, W=W_bytes, **kwargs)
 
         # Assign biases
@@ -1808,10 +2276,206 @@ def add_convolution(self, name, kernel_channels, output_channels, height,
 
         return spec_layer
 
-    def add_pooling(self, name, height, width, stride_height, stride_width,
-                    layer_type, padding_type, input_name, output_name, exclude_pad_area=True, is_global=False,
-                    padding_top=0, padding_bottom=0, padding_left=0, padding_right=0,
-                    same_padding_asymmetry_mode='BOTTOM_RIGHT_HEAVY'):
+    def add_convolution3d(
+        self,
+        name,
+        input_channels,
+        output_channels,
+        depth,
+        height,
+        width,
+        W,
+        b,
+        has_bias,
+        groups=1,
+        stride_depth=1,
+        stride_height=1,
+        stride_width=1,
+        dilation_width=1,
+        dilation_height=1,
+        dilation_depth=1,
+        is_deconv=False,
+        output_shape=None,
+        padding_mode="valid",
+        padding_front=0,
+        padding_back=0,
+        padding_top=0,
+        padding_bottom=0,
+        padding_left=0,
+        padding_right=0,
+        input_name="data",
+        output_name="out",
+    ):
+        """
+        Add a 3 dimensional convolution layer to the network.
+        Refer to the **Convolution3DLayerParams** message in specification (NeuralNetwork.proto) for
+        more details.
+
+        Parameters
+        ----------
+        name: str
+            The name of this layer.
+        input_channels: int
+            Number of input channels.
+        output_channels: int
+            Number of filter kernels. This is equal to the number of channels in the output blob.
+        depth: int
+            Depth of each kernel.
+        height: int
+            Height of each kernel.
+        width: int
+            Width of each kernel.
+        W: numpy.array or bytes()
+            Weight of the convolution kernels.
+            - W should have shape:
+            - If deconv is False: (output_channels, kernel_channels, depth, height, width), where
+              kernel_channels = input_channels / groups
+            - If deconv is True: (output_channels / groups, kernel_channels, depth, height, width)
+              where kernel_channels = input_channels
+        b: numpy.array
+            Biases of the convolution kernels. b should have shape (outputChannels, ).
+        has_bias: boolean
+            Whether bias is ignored.
+            - If True, bias is not ignored.
+            - If False, bias is ignored.
+        groups: int
+            Number of kernel groups. Input is divided into groups along the channel axis. Each
+            kernel group share the same weights. Defaults to 1.
+        stride_depth, stride_height, stride_width: int
+            Stride along the depth, height, and width directions, respectively. Must all be positive
+            integers. Defaults to 1.
+        dilation_depth, dilation_width, dilation_height: int
+            Dilation factors across depth, height, and width directions. Must all be positive
+            integers. Defaults to 1 in each dimension.
+        is_deconv: bool
+            True if this is Convolution Transpose, otherwise False.
+        output_shape: None or Tuple of int
+            Applicable only for Deconvolution layer.
+            None if Convolution.
+            Tuple of length 3 if Convolution Transpose.
+        padding_mode: str
+            Option for the padding type and output blob shape. Can be 'custom', 'valid', or 'same'.
+            Defaults to 'valid'. Case-insensitive.
+        padding_front, padding_back, padding_top, padding_bottom, padding_left, padding_right: int
+            Values of depth (front, back), height (top, bottom), and width (left, right) padding to
+            be used. Must all be positive integers. All default to 0.
+        input_name: str or list of str
+            The input blob name(s) of this layer.
+        output_name: str
+            The output blob name of this layer.
+
+        Depthwise convolution is a special case of convolution, where we have:
+            kernel_channels = 1 (== input_channels / groups)
+            output_channels = channel_multiplier * input_channels
+            groups = input_channels
+            W: [Kernel_depth, Kernel_height, Kernel_width, 1, channel_multiplier * input_channels]
+
+
+        See Also
+        --------
+        add_convolution, add_pooling, add_activation, add_batchnorm
+
+        """
+        # Update spec version if necessary
+        if self.spec and (
+            not self.spec.specificationVersion
+            or self.spec.specificationVersion < _SPECIFICATION_VERSION_IOS_14
+        ):
+            self.spec.specificationVersion = _SPECIFICATION_VERSION_IOS_14
+
+        if isinstance(input_name, tuple):
+            input_names = list(input_name)
+        elif isinstance(input_name, list):
+            input_names = input_name
+        else:
+            input_names = [input_name]
+
+        # 3D convolution doesn't currently support 2-inputs
+        if len(input_names) > 1:
+            raise ValueError("3D convolution only supports 1 input.")
+
+        spec_layer = self._add_generic_layer(name, input_names, [output_name])
+
+        # Set the layer params
+        spec_layer_params = spec_layer.convolution3d
+        spec_layer_params.isDeconvolution = is_deconv
+        spec_layer_params.nGroups = groups
+
+        spec_layer_params.outputChannels = output_channels
+        spec_layer_params.inputChannels = input_channels
+
+        spec_layer_params.kernelDepth = depth
+        spec_layer_params.kernelHeight = height
+        spec_layer_params.kernelWidth = width
+
+        spec_layer_params.strideDepth = stride_depth
+        spec_layer_params.strideHeight = stride_height
+        spec_layer_params.strideWidth = stride_width
+
+        if is_deconv and output_shape:
+            spec_layer_params.outputShape.append(output_shape[0])
+            spec_layer_params.outputShape.append(output_shape[1])
+            spec_layer_params.outputShape.append(output_shape[2])
+
+        supported_padding_modes = {"CUSTOM", "VALID", "SAME"}
+        if padding_mode.upper() not in supported_padding_modes:
+            raise ValueError(
+                "Unsupported padding mode: %s.  Must be one of %s"
+                % (padding_mode, supported_padding_modes)
+            )
+        if padding_mode.upper() == "CUSTOM":
+            spec_layer_params.customPaddingFront = padding_front
+            spec_layer_params.customPaddingBack = padding_back
+            spec_layer_params.customPaddingTop = padding_top
+            spec_layer_params.customPaddingBottom = padding_bottom
+            spec_layer_params.customPaddingLeft = padding_left
+            spec_layer_params.customPaddingRight = padding_right
+        spec_layer_params.paddingType = _NeuralNetwork_pb2.Convolution3DLayerParams.PaddingType.Value(
+            padding_mode.upper()
+        )
+
+        spec_layer_params.dilationDepth = dilation_depth
+        spec_layer_params.dilationHeight = dilation_height
+        spec_layer_params.dilationWidth = dilation_width
+
+        # Weight alignment: MLModel Spec requires following weight arrangement:
+        # is_deconv == False ==> (output_channels, kernel_channels, depth, height, width), where kernel_channel = input_channels / groups
+        # is_deconv == True ==> (kernel_channels, output_channels / groups, height, width), where kernel_channel = input_channels
+        if is_deconv:
+            W = W.transpose((1, 0, 2, 3, 4))
+
+        # Assign weights
+        weights = spec_layer_params.weights
+        weights.floatValue.extend(W.flatten())
+
+        # Assign biases
+        spec_layer_params.hasBias = has_bias
+        if has_bias:
+            bias = spec_layer_params.bias
+            for f in range(output_channels):
+                bias.floatValue.append(float(b[f]))
+
+        return spec_layer
+
+    def add_pooling(
+        self,
+        name,
+        height,
+        width,
+        stride_height,
+        stride_width,
+        layer_type,
+        padding_type,
+        input_name,
+        output_name,
+        exclude_pad_area=True,
+        is_global=False,
+        padding_top=0,
+        padding_bottom=0,
+        padding_left=0,
+        padding_right=0,
+        same_padding_asymmetry_mode="BOTTOM_RIGHT_HEAVY",
+    ):
         """
         Add a pooling layer to the model that performs spatial pooling.
         Refer to the **PoolingLayerParams** message in specification (NeuralNetwork.proto) for more details.
@@ -1854,36 +2518,53 @@ def add_pooling(self, name, height, width, stride_height, stride_width,
 
         See Also
         --------
-        add_convolution, add_activation
+        add_pooling3d, add_convolution, add_activation
         """
+
+        # Create spec layer
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
         spec_layer_params = spec_layer.pooling
 
         # Set the parameters
-        spec_layer_params.type = \
-            _NeuralNetwork_pb2.PoolingLayerParams.PoolingType.Value(layer_type.upper())
-
-        padding_type = padding_type.upper() if isinstance(padding_type, str) else padding_type
-        same_padding_asymmetry_mode = same_padding_asymmetry_mode.upper() \
-            if isinstance(same_padding_asymmetry_mode, str) else same_padding_asymmetry_mode
-
-        if padding_type == 'VALID':
+        spec_layer_params.type = _NeuralNetwork_pb2.PoolingLayerParams.PoolingType.Value(
+            layer_type.upper()
+        )
+
+        padding_type = (
+            padding_type.upper()
+            if isinstance(padding_type, _string_types)
+            else padding_type
+        )
+        same_padding_asymmetry_mode = (
+            same_padding_asymmetry_mode.upper()
+            if isinstance(same_padding_asymmetry_mode, _string_types)
+            else same_padding_asymmetry_mode
+        )
+
+        if padding_type == "VALID":
             height_border = spec_layer_params.valid.paddingAmounts.borderAmounts.add()
             height_border.startEdgeSize = padding_top
             height_border.endEdgeSize = padding_bottom
             width_border = spec_layer_params.valid.paddingAmounts.borderAmounts.add()
             width_border.startEdgeSize = padding_left
             width_border.endEdgeSize = padding_right
-        elif padding_type == 'SAME':
+        elif padding_type == "SAME":
             if not (
-                    same_padding_asymmetry_mode == 'BOTTOM_RIGHT_HEAVY' or same_padding_asymmetry_mode == 'TOP_LEFT_HEAVY'):
+                same_padding_asymmetry_mode == "BOTTOM_RIGHT_HEAVY"
+                or same_padding_asymmetry_mode == "TOP_LEFT_HEAVY"
+            ):
                 raise ValueError(
-                    "Invalid value %d of same_padding_asymmetry_mode parameter" % same_padding_asymmetry_mode)
+                    "Invalid value %d of same_padding_asymmetry_mode parameter"
+                    % same_padding_asymmetry_mode
+                )
             spec_layer_params.same.asymmetryMode = _NeuralNetwork_pb2.SamePadding.SamePaddingMode.Value(
-                same_padding_asymmetry_mode)
-        elif padding_type == 'INCLUDE_LAST_PIXEL':
+                same_padding_asymmetry_mode
+            )
+        elif padding_type == "INCLUDE_LAST_PIXEL":
             if padding_top != padding_bottom or padding_left != padding_right:
-                raise ValueError("Only symmetric padding is supported with the INCLUDE_LAST_PIXEL padding type")
+                raise ValueError(
+                    "Only symmetric padding is supported with the INCLUDE_LAST_PIXEL padding type"
+                )
             spec_layer_params.includeLastPixel.paddingAmounts.append(padding_top)
             spec_layer_params.includeLastPixel.paddingAmounts.append(padding_left)
         else:
@@ -1897,10 +2578,168 @@ def add_pooling(self, name, height, width, stride_height, stride_width,
         spec_layer_params.globalPooling = is_global
         return spec_layer
 
-    def add_padding(self, name, left=0, right=0, top=0, bottom=0,
-                    value=0, input_name='data', output_name='out',
-                    padding_type='constant'):
+    def add_pooling3d(
+        self,
+        name,
+        input_name,
+        output_name,
+        pooling_type,
+        kernel_depth,
+        kernel_height,
+        kernel_width,
+        stride_depth,
+        stride_height,
+        stride_width,
+        padding_mode="valid",
+        custom_padding_front=0,
+        custom_padding_back=0,
+        custom_padding_top=0,
+        custom_padding_bottom=0,
+        custom_padding_left=0,
+        custom_padding_right=0,
+        average_pooling_count_excludes_padding=False,
+    ):
+        """
+        Add a pooling layer to the model that performs spatial pooling across three dimensions.
+        Refer to the **Pooling3DLayerParams** message in specification (NeuralNetwork.proto) for more details.
+
+        Parameters
+        ----------
+        name: str
+            The name of this layer.
+        input_name: str
+            The input blob name of this layer.
+        output_name: str
+            The output blob name of this layer.
+        pooling_type: str
+            Type of pooling performed. Can either be 'MAX' OR 'AVERAGE'.
+        kernel_depth: int
+            Depth of the pooling region.
+        kernel_height: int
+            Height of pooling region.
+        kernel_width: int
+            Width of pooling region.
+        stride_depth: int
+            Stride along the depth direction
+        stride_height: int
+            Stride along the height direction.
+        stride_width: int
+            Stride along the width direction.
+        padding_mode: str
+            Option for the padding type and output blob shape.
+            Can be 'VALID', 'SAME', or 'CUSTOM'.
+        custom_padding_front: int
+            Padding before the input in the depth direction.
+        custom_padding_back: int
+            Padding after the input in the depth direction.
+        custom_padding_top: int
+            Padding before the input in the height direction.
+        custom_padding_bottom: int
+            Padding after the input in the height direction.
+        custom_padding_left: int
+            Padding before the input in the width direction.
+        custom_padding_right: int
+            Padding after the input in the width direction.
+        average_pooling_count_excludes_padding: boolean
+            If true, exclude zeros from padding in average pooling.  Can only be true for AVERAGE padding.
+
+        See Also
+        --------
+        add_pooling, add_global_pooling3d
+        """
+        if self.spec and (
+            not self.spec.specificationVersion
+            or self.spec.specificationVersion < _SPECIFICATION_VERSION_IOS_14
+        ):
+            self.spec.specificationVersion = _SPECIFICATION_VERSION_IOS_14
+        spec_layer = self._add_generic_layer(name, [input_name], [output_name])
+        spec_layer_params = spec_layer.pooling3d
+
+        spec_layer_params.type = _NeuralNetwork_pb2.Pooling3DLayerParams.PoolingType3D.Value(
+            pooling_type.upper()
+        )
+
+        spec_layer_params.kernelDepth = kernel_depth
+        spec_layer_params.kernelHeight = kernel_height
+        spec_layer_params.kernelWidth = kernel_width
+
+        spec_layer_params.strideDepth = stride_depth
+        spec_layer_params.strideHeight = stride_height
+        spec_layer_params.strideWidth = stride_width
+
+        supported_padding_modes = {"CUSTOM", "VALID", "SAME"}
+        if padding_mode.upper() not in supported_padding_modes:
+            raise ValueError(
+                "Unsupported padding mode: %s.  Must be one of %s"
+                % (padding_mode, supported_padding_modes)
+            )
+        if padding_mode.upper() == "CUSTOM":
+            spec_layer_params.customPaddingFront = custom_padding_front
+            spec_layer_params.customPaddingBack = custom_padding_back
+            spec_layer_params.customPaddingTop = custom_padding_top
+            spec_layer_params.customPaddingBottom = custom_padding_bottom
+            spec_layer_params.customPaddingLeft = custom_padding_left
+            spec_layer_params.customPaddingRight = custom_padding_right
+        spec_layer_params.paddingType = _NeuralNetwork_pb2.Pooling3DLayerParams.Pooling3DPaddingType.Value(
+            padding_mode.upper()
+        )
+
+        spec_layer_params.countExcludePadding = average_pooling_count_excludes_padding
+
+        return spec_layer
+
+    def add_global_pooling3d(self, name, input_name, output_name, pooling_type):
+        """
+        Add a layer to pool three spatial dimensions down to one value.
+        This behaves like a special case of Pooling3DLayerParams in which
+        the Kernel is the size of the input and there is no padding.
+        Refer to the **GlobalPooling3DLayerParams** message in specification (NeuralNetwork.proto) for more details.
+
+        Parameters
+        ----------
+        name: str
+            The name of this layer.
+        input_name: str
+            The input blob name of this layer.
+        output_name: str
+            The output blob name of this layer.
+        pooling_type: str
+            Type of pooling performed. Can either be 'MAX' OR 'AVERAGE'.
+
+        See Also
+        --------
+        add_pooling, add_pooling3d
+        """
+        if self.spec and (
+            not self.spec.specificationVersion
+            or self.spec.specificationVersion < _SPECIFICATION_VERSION_IOS_14
+        ):
+            self.spec.specificationVersion = _SPECIFICATION_VERSION_IOS_14
+
+        spec_layer = self._add_generic_layer(name, [input_name], [output_name])
+        spec_layer_params = spec_layer.globalPooling3d
+
+        spec_layer_params.type = _NeuralNetwork_pb2.GlobalPooling3DLayerParams.GlobalPoolingType3D.Value(
+            pooling_type.upper()
+        )
+
+        return spec_layer
+
+    def add_padding(
+        self,
+        name,
+        left=0,
+        right=0,
+        top=0,
+        bottom=0,
+        value=0,
+        input_name="data",
+        output_name="out",
+        padding_type="constant",
+    ):
         """
+
+
         Add a padding layer to the model that performs padding along spatial dimensions.
         Refer to the **PaddingLayerParams** message in specification (NeuralNetwork.proto) for more details.
 
@@ -1933,13 +2772,17 @@ def add_padding(self, name, left=0, right=0, top=0, bottom=0,
         spec_layer_params = spec_layer.padding
 
         # Set the parameters
-        padding_type = padding_type.lower() if isinstance(padding_type, str) else padding_type
-        if padding_type == 'constant':
+        padding_type = (
+            padding_type.lower()
+            if isinstance(padding_type, _string_types)
+            else padding_type
+        )
+        if padding_type == "constant":
             spec_layer_params.constant.value = value
-        elif padding_type == 'reflection':
-            spec_layer_params.reflection.MergeFromString(b'')
-        elif padding_type == 'replication':
-            spec_layer_params.replication.MergeFromString(b'')
+        elif padding_type == "reflection":
+            spec_layer_params.reflection.MergeFromString(b"")
+        elif padding_type == "replication":
+            spec_layer_params.replication.MergeFromString(b"")
         else:
             raise ValueError("Unknown padding_type %s" % padding_type)
 
@@ -1951,8 +2794,9 @@ def add_padding(self, name, left=0, right=0, top=0, bottom=0,
         width_border.endEdgeSize = right
         return spec_layer
 
-    def add_crop(self, name, left, right, top, bottom, offset, input_names,
-                 output_name):
+    def add_crop(
+        self, name, left, right, top, bottom, offset, input_names, output_name
+    ):
         """
         Add a cropping layer to the model.
         The cropping layer have two functional modes:
@@ -2007,8 +2851,20 @@ def add_crop(self, name, left, right, top, bottom, offset, input_names,
         width_border.endEdgeSize = right
         return spec_layer
 
-    def add_simple_rnn(self, name, W_h, W_x, b, hidden_size, input_size, activation, input_names, output_names,
-                       output_all=False, reverse_input=False):
+    def add_simple_rnn(
+        self,
+        name,
+        W_h,
+        W_x,
+        b,
+        hidden_size,
+        input_size,
+        activation,
+        input_names,
+        output_names,
+        output_all=False,
+        reverse_input=False,
+    ):
         """
         Add a simple recurrent layer to the model.
         Refer to the **SimpleRecurrentLayerParams** message in specification (NeuralNetwork.proto) for more details.
@@ -2065,16 +2921,28 @@ def add_simple_rnn(self, name, W_h, W_x, b, hidden_size, input_size, activation,
         _set_recurrent_activation(activation_f, activation)
 
         # Write the weights
-        spec_layer_params.weightMatrix.floatValue.extend(map(float, W_x.flatten()))
-        spec_layer_params.recursionMatrix.floatValue.extend(map(float, W_h.flatten()))
+        spec_layer_params.weightMatrix.floatValue.extend(W_x.flatten())
+        spec_layer_params.recursionMatrix.floatValue.extend(W_h.flatten())
 
         if b is not None:
-            spec_layer_params.biasVector.floatValue.extend(map(float, b.flatten()))
-        return spec_layer
-
-    def add_gru(self, name, W_h, W_x, b, hidden_size, input_size,
-                input_names, output_names, activation='TANH', inner_activation='SIGMOID_HARD',
-                output_all=False, reverse_input=False):
+            spec_layer_params.biasVector.floatValue.extend(b.flatten())
+        return spec_layer
+
+    def add_gru(
+        self,
+        name,
+        W_h,
+        W_x,
+        b,
+        hidden_size,
+        input_size,
+        input_names,
+        output_names,
+        activation="TANH",
+        inner_activation="SIGMOID_HARD",
+        output_all=False,
+        reverse_input=False,
+    ):
         """
         Add a Gated-Recurrent Unit (GRU) layer to the model.
         Refer to the **GRULayerParams** message in specification (NeuralNetwork.proto) for more details.
@@ -2148,29 +3016,41 @@ def add_gru(self, name, W_h, W_x, b, hidden_size, input_size,
         R_z, R_r, R_o = W_h
         W_z, W_r, W_o = W_x
 
-        spec_layer_params.updateGateWeightMatrix.floatValue.extend(map(float, W_z.flatten()))
-        spec_layer_params.resetGateWeightMatrix.floatValue.extend(map(float, W_r.flatten()))
-        spec_layer_params.outputGateWeightMatrix.floatValue.extend(map(float, W_o.flatten()))
+        spec_layer_params.updateGateWeightMatrix.floatValue.extend(W_z.flatten())
+        spec_layer_params.resetGateWeightMatrix.floatValue.extend(W_r.flatten())
+        spec_layer_params.outputGateWeightMatrix.floatValue.extend(W_o.flatten())
 
-        spec_layer_params.updateGateRecursionMatrix.floatValue.extend(map(float, R_z.flatten()))
-        spec_layer_params.resetGateRecursionMatrix.floatValue.extend(map(float, R_r.flatten()))
-        spec_layer_params.outputGateRecursionMatrix.floatValue.extend(map(float, R_o.flatten()))
+        spec_layer_params.updateGateRecursionMatrix.floatValue.extend(R_z.flatten())
+        spec_layer_params.resetGateRecursionMatrix.floatValue.extend(R_r.flatten())
+        spec_layer_params.outputGateRecursionMatrix.floatValue.extend(R_o.flatten())
 
         if b is not None:
             b_z, b_r, b_o = b
-            spec_layer_params.updateGateBiasVector.floatValue.extend(map(float, b_z.flatten()))
-            spec_layer_params.resetGateBiasVector.floatValue.extend(map(float, b_r.flatten()))
-            spec_layer_params.outputGateBiasVector.floatValue.extend(map(float, b_o.flatten()))
-        return spec_layer
-
-    def add_unilstm(self, name, W_h, W_x, b, hidden_size, input_size, input_names, output_names,
-                    inner_activation='SIGMOID',
-                    cell_state_update_activation='TANH',
-                    output_activation='TANH',
-                    peep=None,
-                    output_all=False,
-                    forget_bias=False, coupled_input_forget_gate=False,
-                    cell_clip_threshold=50000.0, reverse_input=False):
+            spec_layer_params.updateGateBiasVector.floatValue.extend(b_z.flatten())
+            spec_layer_params.resetGateBiasVector.floatValue.extend(b_r.flatten())
+            spec_layer_params.outputGateBiasVector.floatValue.extend(b_o.flatten())
+        return spec_layer
+
+    def add_unilstm(
+        self,
+        name,
+        W_h,
+        W_x,
+        b,
+        hidden_size,
+        input_size,
+        input_names,
+        output_names,
+        inner_activation="SIGMOID",
+        cell_state_update_activation="TANH",
+        output_activation="TANH",
+        peep=None,
+        output_all=False,
+        forget_bias=False,
+        coupled_input_forget_gate=False,
+        cell_clip_threshold=50000.0,
+        reverse_input=False,
+    ):
         """
         Add a Uni-directional LSTM layer to the model.
         Refer to the **UniDirectionalLSTMLayerParams** message in specification (NeuralNetwork.proto) for more details.
@@ -2266,39 +3146,54 @@ def add_unilstm(self, name, W_h, W_x, b, hidden_size, input_size, input_names, o
         R_i, R_f, R_o, R_z = W_h
         W_i, W_f, W_o, W_z = W_x
 
-        weight_params.inputGateWeightMatrix.floatValue.extend(map(float, W_i.flatten()))
-        weight_params.forgetGateWeightMatrix.floatValue.extend(map(float, W_f.flatten()))
-        weight_params.outputGateWeightMatrix.floatValue.extend(map(float, W_o.flatten()))
-        weight_params.blockInputWeightMatrix.floatValue.extend(map(float, W_z.flatten()))
+        weight_params.inputGateWeightMatrix.floatValue.extend(W_i.flatten())
+        weight_params.forgetGateWeightMatrix.floatValue.extend(W_f.flatten())
+        weight_params.outputGateWeightMatrix.floatValue.extend(W_o.flatten())
+        weight_params.blockInputWeightMatrix.floatValue.extend(W_z.flatten())
 
-        weight_params.inputGateRecursionMatrix.floatValue.extend(map(float, R_i.flatten()))
-        weight_params.forgetGateRecursionMatrix.floatValue.extend(map(float, R_f.flatten()))
-        weight_params.outputGateRecursionMatrix.floatValue.extend(map(float, R_o.flatten()))
-        weight_params.blockInputRecursionMatrix.floatValue.extend(map(float, R_z.flatten()))
+        weight_params.inputGateRecursionMatrix.floatValue.extend(R_i.flatten())
+        weight_params.forgetGateRecursionMatrix.floatValue.extend(R_f.flatten())
+        weight_params.outputGateRecursionMatrix.floatValue.extend(R_o.flatten())
+        weight_params.blockInputRecursionMatrix.floatValue.extend(R_z.flatten())
 
         if b is not None:
             b_i, b_f, b_o, b_z = b
-            weight_params.inputGateBiasVector.floatValue.extend(map(float, b_i.flatten()))
-            weight_params.forgetGateBiasVector.floatValue.extend(map(float, b_f.flatten()))
-            weight_params.outputGateBiasVector.floatValue.extend(map(float, b_o.flatten()))
-            weight_params.blockInputBiasVector.floatValue.extend(map(float, b_z.flatten()))
+            weight_params.inputGateBiasVector.floatValue.extend(b_i.flatten())
+            weight_params.forgetGateBiasVector.floatValue.extend(b_f.flatten())
+            weight_params.outputGateBiasVector.floatValue.extend(b_o.flatten())
+            weight_params.blockInputBiasVector.floatValue.extend(b_z.flatten())
 
         if peep is not None:
             p_i, p_f, p_o = peep
-            weight_params.inputGatePeepholeVector.floatValue.extend(map(float, p_i.flatten()))
-            weight_params.forgetGatePeepholeVector.floatValue.extend(map(float, p_f.flatten()))
-            weight_params.outputGatePeepholeVector.floatValue.extend(map(float, p_o.flatten()))
-
-        return spec_layer
-
-    def add_bidirlstm(self, name, W_h, W_x, b, W_h_back, W_x_back, b_back, hidden_size, input_size,
-                      input_names, output_names,
-                      inner_activation='SIGMOID',
-                      cell_state_update_activation='TANH',
-                      output_activation='TANH',
-                      peep=None, peep_back=None,
-                      output_all=False,
-                      forget_bias=False, coupled_input_forget_gate=False, cell_clip_threshold=50000.0):
+            weight_params.inputGatePeepholeVector.floatValue.extend(p_i.flatten())
+            weight_params.forgetGatePeepholeVector.floatValue.extend(p_f.flatten())
+            weight_params.outputGatePeepholeVector.floatValue.extend(p_o.flatten())
+
+        return spec_layer
+
+    def add_bidirlstm(
+        self,
+        name,
+        W_h,
+        W_x,
+        b,
+        W_h_back,
+        W_x_back,
+        b_back,
+        hidden_size,
+        input_size,
+        input_names,
+        output_names,
+        inner_activation="SIGMOID",
+        cell_state_update_activation="TANH",
+        output_activation="TANH",
+        peep=None,
+        peep_back=None,
+        output_all=False,
+        forget_bias=False,
+        coupled_input_forget_gate=False,
+        cell_clip_threshold=50000.0,
+    ):
 
         """
         Add a Bi-directional LSTM layer to the model.
@@ -2416,55 +3311,55 @@ def add_bidirlstm(self, name, W_h, W_x, b, W_h_back, W_x_back, b_back, hidden_si
         R_i, R_f, R_o, R_z = W_h
         W_i, W_f, W_o, W_z = W_x
 
-        weight_params.inputGateWeightMatrix.floatValue.extend(map(float, W_i.flatten()))
-        weight_params.forgetGateWeightMatrix.floatValue.extend(map(float, W_f.flatten()))
-        weight_params.outputGateWeightMatrix.floatValue.extend(map(float, W_o.flatten()))
-        weight_params.blockInputWeightMatrix.floatValue.extend(map(float, W_z.flatten()))
+        weight_params.inputGateWeightMatrix.floatValue.extend(W_i.flatten())
+        weight_params.forgetGateWeightMatrix.floatValue.extend(W_f.flatten())
+        weight_params.outputGateWeightMatrix.floatValue.extend(W_o.flatten())
+        weight_params.blockInputWeightMatrix.floatValue.extend(W_z.flatten())
 
-        weight_params.inputGateRecursionMatrix.floatValue.extend(map(float, R_i.flatten()))
-        weight_params.forgetGateRecursionMatrix.floatValue.extend(map(float, R_f.flatten()))
-        weight_params.outputGateRecursionMatrix.floatValue.extend(map(float, R_o.flatten()))
-        weight_params.blockInputRecursionMatrix.floatValue.extend(map(float, R_z.flatten()))
+        weight_params.inputGateRecursionMatrix.floatValue.extend(R_i.flatten())
+        weight_params.forgetGateRecursionMatrix.floatValue.extend(R_f.flatten())
+        weight_params.outputGateRecursionMatrix.floatValue.extend(R_o.flatten())
+        weight_params.blockInputRecursionMatrix.floatValue.extend(R_z.flatten())
 
         if b is not None:
             b_i, b_f, b_o, b_z = b
-            weight_params.inputGateBiasVector.floatValue.extend(map(float, b_i.flatten()))
-            weight_params.forgetGateBiasVector.floatValue.extend(map(float, b_f.flatten()))
-            weight_params.outputGateBiasVector.floatValue.extend(map(float, b_o.flatten()))
-            weight_params.blockInputBiasVector.floatValue.extend(map(float, b_z.flatten()))
+            weight_params.inputGateBiasVector.floatValue.extend(b_i.flatten())
+            weight_params.forgetGateBiasVector.floatValue.extend(b_f.flatten())
+            weight_params.outputGateBiasVector.floatValue.extend(b_o.flatten())
+            weight_params.blockInputBiasVector.floatValue.extend(b_z.flatten())
 
         if peep is not None:
             p_i, p_f, p_o = peep
-            weight_params.inputGatePeepholeVector.floatValue.extend(map(float, p_i.flatten()))
-            weight_params.forgetGatePeepholeVector.floatValue.extend(map(float, p_f.flatten()))
-            weight_params.outputGatePeepholeVector.floatValue.extend(map(float, p_o.flatten()))
+            weight_params.inputGatePeepholeVector.floatValue.extend(p_i.flatten())
+            weight_params.forgetGatePeepholeVector.floatValue.extend(p_f.flatten())
+            weight_params.outputGatePeepholeVector.floatValue.extend(p_o.flatten())
 
         # Write the backward lstm weights
         R_i, R_f, R_o, R_z = W_h_back
         W_i, W_f, W_o, W_z = W_x_back
 
-        weight_params_back.inputGateWeightMatrix.floatValue.extend(map(float, W_i.flatten()))
-        weight_params_back.forgetGateWeightMatrix.floatValue.extend(map(float, W_f.flatten()))
-        weight_params_back.outputGateWeightMatrix.floatValue.extend(map(float, W_o.flatten()))
-        weight_params_back.blockInputWeightMatrix.floatValue.extend(map(float, W_z.flatten()))
+        weight_params_back.inputGateWeightMatrix.floatValue.extend(W_i.flatten())
+        weight_params_back.forgetGateWeightMatrix.floatValue.extend(W_f.flatten())
+        weight_params_back.outputGateWeightMatrix.floatValue.extend(W_o.flatten())
+        weight_params_back.blockInputWeightMatrix.floatValue.extend(W_z.flatten())
 
-        weight_params_back.inputGateRecursionMatrix.floatValue.extend(map(float, R_i.flatten()))
-        weight_params_back.forgetGateRecursionMatrix.floatValue.extend(map(float, R_f.flatten()))
-        weight_params_back.outputGateRecursionMatrix.floatValue.extend(map(float, R_o.flatten()))
-        weight_params_back.blockInputRecursionMatrix.floatValue.extend(map(float, R_z.flatten()))
+        weight_params_back.inputGateRecursionMatrix.floatValue.extend(R_i.flatten())
+        weight_params_back.forgetGateRecursionMatrix.floatValue.extend(R_f.flatten())
+        weight_params_back.outputGateRecursionMatrix.floatValue.extend(R_o.flatten())
+        weight_params_back.blockInputRecursionMatrix.floatValue.extend(R_z.flatten())
 
         if b_back is not None:
             b_i, b_f, b_o, b_z = b_back
-            weight_params_back.inputGateBiasVector.floatValue.extend(map(float, b_i.flatten()))
-            weight_params_back.forgetGateBiasVector.floatValue.extend(map(float, b_f.flatten()))
-            weight_params_back.outputGateBiasVector.floatValue.extend(map(float, b_o.flatten()))
-            weight_params_back.blockInputBiasVector.floatValue.extend(map(float, b_z.flatten()))
+            weight_params_back.inputGateBiasVector.floatValue.extend(b_i.flatten())
+            weight_params_back.forgetGateBiasVector.floatValue.extend(b_f.flatten())
+            weight_params_back.outputGateBiasVector.floatValue.extend(b_o.flatten())
+            weight_params_back.blockInputBiasVector.floatValue.extend(b_z.flatten())
 
         if peep_back is not None:
             p_i, p_f, p_o = peep_back
-            weight_params_back.inputGatePeepholeVector.floatValue.extend(map(float, p_i.flatten()))
-            weight_params_back.forgetGatePeepholeVector.floatValue.extend(map(float, p_f.flatten()))
-            weight_params_back.outputGatePeepholeVector.floatValue.extend(map(float, p_o.flatten()))
+            weight_params_back.inputGatePeepholeVector.floatValue.extend(p_i.flatten())
+            weight_params_back.forgetGatePeepholeVector.floatValue.extend(p_f.flatten())
+            weight_params_back.outputGatePeepholeVector.floatValue.extend(p_o.flatten())
         return spec_layer
 
     def add_flatten(self, name, mode, input_name, output_name):
@@ -2495,17 +3390,21 @@ def add_flatten(self, name, mode, input_name, output_name):
 
         # Set the parameters
         if mode == 0:
-            spec_layer_params.mode = \
-                _NeuralNetwork_pb2.FlattenLayerParams.FlattenOrder.Value('CHANNEL_FIRST')
+            spec_layer_params.mode = _NeuralNetwork_pb2.FlattenLayerParams.FlattenOrder.Value(
+                "CHANNEL_FIRST"
+            )
         elif mode == 1:
-            spec_layer_params.mode = \
-                _NeuralNetwork_pb2.FlattenLayerParams.FlattenOrder.Value('CHANNEL_LAST')
+            spec_layer_params.mode = _NeuralNetwork_pb2.FlattenLayerParams.FlattenOrder.Value(
+                "CHANNEL_LAST"
+            )
         else:
-            raise NotImplementedError('Unknown flatten mode %d ' % mode)
+            raise NotImplementedError("Unknown flatten mode %d " % mode)
 
         return spec_layer
 
-    def add_slice(self, name, input_name, output_name, axis, start_index=0, end_index=-1, stride=1):
+    def add_slice(
+        self, name, input_name, output_name, axis, start_index=0, end_index=-1, stride=1
+    ):
         """
         Add a slice layer. Equivalent to to numpy slice [start_index:end_index:stride],
         start_index is included, while end_index is exclusive.
@@ -2539,7 +3438,9 @@ def add_slice(self, name, input_name, output_name, axis, start_index=0, end_inde
 
         # Set the parameters
         if start_index < 0:
-            raise ValueError("Invalid start_index value %d. Must be non-negative." % start_index)
+            raise ValueError(
+                "Invalid start_index value %d. Must be non-negative." % start_index
+            )
         if stride < 1:
             raise ValueError("Invalid stride value %d. Must be positive." % stride)
 
@@ -2547,21 +3448,65 @@ def add_slice(self, name, input_name, output_name, axis, start_index=0, end_inde
         spec_layer_params.endIndex = end_index
         spec_layer_params.stride = stride
 
-        axis = axis.lower() if isinstance(axis, str) else axis
-        if axis == 'channel':
-            spec_layer_params.axis = \
-                _NeuralNetwork_pb2.SliceLayerParams.SliceAxis.Value('CHANNEL_AXIS')
-        elif axis == 'height':
-            spec_layer_params.axis = \
-                _NeuralNetwork_pb2.SliceLayerParams.SliceAxis.Value('HEIGHT_AXIS')
-        elif axis == 'width':
-            spec_layer_params.axis = \
-                _NeuralNetwork_pb2.SliceLayerParams.SliceAxis.Value('WIDTH_AXIS')
+        axis = axis.lower() if isinstance(axis, _string_types) else axis
+        if axis == "channel":
+            spec_layer_params.axis = _NeuralNetwork_pb2.SliceLayerParams.SliceAxis.Value(
+                "CHANNEL_AXIS"
+            )
+        elif axis == "height":
+            spec_layer_params.axis = _NeuralNetwork_pb2.SliceLayerParams.SliceAxis.Value(
+                "HEIGHT_AXIS"
+            )
+        elif axis == "width":
+            spec_layer_params.axis = _NeuralNetwork_pb2.SliceLayerParams.SliceAxis.Value(
+                "WIDTH_AXIS"
+            )
         else:
-            raise NotImplementedError('Unsupported Slice axis %s ' % axis)
+            raise NotImplementedError("Unsupported Slice axis %s " % axis)
         return spec_layer
 
-    def add_reorganize_data(self, name, input_name, output_name, mode='SPACE_TO_DEPTH', block_size=2):
+    def add_slice_by_size(self, name, input_names, output_name, axis, size):
+        """
+        Add a slice layer. Equivalent to to numpy slice [start_index: start_index+size],
+        Input is list of str which is [input_tensor, begin_id].
+
+        Assume input_tensor has shape (2, 3, 4), and axis=1, size=2.
+        This would produce input_tensor[:, begin_id:begin_id+2, :]
+
+        Refer to the **SliceBySizeLayerParams** message in specification (NeuralNetwork.proto) for more details.
+
+        Parameters
+        ----------
+        name: str
+            The name of this layer.
+
+        input_names: list of str
+            The input blob names of this layer.
+        output_name: str
+            The output blob name of this layer.
+        axis: int
+            axis along which input is sliced.
+        size: int
+            The size of which input will be taken
+
+        See Also
+        --------
+        add_slice, add_slice_static, add_slice_dynamic
+        """
+        spec_layer = self._add_generic_layer(name, input_names, [output_name])
+        spec_layer_params = spec_layer.sliceBySize
+
+        if size < 1:
+            raise ValueError("Invalid size value %d. Must be positive." % stride)
+
+        spec_layer_params.axis = axis
+        spec_layer_params.size = size
+
+        return spec_layer
+
+    def add_reorganize_data(
+        self, name, input_name, output_name, mode="SPACE_TO_DEPTH", block_size=2
+    ):
         """
         Add a data reorganization layer of type "SPACE_TO_DEPTH" or "DEPTH_TO_SPACE".
         Refer to the specification (NeuralNetwork.proto) for more details.
@@ -2587,9 +3532,13 @@ def add_reorganize_data(self, name, input_name, output_name, mode='SPACE_TO_DEPT
               Reverse of the operation 'SPACE_TO_DEPTH'.
               Output CHW dimensions are: [C/(block_size * block_size), H * block_size, C * block_size].
 
+            - If mode == 'PIXEL_SHUFFLE':  data is moved from the channel to the spatial dimension.
+              Reverse of the operation 'SPACE_TO_DEPTH'.
+              Output CHW dimensions are: [C/(block_size * block_size), H * block_size, C * block_size].
+
         block_size: int
             Must be greater than 1. Must divide H and W, when mode is 'SPACE_TO_DEPTH'. (block_size * block_size)
-            must divide C when mode is 'DEPTH_TO_SPACE'.
+            must divide C when mode is 'DEPTH_TO_SPACE' or 'PIXEL_SHUFFLE'.
 
         See Also
         --------
@@ -2600,25 +3549,47 @@ def add_reorganize_data(self, name, input_name, output_name, mode='SPACE_TO_DEPT
 
         # Set the parameters
         if block_size < 2:
-            raise ValueError("Invalid block_size value %d. Must be greater than 1." % block_size)
+            raise ValueError(
+                "Invalid block_size value %d. Must be greater than 1." % block_size
+            )
         spec_layer_params.blockSize = block_size
 
-        mode = mode.upper() if isinstance(mode, str) else mode
-        if mode == 'SPACE_TO_DEPTH':
-            spec_layer_params.mode = \
-                _NeuralNetwork_pb2.ReorganizeDataLayerParams.ReorganizationType.Value('SPACE_TO_DEPTH')
-        elif mode == 'DEPTH_TO_SPACE':
-            spec_layer_params.mode = \
-                _NeuralNetwork_pb2.ReorganizeDataLayerParams.ReorganizationType.Value('DEPTH_TO_SPACE')
+        mode = mode.upper() if isinstance(mode, _string_types) else mode
+        if mode == "SPACE_TO_DEPTH":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ReorganizeDataLayerParams.ReorganizationType.Value(
+                "SPACE_TO_DEPTH"
+            )
+        elif mode == "DEPTH_TO_SPACE":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ReorganizeDataLayerParams.ReorganizationType.Value(
+                "DEPTH_TO_SPACE"
+            )
+        elif mode == "PIXEL_SHUFFLE":
+            if self.spec and (
+                not self.spec.specificationVersion
+                or self.spec.specificationVersion < _SPECIFICATION_VERSION_IOS_14
+            ):
+                self.spec.specificationVersion = _SPECIFICATION_VERSION_IOS_14
+            spec_layer_params.mode = _NeuralNetwork_pb2.ReorganizeDataLayerParams.ReorganizationType.Value(
+                "PIXEL_SHUFFLE"
+            )
         else:
-            raise NotImplementedError('Unknown reorganization mode %s.' % mode)
-        return spec_layer
-
-    def add_batchnorm(self, name, channels, gamma, beta,
-                      mean=None, variance=None,
-                      input_name='data', output_name='out',
-                      compute_mean_var=False,
-                      instance_normalization=False, epsilon=1e-5):
+            raise NotImplementedError("Unknown reorganization mode %s." % mode)
+        return spec_layer
+
+    def add_batchnorm(
+        self,
+        name,
+        channels,
+        gamma,
+        beta,
+        mean=None,
+        variance=None,
+        input_name="data",
+        output_name="out",
+        compute_mean_var=False,
+        instance_normalization=False,
+        epsilon=1e-5,
+    ):
         """
         Add a batch normalization layer. Batch normalization operation is
         defined as:
@@ -2662,19 +3633,21 @@ def add_batchnorm(self, name, channels, gamma, beta,
 
         # Set the parameters
         spec_layer_params.channels = channels
-        spec_layer_params.gamma.floatValue.extend(map(float, gamma.flatten()))
-        spec_layer_params.beta.floatValue.extend(map(float, beta.flatten()))
+        spec_layer_params.gamma.floatValue.extend(gamma.flatten())
+        spec_layer_params.beta.floatValue.extend(beta.flatten())
         spec_layer_params.epsilon = epsilon
         spec_layer_params.computeMeanVar = compute_mean_var
         spec_layer_params.instanceNormalization = instance_normalization
 
         if compute_mean_var:
             if not instance_normalization:
-                raise NotImplementedError('Batch-instance norm is currently not supported')
+                raise NotImplementedError(
+                    "Batch-instance norm is currently not supported"
+                )
 
         if not compute_mean_var:
-            spec_layer_params.mean.floatValue.extend(map(float, mean.flatten()))
-            spec_layer_params.variance.floatValue.extend(map(float, variance.flatten()))
+            spec_layer_params.mean.floatValue.extend(mean.flatten())
+            spec_layer_params.variance.floatValue.extend(variance.flatten())
 
         return spec_layer
 
@@ -2757,14 +3730,18 @@ def add_reshape(self, name, input_name, output_name, target_shape, mode):
         spec_layer_params = spec_layer.reshape
         spec_layer_params.targetShape.extend(target_shape)
         if mode == 0:
-            spec_layer_params.mode = \
-                _NeuralNetwork_pb2.ReshapeLayerParams.ReshapeOrder.Value('CHANNEL_FIRST')
+            spec_layer_params.mode = _NeuralNetwork_pb2.ReshapeLayerParams.ReshapeOrder.Value(
+                "CHANNEL_FIRST"
+            )
         else:
-            spec_layer_params.mode = \
-                _NeuralNetwork_pb2.ReshapeLayerParams.ReshapeOrder.Value('CHANNEL_LAST')
+            spec_layer_params.mode = _NeuralNetwork_pb2.ReshapeLayerParams.ReshapeOrder.Value(
+                "CHANNEL_LAST"
+            )
 
         if len(target_shape) != 4 and len(target_shape) != 3:
-            raise ValueError("Length of the 'target-shape' parameter must be equal to 3 or 4")
+            raise ValueError(
+                "Length of the 'target-shape' parameter must be equal to 3 or 4"
+            )
         self.rank_dict[output_name] = len(target_shape)
         return spec_layer
 
@@ -2805,43 +3782,73 @@ def add_reduce(self, name, input_name, output_name, axis, mode, epsilon=1e-6):
         spec_layer_params = spec_layer.reduce
         spec_layer_params.epsilon = epsilon
 
-        mode = mode.lower() if isinstance(mode, str) else mode
-        if mode == 'sum':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value('SUM')
-        elif mode == 'avg':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value('AVG')
-        elif mode == 'prod':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value('PROD')
-        elif mode == 'logsum':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value('LOGSUM')
-        elif mode == 'sumsquare':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value('SUMSQUARE')
-        elif mode == 'l1':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value('L1')
-        elif mode == 'l2':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value('L2')
-        elif mode == 'max':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value('MAX')
-        elif mode == 'min':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value('MIN')
-        elif mode == 'argmax':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value('ARGMAX')
+        mode = mode.lower() if isinstance(mode, _string_types) else mode
+        if mode == "sum":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value(
+                "SUM"
+            )
+        elif mode == "avg":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value(
+                "AVG"
+            )
+        elif mode == "prod":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value(
+                "PROD"
+            )
+        elif mode == "logsum":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value(
+                "LOGSUM"
+            )
+        elif mode == "sumsquare":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value(
+                "SUMSQUARE"
+            )
+        elif mode == "l1":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value(
+                "L1"
+            )
+        elif mode == "l2":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value(
+                "L2"
+            )
+        elif mode == "max":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value(
+                "MAX"
+            )
+        elif mode == "min":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value(
+                "MIN"
+            )
+        elif mode == "argmax":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ReduceLayerParams.ReduceOperation.Value(
+                "ARGMAX"
+            )
         else:
-            raise NotImplementedError('Unknown reduction operation %s.' % mode)
-
-        axis = axis.upper() if isinstance(axis, str) else axis
-        if axis == 'CHW':
-            spec_layer_params.axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value('CHW')
-        elif axis == 'HW':
-            spec_layer_params.axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value('HW')
-        elif axis == 'C':
-            spec_layer_params.axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value('C')
-        elif axis == 'H':
-            spec_layer_params.axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value('H')
-        elif axis == 'W':
-            spec_layer_params.axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value('W')
+            raise NotImplementedError("Unknown reduction operation %s." % mode)
+
+        axis = axis.upper() if isinstance(axis, _string_types) else axis
+        if axis == "CHW":
+            spec_layer_params.axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value(
+                "CHW"
+            )
+        elif axis == "HW":
+            spec_layer_params.axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value(
+                "HW"
+            )
+        elif axis == "C":
+            spec_layer_params.axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value(
+                "C"
+            )
+        elif axis == "H":
+            spec_layer_params.axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value(
+                "H"
+            )
+        elif axis == "W":
+            spec_layer_params.axis = _NeuralNetwork_pb2.ReduceLayerParams.ReduceAxis.Value(
+                "W"
+            )
         else:
-            raise NotImplementedError('Unknown reduction axis %s.' % axis)
+            raise NotImplementedError("Unknown reduction axis %s." % axis)
         return spec_layer
 
     def add_lrn(self, name, input_name, output_name, alpha, beta, local_size, k=1.0):
@@ -2879,7 +3886,15 @@ def add_lrn(self, name, input_name, output_name, alpha, beta, local_size, k=1.0)
         spec_layer_params.k = k
         return spec_layer
 
-    def add_mvn(self, name, input_name, output_name, across_channels=True, normalize_variance=True, epsilon=1e-5):
+    def add_mvn(
+        self,
+        name,
+        input_name,
+        output_name,
+        across_channels=True,
+        normalize_variance=True,
+        epsilon=1e-5,
+    ):
         """
         Add an MVN (mean variance normalization) layer. Computes mean, variance and normalizes the input.
         Refer to the **MeanVarianceNormalizeLayerParams** message in specification (NeuralNetwork.proto) for more details.
@@ -2946,8 +3961,17 @@ def add_l2_normalize(self, name, input_name, output_name, epsilon=1e-5):
         spec_layer_params.epsilon = epsilon
         return spec_layer
 
-    def add_unary(self, name, input_name, output_name, mode, alpha=1.0,
-                  shift=0, scale=1.0, epsilon=1e-6):
+    def add_unary(
+        self,
+        name,
+        input_name,
+        output_name,
+        mode,
+        alpha=1.0,
+        shift=0,
+        scale=1.0,
+        epsilon=1e-6,
+    ):
         """
         Add a Unary layer. Applies the specified function (mode) to all the elements of the input.
         Prior to the application of the function the input can be scaled and shifted by using the 'scale',
@@ -2988,25 +4012,41 @@ def add_unary(self, name, input_name, output_name, mode, alpha=1.0,
         spec_layer_params.shift = shift
         spec_layer_params.scale = scale
 
-        mode = mode.lower() if isinstance(mode, str) else mode
-        if mode == 'sqrt':
-            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value('SQRT')
-        elif mode == 'rsqrt':
-            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value('RSQRT')
-        elif mode == 'inverse':
-            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value('INVERSE')
-        elif mode == 'power':
-            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value('POWER')
-        elif mode == 'exp':
-            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value('EXP')
-        elif mode == 'log':
-            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value('LOG')
-        elif mode == 'abs':
-            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value('ABS')
-        elif mode == 'threshold':
-            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value('THRESHOLD')
+        mode = mode.lower() if isinstance(mode, _string_types) else mode
+        if mode == "sqrt":
+            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value(
+                "SQRT"
+            )
+        elif mode == "rsqrt":
+            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value(
+                "RSQRT"
+            )
+        elif mode == "inverse":
+            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value(
+                "INVERSE"
+            )
+        elif mode == "power":
+            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value(
+                "POWER"
+            )
+        elif mode == "exp":
+            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value(
+                "EXP"
+            )
+        elif mode == "log":
+            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value(
+                "LOG"
+            )
+        elif mode == "abs":
+            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value(
+                "ABS"
+            )
+        elif mode == "threshold":
+            spec_layer_params.type = _NeuralNetwork_pb2.UnaryFunctionLayerParams.Operation.Value(
+                "THRESHOLD"
+            )
         else:
-            raise NotImplementedError('Unknown unary function %s ' % mode)
+            raise NotImplementedError("Unknown unary function %s " % mode)
         return spec_layer
 
     def add_split(self, name, input_name, output_names):
@@ -3061,13 +4101,15 @@ def add_load_constant(self, name, output_name, constant_value, shape):
         spec_layer_params = spec_layer.loadConstant
 
         data = spec_layer_params.data
-        data.floatValue.extend(map(float, constant_value.flatten()))
+        data.floatValue.extend(constant_value.flatten())
 
         spec_layer_params.shape.extend(shape)
 
         self.rank_dict[output_name] = 5
-        if len(data.floatValue) != np.prod(shape):
-            raise ValueError("Dimensions of 'shape' do not match the size of the provided constant")
+        if len(data.floatValue) != _np.prod(shape):
+            raise ValueError(
+                "Dimensions of 'shape' do not match the size of the provided constant"
+            )
         if not self._disable_rank5_shape_mapping:
             if len(shape) != 3:
                 raise ValueError("'shape' must be of length 3")
@@ -3094,18 +4136,28 @@ def add_custom(self, name, input_names, output_names, custom_proto_spec=None):
         """
         # custom layers require a newer specification version
         from coremltools import _MINIMUM_CUSTOM_LAYER_SPEC_VERSION
+
         if self.spec:
-            self.spec.specificationVersion = max(self.spec.specificationVersion, _MINIMUM_CUSTOM_LAYER_SPEC_VERSION)
+            self.spec.specificationVersion = max(
+                self.spec.specificationVersion, _MINIMUM_CUSTOM_LAYER_SPEC_VERSION
+            )
 
         spec_layer = self._add_generic_layer(name, input_names, output_names)
 
-        spec_layer.custom.MergeFromString(b'')
+        spec_layer.custom.MergeFromString(b"")
         if custom_proto_spec:
             spec_layer.custom.CopyFrom(custom_proto_spec)
         return spec_layer
 
-    def add_resize_bilinear(self, name, input_name, output_name, target_height=1, target_width=1,
-                            mode='ALIGN_ENDPOINTS_MODE'):
+    def add_resize_bilinear(
+        self,
+        name,
+        input_name,
+        output_name,
+        target_height=1,
+        target_width=1,
+        mode="ALIGN_ENDPOINTS_MODE",
+    ):
         """
         Add a resize bilinear layer to the model. A layer that resize the input to a given spatial size using bilinear interpolation.
         Refer to the **ResizeBilinearLayerParams** message in specification (NeuralNetwork.proto) for more details.
@@ -3134,25 +4186,40 @@ def add_resize_bilinear(self, name, input_name, output_name, target_height=1, ta
         spec_layer_params = spec_layer.resizeBilinear
         spec_layer_params.targetSize.append(target_height)
         spec_layer_params.targetSize.append(target_width)
-        mode = mode.upper() if isinstance(mode, str) else mode
-        if mode == 'ALIGN_ENDPOINTS_MODE':
-            spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value('ALIGN_ENDPOINTS_MODE')
-        elif mode == 'STRICT_ALIGN_ENDPOINTS_MODE':
+        mode = mode.upper() if isinstance(mode, _string_types) else mode
+        if mode == "ALIGN_ENDPOINTS_MODE":
+
+            spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value(
+                "ALIGN_ENDPOINTS_MODE"
+            )
+        elif mode == "STRICT_ALIGN_ENDPOINTS_MODE":
+            spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value(
+                "STRICT_ALIGN_ENDPOINTS_MODE"
+            )
+        elif mode == "UPSAMPLE_MODE":
+            spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value(
+                "UPSAMPLE_MODE"
+            )
+        elif mode == "ROI_ALIGN_MODE":
             spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value(
-                'STRICT_ALIGN_ENDPOINTS_MODE')
-        elif mode == 'UPSAMPLE_MODE':
-            spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value('UPSAMPLE_MODE')
-        elif mode == 'ROI_ALIGN_MODE':
-            spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value('ROI_ALIGN_MODE')
+                "ROI_ALIGN_MODE"
+            )
         else:
             raise ValueError("Unsupported resize bilinear mode %s" % mode)
         return spec_layer
 
-    def add_crop_resize(self, name, input_names, output_name, target_height=1, target_width=1,
-                        mode='STRICT_ALIGN_ENDPOINTS_MODE',
-                        normalized_roi=False,
-                        box_indices_mode='CORNERS_HEIGHT_FIRST',
-                        spatial_scale=1.0):
+    def add_crop_resize(
+        self,
+        name,
+        input_names,
+        output_name,
+        target_height=1,
+        target_width=1,
+        mode="STRICT_ALIGN_ENDPOINTS_MODE",
+        normalized_roi=False,
+        box_indices_mode="CORNERS_HEIGHT_FIRST",
+        spatial_scale=1.0,
+    ):
         """
         Add a crop resize layer to the model. A layer that extracts cropped spatial patches or RoIs (regions of interest)
         from the input and resizes them to a pre-specified size using bilinear interpolation.
@@ -3212,40 +4279,65 @@ def add_crop_resize(self, name, input_names, output_name, target_height=1, targe
         spec_layer_params.normalizedCoordinates = normalized_roi
         spec_layer_params.spatialScale = spatial_scale
 
-        mode = mode.upper() if isinstance(mode, str) else mode
-        box_indices_mode = box_indices_mode.upper() if isinstance(box_indices_mode, str) else box_indices_mode
+        mode = mode.upper() if isinstance(mode, _string_types) else mode
+        box_indices_mode = (
+            box_indices_mode.upper()
+            if isinstance(box_indices_mode, _string_types)
+            else box_indices_mode
+        )
 
-        if mode == 'ALIGN_ENDPOINTS_MODE':
-            spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value('ALIGN_ENDPOINTS_MODE')
-        elif mode == 'STRICT_ALIGN_ENDPOINTS_MODE':
+        if mode == "ALIGN_ENDPOINTS_MODE":
             spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value(
-                'STRICT_ALIGN_ENDPOINTS_MODE')
-        elif mode == 'UPSAMPLE_MODE':
-            spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value('UPSAMPLE_MODE')
-        elif mode == 'ROI_ALIGN_MODE':
-            spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value('ROI_ALIGN_MODE')
+                "ALIGN_ENDPOINTS_MODE"
+            )
+        elif mode == "STRICT_ALIGN_ENDPOINTS_MODE":
+            spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value(
+                "STRICT_ALIGN_ENDPOINTS_MODE"
+            )
+        elif mode == "UPSAMPLE_MODE":
+            spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value(
+                "UPSAMPLE_MODE"
+            )
+        elif mode == "ROI_ALIGN_MODE":
+            spec_layer_params.mode.samplingMethod = _NeuralNetwork_pb2.SamplingMode.Method.Value(
+                "ROI_ALIGN_MODE"
+            )
         else:
             raise ValueError("Unsupported crop resize mode %s" % mode)
 
-        if box_indices_mode == 'CORNERS_HEIGHT_FIRST':
+        if box_indices_mode == "CORNERS_HEIGHT_FIRST":
             spec_layer_params.boxIndicesMode.boxMode = _NeuralNetwork_pb2.BoxCoordinatesMode.Coordinates.Value(
-                'CORNERS_HEIGHT_FIRST')
-        elif box_indices_mode == 'CORNERS_WIDTH_FIRST':
+                "CORNERS_HEIGHT_FIRST"
+            )
+        elif box_indices_mode == "CORNERS_WIDTH_FIRST":
             spec_layer_params.boxIndicesMode.boxMode = _NeuralNetwork_pb2.BoxCoordinatesMode.Coordinates.Value(
-                'CORNERS_WIDTH_FIRST')
-        elif box_indices_mode == 'CENTER_SIZE_HEIGHT_FIRST':
+                "CORNERS_WIDTH_FIRST"
+            )
+        elif box_indices_mode == "CENTER_SIZE_HEIGHT_FIRST":
             spec_layer_params.boxIndicesMode.boxMode = _NeuralNetwork_pb2.BoxCoordinatesMode.Coordinates.Value(
-                'CENTER_SIZE_HEIGHT_FIRST')
-        elif box_indices_mode == 'CENTER_SIZE_WIDTH_FIRST':
+                "CENTER_SIZE_HEIGHT_FIRST"
+            )
+        elif box_indices_mode == "CENTER_SIZE_WIDTH_FIRST":
             spec_layer_params.boxIndicesMode.boxMode = _NeuralNetwork_pb2.BoxCoordinatesMode.Coordinates.Value(
-                'CENTER_SIZE_WIDTH_FIRST')
+                "CENTER_SIZE_WIDTH_FIRST"
+            )
         else:
-            raise ValueError("Unsupported crop resize box indices mode %s" % box_indices_mode)
-        return spec_layer
-
-    def set_pre_processing_parameters(self, image_input_names=None, is_bgr=False,
-                                      red_bias=0.0, green_bias=0.0, blue_bias=0.0, gray_bias=0.0, image_scale=1.0,
-                                      image_format='NCHW'):
+            raise ValueError(
+                "Unsupported crop resize box indices mode %s" % box_indices_mode
+            )
+        return spec_layer
+
+    def set_pre_processing_parameters(
+        self,
+        image_input_names=None,
+        is_bgr=False,
+        red_bias=0.0,
+        green_bias=0.0,
+        blue_bias=0.0,
+        gray_bias=0.0,
+        image_scale=1.0,
+        image_format="NCHW",
+    ):
         """
         Add a pre-processing parameters layer to the neural network object.
 
@@ -3284,9 +4376,17 @@ def set_pre_processing_parameters(self, image_input_names=None, is_bgr=False,
         if not image_input_names:
             return  # nothing to do here
 
-        image_format = image_format.upper() if isinstance(image_format, str) else image_format
-        if image_format != 'NCHW' and image_format != 'NHWC':
-            raise ValueError("Input image format must be either 'NCHW' or 'NHWC'. Provided {}".format(image_format))
+        image_format = (
+            image_format.upper()
+            if isinstance(image_format, _string_types)
+            else image_format
+        )
+        if image_format != "NCHW" and image_format != "NHWC":
+            raise ValueError(
+                "Input image format must be either 'NCHW' or 'NHWC'. Provided {}".format(
+                    image_format
+                )
+            )
 
         if not isinstance(is_bgr, dict):
             is_bgr = dict.fromkeys(image_input_names, is_bgr)
@@ -3306,76 +4406,131 @@ def set_pre_processing_parameters(self, image_input_names=None, is_bgr=False,
         def check_valid_preprocessing_keys(input, target, input_name):
             for key in input:
                 if not key in target:
-                    raise ValueError('Invalid key {} in {}.'.format(key, input_name))
+                    raise ValueError("Invalid key {} in {}.".format(key, input_name))
 
         target = image_input_names
-        check_valid_preprocessing_keys(is_bgr, target, 'is_bgr')
-        check_valid_preprocessing_keys(red_bias, target, 'red_bias')
-        check_valid_preprocessing_keys(blue_bias, target, 'blue_bias')
-        check_valid_preprocessing_keys(green_bias, target, 'green_bias')
-        check_valid_preprocessing_keys(gray_bias, target, 'gray_bias')
-        check_valid_preprocessing_keys(image_scale, target, 'image_scale')
+        check_valid_preprocessing_keys(is_bgr, target, "is_bgr")
+        check_valid_preprocessing_keys(red_bias, target, "red_bias")
+        check_valid_preprocessing_keys(blue_bias, target, "blue_bias")
+        check_valid_preprocessing_keys(green_bias, target, "green_bias")
+        check_valid_preprocessing_keys(gray_bias, target, "gray_bias")
+        check_valid_preprocessing_keys(image_scale, target, "image_scale")
 
         spec = self.spec
+
         # Add image inputs
         for input_ in spec.description.input:
             if input_.name in image_input_names:
-                if input_.type.WhichOneof('Type') == 'multiArrayType':
+                if input_.type.WhichOneof("Type") == "multiArrayType":
                     array_shape = tuple(input_.type.multiArrayType.shape)
 
                     if len(array_shape) == 4:
-                        input_indices = [0, 1, 2, 3] if image_format == 'NCHW' else [0, 3, 1, 2]
+                        input_indices = (
+                            [0, 1, 2, 3] if image_format == "NCHW" else [0, 3, 1, 2]
+                        )
                     elif len(array_shape) == 3:
                         # Adding dummy index for 'batch' for compatibility
-                        input_indices = [0, 0, 1, 2] if image_format == 'NCHW' else [0, 2, 0, 1]
+                        input_indices = (
+                            [0, 0, 1, 2] if image_format == "NCHW" else [0, 2, 0, 1]
+                        )
                     else:
-                        raise ValueError("Invalid input shape. Input of rank {}, but expecting input of either rank 3 or rank 4".format(len(array_shape)))
+                        raise ValueError(
+                            "Invalid input shape. Input of rank {}, but expecting input of either rank 3 or rank 4".format(
+                                len(array_shape)
+                            )
+                        )
 
                     # Extract image shape depending on input format
                     _, channels, height, width = [array_shape[e] for e in input_indices]
 
-                    if image_format == 'NHWC':
+                    if image_format == "NHWC":
                         # If input format is 'NHWC' for TF model, it will be
                         # 'NCHW' for CoreML model. Therefore, add transpose to
                         # NHWC after the input and replace all use of input
-                        # with output of transpose
-                        axes = [1, 2, 0]
-                        if len(array_shape) == 4:
-                            axes = [0, 2, 3, 1]
-                        input_transpose = input_.name + '_to_nhwc'
-                        transpose_layer = self.add_transpose(
-                            name=input_transpose,
-                            axes=axes,
-                            input_name=input_.name,
-                            output_name=input_transpose
-                        )
                         layers = self.nn_spec.layers
-                        layers.insert(0, layers.pop())
+                        complement_transpose = True
+                        transpose_names = set()
+                        transpose_outputs = []
                         for layer_ in layers:
-                            for i in range(len(layer_.input)):
-                                if layer_.name == input_transpose:
-                                    continue
-                                if layer_.input[i] == input_.name:
-                                    layer_.input[i] = input_transpose
+                            if (
+                                layer_.HasField("transpose")
+                                and layer_.input[0] == input_.name
+                            ):
+                                transpose_order = list(layer_.transpose.axes)
+                                if transpose_order == [
+                                    0,
+                                    3,
+                                    1,
+                                    2,
+                                ] or transpose_order == [2, 0, 1]:
+                                    transpose_names.add(layer_.name)
+                                    transpose_outputs += list(layer_.output)
+                                else:
+                                    complement_transpose = False
+                                    break
+                            else:
+                                for i in layer_.input:
+                                    if i == input_.name:
+                                        complement_transpose = False
+                                        break
+                        if complement_transpose:
+                            for layer_ in layers:
+                                for i in range(len(layer_.input)):
+                                    if layer_.input[i] in transpose_names:
+                                        layer_.input[i] = input_.name
+                            for layer_ in layers:
+                                if layer_.name == input_.name:
+                                    del layer_.output[:]
+                                    layer_.output.extend(transpose_outputs)
+                                    break
+                            while len(transpose_names) > 0:
+                                for idx, layer_ in enumerate(layers):
+                                    if layer_.name in transpose_names:
+                                        del layers[idx]
+                                        transpose_names.remove(layer_.name)
+                        else:
+                            axes = [1, 2, 0]
+                            if len(array_shape) == 4:
+                                axes = [0, 2, 3, 1]
+                            input_transpose = input_.name + "_to_nhwc"
+                            transpose_layer = self.add_transpose(
+                                name=input_transpose,
+                                axes=axes,
+                                input_name=input_.name,
+                                output_name=input_transpose,
+                            )
+                            layers.insert(0, layers.pop())
+                            for layer_ in layers:
+                                for i in range(len(layer_.input)):
+                                    if layer_.name == input_transpose:
+                                        continue
+                                    if layer_.input[i] == input_.name:
+                                        layer_.input[i] = input_transpose
 
                     # TODO: If input is not rank 3 or 4, then accordingly handle
                     # e.g. for rank-2 input, squeeze additional dimension in case of Gray scale image
                     if channels == 1:
                         input_.type.imageType.colorSpace = _FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value(
-                            'GRAYSCALE')
+                            "GRAYSCALE"
+                        )
                     elif channels == 3:
                         if input_.name in is_bgr:
                             if is_bgr[input_.name]:
                                 input_.type.imageType.colorSpace = _FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value(
-                                    'BGR')
+                                    "BGR"
+                                )
                             else:
                                 input_.type.imageType.colorSpace = _FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value(
-                                    'RGB')
+                                    "RGB"
+                                )
                         else:
                             input_.type.imageType.colorSpace = _FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value(
-                                'RGB')
+                                "RGB"
+                            )
                     else:
-                        raise ValueError("Channel Value %d not supported for image inputs" % channels)
+                        raise ValueError(
+                            "Channel Value %d not supported for image inputs" % channels
+                        )
                     input_.type.imageType.width = width
                     input_.type.imageType.height = height
 
@@ -3386,10 +4541,14 @@ def check_valid_preprocessing_keys(input, target, input_name):
                     scaler.channelScale = image_scale[input_.name]
                 else:
                     scaler.channelScale = 1.0
-                if input_.name in red_bias: scaler.redBias = red_bias[input_.name]
-                if input_.name in blue_bias: scaler.blueBias = blue_bias[input_.name]
-                if input_.name in green_bias: scaler.greenBias = green_bias[input_.name]
-                if input_.name in gray_bias: scaler.grayBias = gray_bias[input_.name]
+                if input_.name in red_bias:
+                    scaler.redBias = red_bias[input_.name]
+                if input_.name in blue_bias:
+                    scaler.blueBias = blue_bias[input_.name]
+                if input_.name in green_bias:
+                    scaler.greenBias = green_bias[input_.name]
+                if input_.name in gray_bias:
+                    scaler.grayBias = gray_bias[input_.name]
 
     def add_transpose(self, name, axes, input_name, output_name):
         """
@@ -3485,10 +4644,10 @@ def add_erf(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.erf.MergeFromString(b'')
+        spec_layer.erf.MergeFromString(b"")
         return spec_layer
 
-    def add_gelu(self, name, input_name, output_name, mode='EXACT'):
+    def add_gelu(self, name, input_name, output_name, mode="EXACT"):
         """
         Add a GELU (gaussian error linear unit) activation layer, which is:
         ``0.5 * x * (1 + erf(x / sqrt(2)))``.
@@ -3509,12 +4668,18 @@ def add_gelu(self, name, input_name, output_name, mode='EXACT'):
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
         spec_layer_params = spec_layer.gelu
 
-        if mode == 'EXACT':
-            spec_layer_params.mode = _NeuralNetwork_pb2.GeluLayerParams.GeluMode.Value('EXACT')
-        elif mode == 'TANH_APPROXIMATION':
-            spec_layer_params.mode = _NeuralNetwork_pb2.GeluLayerParams.GeluMode.Value('TANH_APPROXIMATION')
-        elif mode == 'SIGMOID_APPROXIMATION':
-            spec_layer_params.mode = _NeuralNetwork_pb2.GeluLayerParams.GeluMode.Value('SIGMOID_APPROXIMATION')
+        if mode == "EXACT":
+            spec_layer_params.mode = _NeuralNetwork_pb2.GeluLayerParams.GeluMode.Value(
+                "EXACT"
+            )
+        elif mode == "TANH_APPROXIMATION":
+            spec_layer_params.mode = _NeuralNetwork_pb2.GeluLayerParams.GeluMode.Value(
+                "TANH_APPROXIMATION"
+            )
+        elif mode == "SIGMOID_APPROXIMATION":
+            spec_layer_params.mode = _NeuralNetwork_pb2.GeluLayerParams.GeluMode.Value(
+                "SIGMOID_APPROXIMATION"
+            )
         else:
             raise ValueError("Unsupported Gelu mode %s" % mode)
         return spec_layer
@@ -3540,7 +4705,7 @@ def add_sin(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.sin.MergeFromString(b'')
+        spec_layer.sin.MergeFromString(b"")
         return spec_layer
 
     def add_cos(self, name, input_name, output_name):
@@ -3564,7 +4729,7 @@ def add_cos(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.cos.MergeFromString(b'')
+        spec_layer.cos.MergeFromString(b"")
         return spec_layer
 
     def add_tan(self, name, input_name, output_name):
@@ -3588,7 +4753,7 @@ def add_tan(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.tan.MergeFromString(b'')
+        spec_layer.tan.MergeFromString(b"")
         return spec_layer
 
     def add_asin(self, name, input_name, output_name):
@@ -3612,7 +4777,7 @@ def add_asin(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.asin.MergeFromString(b'')
+        spec_layer.asin.MergeFromString(b"")
         return spec_layer
 
     def add_acos(self, name, input_name, output_name):
@@ -3636,7 +4801,7 @@ def add_acos(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.acos.MergeFromString(b'')
+        spec_layer.acos.MergeFromString(b"")
         return spec_layer
 
     def add_atan(self, name, input_name, output_name):
@@ -3660,7 +4825,7 @@ def add_atan(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.atan.MergeFromString(b'')
+        spec_layer.atan.MergeFromString(b"")
         return spec_layer
 
     def add_sinh(self, name, input_name, output_name):
@@ -3683,7 +4848,7 @@ def add_sinh(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.sinh.MergeFromString(b'')
+        spec_layer.sinh.MergeFromString(b"")
         return spec_layer
 
     def add_cosh(self, name, input_name, output_name):
@@ -3707,7 +4872,7 @@ def add_cosh(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.cosh.MergeFromString(b'')
+        spec_layer.cosh.MergeFromString(b"")
         return spec_layer
 
     def add_tanh(self, name, input_name, output_name):
@@ -3731,7 +4896,7 @@ def add_tanh(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.tanh.MergeFromString(b'')
+        spec_layer.tanh.MergeFromString(b"")
         return spec_layer
 
     def add_asinh(self, name, input_name, output_name):
@@ -3755,7 +4920,7 @@ def add_asinh(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.asinh.MergeFromString(b'')
+        spec_layer.asinh.MergeFromString(b"")
         return spec_layer
 
     def add_acosh(self, name, input_name, output_name):
@@ -3779,7 +4944,7 @@ def add_acosh(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.acosh.MergeFromString(b'')
+        spec_layer.acosh.MergeFromString(b"")
         return spec_layer
 
     def add_atanh(self, name, input_name, output_name):
@@ -3803,7 +4968,7 @@ def add_atanh(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.atanh.MergeFromString(b'')
+        spec_layer.atanh.MergeFromString(b"")
         return spec_layer
 
     def add_exp2(self, name, input_name, output_name):
@@ -3822,7 +4987,7 @@ def add_exp2(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.exp2.MergeFromString(b'')
+        spec_layer.exp2.MergeFromString(b"")
         return spec_layer
 
     def add_add_broadcastable(self, name, input_names, output_name):
@@ -3842,7 +5007,7 @@ def add_add_broadcastable(self, name, input_names, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.addBroadcastable.MergeFromString(b'')
+        spec_layer.addBroadcastable.MergeFromString(b"")
         self._set_max_input_rank(input_names, output_name)
         return spec_layer
 
@@ -3863,7 +5028,7 @@ def add_multiply_broadcastable(self, name, input_names, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.multiplyBroadcastable.MergeFromString(b'')
+        spec_layer.multiplyBroadcastable.MergeFromString(b"")
         self._set_max_input_rank(input_names, output_name)
         return spec_layer
 
@@ -3884,7 +5049,7 @@ def add_divide_broadcastable(self, name, input_names, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.divideBroadcastable.MergeFromString(b'')
+        spec_layer.divideBroadcastable.MergeFromString(b"")
         self._set_max_input_rank(input_names, output_name)
         return spec_layer
 
@@ -3905,7 +5070,7 @@ def add_subtract_broadcastable(self, name, input_names, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.subtractBroadcastable.MergeFromString(b'')
+        spec_layer.subtractBroadcastable.MergeFromString(b"")
         self._set_max_input_rank(input_names, output_name)
         return spec_layer
 
@@ -3926,7 +5091,7 @@ def add_max_broadcastable(self, name, input_names, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.maxBroadcastable.MergeFromString(b'')
+        spec_layer.maxBroadcastable.MergeFromString(b"")
         self._set_max_input_rank(input_names, output_name)
         return spec_layer
 
@@ -3947,7 +5112,7 @@ def add_min_broadcastable(self, name, input_names, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.minBroadcastable.MergeFromString(b'')
+        spec_layer.minBroadcastable.MergeFromString(b"")
         self._set_max_input_rank(input_names, output_name)
         return spec_layer
 
@@ -3972,7 +5137,7 @@ def add_floor_div_broadcastable(self, name, input_names, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.floorDivBroadcastable.MergeFromString(b'')
+        spec_layer.floorDivBroadcastable.MergeFromString(b"")
         self._set_max_input_rank(input_names, output_name)
         return spec_layer
 
@@ -3993,7 +5158,7 @@ def add_mod_broadcastable(self, name, input_names, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.modBroadcastable.MergeFromString(b'')
+        spec_layer.modBroadcastable.MergeFromString(b"")
         self._set_max_input_rank(input_names, output_name)
         return spec_layer
 
@@ -4014,7 +5179,7 @@ def add_pow_broadcastable(self, name, input_names, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.powBroadcastable.MergeFromString(b'')
+        spec_layer.powBroadcastable.MergeFromString(b"")
         self._set_max_input_rank(input_names, output_name)
         return spec_layer
 
@@ -4063,7 +5228,7 @@ def add_ceil(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.ceil.MergeFromString(b'')
+        spec_layer.ceil.MergeFromString(b"")
         return spec_layer
 
     def add_floor(self, name, input_name, output_name):
@@ -4088,7 +5253,7 @@ def add_floor(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.floor.MergeFromString(b'')
+        spec_layer.floor.MergeFromString(b"")
         return spec_layer
 
     def add_round(self, name, input_name, output_name):
@@ -4108,7 +5273,7 @@ def add_round(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.round.MergeFromString(b'')
+        spec_layer.round.MergeFromString(b"")
         return spec_layer
 
     def add_sign(self, name, input_name, output_name):
@@ -4128,10 +5293,10 @@ def add_sign(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.sign.MergeFromString(b'')
+        spec_layer.sign.MergeFromString(b"")
         return spec_layer
 
-    def add_clip(self, name, input_name, output_name, min_value=0., max_value=1.):
+    def add_clip(self, name, input_name, output_name, min_value=0.0, max_value=1.0):
         """
         Add a clip layer to the model that performs element-wise clip operation.
         Clip the values in the input tensor to the range [min_value, max_value].
@@ -4156,7 +5321,7 @@ def add_clip(self, name, input_name, output_name, min_value=0., max_value=1.):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.clip.MergeFromString(b'')
+        spec_layer.clip.MergeFromString(b"")
         spec_params = spec_layer.clip
 
         spec_params.minVal = float(min_value)
@@ -4164,7 +5329,9 @@ def add_clip(self, name, input_name, output_name, min_value=0., max_value=1.):
 
         return spec_layer
 
-    def add_split_nd(self, name, input_name, output_names, axis, num_splits=2, split_sizes=None):
+    def add_split_nd(
+        self, name, input_name, output_names, axis, num_splits=2, split_sizes=None
+    ):
         """
         Add a split layer to the model that splits the input tensor into multiple
         output tensors. Either uniformly split the input tensor into ``num_splits``
@@ -4203,8 +5370,18 @@ def add_split_nd(self, name, input_name, output_names, axis, num_splits=2, split
         assert len(output_names) == spec_layer_params.numSplits
         return spec_layer
 
-    def add_slice_static(self, name, input_name, output_name, begin_ids,
-                         end_ids, strides, begin_masks, end_masks):
+    def add_slice_static(
+        self,
+        name,
+        input_name,
+        output_name,
+        begin_ids,
+        end_ids,
+        strides,
+        begin_masks,
+        end_masks,
+        squeeze_masks=None,
+    ):
         """
         Add a slice_static layer to the model that extracts a slice of size
         ``(end - begin) / stride`` from the given input tensor.
@@ -4228,6 +5405,8 @@ def add_slice_static(self, name, input_name, output_name, begin_ids,
             Boolean masks for begin offsets.
         end_masks: list of bool
             Boolean masks for end offsets.
+        squeeze_masks: list of bool
+            Boolean masks for squeezing axis.
 
         See Also
         --------
@@ -4239,6 +5418,7 @@ def add_slice_static(self, name, input_name, output_name, begin_ids,
         assert len(strides) == rank
         assert len(begin_masks) == rank
         assert len(end_masks) == rank
+        assert squeeze_masks is None or len(squeeze_masks) == rank
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
         spec_layer_params = spec_layer.sliceStatic
@@ -4249,10 +5429,29 @@ def add_slice_static(self, name, input_name, output_name, begin_ids,
         spec_layer_params.beginMasks.extend(begin_masks)
         spec_layer_params.endMasks.extend(end_masks)
 
+        if not (squeeze_masks and any(squeeze_masks)):
+            return spec_layer
+
+        if self.spec and (
+            not self.spec.specificationVersion
+            or self.spec.specificationVersion < _SPECIFICATION_VERSION_IOS_14
+        ):
+            self.spec.specificationVersion = _SPECIFICATION_VERSION_IOS_14
+        spec_layer_params.squeezeMasks.extend(squeeze_masks)
+
         return spec_layer
 
-    def add_slice_dynamic(self, name, input_names, output_name, end_ids=None,
-                          strides=None, begin_masks=None, end_masks=None):
+    def add_slice_dynamic(
+        self,
+        name,
+        input_names,
+        output_name,
+        end_ids=None,
+        strides=None,
+        begin_masks=None,
+        end_masks=None,
+        squeeze_masks=None,
+    ):
         """
         Add a slice_dynamic layer to the model that extracts a slice of size
         ``(end - begin) / stride`` from the given input tensor.
@@ -4274,6 +5473,8 @@ def add_slice_dynamic(self, name, input_names, output_name, end_ids=None,
             Boolean masks for begin offsets, default: [false].
         end_masks: list of bool, optional
             Boolean masks for end offsets, default: [false].
+        squeeze_masks: list of bool, optional
+            Boolean masks for squeezing axis, default: [false].
 
         See Also
         --------
@@ -4288,6 +5489,8 @@ def add_slice_dynamic(self, name, input_names, output_name, end_ids=None,
             begin_masks = [False for _ in range(5)]
         if not end_masks:
             end_masks = [False for _ in range(5)]
+        if not squeeze_masks:
+            squeeze_masks = [False for _ in range(5)]
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
         spec_layer_params = spec_layer.sliceDynamic
@@ -4296,10 +5499,19 @@ def add_slice_dynamic(self, name, input_names, output_name, end_ids=None,
         spec_layer_params.strides.extend(strides)
         spec_layer_params.beginMasks.extend(begin_masks)
         spec_layer_params.endMasks.extend(end_masks)
+        if not any(squeeze_masks):
+            return spec_layer
+
+        if self.spec and (
+            not self.spec.specificationVersion
+            or self.spec.specificationVersion < _SPECIFICATION_VERSION_IOS_14
+        ):
+            self.spec.specificationVersion = _SPECIFICATION_VERSION_IOS_14
+        spec_layer_params.squeezeMasks.extend(squeeze_masks)
 
         return spec_layer
 
-    def add_tile(self, name, input_name, output_name, reps):
+    def add_tile(self, name, input_name, output_name, reps=[]):
         """
         Add a tile layer to the model that construct a tensor by repeating the
         input tensor multiple number of times.
@@ -4309,26 +5521,45 @@ def add_tile(self, name, input_name, output_name, reps):
         ----------
         name: str
             The name of this layer.
-        input_name: str
+        input_name: str or list[str]
             The input blob name of this layer.
+            If second input is provided, reps parameter is ignored.
         output_name: str
             The output blob name of this layer.
         reps: list of int or tuple of int
             Number of times to replicate.
+            If `input_name` provides two inputs, second input is used as
+            reps and this parameter is ignored.
 
         See Also
         --------
         add_stack, add_concat_nd
         """
-
-        spec_layer = self._add_generic_layer(name, [input_name], [output_name])
+        if isinstance(input_name, tuple):
+            input_names = list(input_name)
+        elif isinstance(input_name, list):
+            input_names = input_name
+        else:
+            input_names = [input_name]
+        spec_layer = self._add_generic_layer(name, input_names, [output_name])
 
         spec_layer_params = spec_layer.tile
+        # If two inputs are provided,
+        # ignore reps attribute.
+        if len(input_names) == 2:
+            reps = []
+            if self.spec and (
+                not self.spec.specificationVersion
+                or self.spec.specificationVersion < _SPECIFICATION_VERSION_IOS_14
+            ):
+                self.spec.specificationVersion = _SPECIFICATION_VERSION_IOS_14
         assert all([i > 0 for i in reps])
         spec_layer_params.reps.extend(reps)
         return spec_layer
 
-    def add_range_static(self, name, output_name, input_names=None, end=1, start=0, step=1):
+    def add_range_static(
+        self, name, output_name, input_names=None, end=1, start=0, step=1
+    ):
         """
         Add a range_static layer that returns a tensor that contains evenly spaced values.
         This layer has no input and three parameters.
@@ -4355,7 +5586,7 @@ def add_range_static(self, name, output_name, input_names=None, end=1, start=0,
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.rangeStatic.MergeFromString(b'')
+        spec_layer.rangeStatic.MergeFromString(b"")
         spec_params = spec_layer.rangeStatic
 
         spec_params.endValue = float(end)
@@ -4393,10 +5624,10 @@ def add_range_dynamic(self, name, input_names, output_name, start=0, step=1):
         """
 
         if len(input_names) < 1 or len(input_names) > 3:
-            raise ValueError('RangeDynamic layer must have either 1, 2 or 3 inputs.')
+            raise ValueError("RangeDynamic layer must have either 1, 2 or 3 inputs.")
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.rangeDynamic.MergeFromString(b'')
+        spec_layer.rangeDynamic.MergeFromString(b"")
         spec_params = spec_layer.rangeDynamic
 
         spec_params.startValue = float(start)
@@ -4432,15 +5663,22 @@ def add_branch(self, name, input_name, if_branch=None, else_branch=None):
         if if_branch:
             branch.ifBranch = if_branch
         else:
-            branch.ifBranch.MergeFromString(b'')
+            branch.ifBranch.MergeFromString(b"")
         if else_branch:
             branch.elseBranch = else_branch
         else:
-            branch.elseBranch.MergeFromString(b'')
+            branch.elseBranch.MergeFromString(b"")
         return layer
 
-    def add_loop(self, name, body_network=None, input_name=None, condition=None,
-                 condition_network=None, max_iterations=None):
+    def add_loop(
+        self,
+        name,
+        body_network=None,
+        input_name=None,
+        condition=None,
+        condition_network=None,
+        max_iterations=None,
+    ):
         """
         Add a loop layer to the model that provides the functionality of a ``for``
         loop, or a ``while`` loop.
@@ -4470,17 +5708,19 @@ def add_loop(self, name, body_network=None, input_name=None, condition=None,
         spec_layer = self._add_generic_layer(name, input_names, [])
         loop = spec_layer.loop
         if condition_network is None:
-            loop.conditionNetwork.MergeFromString(b'')
+            loop.conditionNetwork.MergeFromString(b"")
         else:
             loop.conditionNetwork = condition_network
 
         if condition is not None:
             loop.conditionVar = str(condition)
         if max_iterations is not None:
-            loop.maxLoopIterations = max_iterations if max_iterations is not None else -1
+            loop.maxLoopIterations = (
+                max_iterations if max_iterations is not None else -1
+            )
 
         if body_network is None:
-            loop.bodyNetwork.MergeFromString(b'')
+            loop.bodyNetwork.MergeFromString(b"")
         else:
             loop.bodyNetwork = body_network
         return spec_layer
@@ -4503,7 +5743,7 @@ def add_loop_break(self, name):
 
         spec_layer = self.nn_spec.layers.add()
         spec_layer.name = name
-        spec_layer.loopBreak.MergeFromString(b'')
+        spec_layer.loopBreak.MergeFromString(b"")
         return spec_layer
 
     def add_loop_continue(self, name):
@@ -4525,7 +5765,7 @@ def add_loop_continue(self, name):
 
         spec_layer = self.nn_spec.layers.add()
         spec_layer.name = name
-        spec_layer.loopContinue.MergeFromString(b'')
+        spec_layer.loopContinue.MergeFromString(b"")
         return spec_layer
 
     def add_copy(self, name, input_name, output_name):
@@ -4545,16 +5785,20 @@ def add_copy(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.copy.MergeFromString(b'')
+        spec_layer.copy.MergeFromString(b"")
         # If output name rank is different than earlier,
         # mark it as unknown
-        if output_name in self.rank_dict and self._get_rank(output_name) != self._get_rank(input_name):
+        if output_name in self.rank_dict and self._get_rank(
+            output_name
+        ) != self._get_rank(input_name):
             self.rank_dict[output_name] = -1
         else:
             self.rank_dict[output_name] = self._get_rank(input_name)
         return spec_layer
 
-    def add_greater_than(self, name, input_names, output_name, use_greater_than_equal=False, alpha=0.):
+    def add_greater_than(
+        self, name, input_names, output_name, use_greater_than_equal=False, alpha=0.0
+    ):
         """
         Add a greater_than layer to the model that performs the element-wise
         greater-than (>) operation or greater-than-or-equal-to (>=) operation.
@@ -4580,21 +5824,23 @@ def add_greater_than(self, name, input_names, output_name, use_greater_than_equa
         add_equal, add_not_equal, add_less_than
         """
 
-        if isinstance(input_names, str):
+        if isinstance(input_names, _string_types):
             input_names = [input_names]
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
         if use_greater_than_equal:
-            spec_layer.greaterEqual.MergeFromString(b'')
+            spec_layer.greaterEqual.MergeFromString(b"")
             if len(input_names) == 1:
                 spec_layer.greaterEqual.alpha = alpha
         else:
-            spec_layer.greaterThan.MergeFromString(b'')
+            spec_layer.greaterThan.MergeFromString(b"")
             if len(input_names) == 1:
                 spec_layer.greaterThan.alpha = alpha
 
         return spec_layer
 
-    def add_less_than(self, name, input_names, output_name, use_less_than_equal=False, alpha=0.):
+    def add_less_than(
+        self, name, input_names, output_name, use_less_than_equal=False, alpha=0.0
+    ):
         """
         Add a less_than layer to the model that performs the element-wise
         less-than (<) operation or less-than-or-equal-to (<=) operation.
@@ -4620,20 +5866,20 @@ def add_less_than(self, name, input_names, output_name, use_less_than_equal=Fals
         add_equal, add_not_equal, add_greater_than
         """
 
-        if isinstance(input_names, str):
+        if isinstance(input_names, _string_types):
             input_names = [input_names]
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
         if use_less_than_equal:
-            spec_layer.lessEqual.MergeFromString(b'')
+            spec_layer.lessEqual.MergeFromString(b"")
             if len(input_names) == 1:
                 spec_layer.lessEqual.alpha = alpha
         else:
-            spec_layer.lessThan.MergeFromString(b'')
+            spec_layer.lessThan.MergeFromString(b"")
             if len(input_names) == 1:
                 spec_layer.lessThan.alpha = alpha
         return spec_layer
 
-    def add_equal(self, name, input_names, output_name, alpha=0.):
+    def add_equal(self, name, input_names, output_name, alpha=0.0):
         """
         Add an equal layer to the model that performs the element-wise equal
         (=) operation. Broadcasting is supported.
@@ -4655,15 +5901,15 @@ def add_equal(self, name, input_names, output_name, alpha=0.):
         add_not_equal, add_greater_than, add_less_than
         """
 
-        if isinstance(input_names, str):
+        if isinstance(input_names, _string_types):
             input_names = [input_names]
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.equal.MergeFromString(b'')
+        spec_layer.equal.MergeFromString(b"")
         if len(input_names) == 1:
             spec_layer.equal.alpha = alpha
         return spec_layer
 
-    def add_not_equal(self, name, input_names, output_name, alpha=0.):
+    def add_not_equal(self, name, input_names, output_name, alpha=0.0):
         """
         Add a not_equal layer to the model that performs the element-wise not
         equal (!=) operation. Broadcasting is supported.
@@ -4685,10 +5931,10 @@ def add_not_equal(self, name, input_names, output_name, alpha=0.):
         add_equal, add_greater_than, add_less_than
         """
 
-        if isinstance(input_names, str):
+        if isinstance(input_names, _string_types):
             input_names = [input_names]
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.notEqual.MergeFromString(b'')
+        spec_layer.notEqual.MergeFromString(b"")
         if len(input_names) == 1:
             spec_layer.notEqual.alpha = alpha
         return spec_layer
@@ -4713,30 +5959,32 @@ def add_logical(self, name, input_names, output_name, mode):
             Logical operation mode in [AND | OR | XOR | NOT].
         """
 
-        if isinstance(input_names, str):
+        if isinstance(input_names, _string_types):
             input_names = [input_names]
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
 
-        if mode in ['AND', 'OR', 'XOR'] and len(input_names) != 2:
+        if mode in ["AND", "OR", "XOR"] and len(input_names) != 2:
             raise ValueError('Logical operation "%s" requires 2 inputs' % name)
-        if mode in ['NOT'] and len(input_names) != 1:
+        if mode in ["NOT"] and len(input_names) != 1:
             raise ValueError('Logical operation "%s" requires 1 input' % name)
 
-        if mode == 'AND':
-            spec_layer.logicalAnd.MergeFromString(b'')
-        elif mode == 'OR':
-            spec_layer.logicalOr.MergeFromString(b'')
-        elif mode == 'XOR':
-            spec_layer.logicalXor.MergeFromString(b'')
-        elif mode == 'NOT':
-            spec_layer.logicalNot.MergeFromString(b'')
+        if mode == "AND":
+            spec_layer.logicalAnd.MergeFromString(b"")
+        elif mode == "OR":
+            spec_layer.logicalOr.MergeFromString(b"")
+        elif mode == "XOR":
+            spec_layer.logicalXor.MergeFromString(b"")
+        elif mode == "NOT":
+            spec_layer.logicalNot.MergeFromString(b"")
         else:
             raise ValueError('Logical operation "%s" is not supported' % mode)
 
         return spec_layer
 
-    def add_sliding_windows(self, name, input_name, output_name, axis, window_size, step=1):
+    def add_sliding_windows(
+        self, name, input_name, output_name, axis, window_size, step=1
+    ):
         """
         Add a sliding_windows layer to the model that returns a tensor containing
         all windows of size ``window_size`` * separated by ``step`` along the dimension ``axis``.
@@ -4802,8 +6050,9 @@ def add_reverse(self, name, input_name, output_name, reverse_dim=None):
         spec_layer_params.reverseDim.extend(map(bool, reverse_dim))
         return spec_layer
 
-    def add_reverse_sequence(self, name, input_names, output_name,
-                             batch_axis=0, seq_axis=-1):
+    def add_reverse_sequence(
+        self, name, input_names, output_name, batch_axis=0, seq_axis=-1
+    ):
         """
         Add a reverse sequence layer to the model that reverses variable length slices.
         Refer to the **ReverseSeqLayerParams** message in specification (NeuralNetwork.proto) for more details.
@@ -4857,10 +6106,12 @@ def add_gather(self, name, input_names, output_name, axis=0):
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
         spec_layer.gather.axis = axis
-        self.rank_dict[output_name] = self._get_rank(input_names[0]) - 1 + self._get_rank(input_names[1])
+        self.rank_dict[output_name] = (
+            self._get_rank(input_names[0]) - 1 + self._get_rank(input_names[1])
+        )
         return spec_layer
 
-    def add_scatter(self, name, input_names, output_name, axis=0, mode='UPDATE'):
+    def add_scatter(self, name, input_names, output_name, axis=0, mode="UPDATE"):
         """
         Add a scatter layer to the model that scatters data into a new tensor
         according to indices from the input.
@@ -4888,21 +6139,23 @@ def add_scatter(self, name, input_names, output_name, axis=0, mode='UPDATE'):
         spec_layer_params = spec_layer.scatter
         spec_layer_params.axis = axis
 
-        mode = mode.upper() if isinstance(mode, str) else mode
-        if mode == 'UPDATE':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_UPDATE')
-        elif mode == 'ADD':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_ADD')
-        elif mode == 'SUB':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_SUB')
-        elif mode == 'MUL':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_MUL')
-        elif mode == 'DIV':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_DIV')
-        elif mode == 'MAX':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_MAX')
-        elif mode == 'MIN':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_MIN')
+        mode = mode.upper() if isinstance(mode, _string_types) else mode
+        if mode == "UPDATE":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value(
+                "SCATTER_UPDATE"
+            )
+        elif mode == "ADD":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_ADD")
+        elif mode == "SUB":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_SUB")
+        elif mode == "MUL":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MUL")
+        elif mode == "DIV":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_DIV")
+        elif mode == "MAX":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MAX")
+        elif mode == "MIN":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MIN")
         else:
             raise ValueError("Unsupported Scatter mode %s" % mode)
 
@@ -4936,7 +6189,9 @@ def add_gather_along_axis(self, name, input_names, output_name, axis=0):
         self.rank_dict[output_name] = self._get_rank(input_names[1])
         return spec_layer
 
-    def add_scatter_along_axis(self, name, input_names, output_name, axis=0, mode='UPDATE'):
+    def add_scatter_along_axis(
+        self, name, input_names, output_name, axis=0, mode="UPDATE"
+    ):
         """
         Add a scatter_along_axis layer to the model that scatters data into a new
         tensor according to indices from the input along the given axis into the
@@ -4965,23 +6220,25 @@ def add_scatter_along_axis(self, name, input_names, output_name, axis=0, mode='U
         spec_layer_params = spec_layer.scatterAlongAxis
         spec_layer_params.axis = axis
 
-        mode = mode.upper() if isinstance(mode, str) else mode
-        if mode == 'UPDATE':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_UPDATE')
-        elif mode == 'ADD':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_ADD')
-        elif mode == 'SUB':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_SUB')
-        elif mode == 'MUL':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_MUL')
-        elif mode == 'DIV':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_DIV')
-        elif mode == 'MAX':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_MAX')
-        elif mode == 'MIN':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_MIN')
+        mode = mode.upper() if isinstance(mode, _string_types) else mode
+        if mode == "UPDATE":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value(
+                "SCATTER_UPDATE"
+            )
+        elif mode == "ADD":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_ADD")
+        elif mode == "SUB":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_SUB")
+        elif mode == "MUL":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MUL")
+        elif mode == "DIV":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_DIV")
+        elif mode == "MAX":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MAX")
+        elif mode == "MIN":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MIN")
         else:
-            raise ValueError('Unsupported scatter_along_axis mode %s' % mode)
+            raise ValueError("Unsupported scatter_along_axis mode %s" % mode)
 
         return spec_layer
 
@@ -5007,7 +6264,7 @@ def add_gather_nd(self, name, input_names, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.gatherND.MergeFromString(b'')
+        spec_layer.gatherND.MergeFromString(b"")
         # NOTE: ideally, following is formula for computing output rank
         # self.rank_dict[output_name] = self._get_rank(input_names[1]) - 1 + self._get_rank(input_names[0])
         #                               + shape_dict[input_names[1]][-1]
@@ -5016,7 +6273,7 @@ def add_gather_nd(self, name, input_names, output_name):
         self.rank_dict[output_name] = -1
         return spec_layer
 
-    def add_scatter_nd(self, name, input_names, output_name, mode='UPDATE'):
+    def add_scatter_nd(self, name, input_names, output_name, mode="UPDATE"):
         """
         Add a scatter layer to the model that scatters data into a new tensor
         according to indices from input. This is the reverse operation of the
@@ -5042,27 +6299,31 @@ def add_scatter_nd(self, name, input_names, output_name, mode='UPDATE'):
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
         spec_layer_params = spec_layer.scatterND
 
-        mode = mode.upper() if isinstance(mode, str) else mode
-        if mode == 'UPDATE':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_UPDATE')
-        elif mode == 'ADD':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_ADD')
-        elif mode == 'SUB':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_SUB')
-        elif mode == 'MUL':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_MUL')
-        elif mode == 'DIV':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_DIV')
-        elif mode == 'MAX':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_MAX')
-        elif mode == 'MIN':
-            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value('SCATTER_MIN')
+        mode = mode.upper() if isinstance(mode, _string_types) else mode
+        if mode == "UPDATE":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value(
+                "SCATTER_UPDATE"
+            )
+        elif mode == "ADD":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_ADD")
+        elif mode == "SUB":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_SUB")
+        elif mode == "MUL":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MUL")
+        elif mode == "DIV":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_DIV")
+        elif mode == "MAX":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MAX")
+        elif mode == "MIN":
+            spec_layer_params.mode = _NeuralNetwork_pb2.ScatterMode.Value("SCATTER_MIN")
         else:
-            raise ValueError('Unsupported scatter mode %s' % mode)
+            raise ValueError("Unsupported scatter mode %s" % mode)
 
         return spec_layer
 
-    def add_topk(self, name, input_names, output_names, k=0, axis=0, use_bottom_k=False):
+    def add_topk(
+        self, name, input_names, output_names, k=0, axis=0, use_bottom_k=False
+    ):
         """
         Add a topk layer to the model that returns top or bottom k values and
         the corresponding indices of the input tensor along a given axis.
@@ -5173,8 +6434,15 @@ def add_argmin(self, name, input_name, output_name, axis, keepdims=True):
                 self.rank_dict[output_name] = input_rank - 1
         return spec_layer
 
-    def add_constant_pad(self, name, input_names, output_name,
-                         value=0.0, pad_to_given_output_size_mode=False, pad_amounts=[]):
+    def add_constant_pad(
+        self,
+        name,
+        input_names,
+        output_name,
+        value=0.0,
+        pad_to_given_output_size_mode=False,
+        pad_amounts=[],
+    ):
         """
         Add a constant pad layer.
         Refer to the **ConstantPaddingLayerParams** message in specification (NeuralNetwork.proto) for more details.
@@ -5207,11 +6475,21 @@ def add_constant_pad(self, name, input_names, output_name,
         if len(pad_amounts) > 0:
             spec_layer_params.padAmounts.extend(map(int, pad_amounts))
         if len(input_names) == 1 and len(pad_amounts) == 0:
-            raise ValueError("Constant_pad layer: pad_amounts must be provided when there is a single input")
+            raise ValueError(
+                "Constant_pad layer: pad_amounts must be provided when there is a single input"
+            )
         return spec_layer
 
-    def add_nms(self, name, input_names, output_names,
-                iou_threshold=0.5, score_threshold=0.0, max_boxes=1, per_class_suppression=False):
+    def add_nms(
+        self,
+        name,
+        input_names,
+        output_names,
+        iou_threshold=0.5,
+        score_threshold=0.0,
+        max_boxes=1,
+        per_class_suppression=False,
+    ):
         """
         Add a non maximum suppression layer.
         Refer to the **NonMaximumSuppressionLayerParams** message in specification (NeuralNetwork.proto) for more details.
@@ -5251,15 +6529,22 @@ def add_nms(self, name, input_names, output_names,
         self.rank_dict[output_names[3]] = 1
         return spec_layer
 
-    def add_embedding_nd(self, name, input_name, output_name,
-                         vocab_size, embedding_size,
-                         W, b=None,
-                         is_quantized_weight=False,
-                         quantization_type='linear',
-                         nbits=8,
-                         quant_scale=None,
-                         quant_bias=None,
-                         quant_lut=None):
+    def add_embedding_nd(
+        self,
+        name,
+        input_name,
+        output_name,
+        vocab_size,
+        embedding_size,
+        W,
+        b=None,
+        is_quantized_weight=False,
+        quantization_type="linear",
+        nbits=8,
+        quant_scale=None,
+        quant_bias=None,
+        quant_lut=None,
+    ):
         """
         Add an embedding layer to the model that performs a matrix lookup and
         optionally adds a bias.
@@ -5314,31 +6599,52 @@ def add_embedding_nd(self, name, input_name, output_name,
 
         weights = spec_layer_params.weights
         if not is_quantized_weight:
-            weights.floatValue.extend(map(float, W.flatten()))
+            weights.floatValue.extend(W.flatten())
         else:
-            _verify_quantization_arguments(weight=W, output_channels=embedding_size,
-                                           quantization_type=quantization_type, nbits=nbits,
-                                           quant_scale=quant_scale, quant_bias=quant_bias, quant_lut=quant_lut)
-
-            _fill_quantized_weights(weights_message=weights, W=W,
-                                    quantization_type=quantization_type, nbits=nbits,
-                                    quant_scale=quant_scale, quant_bias=quant_bias, quant_lut=quant_lut)
+            _verify_quantization_arguments(
+                weight=W,
+                output_channels=embedding_size,
+                quantization_type=quantization_type,
+                nbits=nbits,
+                quant_scale=quant_scale,
+                quant_bias=quant_bias,
+                quant_lut=quant_lut,
+            )
+
+            _fill_quantized_weights(
+                weights_message=weights,
+                W=W,
+                quantization_type=quantization_type,
+                nbits=nbits,
+                quant_scale=quant_scale,
+                quant_bias=quant_bias,
+                quant_lut=quant_lut,
+            )
 
         if b is not None:
             bias = spec_layer_params.bias
-            bias.floatValue.extend(map(float, b.flatten()))
-        return spec_layer
-
-    def add_batched_mat_mul(self, name, input_names, output_name,
-                            transpose_a=False, transpose_b=False,
-                            weight_matrix_rows=0, weight_matrix_columns=0,
-                            W=None, bias=None,
-                            is_quantized_weight=False,
-                            quantization_type='linear',
-                            nbits=8,
-                            quant_scale=None,
-                            quant_bias=None,
-                            quant_lut=None):
+            bias.floatValue.extend(b.flatten())
+        return spec_layer
+
+    def add_batched_mat_mul(
+        self,
+        name,
+        input_names,
+        output_name,
+        transpose_a=False,
+        transpose_b=False,
+        weight_matrix_rows=0,
+        weight_matrix_columns=0,
+        W=None,
+        bias=None,
+        int_8_dynamic_quantize=False,
+        is_quantized_weight=False,
+        quantization_type="linear",
+        nbits=8,
+        quant_scale=None,
+        quant_bias=None,
+        quant_lut=None,
+    ):
         """
         Add a N-D Batched Matrix Multiplication layer with numpy like broadcasting.
         Refer to the **BatchedMatMulLayerParams** message in specification
@@ -5375,7 +6681,7 @@ def add_batched_mat_mul(self, name, input_names, output_name,
         bias: float32 numpy.array, optional
             Bias vector of shape (weight_matrix_columns,).
 
-        Quantization arguments expected in kwargs, when W is of type bytes():
+        Quantization arguments, used when W is of type bytes():
 
         is_quantized_weight: bool, optional
             Set it to true when W is of type bytes(), representing quantized weights, default: false.
@@ -5395,6 +6701,10 @@ def add_batched_mat_mul(self, name, input_names, output_name,
         quant_lut: numpy.array(dtype=numpy.float32), optional
             The LUT (look up table) to be used with LUT quantization. Must be of length 2^nbits, default: None.
 
+        int_8_dynamic_quantize: bool
+            Whether to quantize and dequantize before and after batched matmul, respectively.
+            Expects byte weights, representing int8 values, if True. See NeuralNetwork.proto for other validation conditions.
+
         See Also
         --------
         add_inner_product
@@ -5405,16 +6715,23 @@ def add_batched_mat_mul(self, name, input_names, output_name,
         spec_layer_params = spec_layer.batchedMatmul
         spec_layer_params.transposeA = transpose_a
         spec_layer_params.transposeB = transpose_b
+        spec_layer_params.int8DynamicQuantize = int_8_dynamic_quantize
 
         if ((W is not None) or (bias is not None)) and len(input_names) == 2:
-            raise ValueError("batched_mat_mul: Weight and/or bias are ignored when there are two inputs")
+            raise ValueError(
+                "batched_mat_mul: Weight and/or bias are ignored when there are two inputs"
+            )
 
         if (W is None) and len(input_names) == 1:
-            raise ValueError("batched_mat_mul: Weight parameter must be provided when there is one input")
+            raise ValueError(
+                "batched_mat_mul: Weight parameter must be provided when there is one input"
+            )
 
         self.rank_dict[output_name] = 2
         for input_ in input_names:
-            self.rank_dict[output_name] = max(self._get_rank(output_name), self._get_rank(input_))
+            self.rank_dict[output_name] = max(
+                self._get_rank(output_name), self._get_rank(input_)
+            )
 
         if len(input_names) == 1:
             spec_layer_params.weightMatrixFirstDimension = weight_matrix_rows
@@ -5424,35 +6741,53 @@ def add_batched_mat_mul(self, name, input_names, output_name,
             weights = spec_layer_params.weights
 
             if not is_quantized_weight:
-                weights.floatValue.extend(map(float, np.transpose(W).flatten()))
+                weights.floatValue.extend(_np.transpose(W).flatten())
             else:
-                _verify_quantization_arguments(weight=W, output_channels=weight_matrix_columns,
-                                               quantization_type=quantization_type, nbits=nbits,
-                                               quant_scale=quant_scale, quant_bias=quant_bias, quant_lut=quant_lut)
+                _verify_quantization_arguments(
+                    weight=W,
+                    output_channels=weight_matrix_columns,
+                    quantization_type=quantization_type,
+                    nbits=nbits,
+                    quant_scale=quant_scale,
+                    quant_bias=quant_bias,
+                    quant_lut=quant_lut,
+                    int_8_dynamic_quantize=int_8_dynamic_quantize,
+                )
 
-                num_weights = weight_matrix_rows * weight_matrix_columns
                 if nbits < 8:
-                    byte_arr = np.frombuffer(W, dtype=np.uint8)
-                    W = unpack_to_bytes(byte_arr, num_weights, nbits)
+                    num_weights = weight_matrix_rows * weight_matrix_columns
+                    byte_arr = _np.frombuffer(W, dtype=_np.uint8)
+                    W = _unpack_to_bytes(byte_arr, num_weights, nbits)
+                elif int_8_dynamic_quantize:
+                    W = _np.frombuffer(W, dtype=_np.int8)
                 else:
-                    W = np.frombuffer(W, dtype=np.uint8)
+                    W = _np.frombuffer(W, dtype=_np.uint8)
 
-                W = np.reshape(W, (weight_matrix_rows, weight_matrix_columns))
-                W = np.transpose(W)
+                W = _np.reshape(W, (weight_matrix_rows, weight_matrix_columns))
+                W = _np.transpose(W)
 
                 W_bytes = bytes()
                 if nbits == 8:
                     W_bytes += W.flatten().tobytes()
                 else:
-                    W_bytes += _convert_array_to_nbit_quantized_bytes(W.flatten(), nbits).tobytes()
-
-                _fill_quantized_weights(weights_message=weights, W=W_bytes,
-                                        quantization_type=quantization_type, nbits=nbits,
-                                        quant_scale=quant_scale, quant_bias=quant_bias, quant_lut=quant_lut)
+                    W_bytes += _convert_array_to_nbit_quantized_bytes(
+                        W.flatten(), nbits
+                    ).tobytes()
+
+                _fill_quantized_weights(
+                    weights_message=weights,
+                    W=W_bytes,
+                    use_int_8=int_8_dynamic_quantize,
+                    quantization_type=quantization_type,
+                    nbits=nbits,
+                    quant_scale=quant_scale,
+                    quant_bias=quant_bias,
+                    quant_lut=quant_lut,
+                )
 
             if bias is not None:
                 bias_param = spec_layer_params.bias
-                bias_param.floatValue.extend(map(float, bias.flatten()))
+                bias_param.floatValue.extend(bias.flatten())
 
         return spec_layer
 
@@ -5477,7 +6812,7 @@ def add_get_shape(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.getShape.MergeFromString(b'')
+        spec_layer.getShape.MergeFromString(b"")
         self.rank_dict[output_name] = 1
         return spec_layer
 
@@ -5508,17 +6843,19 @@ def add_load_constant_nd(self, name, output_name, constant_value, shape):
         spec_layer_params = spec_layer.loadConstantND
 
         data = spec_layer_params.data
-        data.floatValue.extend(map(float, constant_value.flatten()))
+        data.floatValue.extend(constant_value.flatten())
         spec_layer_params.shape.extend(shape)
 
         # Rank information
         self.rank_dict[output_name] = len(shape)
 
-        if len(data.floatValue) != np.prod(shape):
-            raise ValueError("Dimensions of 'shape' do not match the size of the provided constant")
+        if len(data.floatValue) != _np.prod(shape):
+            raise ValueError(
+                "Dimensions of 'shape' do not match the size of the provided constant"
+            )
         return spec_layer
 
-    def add_fill_like(self, name, input_name, output_name, value=0.):
+    def add_fill_like(self, name, input_name, output_name, value=0.0):
         """
         Add a fill_like layer to the model outputs a tensor filled with a
         scalar value.
@@ -5546,7 +6883,7 @@ def add_fill_like(self, name, input_name, output_name, value=0.):
         spec_layer_params.value = value
         return spec_layer
 
-    def add_fill_static(self, name, output_name, output_shape, value=0.):
+    def add_fill_static(self, name, output_name, output_shape, value=0.0):
         """
         Add a fill_static layer to the model that outputs a tensor filled
         with a scalar value given shape as parameter.
@@ -5576,7 +6913,7 @@ def add_fill_static(self, name, output_name, output_shape, value=0.):
         self.rank_dict[output_name] = len(output_shape)
         return spec_layer
 
-    def add_fill_dynamic(self, name, input_name, output_name, value=0.):
+    def add_fill_dynamic(self, name, input_name, output_name, value=0.0):
         """
         Add a fill_dynamic layer to the model that outputs a tensor filled
         with a scalar value.
@@ -5627,7 +6964,7 @@ def add_broadcast_to_like(self, name, input_names, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.broadcastToLike.MergeFromString(b'')
+        spec_layer.broadcastToLike.MergeFromString(b"")
 
         if len(input_names) != 2:
             raise ValueError("BroadcastToLikeLayer must have two inputs")
@@ -5687,7 +7024,7 @@ def add_broadcast_to_dynamic(self, name, input_names, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.broadcastToDynamic.MergeFromString(b'')
+        spec_layer.broadcastToDynamic.MergeFromString(b"")
         # Setting rank to -1 is a hint that Rank was not computed
         # converter can modify if it's a constant and known
         self.rank_dict[output_name] = -1
@@ -5812,7 +7149,7 @@ def add_reshape_like(self, name, input_names, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.reshapeLike.MergeFromString(b'')
+        spec_layer.reshapeLike.MergeFromString(b"")
         self.rank_dict[output_name] = self._get_rank(input_names[1])
         return spec_layer
 
@@ -5865,7 +7202,7 @@ def add_reshape_dynamic(self, name, input_names, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.reshapeDynamic.MergeFromString(b'')
+        spec_layer.reshapeDynamic.MergeFromString(b"")
         # Setting rank to -1 is a hint that Rank was not computed
         # converter can modify if it's a constant and known
         self.rank_dict[output_name] = -1
@@ -5896,17 +7233,23 @@ def add_rank_preserving_reshape(self, name, input_name, output_name, output_shap
         add_reshape, add_reshape_like, add_reshape_static, add_reshape_dynamic
         """
 
-        spec_layer = self._add_generic_layer(name, [input_name], [output_name],
-                                             input_ranks=[len(output_shape)],
-                                             input_shapes=[[int(x) for x in output_shape]],
-                                             output_ranks=[len(output_shape)],
-                                             output_shapes=[[int(x) for x in output_shape]])
+        spec_layer = self._add_generic_layer(
+            name,
+            [input_name],
+            [output_name],
+            input_ranks=[len(output_shape)],
+            input_shapes=[[int(x) for x in output_shape]],
+            output_ranks=[len(output_shape)],
+            output_shapes=[[int(x) for x in output_shape]],
+        )
 
         spec_layer_params = spec_layer.rankPreservingReshape
         spec_layer_params.targetShape.extend(map(int, output_shape))
         return spec_layer
 
-    def add_random_normal_like(self, name, input_name, output_name, mean=0., stddev=0., seed=-1):
+    def add_random_normal_like(
+        self, name, input_name, output_name, mean=0.0, stddev=0.0, seed=-1
+    ):
         """
         Add a random_normal_like layer to the model that fills the output
         tensor with random values from normal distribution.
@@ -5942,7 +7285,9 @@ def add_random_normal_like(self, name, input_name, output_name, mean=0., stddev=
 
         return spec_layer
 
-    def add_random_normal_static(self, name, output_name, output_shape, mean=0., stddev=0., seed=-1):
+    def add_random_normal_static(
+        self, name, output_name, output_shape, mean=0.0, stddev=0.0, seed=-1
+    ):
         """
         Add a random_normal_static layer to the model that fills the output
         tensor with random values from normal distribution.
@@ -5980,7 +7325,9 @@ def add_random_normal_static(self, name, output_name, output_shape, mean=0., std
         self.rank_dict[output_name] = len(output_shape)
         return spec_layer
 
-    def add_random_normal_dynamic(self, name, input_names, output_name, mean=0., stddev=0., seed=-1):
+    def add_random_normal_dynamic(
+        self, name, input_names, output_name, mean=0.0, stddev=0.0, seed=-1
+    ):
         """
         Add a random_normal_dynamic layer to the model that fills the output
         tensor with random values from normal distribution.
@@ -6018,7 +7365,9 @@ def add_random_normal_dynamic(self, name, input_names, output_name, mean=0., std
         self.rank_dict[output_name] = -1
         return spec_layer
 
-    def add_random_uniform_like(self, name, input_name, output_name, minval=0., maxval=1., seed=-1):
+    def add_random_uniform_like(
+        self, name, input_name, output_name, minval=0.0, maxval=1.0, seed=-1
+    ):
         """
         Add a random_uniform_like layer to the model that fills the output
         tensors with random values from uniform distribution.
@@ -6054,7 +7403,9 @@ def add_random_uniform_like(self, name, input_name, output_name, minval=0., maxv
 
         return spec_layer
 
-    def add_random_uniform_static(self, name, output_name, output_shape, minval=0., maxval=1., seed=-1):
+    def add_random_uniform_static(
+        self, name, output_name, output_shape, minval=0.0, maxval=1.0, seed=-1
+    ):
         """
         Add a random_uniform_static layer to the model that fills the output
         tensors with random values from uniform distribution.
@@ -6091,7 +7442,9 @@ def add_random_uniform_static(self, name, output_name, output_shape, minval=0.,
         self.rank_dict[output_name] = len(output_shape)
         return spec_layer
 
-    def add_random_uniform_dynamic(self, name, input_names, output_name, minval=0., maxval=1., seed=-1):
+    def add_random_uniform_dynamic(
+        self, name, input_names, output_name, minval=0.0, maxval=1.0, seed=-1
+    ):
         """
         Add a random_uniform_dynamic layer to the model that fills the output
         tensors with random values from uniform distribution.
@@ -6129,7 +7482,9 @@ def add_random_uniform_dynamic(self, name, input_names, output_name, minval=0.,
         self.rank_dict[output_name] = -1
         return spec_layer
 
-    def add_random_bernoulli_like(self, name, input_name, output_name, prob=0.5, seed=-1):
+    def add_random_bernoulli_like(
+        self, name, input_name, output_name, prob=0.5, seed=-1
+    ):
         """
         Add a random_bernoulli_like layer to the model that fills the output
         tensor with random values from Bernoulli distribution.
@@ -6162,7 +7517,9 @@ def add_random_bernoulli_like(self, name, input_name, output_name, prob=0.5, see
 
         return spec_layer
 
-    def add_random_bernoulli_static(self, name, output_name, output_shape, prob=0.5, seed=-1):
+    def add_random_bernoulli_static(
+        self, name, output_name, output_shape, prob=0.5, seed=-1
+    ):
         """
         Add a random_bernoulli_static layer to the model that fills the output
         tensor with random values from Bernoulli distribution.
@@ -6197,7 +7554,9 @@ def add_random_bernoulli_static(self, name, output_name, output_shape, prob=0.5,
         self.rank_dict[output_name] = len(output_shape)
         return spec_layer
 
-    def add_random_bernoulli_dynamic(self, name, input_names, output_name, prob=0.5, seed=-1):
+    def add_random_bernoulli_dynamic(
+        self, name, input_names, output_name, prob=0.5, seed=-1
+    ):
         """
         Add a random_bernoulli_dynamic layer to the model that fills the output
         tensor with random values from Bernoulli distribution.
@@ -6233,8 +7592,17 @@ def add_random_bernoulli_dynamic(self, name, input_names, output_name, prob=0.5,
         self.rank_dict[output_name] = -1
         return spec_layer
 
-    def add_categorical_distribution(self, name, input_name, output_name, num_samples,
-                                     is_logits=True, eps=1e-10, temperature=1.0, seed=-1):
+    def add_categorical_distribution(
+        self,
+        name,
+        input_name,
+        output_name,
+        num_samples,
+        is_logits=True,
+        eps=1e-10,
+        temperature=1.0,
+        seed=-1,
+    ):
         """
         Add a categorical_distribution layer to the model that fills the output
         tensor with random values from categorical distribution.
@@ -6273,8 +7641,9 @@ def add_categorical_distribution(self, name, input_name, output_name, num_sample
 
         return spec_layer
 
-    def add_reduce_sum(self, name, input_name, output_name,
-                       axes=None, keepdims=True, reduce_all=False):
+    def add_reduce_sum(
+        self, name, input_name, output_name, axes=None, keepdims=True, reduce_all=False
+    ):
         """
         Add a reduce_sum layer to the model that reduces the input tensor
         using ``sum(elements across given dimensions)``.
@@ -6307,7 +7676,7 @@ def add_reduce_sum(self, name, input_name, output_name,
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
         spec_layer_params = spec_layer.reduceSum
 
-        if axes and len(axes) != 0:
+        if axes is not None and len(axes) != 0:
             spec_layer_params.axes.extend(map(int, axes))
         else:
             reduce_all = True
@@ -6315,11 +7684,14 @@ def add_reduce_sum(self, name, input_name, output_name,
         spec_layer_params.keepDims = keepdims
         spec_layer_params.reduceAll = reduce_all
 
-        self._set_rank_for_reduce_op(input_name, output_name, axes, keepdims, reduce_all)
+        self._set_rank_for_reduce_op(
+            input_name, output_name, axes, keepdims, reduce_all
+        )
         return spec_layer
 
-    def add_reduce_prod(self, name, input_name, output_name,
-                        axes=None, keepdims=True, reduce_all=False):
+    def add_reduce_prod(
+        self, name, input_name, output_name, axes=None, keepdims=True, reduce_all=False
+    ):
         """
         Add a reduce_prod layer to the model that reduces the input tensor
         using ``prod(elements across given dimensions)``.
@@ -6353,7 +7725,7 @@ def add_reduce_prod(self, name, input_name, output_name,
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
         spec_layer_params = spec_layer.reduceProd
 
-        if axes and len(axes) != 0:
+        if axes is not None and len(axes) != 0:
             spec_layer_params.axes.extend(map(int, axes))
         else:
             reduce_all = True
@@ -6361,11 +7733,14 @@ def add_reduce_prod(self, name, input_name, output_name,
         spec_layer_params.keepDims = keepdims
         spec_layer_params.reduceAll = reduce_all
 
-        self._set_rank_for_reduce_op(input_name, output_name, axes, keepdims, reduce_all)
+        self._set_rank_for_reduce_op(
+            input_name, output_name, axes, keepdims, reduce_all
+        )
         return spec_layer
 
-    def add_reduce_mean(self, name, input_name, output_name,
-                        axes=None, keepdims=True, reduce_all=False):
+    def add_reduce_mean(
+        self, name, input_name, output_name, axes=None, keepdims=True, reduce_all=False
+    ):
         """
         Add a reduce_mean layer to the model that reduces the input tensor
         using ``mean(elements across given dimensions)``.
@@ -6397,7 +7772,7 @@ def add_reduce_mean(self, name, input_name, output_name,
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
         spec_layer_params = spec_layer.reduceMean
 
-        if axes and len(axes) != 0:
+        if axes is not None and len(axes) != 0:
             spec_layer_params.axes.extend(map(int, axes))
         else:
             reduce_all = True
@@ -6405,11 +7780,14 @@ def add_reduce_mean(self, name, input_name, output_name,
         spec_layer_params.keepDims = keepdims
         spec_layer_params.reduceAll = reduce_all
 
-        self._set_rank_for_reduce_op(input_name, output_name, axes, keepdims, reduce_all)
+        self._set_rank_for_reduce_op(
+            input_name, output_name, axes, keepdims, reduce_all
+        )
         return spec_layer
 
-    def add_reduce_max(self, name, input_name, output_name,
-                       axes=None, keepdims=True, reduce_all=False):
+    def add_reduce_max(
+        self, name, input_name, output_name, axes=None, keepdims=True, reduce_all=False
+    ):
         """
         Add a reduce_max layer to the model that reduces the input tensor
         using ``max(elements across given dimensions)``.
@@ -6441,7 +7819,7 @@ def add_reduce_max(self, name, input_name, output_name,
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
         spec_layer_params = spec_layer.reduceMax
 
-        if axes and len(axes) != 0:
+        if axes is not None and len(axes) != 0:
             spec_layer_params.axes.extend(map(int, axes))
         else:
             reduce_all = True
@@ -6449,11 +7827,14 @@ def add_reduce_max(self, name, input_name, output_name,
         spec_layer_params.keepDims = keepdims
         spec_layer_params.reduceAll = reduce_all
 
-        self._set_rank_for_reduce_op(input_name, output_name, axes, keepdims, reduce_all)
+        self._set_rank_for_reduce_op(
+            input_name, output_name, axes, keepdims, reduce_all
+        )
         return spec_layer
 
-    def add_reduce_min(self, name, input_name, output_name,
-                       axes=None, keepdims=True, reduce_all=False):
+    def add_reduce_min(
+        self, name, input_name, output_name, axes=None, keepdims=True, reduce_all=False
+    ):
         """
         Add a reduce_min layer to the model that reduces the input tensor
         using ``min(elements across given dimensions)``.
@@ -6485,7 +7866,7 @@ def add_reduce_min(self, name, input_name, output_name,
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
         spec_layer_params = spec_layer.reduceMin
 
-        if axes and len(axes) != 0:
+        if axes is not None and len(axes) != 0:
             spec_layer_params.axes.extend(map(int, axes))
         else:
             reduce_all = True
@@ -6493,11 +7874,14 @@ def add_reduce_min(self, name, input_name, output_name,
         spec_layer_params.keepDims = keepdims
         spec_layer_params.reduceAll = reduce_all
 
-        self._set_rank_for_reduce_op(input_name, output_name, axes, keepdims, reduce_all)
+        self._set_rank_for_reduce_op(
+            input_name, output_name, axes, keepdims, reduce_all
+        )
         return spec_layer
 
-    def add_reduce_l2(self, name, input_name, output_name,
-                      axes=None, keepdims=True, reduce_all=False):
+    def add_reduce_l2(
+        self, name, input_name, output_name, axes=None, keepdims=True, reduce_all=False
+    ):
         """
         Add a reduce_l2 layer to the model that reduces the input tensor
         using ``l2_normalization(elements across given dimensions)``.
@@ -6529,7 +7913,7 @@ def add_reduce_l2(self, name, input_name, output_name,
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
         spec_layer_params = spec_layer.reduceL2
 
-        if axes and len(axes) != 0:
+        if axes is not None and len(axes) != 0:
             spec_layer_params.axes.extend(map(int, axes))
         else:
             reduce_all = True
@@ -6537,11 +7921,14 @@ def add_reduce_l2(self, name, input_name, output_name,
         spec_layer_params.keepDims = keepdims
         spec_layer_params.reduceAll = reduce_all
 
-        self._set_rank_for_reduce_op(input_name, output_name, axes, keepdims, reduce_all)
+        self._set_rank_for_reduce_op(
+            input_name, output_name, axes, keepdims, reduce_all
+        )
         return spec_layer
 
-    def add_reduce_l1(self, name, input_name, output_name,
-                      axes=None, keepdims=True, reduce_all=False):
+    def add_reduce_l1(
+        self, name, input_name, output_name, axes=None, keepdims=True, reduce_all=False
+    ):
         """
         Add a reduce_l1 layer to the model that reduces the input tensor
         using ``l1_normalization(elements across given dimensions)``.
@@ -6573,7 +7960,7 @@ def add_reduce_l1(self, name, input_name, output_name,
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
         spec_layer_params = spec_layer.reduceL1
 
-        if axes and len(axes) != 0:
+        if axes is not None and len(axes) != 0:
             spec_layer_params.axes.extend(map(int, axes))
         else:
             reduce_all = True
@@ -6581,11 +7968,14 @@ def add_reduce_l1(self, name, input_name, output_name,
         spec_layer_params.keepDims = keepdims
         spec_layer_params.reduceAll = reduce_all
 
-        self._set_rank_for_reduce_op(input_name, output_name, axes, keepdims, reduce_all)
+        self._set_rank_for_reduce_op(
+            input_name, output_name, axes, keepdims, reduce_all
+        )
         return spec_layer
 
-    def add_reduce_sumsquare(self, name, input_name, output_name,
-                             axes=None, keepdims=True, reduce_all=False):
+    def add_reduce_sumsquare(
+        self, name, input_name, output_name, axes=None, keepdims=True, reduce_all=False
+    ):
         """
         Add a reduce_sumsquare layer to the model that reduces the input tensor
         using ``sum(square(elements across given dimensions))``.
@@ -6617,7 +8007,7 @@ def add_reduce_sumsquare(self, name, input_name, output_name,
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
         spec_layer_params = spec_layer.reduceSumSquare
 
-        if axes and len(axes) != 0:
+        if axes is not None and len(axes) != 0:
             spec_layer_params.axes.extend(map(int, axes))
         else:
             reduce_all = True
@@ -6625,11 +8015,14 @@ def add_reduce_sumsquare(self, name, input_name, output_name,
         spec_layer_params.keepDims = keepdims
         spec_layer_params.reduceAll = reduce_all
 
-        self._set_rank_for_reduce_op(input_name, output_name, axes, keepdims, reduce_all)
+        self._set_rank_for_reduce_op(
+            input_name, output_name, axes, keepdims, reduce_all
+        )
         return spec_layer
 
-    def add_reduce_logsum(self, name, input_name, output_name,
-                          axes=None, keepdims=True, reduce_all=False):
+    def add_reduce_logsum(
+        self, name, input_name, output_name, axes=None, keepdims=True, reduce_all=False
+    ):
         """
         Add a reduce_logsum layer to the model that reduces the input tensor
         using log(sum(elements across given dimensions)).
@@ -6661,7 +8054,7 @@ def add_reduce_logsum(self, name, input_name, output_name,
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
         spec_layer_params = spec_layer.reduceLogSum
 
-        if axes and len(axes) != 0:
+        if axes is not None and len(axes) != 0:
             spec_layer_params.axes.extend(map(int, axes))
         else:
             reduce_all = True
@@ -6669,11 +8062,14 @@ def add_reduce_logsum(self, name, input_name, output_name,
         spec_layer_params.keepDims = keepdims
         spec_layer_params.reduceAll = reduce_all
 
-        self._set_rank_for_reduce_op(input_name, output_name, axes, keepdims, reduce_all)
+        self._set_rank_for_reduce_op(
+            input_name, output_name, axes, keepdims, reduce_all
+        )
         return spec_layer
 
-    def add_reduce_logsumexp(self, name, input_name, output_name,
-                             axes=None, keepdims=True, reduce_all=False):
+    def add_reduce_logsumexp(
+        self, name, input_name, output_name, axes=None, keepdims=True, reduce_all=False
+    ):
         """
         Add a reduce_logsumexp layer to the model that computes ``log(sum(exp(tensor)))``
         and reduces along the given axis.
@@ -6705,7 +8101,7 @@ def add_reduce_logsumexp(self, name, input_name, output_name,
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
         spec_layer_params = spec_layer.reduceLogSumExp
 
-        if axes and len(axes) != 0:
+        if axes is not None and len(axes) != 0:
             spec_layer_params.axes.extend(map(int, axes))
         else:
             reduce_all = True
@@ -6713,7 +8109,9 @@ def add_reduce_logsumexp(self, name, input_name, output_name,
         spec_layer_params.keepDims = keepdims
         spec_layer_params.reduceAll = reduce_all
 
-        self._set_rank_for_reduce_op(input_name, output_name, axes, keepdims, reduce_all)
+        self._set_rank_for_reduce_op(
+            input_name, output_name, axes, keepdims, reduce_all
+        )
         return spec_layer
 
     def add_where_nonzero(self, name, input_name, output_name):
@@ -6738,12 +8136,14 @@ def add_where_nonzero(self, name, input_name, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
-        spec_layer.whereNonZero.MergeFromString(b'')
+        spec_layer.whereNonZero.MergeFromString(b"")
 
         self.rank_dict[output_name] = 2
         return spec_layer
 
-    def add_matrix_band_part(self, name, input_name, output_name, num_lower=-1, num_upper=-1):
+    def add_matrix_band_part(
+        self, name, input_name, output_name, num_lower=-1, num_upper=-1
+    ):
         """
         Add a matrix_band_part layer to the model that copies a tensor setting
         everything outside a central band in each inner-most matrix to zero.
@@ -6857,14 +8257,15 @@ def add_where_broadcastable(self, name, input_names, output_name):
         """
 
         spec_layer = self._add_generic_layer(name, input_names, [output_name])
-        spec_layer.whereBroadcastable.MergeFromString(b'')
+        spec_layer.whereBroadcastable.MergeFromString(b"")
 
         self._set_max_input_rank(input_names, output_name)
 
         return spec_layer
 
-    def add_layer_normalization(self, name, input_name, output_name,
-                                normalized_shape, gamma, beta, eps=1e-5):
+    def add_layer_normalization(
+        self, name, input_name, output_name, normalized_shape, gamma, beta, eps=1e-5
+    ):
         """
         Add a layer normalization layer to the model that applies layer
         normalization over the input tensor.
@@ -6890,10 +8291,10 @@ def add_layer_normalization(self, name, input_name, output_name,
         """
 
         if gamma.shape != tuple(normalized_shape):
-            raise ValueError('Shape of parameter gamma should match normalized_shape')
+            raise ValueError("Shape of parameter gamma should match normalized_shape")
 
         if beta.shape != tuple(normalized_shape):
-            raise ValueError('Shape of parameter beta should match normalized_shape')
+            raise ValueError("Shape of parameter beta should match normalized_shape")
 
         spec_layer = self._add_generic_layer(name, [input_name], [output_name])
         spec_layer_params = spec_layer.layerNormalization
@@ -6901,11 +8302,173 @@ def add_layer_normalization(self, name, input_name, output_name,
         spec_layer_params.normalizedShape.extend(normalized_shape)
 
         weights = spec_layer_params.gamma
-        weights.floatValue.extend(map(float, gamma.flatten()))
+        weights.floatValue.extend(gamma.flatten())
 
         bias = spec_layer_params.beta
-        bias.floatValue.extend(map(float, beta.flatten()))
+        bias.floatValue.extend(beta.flatten())
 
         spec_layer_params.eps = eps
 
         return spec_layer
+
+    def add_one_hot(
+        self,
+        name,
+        input_names,
+        output_name,
+        one_hot_vector_size=None,
+        axis=-1,
+        on_value=1.0,
+        off_value=0.0,
+    ):
+        """
+        Add a one hot layer to the model that computes the one hot representation of the input tensor.
+        Refer to the **OneHotLayerParams** message in specification
+        (NeuralNetwork.proto) for more details.
+
+        Parameters
+        ----------
+        name: str
+            The name of this layer.
+        input_names: list of str
+            The input blob names of this layer.
+        output_name: str
+            The output blob name of this layer.
+        one_hot_vector_size:  int > 0
+            size of the one hot vector.
+        axis: int, optional
+            refers to the axis in the output tensor, default: -1.
+        on_value: float, optional
+            Constant value on locations represented by first input, default: 1.0.
+        off_value: float, optional
+            Constant value at all other locations, default: 0.0.
+        """
+
+        if self.spec and (
+            not self.spec.specificationVersion
+            or self.spec.specificationVersion < _SPECIFICATION_VERSION_IOS_14
+        ):
+            self.spec.specificationVersion = _SPECIFICATION_VERSION_IOS_14
+
+        spec_layer = self._add_generic_layer(name, input_names, [output_name])
+        spec_layer_params = spec_layer.oneHot
+        spec_layer_params.axis = axis
+        if one_hot_vector_size:
+            spec_layer_params.oneHotVectorSize = one_hot_vector_size
+        spec_layer_params.onValue = on_value
+        spec_layer_params.offValue = off_value
+        return spec_layer
+
+    def add_cumsum(
+        self, name, input_names, output_name, axis=-1, reverse=False, exclusive=False
+    ):
+        """
+        Add a cum sum layer to the model computes the cumulative sum values of the input along a given axis.
+        Refer to the **CumSumLayerParams** message in specification
+        (NeuralNetwork.proto) for more details.
+
+        Parameters
+        ----------
+        name: str
+            The name of this layer.
+        input_names: list of str
+            The input blob names of this layer.
+        output_name: str
+            The output blob name of this layer.
+        axis: int, optional
+            Axis to perform the operation, default: -1.
+        reverse: bool, optional
+            if true, cumsum is performed in the opposite direction, default: False.
+        exclusive: bool, optional
+            whether to perform exclusive or inclusive cumulative summation, default: False.
+        """
+
+        if self.spec and (
+            not self.spec.specificationVersion
+            or self.spec.specificationVersion < _SPECIFICATION_VERSION_IOS_14
+        ):
+            self.spec.specificationVersion = _SPECIFICATION_VERSION_IOS_14
+
+        spec_layer = self._add_generic_layer(name, input_names, [output_name])
+        spec_layer_params = spec_layer.cumSum
+        spec_layer_params.axis = axis
+        spec_layer_params.reverse = reverse
+        spec_layer_params.excludeFinalSum = exclusive
+        return spec_layer
+
+    def add_clamped_relu(self, name, input_name, output_name, alpha=0.0, beta=6.0):
+        """
+        Add a clamped relu layer to the model.
+        Clamped relu formula is f(x) = min((x >= 0 ? x : alpha * x), beta)
+        Refer to the **ClampedReluLayerParams** message in specification (NeuralNetwork.proto) for more details.
+
+        Parameters
+        ----------
+        name: str
+            The name of this layer.
+        input_name: str
+            The input blob name of this layer.
+        output_name: str
+            The output blob name of this layer.
+        alpha: float, optional
+             slope of the output when input is negative, default: 0.0.
+        beta: float, optional
+            Upper bound on the output value, default: 6.0.
+
+        See Also
+        --------
+        add_clip
+        """
+
+        if self.spec and (
+            not self.spec.specificationVersion
+            or self.spec.specificationVersion < _SPECIFICATION_VERSION_IOS_14
+        ):
+            self.spec.specificationVersion = _SPECIFICATION_VERSION_IOS_14
+
+        spec_layer = self._add_generic_layer(name, [input_name], [output_name])
+        spec_layer.clampedReLU.MergeFromString(b"")
+        spec_params = spec_layer.clampedReLU
+
+        spec_params.alpha = float(alpha)
+        spec_params.beta = float(beta)
+
+        return spec_layer
+
+    def add_argsort(self, name, input_name, output_name, axis=0, descending=False):
+        """
+        Add an argsort layer to the model.
+        Refer to the **ArgsortLayerParams** message in the specification (NeuralNetwork.proto) for more details.
+
+        Parameters
+        ----------
+        name: str
+            The name of this layer.
+        input_name: str
+            The input blob name of this layer.
+        output_name: str
+            The output blob name of this layer.
+        axis: int, optional
+             axis along which to compute the sorting indices
+        descending: bool, optional
+            order of sorting
+
+        See Also
+        --------
+        add_topk
+        """
+
+        if self.spec and (
+            not self.spec.specificationVersion
+            or self.spec.specificationVersion < _SPECIFICATION_VERSION_IOS_14
+        ):
+            self.spec.specificationVersion = _SPECIFICATION_VERSION_IOS_14
+
+        spec_layer = self._add_generic_layer(name, [input_name], [output_name])
+        spec_layer.argSort.MergeFromString(b"")
+        spec_params = spec_layer.argSort
+
+        spec_params.axis = int(axis)
+        spec_params.descending = descending
+
+        return spec_layer
diff --git a/coremltools/models/neural_network/flexible_shape_utils.py b/coremltools/models/neural_network/flexible_shape_utils.py
index ae0597ee1..022f10a85 100644
--- a/coremltools/models/neural_network/flexible_shape_utils.py
+++ b/coremltools/models/neural_network/flexible_shape_utils.py
@@ -4,21 +4,19 @@
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 """
-Utilities to annotate Neural Network Features with flexible shape information. 
+Utilities to annotate Neural Network Features with flexible shape information.
 Only available in coremltools 2.0b1 and onwards
 """
 
 from ..utils import _get_feature, _get_nn_layers, _get_input_names
 from ... import _MINIMUM_FLEXIBLE_SHAPES_SPEC_VERSION
 from ... import _MINIMUM_NDARRAY_SPEC_VERSION
-from ..model import NeuralNetworkShaper
-from coremltools.models._deprecation import deprecated
 
-_SEQUENCE_KEY = 'S'
-_BATCH_KEY = 'B'
-_CHANNEL_KEY = 'C'
-_HEIGHT_KEY = 'H'
-_WIDTH_KEY = 'W'
+_SEQUENCE_KEY = "S"
+_BATCH_KEY = "B"
+_CHANNEL_KEY = "C"
+_HEIGHT_KEY = "H"
+_WIDTH_KEY = "W"
 
 _CONSTRAINED_KEYS = [_CHANNEL_KEY, _HEIGHT_KEY, _WIDTH_KEY]
 
@@ -26,7 +24,7 @@
 class Shape(object):
     def __init__(self, shape_value):
         if shape_value < 1:
-            raise Exception('Invalid value. Size/Shape values must be > 0')
+            raise Exception("Invalid value. Size/Shape values must be > 0")
         self._value = shape_value
 
     @property
@@ -39,7 +37,7 @@ def __init__(self, size_value):
         super(Size, self).__init__(size_value)
 
 
-class NeuralNetworkMultiArrayShape():
+class NeuralNetworkMultiArrayShape:
     """
     An object representing a shape for a multiArray feature in a
     neural network. Valid shapes must have have only the Channel [C]
@@ -47,9 +45,11 @@ class NeuralNetworkMultiArrayShape():
     """
 
     def __init__(self, channel=None, height=None, width=None):
-        self._shape = {_CHANNEL_KEY: Shape(int(channel)) if channel else None,
-                       _HEIGHT_KEY: Shape(int(height)) if height else None,
-                       _WIDTH_KEY: Shape(int(width)) if width else None}
+        self._shape = {
+            _CHANNEL_KEY: Shape(int(channel)) if channel else None,
+            _HEIGHT_KEY: Shape(int(height)) if height else None,
+            _WIDTH_KEY: Shape(int(width)) if width else None,
+        }
 
     def set_channel_shape(self, channel_shape):
         self._shape[_CHANNEL_KEY] = Shape(channel_shape)
@@ -64,12 +64,13 @@ def _validate_multiarray_shape(self):
         num_dims = len([v for v in self._shape.values() if v])
         if num_dims != 1 and num_dims != 3:
             raise Exception(
-                'For neural networks, shape must be of length 1 or 3'
-                ', representing input shape [C] or [C,H,W], respectively')
+                "For neural networks, shape must be of length 1 or 3"
+                ", representing input shape [C] or [C,H,W], respectively"
+            )
 
         if num_dims == 1:
-            if not self._shape['C']:
-                raise Exception('Channel Shape not specified')
+            if not self._shape["C"]:
+                raise Exception("Channel Shape not specified")
 
     @property
     def multiarray_shape(self):
@@ -77,13 +78,16 @@ def multiarray_shape(self):
         if num_dims == 1:
             return [self._shape[_CHANNEL_KEY].value]
         elif num_dims == 3:
-            return [self._shape[_CHANNEL_KEY].value, self._shape[_HEIGHT_KEY].value,
-                    self._shape[_WIDTH_KEY].value]
+            return [
+                self._shape[_CHANNEL_KEY].value,
+                self._shape[_HEIGHT_KEY].value,
+                self._shape[_WIDTH_KEY].value,
+            ]
         else:
-            raise Exception('Invalid multiarray shape for neural network')
+            raise Exception("Invalid multiarray shape for neural network")
 
 
-class NeuralNetworkImageSize():
+class NeuralNetworkImageSize:
     """
     An object representing a size for an image feature inside a
     neural network. Valid sizess for height and width are > 0.
@@ -117,17 +121,19 @@ def __init__(self, lowerBound, upperBound):
 
         if not unBounded and lowerBound > upperBound:
             raise Exception(
-                'lowerBound > upperBound for range ({},{})'.format(lowerBound,
-                    upperBound))
+                "lowerBound > upperBound for range ({},{})".format(
+                    lowerBound, upperBound
+                )
+            )
 
         if not unBounded and upperBound < 1:
-            raise Exception('Invalid upperBound: {} '.format(upperBound))
+            raise Exception("Invalid upperBound: {} ".format(upperBound))
 
         if lowerBound == 0:
             lowerBound = 1
 
         if lowerBound < 1:
-            raise Exception('Invalid lowerBound: {}'.format(lowerBound))
+            raise Exception("Invalid lowerBound: {}".format(lowerBound))
 
         self._lowerBound = lowerBound
         self._upperBound = upperBound
@@ -150,7 +156,7 @@ def isFlexible(self):
         return not (self._lowerBound == self._upperBound)
 
 
-class NeuralNetworkMultiArrayShapeRange():
+class NeuralNetworkMultiArrayShapeRange:
     """
     An object representing a range of shapes for a multiArray feature in a
     neural network. Valid shape ranges must have have only the Channel [C]
@@ -163,7 +169,9 @@ def __init__(self, input_ranges=None):
 
         if input_ranges:
             if not isinstance(input_ranges, dict):
-                raise Exception('Attempting to initialize a shape range with something other than a dictionary of shapes.')
+                raise Exception(
+                    "Attempting to initialize a shape range with something other than a dictionary of shapes."
+                )
             self.arrayShapeRange = {}
             for key, value in input_ranges.items():
                 if key in _CONSTRAINED_KEYS:
@@ -172,9 +180,9 @@ def __init__(self, input_ranges=None):
 
     def _create_shape_range(self, r):
         if not isinstance(r, tuple):
-            raise Exception('Range should be a ShapeRange or a tuple object')
+            raise Exception("Range should be a ShapeRange or a tuple object")
         elif len(r) != 2:
-            raise Exception('Range tuple should be at least length 2')
+            raise Exception("Range tuple should be at least length 2")
         return ShapeRange(r[0], r[1])
 
     def add_channel_range(self, channel_range):
@@ -199,16 +207,23 @@ def validate_array_shape_range(self):
         num_dims = self.get_shape_range_dims()
         if num_dims != 1 and num_dims != 3:
             raise Exception(
-                'For neural networks, shape must be of length 1 or 3'
-                ', representing input shape [C] or [C,H,W], respectively')
+                "For neural networks, shape must be of length 1 or 3"
+                ", representing input shape [C] or [C,H,W], respectively"
+            )
 
         if num_dims == 1:
             if _CHANNEL_KEY not in self.arrayShapeRange.keys():
-                raise Exception('Channel Shape Range not specified')
+                raise Exception("Channel Shape Range not specified")
 
         if num_dims == 3:
-            if _CHANNEL_KEY not in self.arrayShapeRange.keys() or _HEIGHT_KEY not in self.arrayShapeRange.keys() or _WIDTH_KEY not in self.arrayShapeRange.keys():
-                raise Exception('Shape range constraint missing for either channel, height, or width.')
+            if (
+                _CHANNEL_KEY not in self.arrayShapeRange.keys()
+                or _HEIGHT_KEY not in self.arrayShapeRange.keys()
+                or _WIDTH_KEY not in self.arrayShapeRange.keys()
+            ):
+                raise Exception(
+                    "Shape range constraint missing for either channel, height, or width."
+                )
 
     def get_channel_range(self):
         return self.arrayShapeRange[_CHANNEL_KEY]
@@ -231,7 +246,7 @@ def isFlexible(self):
         return False
 
 
-class NeuralNetworkImageSizeRange():
+class NeuralNetworkImageSizeRange:
     """
     An object representing a range of sizes for an image feature inside a
     neural network. Valid ranges for height and width are > 0. A "-1"
@@ -242,20 +257,16 @@ class NeuralNetworkImageSizeRange():
     def __init__(self, height_range=None, width_range=None):
         if height_range and not isinstance(height_range, ShapeRange):
             if not isinstance(height_range, tuple):
-                raise Exception(
-                    'Height range should be a ShapeRange or a tuple object')
+                raise Exception("Height range should be a ShapeRange or a tuple object")
             elif len(height_range) != 2:
-                raise Exception(
-                    'Height range tuple should be at least length 2')
+                raise Exception("Height range tuple should be at least length 2")
             height_range = ShapeRange(height_range[0], height_range[1])
 
         if width_range and not isinstance(width_range, ShapeRange):
             if not isinstance(width_range, tuple):
-                raise Exception(
-                    'Width range should be a ShapeRange or a tuple object')
+                raise Exception("Width range should be a ShapeRange or a tuple object")
             elif len(width_range) != 2:
-                raise Exception(
-                    'Width range tuple should be at least length 2')
+                raise Exception("Width range tuple should be at least length 2")
             width_range = ShapeRange(width_range[0], width_range[1])
 
         self._height_range = height_range
@@ -264,22 +275,18 @@ def __init__(self, height_range=None, width_range=None):
     def add_width_range(self, width_range):
         if not isinstance(width_range, ShapeRange):
             if not isinstance(width_range, tuple):
-                raise Exception(
-                    'Width range should be a ShapeRange or a tuple object')
+                raise Exception("Width range should be a ShapeRange or a tuple object")
             elif len(width_range) != 2:
-                raise Exception(
-                    'Width range tuple should be at least length 2')
+                raise Exception("Width range tuple should be at least length 2")
 
         self._width_range = ShapeRange(width_range[0], width_range[1])
 
     def add_height_range(self, height_range):
         if not isinstance(height_range, ShapeRange):
             if not isinstance(height_range, tuple):
-                raise Exception(
-                    'Height range should be a ShapeRange or a tuple object')
+                raise Exception("Height range should be a ShapeRange or a tuple object")
             elif len(height_range) != 2:
-                raise Exception(
-                    'Height range tuple should be at least length 2')
+                raise Exception("Height range tuple should be at least length 2")
 
         self._height_range = ShapeRange(height_range[0], height_range[1])
 
@@ -332,17 +339,18 @@ def add_enumerated_multiarray_shapes(spec, feature_name, shapes):
     for shape in shapes:
         if not isinstance(shape, NeuralNetworkMultiArrayShape):
             raise Exception(
-                'Shape ranges should be of type NeuralNetworkMultiArrayShape')
+                "Shape ranges should be of type NeuralNetworkMultiArrayShape"
+            )
         shape._validate_multiarray_shape()
 
     feature = _get_feature(spec, feature_name)
-    if feature.type.WhichOneof('Type') != 'multiArrayType':
-        raise Exception('Trying to add enumerated shapes to '
-                        'a non-multiArray feature type')
+    if feature.type.WhichOneof("Type") != "multiArrayType":
+        raise Exception(
+            "Trying to add enumerated shapes to " "a non-multiArray feature type"
+        )
 
-    if feature.type.multiArrayType.WhichOneof(
-            'ShapeFlexibility') != 'enumeratedShapes':
-        feature.type.multiArrayType.ClearField('ShapeFlexibility')
+    if feature.type.multiArrayType.WhichOneof("ShapeFlexibility") != "enumeratedShapes":
+        feature.type.multiArrayType.ClearField("ShapeFlexibility")
 
     eshape_len = len(feature.type.multiArrayType.enumeratedShapes.shapes)
 
@@ -360,16 +368,18 @@ def add_enumerated_multiarray_shapes(spec, feature_name, shapes):
             fs.set_width_shape(fixed_shape[2])
             shapes.append(fs)
         else:
-            raise Exception('Original fixed multiArray shape for {} is invalid'
-                            .format(feature_name))
+            raise Exception(
+                "Original fixed multiArray shape for {} is invalid".format(feature_name)
+            )
 
     for shape in shapes:
         s = feature.type.multiArrayType.enumeratedShapes.shapes.add()
         s.shape.extend(shape.multiarray_shape)
 
     # Bump up specification version
-    spec.specificationVersion = max(_MINIMUM_FLEXIBLE_SHAPES_SPEC_VERSION,
-                                    spec.specificationVersion)
+    spec.specificationVersion = max(
+        _MINIMUM_FLEXIBLE_SHAPES_SPEC_VERSION, spec.specificationVersion
+    )
 
 
 def add_enumerated_image_sizes(spec, feature_name, sizes):
@@ -408,17 +418,14 @@ def add_enumerated_image_sizes(spec, feature_name, sizes):
 
     for size in sizes:
         if not isinstance(size, NeuralNetworkImageSize):
-            raise Exception(
-                'Shape ranges should be of type NeuralNetworkImageSize')
+            raise Exception("Shape ranges should be of type NeuralNetworkImageSize")
 
     feature = _get_feature(spec, feature_name)
-    if feature.type.WhichOneof('Type') != 'imageType':
-        raise Exception('Trying to add enumerated sizes to '
-                        'a non-image feature type')
+    if feature.type.WhichOneof("Type") != "imageType":
+        raise Exception("Trying to add enumerated sizes to " "a non-image feature type")
 
-    if feature.type.imageType.WhichOneof(
-            'SizeFlexibility') != 'enumeratedSizes':
-        feature.type.imageType.ClearField('SizeFlexibility')
+    if feature.type.imageType.WhichOneof("SizeFlexibility") != "enumeratedSizes":
+        feature.type.imageType.ClearField("SizeFlexibility")
 
     esizes_len = len(feature.type.imageType.enumeratedSizes.sizes)
 
@@ -435,8 +442,9 @@ def add_enumerated_image_sizes(spec, feature_name, sizes):
         s.width = size.width
 
     # Bump up specification version
-    spec.specificationVersion = max(_MINIMUM_FLEXIBLE_SHAPES_SPEC_VERSION,
-                                    spec.specificationVersion)
+    spec.specificationVersion = max(
+        _MINIMUM_FLEXIBLE_SHAPES_SPEC_VERSION, spec.specificationVersion
+    )
 
 
 def update_image_size_range(spec, feature_name, size_range):
@@ -472,24 +480,31 @@ def update_image_size_range(spec, feature_name, size_range):
         None. The spec object is updated
     """
     if not isinstance(size_range, NeuralNetworkImageSizeRange):
-        raise Exception(
-            'Shape ranges should be of type NeuralNetworkImageSizeRange')
+        raise Exception("Shape ranges should be of type NeuralNetworkImageSizeRange")
 
     feature = _get_feature(spec, feature_name)
-    if feature.type.WhichOneof('Type') != 'imageType':
-        raise Exception('Trying to add size ranges for '
-                        'a non-image feature type')
-
-    feature.type.imageType.ClearField('SizeFlexibility')
-    feature.type.imageType.imageSizeRange.heightRange.lowerBound = size_range.get_height_range().lowerBound
-    feature.type.imageType.imageSizeRange.heightRange.upperBound = size_range.get_height_range().upperBound
-
-    feature.type.imageType.imageSizeRange.widthRange.lowerBound = size_range.get_width_range().lowerBound
-    feature.type.imageType.imageSizeRange.widthRange.upperBound = size_range.get_width_range().upperBound
+    if feature.type.WhichOneof("Type") != "imageType":
+        raise Exception("Trying to add size ranges for " "a non-image feature type")
+
+    feature.type.imageType.ClearField("SizeFlexibility")
+    feature.type.imageType.imageSizeRange.heightRange.lowerBound = (
+        size_range.get_height_range().lowerBound
+    )
+    feature.type.imageType.imageSizeRange.heightRange.upperBound = (
+        size_range.get_height_range().upperBound
+    )
+
+    feature.type.imageType.imageSizeRange.widthRange.lowerBound = (
+        size_range.get_width_range().lowerBound
+    )
+    feature.type.imageType.imageSizeRange.widthRange.upperBound = (
+        size_range.get_width_range().upperBound
+    )
 
     # Bump up specification version
-    spec.specificationVersion = max(_MINIMUM_FLEXIBLE_SHAPES_SPEC_VERSION,
-                                    spec.specificationVersion)
+    spec.specificationVersion = max(
+        _MINIMUM_FLEXIBLE_SHAPES_SPEC_VERSION, spec.specificationVersion
+    )
 
 
 def update_multiarray_shape_range(spec, feature_name, shape_range):
@@ -528,17 +543,18 @@ def update_multiarray_shape_range(spec, feature_name, shape_range):
         None. The spec is updated
     """
     if not isinstance(shape_range, NeuralNetworkMultiArrayShapeRange):
-        raise Exception('Shape range should be of type MultiArrayShapeRange')
+        raise Exception("Shape range should be of type MultiArrayShapeRange")
 
     shape_range.validate_array_shape_range()
     feature = _get_feature(spec, feature_name)
 
-    if feature.type.WhichOneof('Type') != 'multiArrayType':
-        raise Exception('Trying to update shape range for '
-                        'a non-multiArray feature type')
+    if feature.type.WhichOneof("Type") != "multiArrayType":
+        raise Exception(
+            "Trying to update shape range for " "a non-multiArray feature type"
+        )
 
     # Add channel range
-    feature.type.multiArrayType.ClearField('ShapeFlexibility')
+    feature.type.multiArrayType.ClearField("ShapeFlexibility")
     s = feature.type.multiArrayType.shapeRange.sizeRanges.add()
     s.lowerBound = shape_range.get_channel_range().lowerBound
     s.upperBound = shape_range.get_channel_range().upperBound
@@ -554,9 +570,9 @@ def update_multiarray_shape_range(spec, feature_name, shape_range):
         s.upperBound = shape_range.get_width_range().upperBound
 
     # Bump up specification version
-    spec.specificationVersion = max(_MINIMUM_FLEXIBLE_SHAPES_SPEC_VERSION,
-                                    spec.specificationVersion)
-
+    spec.specificationVersion = max(
+        _MINIMUM_FLEXIBLE_SHAPES_SPEC_VERSION, spec.specificationVersion
+    )
 
 
 def set_multiarray_ndshape_range(spec, feature_name, lower_bounds, upper_bounds):
@@ -598,41 +614,52 @@ def set_multiarray_ndshape_range(spec, feature_name, lower_bounds, upper_bounds)
         None. The spec is updated
     """
     if not isinstance(lower_bounds, list):
-        raise Exception('lower_bounds must be a list')
+        raise Exception("lower_bounds must be a list")
     if not isinstance(upper_bounds, list):
-        raise Exception('upper_bounds must be a list')
+        raise Exception("upper_bounds must be a list")
 
     feature = _get_feature(spec, feature_name)
 
-    if feature.type.WhichOneof('Type') != 'multiArrayType':
-        raise Exception('Trying to update shape range for '
-                        'a non-multiArray feature type')
+    if feature.type.WhichOneof("Type") != "multiArrayType":
+        raise Exception(
+            "Trying to update shape range for " "a non-multiArray feature type"
+        )
 
     shape = feature.type.multiArrayType.shape
 
     if len(shape) != len(lower_bounds):
-        raise Exception('Length of lower_bounds is not equal to the number of dimensions in the default shape')
+        raise Exception(
+            "Length of lower_bounds is not equal to the number of dimensions in the default shape"
+        )
     if len(shape) != len(upper_bounds):
-        raise Exception('Length of upper_bounds is not equal to the number of dimensions in the default shape')
+        raise Exception(
+            "Length of upper_bounds is not equal to the number of dimensions in the default shape"
+        )
 
-    feature.type.multiArrayType.ClearField('ShapeFlexibility')
+    feature.type.multiArrayType.ClearField("ShapeFlexibility")
 
     for i in range(len(lower_bounds)):
         if shape[i] < lower_bounds[i]:
-            raise Exception('Default shape in %d-th dimension, which is %d, is smaller'
-                            ' than the lower bound of %d' %(i, int(shape[i]), lower_bounds[i]))
+            raise Exception(
+                "Default shape in %d-th dimension, which is %d, is smaller"
+                " than the lower bound of %d" % (i, int(shape[i]), lower_bounds[i])
+            )
         if upper_bounds[i] != -1:
             if shape[i] > upper_bounds[i]:
-                raise Exception('Default shape in %d-th dimension, which is %d, is greater'
-                                ' than the upper bound of %d' % (i, int(shape[i]), upper_bounds[i]))
+                raise Exception(
+                    "Default shape in %d-th dimension, which is %d, is greater"
+                    " than the upper bound of %d" % (i, int(shape[i]), upper_bounds[i])
+                )
 
         s = feature.type.multiArrayType.shapeRange.sizeRanges.add()
         s.lowerBound = lower_bounds[i]
         s.upperBound = upper_bounds[i]
 
     # Bump up specification version
-    spec.specificationVersion = max(_MINIMUM_NDARRAY_SPEC_VERSION,
-                                    spec.specificationVersion)
+    spec.specificationVersion = max(
+        _MINIMUM_NDARRAY_SPEC_VERSION, spec.specificationVersion
+    )
+
 
 def add_multiarray_ndshape_enumeration(spec, feature_name, enumerated_shapes):
     """
@@ -668,20 +695,20 @@ def add_multiarray_ndshape_enumeration(spec, feature_name, enumerated_shapes):
         None. The spec is updated
     """
     if not isinstance(enumerated_shapes, list):
-        raise Exception('enumerated_shapes must be a list')
+        raise Exception("enumerated_shapes must be a list")
     if len(enumerated_shapes) == 0:
-        raise Exception('enumerated_shapes is empty')
+        raise Exception("enumerated_shapes is empty")
 
     feature = _get_feature(spec, feature_name)
-    if feature.type.WhichOneof('Type') != 'multiArrayType':
-        raise Exception('Trying to update shape range for '
-                        'a non-multiArray feature type')
+    if feature.type.WhichOneof("Type") != "multiArrayType":
+        raise Exception(
+            "Trying to update shape range for " "a non-multiArray feature type"
+        )
 
     shape = feature.type.multiArrayType.shape
 
-    if feature.type.multiArrayType.WhichOneof(
-            'ShapeFlexibility') != 'enumeratedShapes':
-        feature.type.multiArrayType.ClearField('ShapeFlexibility')
+    if feature.type.multiArrayType.WhichOneof("ShapeFlexibility") != "enumeratedShapes":
+        feature.type.multiArrayType.ClearField("ShapeFlexibility")
 
     eshape_len = len(feature.type.multiArrayType.enumeratedShapes.shapes)
 
@@ -694,61 +721,11 @@ def add_multiarray_ndshape_enumeration(spec, feature_name, enumerated_shapes):
 
     for shape in enumerated_shapes:
         if not isinstance(shape, tuple):
-            raise Exception('An element in \'enumerated_shapes\' is not a tuple')
+            raise Exception("An element in 'enumerated_shapes' is not a tuple")
         s = feature.type.multiArrayType.enumeratedShapes.shapes.add()
         s.shape.extend(list(shape))
 
     # Bump up specification version
-    spec.specificationVersion = max(_MINIMUM_NDARRAY_SPEC_VERSION,
-                                    spec.specificationVersion)
-
-@deprecated
-def get_allowed_shape_ranges(spec):
-    """
-    For a given model specification, returns a dictionary with a shape range object for each input feature name.
-    """
-
-    shaper = NeuralNetworkShaper(spec, False)
-    inputs = _get_input_names(spec)
-    output = {}
-
-    for input in inputs:
-        output[input] = shaper.shape(input)
-
-    return output
-
-
-@deprecated
-def can_allow_multiple_input_shapes(spec):
-    """
-    Examines a model specification and determines if it can compute results for more than one output shape.
-
-    :param spec: MLModel
-        The protobuf specification of the model.
-
-    :return: Bool
-        Returns True if the model can allow multiple input shapes, False otherwise.
-    """
-
-    # First, check that the model actually has a neural network in it
-    try:
-        layers = _get_nn_layers(spec)
-    except:
-        raise Exception('Unable to verify that this model contains a neural network.')
-
-    try:
-        shaper = NeuralNetworkShaper(spec, False)
-    except:
-        raise Exception('Unable to compute shapes for this neural network.')
-
-    inputs = _get_input_names(spec)
-
-    for name in inputs:
-
-        shape_dict = shaper.shape(name)
-        shape = NeuralNetworkMultiArrayShapeRange(shape_dict)
-
-        if (shape.isFlexible()):
-            return True
-
-    return False
+    spec.specificationVersion = max(
+        _MINIMUM_NDARRAY_SPEC_VERSION, spec.specificationVersion
+    )
diff --git a/coremltools/models/neural_network/optimization_utils.py b/coremltools/models/neural_network/optimization_utils.py
index 386e59415..28e180143 100644
--- a/coremltools/models/neural_network/optimization_utils.py
+++ b/coremltools/models/neural_network/optimization_utils.py
@@ -9,22 +9,24 @@
 
 import numpy as _np
 
+
 def _fuse_layer_with_scale_layer(layer_idx, scale_idx, layers):
-    layer_type = layers[layer_idx].WhichOneof('layer')
-    if layer_type == 'convolution':
+    layer_type = layers[layer_idx].WhichOneof("layer")
+    if layer_type == "convolution":
         layer = layers[layer_idx].convolution
-    elif layer_type == 'innerProduct':
+    elif layer_type == "innerProduct":
         layer = layers[layer_idx].innerProduct
     else:
-        raise Exception('Scale fusion not supper for layer '
-                        'type {} '.format(layer_type))
+        raise Exception(
+            "Scale fusion not supper for layer " "type {} ".format(layer_type)
+        )
 
     scale = layers[scale_idx].scale
 
     # Update weights
     sw = _np.array(scale.scale.floatValue)
     w = _np.array(layer.weights.floatValue)
-    w = w.reshape(layer.outputChannels, int(len(w)/layer.outputChannels))
+    w = w.reshape(layer.outputChannels, int(len(w) / layer.outputChannels))
     wp = w * sw[:, None]
     del layer.weights.floatValue[:]
     layer.weights.floatValue.extend(wp.flatten())
@@ -37,26 +39,27 @@ def _fuse_layer_with_scale_layer(layer_idx, scale_idx, layers):
             layer.hasBias = True
         else:
             lb = _np.array(layer.bias.floatValue)
-            bp = sw*lb + sb
+            bp = sw * lb + sb
             del layer.bias.floatValue[:]
             layer.bias.floatValue.extend(bp)
 
     # re-wire outputs and delete scale layer
-    print('Fused {}->{}'.format(layers[layer_idx].name, layers[scale_idx].name))
+    print("Fused {}->{}".format(layers[layer_idx].name, layers[scale_idx].name))
     del layers[layer_idx].output[:]
     layers[layer_idx].output.extend(layers[scale_idx].output)
     del layers[scale_idx]
 
 
 def _fuse_layer_with_bias_layer(layer_idx, bias_idx, layers):
-    layer_type = layers[layer_idx].WhichOneof('layer')
-    if layer_type == 'convolution':
+    layer_type = layers[layer_idx].WhichOneof("layer")
+    if layer_type == "convolution":
         layer = layers[layer_idx].convolution
-    elif layer_type == 'innerProduct':
+    elif layer_type == "innerProduct":
         layer = layers[layer_idx].innerProduct
     else:
-        raise Exception('Bias fusion not supper for layer '
-                        'type {} '.format(layer_type))
+        raise Exception(
+            "Bias fusion not supper for layer " "type {} ".format(layer_type)
+        )
 
     bias = layers[bias_idx].bias
 
@@ -71,11 +74,12 @@ def _fuse_layer_with_bias_layer(layer_idx, bias_idx, layers):
         layer.bias.floatValue.extend(bp)
 
     # re-wire outputs and delete bias layer
-    print('Fused {}->{}'.format(layers[layer_idx].name, layers[bias_idx].name))
+    print("Fused {}->{}".format(layers[layer_idx].name, layers[bias_idx].name))
     del layers[layer_idx].output[:]
     layers[layer_idx].output.extend(layers[bias_idx].output)
     del layers[bias_idx]
 
+
 def _bn_scale_fusion(bn_idx, scale_idx, layers):
     bn = layers[bn_idx].batchnorm
     scale = layers[scale_idx].scale
@@ -98,7 +102,7 @@ def _bn_scale_fusion(bn_idx, scale_idx, layers):
     bn.beta.floatValue.extend(beta)
 
     # re-wire outputs and delete scale layer
-    print('Fused {}->{}'.format(layers[bn_idx].name, layers[scale_idx].name))
+    print("Fused {}->{}".format(layers[bn_idx].name, layers[scale_idx].name))
     del layers[bn_idx].output[:]
     layers[bn_idx].output.extend(layers[scale_idx].output)
     del layers[scale_idx]
@@ -119,9 +123,9 @@ def _conv_bn_fusion(conv_idx, bn_idx, layers):
     else:
         b = _np.zeros(conv.outputChannels)
 
-    w = w.reshape(conv.outputChannels, int(len(w)/conv.outputChannels))
+    w = w.reshape(conv.outputChannels, int(len(w) / conv.outputChannels))
     wp = (gamma / _np.sqrt(variance))[:, None] * w
-    bp = (gamma*b/_np.sqrt(variance)) - (gamma*mean/_np.sqrt(variance)) + beta
+    bp = (gamma * b / _np.sqrt(variance)) - (gamma * mean / _np.sqrt(variance)) + beta
 
     del conv.weights.floatValue[:]
     if conv.hasBias:
@@ -131,7 +135,7 @@ def _conv_bn_fusion(conv_idx, bn_idx, layers):
     conv.bias.floatValue.extend(bp)
     conv.hasBias = True
 
-    print('Fused {}->{}'.format(layers[conv_idx].name, layers[bn_idx].name))
+    print("Fused {}->{}".format(layers[conv_idx].name, layers[bn_idx].name))
     # re-wire outputs and delete batchnorm layer
     del layers[conv_idx].output[:]
     layers[conv_idx].output.extend(layers[bn_idx].output)
@@ -144,27 +148,27 @@ def _get_nn_mappings(layers):
     output_map = {}
     input_map = {}
     for idx, layer in enumerate(layers):
-        layer_name = '{}'.format(idx)
-        layer_map[layer_name] = {'outputs': [], 'inputs': []}
-        layer_type = layer.WhichOneof('layer')
+        layer_name = "{}".format(idx)
+        layer_map[layer_name] = {"outputs": [], "inputs": []}
+        layer_type = layer.WhichOneof("layer")
         if layer_type not in type_map.keys():
             type_map[layer_type] = []
         type_map[layer_type].append(layer_name)
 
         # Add inputs and outputs for layer
         for o in layer.output:
-            layer_map[layer_name]['outputs'].append(o)
+            layer_map[layer_name]["outputs"].append(o)
         for i in layer.input:
-            layer_map[layer_name]['inputs'].append(i)
+            layer_map[layer_name]["inputs"].append(i)
 
     # Construct input/output graph dict
     for l in layer_map.keys():
         output_map[l] = []
         input_map[l] = []
         for cl in layer_map.keys():
-            if any(x in layer_map[l]['outputs'] for x in layer_map[cl]['inputs']):
+            if any(x in layer_map[l]["outputs"] for x in layer_map[cl]["inputs"]):
                 output_map[l].append(cl)
-            if any(x in layer_map[l]['inputs'] for x in layer_map[cl]['outputs']):
+            if any(x in layer_map[l]["inputs"] for x in layer_map[cl]["outputs"]):
                 input_map[l].append(cl)
 
     return type_map, output_map, input_map
@@ -175,22 +179,22 @@ def _optimize_nn(layers):
     bn_layers = conv_layers = ip_layers = bias_layers = scale_layers = []
 
     # Only fuse with non-instance batchnorm layers
-    if 'batchnorm' in type_map.keys():
-        for bn_layer_idx in type_map['batchnorm']:
+    if "batchnorm" in type_map.keys():
+        for bn_layer_idx in type_map["batchnorm"]:
             if not layers[int(bn_layer_idx)].batchnorm.instanceNormalization:
                 bn_layers.append(bn_layer_idx)
 
-    if 'convolution' in type_map.keys():
-        conv_layers = type_map['convolution']
+    if "convolution" in type_map.keys():
+        conv_layers = type_map["convolution"]
 
-    if 'innerProduct' in type_map.keys():
-        ip_layers = type_map['innerProduct']
+    if "innerProduct" in type_map.keys():
+        ip_layers = type_map["innerProduct"]
 
-    if 'bias' in type_map.keys():
-        bias_layers = type_map['bias']
+    if "bias" in type_map.keys():
+        bias_layers = type_map["bias"]
 
-    if 'scale' in type_map.keys():
-        scale_layers = type_map['scale']
+    if "scale" in type_map.keys():
+        scale_layers = type_map["scale"]
 
     # Convolution optimizations
     for conv_idx in conv_layers:
diff --git a/coremltools/models/neural_network/printer.py b/coremltools/models/neural_network/printer.py
index 6573cf8dc..68a778876 100644
--- a/coremltools/models/neural_network/printer.py
+++ b/coremltools/models/neural_network/printer.py
@@ -3,15 +3,12 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from .spec_inspection_utils import _get_feature_description_summary, \
-                            _summarize_neural_network_spec_code_style, \
-                            _summarize_neural_network_spec
+from .spec_inspection_utils import (
+    _get_feature_description_summary,
+    _summarize_neural_network_spec_code_style,
+    _summarize_neural_network_spec,
+)
 
-from coremltools.models._deprecation import deprecated
-
-@deprecated
-def print_network_spec_parameter_info_style(mlmodel_spec, interface_only=False):
-    return _print_network_spec_parameter_info_style(mlmodel_spec, interface_only=interface_only)
 
 def _print_network_spec_parameter_info_style(mlmodel_spec, interface_only=False):
     """ Print the network information summary.
@@ -21,36 +18,35 @@ def _print_network_spec_parameter_info_style(mlmodel_spec, interface_only=False)
     """
     inputs, outputs, layers_info = _summarize_neural_network_spec(mlmodel_spec)
 
-    print('Inputs:')
+    print("Inputs:")
     for i in inputs:
         name, description = i
-        print('  {} {}'.format(name, description))
+        print("  {} {}".format(name, description))
 
-    print('Outputs:')
+    print("Outputs:")
     for o in outputs:
         name, description = o
-        print('  {} {}'.format(name, description))
+        print("  {} {}".format(name, description))
 
     if layers_info is None:
-        print('\n(This MLModel is not a neural network model or does not contain any layers)')
+        print(
+            "\n(This MLModel is not a neural network model or does not contain any layers)"
+        )
 
     if layers_info and not interface_only:
-        print('\nLayers:')
+        print("\nLayers:")
         for idx, l in enumerate(layers_info):
             layer_type, name, in_blobs, out_blobs, params_info = l
-            print('[{}] ({}) {}'.format(idx, layer_type, name))
-            print('  Input blobs: {}'.format(in_blobs))
-            print('  Output blobs: {}'.format(out_blobs))
+            print("[{}] ({}) {}".format(idx, layer_type, name))
+            print("  Input blobs: {}".format(in_blobs))
+            print("  Output blobs: {}".format(out_blobs))
             if len(params_info) > 0:
-                print('  Parameters: ')
+                print("  Parameters: ")
             for param in params_info:
-                print('    {} = {}'.format(param[0], param[1]))
+                print("    {} = {}".format(param[0], param[1]))
 
-    print('\n')
+    print("\n")
 
-@deprecated
-def print_network_spec_coding_style(mlmodel_spec, interface_only=False):
-    return _print_network_spec_coding_style(mlmodel_spec, interface_only=interface_only)
 
 def _print_network_spec_coding_style(mlmodel_spec, interface_only=False):
     """
@@ -59,21 +55,27 @@ def _print_network_spec_coding_style(mlmodel_spec, interface_only=False):
     interface_only : Shows only the input and output of the network
     """
 
-    inputs = [(blob.name, _get_feature_description_summary(blob)) for blob in mlmodel_spec.description.input]
-    outputs = [(blob.name, _get_feature_description_summary(blob)) for blob in mlmodel_spec.description.output]
+    inputs = [
+        (blob.name, _get_feature_description_summary(blob))
+        for blob in mlmodel_spec.description.input
+    ]
+    outputs = [
+        (blob.name, _get_feature_description_summary(blob))
+        for blob in mlmodel_spec.description.output
+    ]
 
     input_names = []
-    print('Inputs:')
+    print("Inputs:")
     for i in inputs:
         name, description = i
-        print('  {} {}'.format(name, description))
+        print("  {} {}".format(name, description))
         input_names.append(name)
 
     output_names = []
-    print('Outputs:')
+    print("Outputs:")
     for o in outputs:
         name, description = o
-        print('  {} {}'.format(name, description))
+        print("  {} {}".format(name, description))
         output_names.append(name)
 
     if interface_only:
@@ -81,22 +83,24 @@ def _print_network_spec_coding_style(mlmodel_spec, interface_only=False):
 
     nn_spec = None
 
-    if mlmodel_spec.HasField('neuralNetwork'):
+    if mlmodel_spec.HasField("neuralNetwork"):
         nn_spec = mlmodel_spec.neuralNetwork
-    elif mlmodel_spec.HasField('neuralNetworkClassifier'):
+    elif mlmodel_spec.HasField("neuralNetworkClassifier"):
         nn_spec = mlmodel_spec.neuralNetworkClassifier
-    elif mlmodel_spec.HasField('neuralNetworkRegressor'):
+    elif mlmodel_spec.HasField("neuralNetworkRegressor"):
         nn_spec = mlmodel_spec.neuralNetworkRegressor
 
     if nn_spec is None:
-        print('\n(This MLModel is not a neural network model)')
+        print("\n(This MLModel is not a neural network model)")
         return
 
-    print('\n')
-    _summarize_neural_network_spec_code_style(nn_spec, input_names=input_names, output_names=output_names)
+    print("\n")
+    _summarize_neural_network_spec_code_style(
+        nn_spec, input_names=input_names, output_names=output_names
+    )
 
 
-def print_network_spec(mlmodel_spec, interface_only=False, style=''):
+def print_network_spec(mlmodel_spec, interface_only=False, style=""):
     """ Print the network information summary.
     Args:
     mlmodel_spec : the mlmodel spec
@@ -104,7 +108,9 @@ def print_network_spec(mlmodel_spec, interface_only=False, style=''):
     style : str. Either 'coding' or default, which prints information on parameters of layers.
     """
 
-    if style == 'coding':
+    if style == "coding":
         _print_network_spec_coding_style(mlmodel_spec, interface_only=interface_only)
     else:
-        _print_network_spec_parameter_info_style(mlmodel_spec, interface_only=interface_only)
+        _print_network_spec_parameter_info_style(
+            mlmodel_spec, interface_only=interface_only
+        )
diff --git a/coremltools/models/neural_network/quantization_utils.py b/coremltools/models/neural_network/quantization_utils.py
index c9b7ed4f4..936492148 100644
--- a/coremltools/models/neural_network/quantization_utils.py
+++ b/coremltools/models/neural_network/quantization_utils.py
@@ -12,7 +12,9 @@
 from __future__ import absolute_import as _
 
 import numpy as _np
-import sys, os
+from sys import stdout as _stdout
+from os import listdir as _listdir
+from six import string_types as _string_types
 from .optimization_utils import _optimize_nn
 
 from coremltools.models import (
@@ -23,15 +25,16 @@
     _QUANTIZATION_MODE_CUSTOM_LOOKUP_TABLE,
     _QUANTIZATION_MODE_LINEAR_QUANTIZATION,
     _QUANTIZATION_MODE_LINEAR_SYMMETRIC,
-    _LUT_BASED_QUANTIZATION
+    _LUT_BASED_QUANTIZATION,
 )
 
-from ..utils import _get_nn_layers, _wp_to_fp16wp, _get_model, macos_version
-from ..._deps import HAS_SKLEARN as _HAS_SKLEARN
-from ... import (_MINIMUM_QUANTIZED_MODEL_SPEC_VERSION,
-                 _MINIMUM_FP16_SPEC_VERSION)
-
-from coremltools.models._deprecation import deprecated
+from ..utils import _get_nn_layers, _wp_to_fp16wp, _get_model, _macos_version
+from ..._deps import _HAS_SKLEARN as _HAS_SKLEARN
+from ... import (
+    _MINIMUM_QUANTIZED_MODEL_SPEC_VERSION,
+    _MINIMUM_FP16_SPEC_VERSION,
+    _SPECIFICATION_VERSION_IOS_14,
+)
 
 
 class QuantizedLayerSelector(object):
@@ -58,17 +61,29 @@ def do_quantize(self, layer, **kwargs):
         quantized_model = quantize_weights(mlmodel, 8, quantization_mode='linear', selector=selector)
 
     """
+
     def __init__(self):
         self.quantizable_layer_types = {
-            'convolution', 'innerProduct', 'embedding', 'embeddingND',
-            'batchnorm', 'scale', 'bias', 'loadConstant',
-            'simpleRecurrent', 'gru', 'uniDirectionalLSTM',
-            'biDirectionalLSTM', 'batchedMatmul', 'depthwiseConv',
-            'loop', 'branch'
+            "convolution",
+            "innerProduct",
+            "embedding",
+            "embeddingND",
+            "batchnorm",
+            "scale",
+            "bias",
+            "loadConstant",
+            "simpleRecurrent",
+            "gru",
+            "uniDirectionalLSTM",
+            "biDirectionalLSTM",
+            "batchedMatmul",
+            "depthwiseConv",
+            "loop",
+            "branch",
         }
 
     def do_quantize(self, layer, **kwargs):
-        return layer.WhichOneof('layer') in self.quantizable_layer_types
+        return layer.WhichOneof("layer") in self.quantizable_layer_types
 
 
 class AdvancedQuantizedLayerSelector(QuantizedLayerSelector):
@@ -89,10 +104,13 @@ class AdvancedQuantizedLayerSelector(QuantizedLayerSelector):
         quantized_model = quantize_weights(model, 8, selector=selector)
 
     """
-    def __init__(self,
-                 skip_layer_types=[],
-                 minimum_conv_kernel_channels=4,
-                 minimum_conv_weight_count=4096):
+
+    def __init__(
+        self,
+        skip_layer_types=[],
+        minimum_conv_kernel_channels=4,
+        minimum_conv_weight_count=4096,
+    ):
 
         super(AdvancedQuantizedLayerSelector, self).__init__()
         self.skip_layer_types = skip_layer_types
@@ -103,8 +121,12 @@ def __init__(self,
             if lt not in self.quantizable_layer_types:
                 invalid_skip_types.append(lt)
         if len(invalid_skip_types) > 0:
-            err_msg = 'Skip quantization layer types ({}) is not supported.\n'.format(','.join(invalid_skip_types))
-            err_msg += 'Supported quantization layers: ({})'.format(','.join(self.quantizable_layer_types))
+            err_msg = "Skip quantization layer types ({}) is not supported.\n".format(
+                ",".join(invalid_skip_types)
+            )
+            err_msg += "Supported quantization layers: ({})".format(
+                ",".join(self.quantizable_layer_types)
+            )
             raise ValueError(err_msg)
 
         self.minimum_conv_kernel_channels = minimum_conv_kernel_channels
@@ -117,11 +139,11 @@ def do_quantize(self, layer, weight_param=None):
         if not ret:
             return False
 
-        layer_type = layer.WhichOneof('layer')
+        layer_type = layer.WhichOneof("layer")
         if layer_type in self.skip_layer_types:
             return False
 
-        if layer_type == 'convolution':
+        if layer_type == "convolution":
             oc = layer.convolution.outputChannels
             kc = layer.convolution.kernelChannels
             kh = layer.convolution.kernelSize[0]
@@ -130,31 +152,127 @@ def do_quantize(self, layer, weight_param=None):
             counts = oc * kc * kh * kw
             has_bias = layer.convolution.hasBias
 
-            if weight_param is None or weight_param == 'weights':
-                if 'depthwiseConv' in self.skip_layer_types and kc == 1 and \
-                    groups > 1:
+            if weight_param is None or weight_param == "weights":
+                if "depthwiseConv" in self.skip_layer_types and kc == 1 and groups > 1:
                     return False
 
-                if kc < self.minimum_conv_kernel_channels or \
-                    counts < self.minimum_conv_weight_count:
+                if (
+                    kc < self.minimum_conv_kernel_channels
+                    or counts < self.minimum_conv_weight_count
+                ):
                     return False
 
-            elif weight_param == 'bias':
-                return not 'bias' in self.skip_layer_types
+            elif weight_param == "bias":
+                return not "bias" in self.skip_layer_types
             else:
-                raise ValueError('Unrecognized quantization weight field {}'.format(weight_param))
+                raise ValueError(
+                    "Unrecognized quantization weight field {}".format(weight_param)
+                )
 
-        elif layer_type == 'innerProduct' or 'batchedMatmul':
-            if weight_param is None or weight_param == 'weights':
+        elif layer_type == "innerProduct" or "batchedMatmul":
+            if weight_param is None or weight_param == "weights":
                 return True
-            if weight_param == 'bias':
-                return not 'bias' in self.skip_layer_types
+            if weight_param == "bias":
+                return not "bias" in self.skip_layer_types
             else:
-                raise ValueError('Unrecognized quantization weight field {}'.format(weight_param))
+                raise ValueError(
+                    "Unrecognized quantization weight field {}".format(weight_param)
+                )
 
         return True
 
 
+class MatrixMultiplyLayerSelector(QuantizedLayerSelector):
+    """
+        Layer selector object that allows users to select matrix multiplication layers
+        with one of the matrices being constant, based on some criterions like total
+        numbers of parameters/weights, number of input or output channels and/or layer
+        names. If any of the criterion is not valid, the corresponding layer is not
+        selected.
+    """
+
+    def __init__(
+        self,
+        minimum_weight_count=1,
+        minimum_input_channels=1,
+        minimum_output_channels=1,
+        maximum_input_channels=None,
+        maximum_output_channels=None,
+        include_layers_with_names=None,
+    ):
+
+        super(MatrixMultiplyLayerSelector, self).__init__()
+
+        # weight count refers to number of parameters/weights and is equal to product of input & output channels
+        self.minimum_weight_count = minimum_weight_count
+        self.minimum_input_channels = minimum_input_channels
+        self.minimum_output_channels = minimum_output_channels
+        self.maximum_input_channels = maximum_input_channels
+        self.maximum_output_channels = maximum_output_channels
+        if include_layers_with_names is None:
+            self.include_layers_with_names = []
+
+        if not (
+            isinstance(self.include_layers_with_names, (list, tuple))
+            and all(
+                [isinstance(s, _string_types) for s in self.include_layers_with_names]
+            )
+        ):
+            raise ValueError(
+                "Property 'include_layers_with_names' must be a list/tuple of str objects"
+            )
+
+    def do_quantize(self, layer, weight_param=None):
+        """ weight_param - should be name of the WeightParam field
+        """
+        ret = super(MatrixMultiplyLayerSelector, self).do_quantize(layer)
+        if not ret:
+            return False
+
+        layer_type = layer.WhichOneof("layer")
+
+        if layer_type in ["innerProduct", "batchedMatmul"]:
+            if weight_param == "bias":
+                return True
+            elif weight_param is None or weight_param == "weights":
+
+                if layer_type == "innerProduct":
+                    ic = layer.innerProduct.inputChannels
+                    oc = layer.innerProduct.outputChannels
+                else:
+                    ic = layer.batchedMatmul.weightMatrixFirstDimension
+                    oc = layer.batchedMatmul.weightMatrixSecondDimension
+
+                wc = ic * oc
+
+                if wc < self.minimum_weight_count:
+                    return False
+                if ic < self.minimum_input_channels:
+                    return False
+                if oc < self.minimum_output_channels:
+                    return False
+                if self.maximum_input_channels and ic > self.maximum_input_channels:
+                    return False
+                if self.maximum_output_channels and oc > self.maximum_output_channels:
+                    return False
+                if (
+                    self.include_layers_with_names
+                    and layer.name not in self.include_layers_with_names
+                ):
+                    return False
+
+                return True
+            else:
+                raise ValueError(
+                    "Unrecognized quantization weight field {}".format(weight_param)
+                )
+
+        elif layer_type in ["loop", "branch"]:
+            return True
+
+        return False
+
+
 def _convert_1bit_array_to_byte_array(arr):
     """
     Convert bit array to byte array.
@@ -171,22 +289,23 @@ def _convert_1bit_array_to_byte_array(arr):
     while len(arr) < 8 or len(arr) % 8:
         arr.append(0)
 
-    arr = _np.array(arr, dtype='uint8')
+    arr = _np.array(arr, dtype="uint8")
     bit_arr = []
     idx = 0
     # Iterate and combine 8-bits into a uint8
     for arr_idx in range(int(len(arr) / 8)):
-        bit_arr.append(((arr[idx] << 7) & (1 << 7)) |
-                        ((arr[idx+1] << 6) & (1 << 6)) |
-                        ((arr[idx+2] << 5) & (1 << 5)) |
-                        ((arr[idx+3] << 4) & (1 << 4)) |
-                        ((arr[idx+4] << 3) & (1 << 3)) |
-                        ((arr[idx+5] << 2) & (1 << 2)) |
-                        ((arr[idx+6] << 1) & (1 << 1)) |
-                        ((arr[idx+7] << 0) & (1 << 0))
-                        )
+        bit_arr.append(
+            ((arr[idx] << 7) & (1 << 7))
+            | ((arr[idx + 1] << 6) & (1 << 6))
+            | ((arr[idx + 2] << 5) & (1 << 5))
+            | ((arr[idx + 3] << 4) & (1 << 4))
+            | ((arr[idx + 4] << 3) & (1 << 3))
+            | ((arr[idx + 5] << 2) & (1 << 2))
+            | ((arr[idx + 6] << 1) & (1 << 1))
+            | ((arr[idx + 7] << 0) & (1 << 0))
+        )
         idx += 8
-    return _np.array(bit_arr, dtype='uint8')
+    return _np.array(bit_arr, dtype="uint8")
 
 
 def _convert_array_to_nbit_quantized_bytes(arr, nbits):
@@ -236,12 +355,14 @@ def _get_linear_lookup_table_and_weight(nbits, wp):
     """
     w = wp.reshape(1, -1)
     qw, scales, biases = _quantize_channelwise_linear(w, nbits, axis=0)
-    indices = _np.array(range(0, 2**nbits))
+    indices = _np.array(range(0, 2 ** nbits))
     lookup_table = indices * scales[0] + biases[0]
     return lookup_table, qw
 
 
-def _get_kmeans_lookup_table_and_weight(nbits, w, init='k-means++', tol=1e-2, n_init=1, rand_seed=0):
+def _get_kmeans_lookup_table_and_weight(
+    nbits, w, init="k-means++", tol=1e-2, n_init=1, rand_seed=0
+):
     """
     Generate K-Means lookup table given a weight parameter field
 
@@ -261,17 +382,20 @@ def _get_kmeans_lookup_table_and_weight(nbits, w, init='k-means++', tol=1e-2, n_
     if _HAS_SKLEARN:
         from sklearn.cluster import KMeans
     else:
-        raise Exception('sklearn package required for k-means quantization')
+        raise Exception("sklearn package required for k-means quantization")
     units = _np.prod(w.shape)
     lut_len = 1 << nbits
     n_clusters = units if (units < lut_len) else lut_len
     wf = w.reshape(-1, 1)
-    kmeans = KMeans(n_clusters=n_clusters, init=init, tol=tol, n_init=n_init, random_state=rand_seed).fit(wf)
+    kmeans = KMeans(
+        n_clusters=n_clusters, init=init, tol=tol, n_init=n_init, random_state=rand_seed
+    ).fit(wf)
     wq = kmeans.labels_[:units]
     lut = _np.zeros(lut_len)
     lut[:n_clusters] = kmeans.cluster_centers_.flatten()
     return lut, wq
 
+
 def _quantize_channelwise_linear(weight, nbits, axis=0, symmetric=False):
     """
     Linearly quantize weight blob.
@@ -299,37 +423,39 @@ def _quantize_channelwise_linear(weight, nbits, axis=0, symmetric=False):
     bias: numpy.array
         per channel bias
     """
-    if len(weight.shape) == 1: # vector situation, treat as 1 channel
+    if len(weight.shape) == 1:  # vector situation, treat as 1 channel
         weight = weight.reshape((1, weight.shape[0]))
 
     rank = len(weight.shape)
     if axis == 1:
-        transposed_axis_order = (1,0) + tuple(range(2,rank))
+        transposed_axis_order = (1, 0) + tuple(range(2, rank))
         weight = _np.transpose(weight, transposed_axis_order)
 
     num_channels = weight.shape[0]
     shape = weight.shape
-    weight = weight.reshape((num_channels, -1)) # [C, L]
+    weight = weight.reshape((num_channels, -1))  # [C, L]
 
-    a = _np.amin(weight, axis=-1) # [C,]
-    b = _np.amax(weight, axis=-1) # [C,]
+    a = _np.amin(weight, axis=-1)  # [C,]
+    b = _np.amax(weight, axis=-1)  # [C,]
 
     if symmetric:
         r = _np.maximum(_np.abs(a), _np.abs(b))
         scale = r / ((1 << nbits) / 2.0 - 1)
         bias = -(1 << nbits) / 2.0 * scale
-        num = (weight - bias[:,None])
-        denom = scale[:,None]
-        qw = _np.divide(num, denom, out=_np.zeros_like(num),
-                where=(_np.abs(denom) > 1e-6))
+        num = weight - bias[:, None]
+        denom = scale[:, None]
+        qw = _np.divide(
+            num, denom, out=_np.zeros_like(num), where=(_np.abs(denom) > 1e-6)
+        )
         qw = _np.round(qw)
     else:
         qb = (1 << nbits) - 1
         scale = (b - a) / qb
-        inv_scale = _np.divide(1.0, scale, out=_np.zeros_like(scale),
-                where=(_np.abs(scale) > 1e-6))
+        inv_scale = _np.divide(
+            1.0, scale, out=_np.zeros_like(scale), where=(_np.abs(scale) > 1e-6)
+        )
         bias = a
-        qw = (weight - a[:,None]) * inv_scale[:,None]
+        qw = (weight - a[:, None]) * inv_scale[:, None]
         qw = _np.round(qw)
 
     # Reshape
@@ -368,39 +494,42 @@ def _quantize_wp(wp, nbits, qm, axis=0, **kwargs):
     scale = bias = lut = None
 
     # Linear Quantization
-    if qm in [_QUANTIZATION_MODE_LINEAR_QUANTIZATION,
-        _QUANTIZATION_MODE_LINEAR_SYMMETRIC]:
-        symmetric = (qm == _QUANTIZATION_MODE_LINEAR_SYMMETRIC)
-        qw, scale, bias = _quantize_channelwise_linear(wp, nbits, axis,
-            symmetric)
+    if qm in [
+        _QUANTIZATION_MODE_LINEAR_QUANTIZATION,
+        _QUANTIZATION_MODE_LINEAR_SYMMETRIC,
+    ]:
+        symmetric = qm == _QUANTIZATION_MODE_LINEAR_SYMMETRIC
+        qw, scale, bias = _quantize_channelwise_linear(wp, nbits, axis, symmetric)
     # Lookup tables
     elif qm == _QUANTIZATION_MODE_LOOKUP_TABLE_KMEANS:
         lut, qw = _get_kmeans_lookup_table_and_weight(nbits, wp)
     elif qm == _QUANTIZATION_MODE_CUSTOM_LOOKUP_TABLE:
-        if 'lut_function' not in kwargs.keys():
-            raise Exception('Custom lookup table quantization mode '
-                            'selected but no lookup table function passed')
-        lut_function = kwargs['lut_function']
+        if "lut_function" not in kwargs.keys():
+            raise Exception(
+                "Custom lookup table quantization mode "
+                "selected but no lookup table function passed"
+            )
+        lut_function = kwargs["lut_function"]
         if not callable(lut_function):
-            raise Exception('Argument for Lookup Table passed in but is '
-                            'not callable')
+            raise Exception(
+                "Argument for Lookup Table passed in but is " "not callable"
+            )
         try:
             lut, qw = lut_function(nbits, wp)
         except Exception as e:
-            raise Exception('{}\nCall to Lookup Table function failed'
-                            .format(e.message))
+            raise Exception(
+                "{}\nCall to Lookup Table function failed".format(e.message)
+            )
     elif qm == _QUANTIZATION_MODE_LOOKUP_TABLE_LINEAR:
         lut, qw = _get_linear_lookup_table_and_weight(nbits, wp)
     else:
-        raise NotImplementedError(
-            'Quantization method "{}" not supported'.format(qm))
+        raise NotImplementedError('Quantization method "{}" not supported'.format(qm))
 
     quantized_wp = _np.uint8(qw)
     return scale, bias, lut, quantized_wp
 
 
 def _quantize_wp_field(wp, nbits, qm, shape, axis=0, **kwargs):
-
     """
     Quantize WeightParam field in Neural Network Protobuf
 
@@ -431,24 +560,33 @@ def _quantize_wp_field(wp, nbits, qm, shape, axis=0, **kwargs):
         return _wp_to_fp16wp(wp)
 
     if nbits > 8:
-        raise Exception('Only 8-bit and lower quantization is supported')
+        raise Exception("Only 8-bit and lower quantization is supported")
 
     if qm not in _SUPPORTED_QUANTIZATION_MODES:
-        raise Exception('Quantization mode {} not supported'.format(qm))
+        raise Exception("Quantization mode {} not supported".format(qm))
 
     # axis parameter check
     if axis == 1 and len(shape) != 4:
-        raise Exception('Quantization on second axis is only supported '
-                        'for rank-4 weight blob.')
+        raise Exception(
+            "Quantization on second axis is only supported " "for rank-4 weight blob."
+        )
     if axis != 0 and axis != 1:
-        raise Exception('Invalid quantization axis {} passed in. Allowed'
-                        'values are 0 (first axis) and 1 (second axis)'.format(axis))
+        raise Exception(
+            "Invalid quantization axis {} passed in. Allowed"
+            "values are 0 (first axis) and 1 (second axis)".format(axis)
+        )
 
     # WeightParam size check - non-linear quantizations are applied on layer level
-    num_channels = shape[axis] if qm in [_QUANTIZATION_MODE_LINEAR_QUANTIZATION,
-        _QUANTIZATION_MODE_LINEAR_SYMMETRIC] else 1
+    num_channels = (
+        shape[axis]
+        if qm
+        in [_QUANTIZATION_MODE_LINEAR_QUANTIZATION, _QUANTIZATION_MODE_LINEAR_SYMMETRIC]
+        else 1
+    )
     if len(wp.floatValue) % num_channels:
-        raise Exception('Number of quantization channels does not divide evenly into weights')
+        raise Exception(
+            "Number of quantization channels does not divide evenly into weights"
+        )
 
     qparams = wp.quantization
     qparams.numberOfBits = nbits
@@ -456,8 +594,10 @@ def _quantize_wp_field(wp, nbits, qm, shape, axis=0, **kwargs):
     weights = _np.array(wp.floatValue).reshape(shape)
     scale, bias, lut, uint8_weights = _quantize_wp(weights, nbits, qm, axis, **kwargs)
     uint8_weights = uint8_weights.flatten()
-    if qm in [_QUANTIZATION_MODE_LINEAR_QUANTIZATION, 
-              _QUANTIZATION_MODE_LINEAR_SYMMETRIC]:
+    if qm in [
+        _QUANTIZATION_MODE_LINEAR_QUANTIZATION,
+        _QUANTIZATION_MODE_LINEAR_SYMMETRIC,
+    ]:
         qparams.linearQuantization.scale.extend(scale)
         qparams.linearQuantization.bias.extend(bias)
     else:
@@ -467,36 +607,36 @@ def _quantize_wp_field(wp, nbits, qm, shape, axis=0, **kwargs):
     if nbits == 8:
         wp.rawValue += uint8_weights.tobytes()
     else:
-        wp.rawValue += _convert_array_to_nbit_quantized_bytes(uint8_weights,
-            nbits).tobytes()
+        wp.rawValue += _convert_array_to_nbit_quantized_bytes(
+            uint8_weights, nbits
+        ).tobytes()
     del wp.floatValue[:]
 
 
-def unpack_to_bytes(byte_arr, num_weights, nbits):
+def _unpack_to_bytes(byte_arr, num_weights, nbits):
     assert num_weights % 1 == 0
     num_weights = int(num_weights)
     bit_arr = _decompose_bytes_to_bit_arr(byte_arr.flatten().tolist())
-    bit_arr = _np.array(bit_arr[:num_weights * nbits]).reshape((num_weights, nbits))
-    expo = 2**_np.array(list(reversed(range(0,nbits))))
+    bit_arr = _np.array(bit_arr[: num_weights * nbits]).reshape((num_weights, nbits))
+    expo = 2 ** _np.array(list(reversed(range(0, nbits))))
     byte_arr = _np.sum(bit_arr * expo, axis=1)
     return byte_arr
 
 
 def _dequantize_linear(weight_8bit, scale, bias, axis=0):
-
-    if len(weight_8bit.shape) == 1: # vector situation, treat as 1 channel
+    if len(weight_8bit.shape) == 1:  # vector situation, treat as 1 channel
         weight_8bit = weight_8bit.reshape((1, weight_8bit.shape[0]))
 
     rank = len(weight_8bit.shape)
     if axis == 1:
-        transposed_axis_order = (1,0) + tuple(range(2,rank))
+        transposed_axis_order = (1, 0) + tuple(range(2, rank))
         weight_8bit = _np.transpose(weight_8bit, transposed_axis_order)
 
     num_channels = weight_8bit.shape[0]
-    broadcast_shape = (num_channels, ) + (1,) * (rank - 1)
+    broadcast_shape = (num_channels,) + (1,) * (rank - 1)
     scale = scale.reshape(broadcast_shape)
     bias = bias.reshape(broadcast_shape)
-    weight = weight_8bit.astype('float') * scale + bias
+    weight = weight_8bit.astype("float") * scale + bias
     if axis == 1:
         weight = _np.transpose(weight, transposed_axis_order)
 
@@ -504,32 +644,40 @@ def _dequantize_linear(weight_8bit, scale, bias, axis=0):
 
 
 def _dequantize_lut(weight_8bit, lut):
-    return lut[weight_8bit.astype('uint8')]
+    return lut[weight_8bit.astype("uint8")]
+
 
 def _dequantize_wp(wp, shape, axis=0):
     if len(wp.floatValue) != 0:
         return
 
-    is_linear = wp.quantization.WhichOneof('QuantizationType') == 'linearQuantization'
+    is_linear = wp.quantization.WhichOneof("QuantizationType") == "linearQuantization"
     if is_linear:
-        if len(wp.quantization.linearQuantization.scale) != \
-                len(wp.quantization.linearQuantization.bias):
-            raise Exception('Linear quantization scale and bias vectors are '
-                            'different lengths')
+        if len(wp.quantization.linearQuantization.scale) != len(
+            wp.quantization.linearQuantization.bias
+        ):
+            raise Exception(
+                "Linear quantization scale and bias vectors are " "different lengths"
+            )
 
     # axis parameter check
     if axis == 1 and len(shape) != 4:
-        raise Exception('Dequantization on second axis is only supported '
-                        'for rank-4 weight blob.')
+        raise Exception(
+            "Dequantization on second axis is only supported " "for rank-4 weight blob."
+        )
     if axis != 0 and axis != 1:
-        raise Exception('Invalid quantization axis {} passed in. Allowed'
-                        'values are 0 (first axis) and 1 (second axis)'.format(axis))
+        raise Exception(
+            "Invalid quantization axis {} passed in. Allowed"
+            "values are 0 (first axis) and 1 (second axis)".format(axis)
+        )
 
     nbits = wp.quantization.numberOfBits
     num_weights = _np.prod(shape)
     byte_arr = _np.frombuffer(wp.rawValue, dtype=_np.uint8)
 
-    weight_8bit = byte_arr if nbits == 8 else unpack_to_bytes(byte_arr, num_weights, nbits)
+    weight_8bit = (
+        byte_arr if nbits == 8 else _unpack_to_bytes(byte_arr, num_weights, nbits)
+    )
     weight_8bit = weight_8bit.reshape(shape)
 
     if is_linear:
@@ -554,39 +702,39 @@ def _dequantize_nn_spec(spec):
 def _quantize_nn_spec(nn_spec, nbits, qm, **kwargs):
     """ Quantize weights in NeuralNetwork type mlmodel specifications.
     """
-    selector = kwargs.get('selector', QuantizedLayerSelector())
+    selector = kwargs.get("selector", QuantizedLayerSelector())
 
     if qm not in _SUPPORTED_QUANTIZATION_MODES:
-        raise Exception('Quantization mode {} not supported'.format(qm))
+        raise Exception("Quantization mode {} not supported".format(qm))
 
     if qm != _QUANTIZATION_MODE_DEQUANTIZE:
         if nbits is None:
             raise Exception('Missing argument "nbits"')
         if not (nbits > 0 and nbits <= 8 or nbits == 16):
-            raise Exception('Only half precision (16-bit), 1 to 8-bit '
-                    'quantization is supported')
+            raise Exception(
+                "Only half precision (16-bit), 1 to 8-bit " "quantization is supported"
+            )
 
     if qm == _QUANTIZATION_MODE_LINEAR_SYMMETRIC and nbits != 8:
-        raise Exception('Symmetric quantization is only applicable for 8 bit'
-                        'linear')
+        raise Exception("Symmetric quantization is only applicable for 8 bit" "linear")
 
     layers = nn_spec.layers
 
     # Perform optimization step
     if nbits is not None and nbits < 16 and qm != _QUANTIZATION_MODE_DEQUANTIZE:
-        print('Optimizing Neural Network before Quantization:')
+        print("Optimizing Neural Network before Quantization:")
         _optimize_nn(layers)
-        print('Finished optimizing network. Quantizing neural network..')
+        print("Finished optimizing network. Quantizing neural network..")
 
     # Quantize each layer
     for layer in layers:
-        layer_type = layer.WhichOneof('layer')
+        layer_type = layer.WhichOneof("layer")
         if not selector.do_quantize(layer):
             continue
-        print('Quantizing layer {}'.format(layer.name))
+        print("Quantizing layer {}".format(layer.name))
 
         # Convolution
-        if layer_type == 'convolution':
+        if layer_type == "convolution":
             output_channels = layer.convolution.outputChannels
             kernel_channels = layer.convolution.kernelChannels
             kernel_height = layer.convolution.kernelSize[0]
@@ -595,65 +743,112 @@ def _quantize_nn_spec(nn_spec, nbits, qm, **kwargs):
             counts = output_channels * kernel_channels * kernel_height * kernel_width
             has_bias = layer.convolution.hasBias
             if layer.convolution.isDeconvolution:
-                shape = (kernel_channels, int(output_channels/groups), kernel_height, kernel_width)
-                _quantize_wp_field(layer.convolution.weights, nbits, qm, shape, axis=1, **kwargs)
+                shape = (
+                    kernel_channels,
+                    int(output_channels / groups),
+                    kernel_height,
+                    kernel_width,
+                )
+                _quantize_wp_field(
+                    layer.convolution.weights, nbits, qm, shape, axis=1, **kwargs
+                )
             else:
                 shape = (output_channels, kernel_channels, kernel_height, kernel_width)
-                _quantize_wp_field(layer.convolution.weights, nbits, qm, shape, **kwargs)
-
-            if has_bias and selector.do_quantize(layer, weight_param='bias'):
-                _quantize_wp_field(layer.convolution.bias, nbits, qm,
-                        shape=(output_channels,), **kwargs)
+                _quantize_wp_field(
+                    layer.convolution.weights, nbits, qm, shape, **kwargs
+                )
+
+            if has_bias and selector.do_quantize(layer, weight_param="bias"):
+                _quantize_wp_field(
+                    layer.convolution.bias,
+                    nbits,
+                    qm,
+                    shape=(output_channels,),
+                    **kwargs
+                )
 
         # Batchnorm
-        elif layer_type == 'batchnorm':
+        elif layer_type == "batchnorm":
             nw = layer.batchnorm.channels
             _quantize_wp_field(layer.batchnorm.gamma, nbits, qm, shape=(nw,), **kwargs)
             _quantize_wp_field(layer.batchnorm.beta, nbits, qm, shape=(nw,), **kwargs)
             _quantize_wp_field(layer.batchnorm.mean, nbits, qm, shape=(nw,), **kwargs)
-            _quantize_wp_field(layer.batchnorm.variance, nbits, qm, shape=(nw,), **kwargs)
+            _quantize_wp_field(
+                layer.batchnorm.variance, nbits, qm, shape=(nw,), **kwargs
+            )
 
         # InnerProduct
-        elif layer_type == 'innerProduct':
+        elif layer_type == "innerProduct":
             output_channels = layer.innerProduct.outputChannels
             input_channels = layer.innerProduct.inputChannels
-            _quantize_wp_field(layer.innerProduct.weights, nbits, qm,
-                shape=(output_channels, input_channels), **kwargs)
+            _quantize_wp_field(
+                layer.innerProduct.weights,
+                nbits,
+                qm,
+                shape=(output_channels, input_channels),
+                **kwargs
+            )
             has_bias = layer.innerProduct.hasBias
-            if has_bias and selector.do_quantize(layer, weight_param='bias'):
-                _quantize_wp_field(layer.innerProduct.bias, nbits, qm,
-                    shape=(output_channels,), **kwargs)
+            if has_bias and selector.do_quantize(layer, weight_param="bias"):
+                _quantize_wp_field(
+                    layer.innerProduct.bias,
+                    nbits,
+                    qm,
+                    shape=(output_channels,),
+                    **kwargs
+                )
 
         # BatchedMatmul
-        elif layer_type == 'batchedMatmul':
+        elif layer_type == "batchedMatmul":
             x1 = layer.batchedMatmul.weightMatrixFirstDimension
             x2 = layer.batchedMatmul.weightMatrixSecondDimension
-            _quantize_wp_field(layer.batchedMatmul.weights, nbits, qm,
-                shape=(x2, x1), **kwargs)
+            _quantize_wp_field(
+                layer.batchedMatmul.weights, nbits, qm, shape=(x2, x1), **kwargs
+            )
             has_bias = layer.batchedMatmul.hasBias
-            if has_bias and selector.do_quantize(layer, weight_param='bias'):
-                _quantize_wp_field(layer.batchedMatmul.bias, nbits, qm,
-                    shape=(x2,), **kwargs)
+            if has_bias and selector.do_quantize(layer, weight_param="bias"):
+                _quantize_wp_field(
+                    layer.batchedMatmul.bias, nbits, qm, shape=(x2,), **kwargs
+                )
 
         # Embedding layer
-        elif layer_type == 'embedding':
+        elif layer_type == "embedding":
             output_channels = layer.embedding.outputChannels
             input_channels = layer.embedding.inputDim
-            _quantize_wp_field(layer.embedding.weights, nbits, qm, shape=(output_channels, input_channels), **kwargs)
+            _quantize_wp_field(
+                layer.embedding.weights,
+                nbits,
+                qm,
+                shape=(output_channels, input_channels),
+                **kwargs
+            )
             if layer.embedding.hasBias:
-                _quantize_wp_field(layer.embedding.bias, nbits, qm, shape=(output_channels,), **kwargs)
-
+                _quantize_wp_field(
+                    layer.embedding.bias, nbits, qm, shape=(output_channels,), **kwargs
+                )
 
         # Embedding ND layer
-        elif layer_type == 'embeddingND':
+        elif layer_type == "embeddingND":
             output_channels = layer.embeddingND.embeddingSize
             input_channels = layer.embeddingND.vocabSize
-            _quantize_wp_field(layer.embeddingND.weights, nbits, qm, shape=(output_channels, input_channels), **kwargs)
+            _quantize_wp_field(
+                layer.embeddingND.weights,
+                nbits,
+                qm,
+                shape=(output_channels, input_channels),
+                **kwargs
+            )
             if layer.embeddingND.hasBias:
-                _quantize_wp_field(layer.embeddingND.bias, nbits, qm, shape=(output_channels,), **kwargs)
+                _quantize_wp_field(
+                    layer.embeddingND.bias,
+                    nbits,
+                    qm,
+                    shape=(output_channels,),
+                    **kwargs
+                )
 
         # Scale layer
-        elif layer_type == 'scale':
+        elif layer_type == "scale":
             nw = _np.prod(layer.scale.shapeScale)
             _quantize_wp_field(layer.scale.scale, nbits, qm, shape=(nw,), **kwargs)
             if layer.scale.hasBias:
@@ -661,77 +856,216 @@ def _quantize_nn_spec(nn_spec, nbits, qm, **kwargs):
                 _quantize_wp_field(layer.scale.bias, nbits, qm, shape=(nw,), **kwargs)
 
         # Bias layer
-        elif layer_type == 'bias':
+        elif layer_type == "bias":
             nw = _np.prod(layer.bias.shape)
             _quantize_wp_field(layer.bias.bias, nbits, qm, shape=(nw,), **kwargs)
 
         # LoadConstant layer
-        elif layer_type == 'loadConstant':
+        elif layer_type == "loadConstant":
             nw = _np.prod(layer.loadConstant.shape)
-            _quantize_wp_field(layer.loadConstant.data, nbits, qm, shape=(nw,), **kwargs)
+            _quantize_wp_field(
+                layer.loadConstant.data, nbits, qm, shape=(nw,), **kwargs
+            )
 
         # Simple Recurrent
-        elif layer_type == 'simpleRecurrent':
+        elif layer_type == "simpleRecurrent":
             i_size = layer.simpleRecurrent.inputVectorSize
             o_size = layer.simpleRecurrent.outputVectorSize
-            _quantize_wp_field(layer.simpleRecurrent.weightMatrix, nbits, qm, shape=(o_size, i_size), **kwargs)
-            _quantize_wp_field(layer.simpleRecurrent.recursionMatrix, nbits, qm, shape=(o_size, o_size), **kwargs)
+            _quantize_wp_field(
+                layer.simpleRecurrent.weightMatrix,
+                nbits,
+                qm,
+                shape=(o_size, i_size),
+                **kwargs
+            )
+            _quantize_wp_field(
+                layer.simpleRecurrent.recursionMatrix,
+                nbits,
+                qm,
+                shape=(o_size, o_size),
+                **kwargs
+            )
             if layer.simpleRecurrent.hasBiasVector:
-                _quantize_wp_field(layer.simpleRecurrent.biasVector,nbits, qm, shape=(o_size,), **kwargs)
+                _quantize_wp_field(
+                    layer.simpleRecurrent.biasVector,
+                    nbits,
+                    qm,
+                    shape=(o_size,),
+                    **kwargs
+                )
 
         # GRU
-        elif layer_type == 'gru':
+        elif layer_type == "gru":
             i_size = layer.gru.inputVectorSize
             o_size = layer.gru.outputVectorSize
             # Weight Matrix
-            _quantize_wp_field(layer.gru.updateGateWeightMatrix, nbits, qm, shape=(o_size,i_size), **kwargs)
-            _quantize_wp_field(layer.gru.resetGateWeightMatrix, nbits, qm, shape=(o_size,i_size), **kwargs)
-            _quantize_wp_field(layer.gru.outputGateWeightMatrix, nbits, qm, shape=(o_size,i_size), **kwargs)
+            _quantize_wp_field(
+                layer.gru.updateGateWeightMatrix,
+                nbits,
+                qm,
+                shape=(o_size, i_size),
+                **kwargs
+            )
+            _quantize_wp_field(
+                layer.gru.resetGateWeightMatrix,
+                nbits,
+                qm,
+                shape=(o_size, i_size),
+                **kwargs
+            )
+            _quantize_wp_field(
+                layer.gru.outputGateWeightMatrix,
+                nbits,
+                qm,
+                shape=(o_size, i_size),
+                **kwargs
+            )
             # Recursion Weights
-            _quantize_wp_field(layer.gru.updateGateRecursionMatrix, nbits, qm, shape=(o_size, o_size), **kwargs)
-            _quantize_wp_field(layer.gru.resetGateRecursionMatrix, nbits, qm, shape=(o_size, o_size), **kwargs)
-            _quantize_wp_field(layer.gru.outputGateRecursionMatrix, nbits, qm, shape=(o_size, o_size), **kwargs)
+            _quantize_wp_field(
+                layer.gru.updateGateRecursionMatrix,
+                nbits,
+                qm,
+                shape=(o_size, o_size),
+                **kwargs
+            )
+            _quantize_wp_field(
+                layer.gru.resetGateRecursionMatrix,
+                nbits,
+                qm,
+                shape=(o_size, o_size),
+                **kwargs
+            )
+            _quantize_wp_field(
+                layer.gru.outputGateRecursionMatrix,
+                nbits,
+                qm,
+                shape=(o_size, o_size),
+                **kwargs
+            )
             # Bias
             if layer.gru.hasBiasVectors:
-                _quantize_wp_field(layer.gru.updateGateBiasVector, nbits, qm, shape=(o_size,), **kwargs)
-                _quantize_wp_field(layer.gru.resetGateBiasVector, nbits, qm, shape=(o_size,), **kwargs)
-                _quantize_wp_field(layer.gru.outputGateBiasVector, nbits, qm, shape=(o_size,), **kwargs)
+                _quantize_wp_field(
+                    layer.gru.updateGateBiasVector, nbits, qm, shape=(o_size,), **kwargs
+                )
+                _quantize_wp_field(
+                    layer.gru.resetGateBiasVector, nbits, qm, shape=(o_size,), **kwargs
+                )
+                _quantize_wp_field(
+                    layer.gru.outputGateBiasVector, nbits, qm, shape=(o_size,), **kwargs
+                )
 
         # LSTM Layers
-        elif layer_type in ['uniDirectionalLSTM', 'biDirectionalLSTM']:
+        elif layer_type in ["uniDirectionalLSTM", "biDirectionalLSTM"]:
 
-            def _lstmwp_to_fp16_lstmwp(lstm_wp, nbits, qm, i_size, o_size, has_peephole=True):
+            def _lstmwp_to_fp16_lstmwp(
+                lstm_wp, nbits, qm, i_size, o_size, has_peephole=True
+            ):
                 assert lstm_wp
-                _quantize_wp_field(lstm_wp.inputGateWeightMatrix, nbits, qm, shape=(o_size, i_size), **kwargs)
-                _quantize_wp_field(lstm_wp.forgetGateWeightMatrix, nbits, qm,  shape=(o_size, i_size), **kwargs)
-                _quantize_wp_field(lstm_wp.blockInputWeightMatrix, nbits, qm,  shape=(o_size, i_size), **kwargs)
-                _quantize_wp_field(lstm_wp.outputGateWeightMatrix, nbits, qm,  shape=(o_size, i_size), **kwargs)
-
-                _quantize_wp_field(lstm_wp.inputGateRecursionMatrix, nbits, qm, shape=(o_size, o_size), **kwargs)
-                _quantize_wp_field(lstm_wp.forgetGateRecursionMatrix, nbits, qm, shape=(o_size, o_size), **kwargs)
-                _quantize_wp_field(lstm_wp.blockInputRecursionMatrix, nbits, qm, shape=(o_size, o_size), **kwargs)
-                _quantize_wp_field(lstm_wp.outputGateRecursionMatrix, nbits, qm, shape=(o_size, o_size), **kwargs)
-
-                _quantize_wp_field(lstm_wp.inputGateBiasVector, nbits, qm, shape=(o_size,), **kwargs)
-                _quantize_wp_field(lstm_wp.forgetGateBiasVector, nbits, qm, shape=(o_size,), **kwargs)
-                _quantize_wp_field(lstm_wp.blockInputBiasVector, nbits, qm, shape=(o_size,), **kwargs)
-                _quantize_wp_field(lstm_wp.outputGateBiasVector, nbits, qm, shape=(o_size,), **kwargs)
+                _quantize_wp_field(
+                    lstm_wp.inputGateWeightMatrix,
+                    nbits,
+                    qm,
+                    shape=(o_size, i_size),
+                    **kwargs
+                )
+                _quantize_wp_field(
+                    lstm_wp.forgetGateWeightMatrix,
+                    nbits,
+                    qm,
+                    shape=(o_size, i_size),
+                    **kwargs
+                )
+                _quantize_wp_field(
+                    lstm_wp.blockInputWeightMatrix,
+                    nbits,
+                    qm,
+                    shape=(o_size, i_size),
+                    **kwargs
+                )
+                _quantize_wp_field(
+                    lstm_wp.outputGateWeightMatrix,
+                    nbits,
+                    qm,
+                    shape=(o_size, i_size),
+                    **kwargs
+                )
+
+                _quantize_wp_field(
+                    lstm_wp.inputGateRecursionMatrix,
+                    nbits,
+                    qm,
+                    shape=(o_size, o_size),
+                    **kwargs
+                )
+                _quantize_wp_field(
+                    lstm_wp.forgetGateRecursionMatrix,
+                    nbits,
+                    qm,
+                    shape=(o_size, o_size),
+                    **kwargs
+                )
+                _quantize_wp_field(
+                    lstm_wp.blockInputRecursionMatrix,
+                    nbits,
+                    qm,
+                    shape=(o_size, o_size),
+                    **kwargs
+                )
+                _quantize_wp_field(
+                    lstm_wp.outputGateRecursionMatrix,
+                    nbits,
+                    qm,
+                    shape=(o_size, o_size),
+                    **kwargs
+                )
+
+                _quantize_wp_field(
+                    lstm_wp.inputGateBiasVector, nbits, qm, shape=(o_size,), **kwargs
+                )
+                _quantize_wp_field(
+                    lstm_wp.forgetGateBiasVector, nbits, qm, shape=(o_size,), **kwargs
+                )
+                _quantize_wp_field(
+                    lstm_wp.blockInputBiasVector, nbits, qm, shape=(o_size,), **kwargs
+                )
+                _quantize_wp_field(
+                    lstm_wp.outputGateBiasVector, nbits, qm, shape=(o_size,), **kwargs
+                )
 
                 if has_peephole:
-                    _quantize_wp_field(lstm_wp.inputGatePeepholeVector, nbits, qm, shape=(o_size,), **kwargs)
-                    _quantize_wp_field(lstm_wp.forgetGatePeepholeVector, nbits, qm, shape=(o_size,), **kwargs)
-                    _quantize_wp_field(lstm_wp.outputGatePeepholeVector, nbits, qm, shape=(o_size,), **kwargs)
-
-            if layer_type == 'uniDirectionalLSTM':
+                    _quantize_wp_field(
+                        lstm_wp.inputGatePeepholeVector,
+                        nbits,
+                        qm,
+                        shape=(o_size,),
+                        **kwargs
+                    )
+                    _quantize_wp_field(
+                        lstm_wp.forgetGatePeepholeVector,
+                        nbits,
+                        qm,
+                        shape=(o_size,),
+                        **kwargs
+                    )
+                    _quantize_wp_field(
+                        lstm_wp.outputGatePeepholeVector,
+                        nbits,
+                        qm,
+                        shape=(o_size,),
+                        **kwargs
+                    )
+
+            if layer_type == "uniDirectionalLSTM":
                 _lstmwp_to_fp16_lstmwp(
                     lstm_wp=layer.uniDirectionalLSTM.weightParams,
                     nbits=nbits,
                     qm=qm,
                     i_size=layer.uniDirectionalLSTM.inputVectorSize,
                     o_size=layer.uniDirectionalLSTM.outputVectorSize,
-                    has_peephole=layer.uniDirectionalLSTM.params.hasPeepholeVectors)
+                    has_peephole=layer.uniDirectionalLSTM.params.hasPeepholeVectors,
+                )
 
-            elif layer_type == 'biDirectionalLSTM':
+            elif layer_type == "biDirectionalLSTM":
                 for lstm_wp in layer.biDirectionalLSTM.weightParams:
                     _lstmwp_to_fp16_lstmwp(
                         lstm_wp=lstm_wp,
@@ -739,61 +1073,64 @@ def _lstmwp_to_fp16_lstmwp(lstm_wp, nbits, qm, i_size, o_size, has_peephole=True
                         qm=qm,
                         i_size=layer.biDirectionalLSTM.inputVectorSize,
                         o_size=layer.biDirectionalLSTM.outputVectorSize,
-                        has_peephole=layer.biDirectionalLSTM.params.hasPeepholeVectors)
-
-        elif layer_type == 'custom':
-            print('Skipping custom layer {}. Weights for this layer need to'
-                   'be converted manually'.format(layer.name))
-        elif layer_type == 'branch':
+                        has_peephole=layer.biDirectionalLSTM.params.hasPeepholeVectors,
+                    )
+
+        elif layer_type == "custom":
+            print(
+                "Skipping custom layer {}. Weights for this layer need to"
+                "be converted manually".format(layer.name)
+            )
+        elif layer_type == "branch":
             _quantize_nn_spec(layer.branch.ifBranch, nbits, qm, **kwargs)
             _quantize_nn_spec(layer.branch.elseBranch, nbits, qm, **kwargs)
-        elif layer_type == 'loop':
+        elif layer_type == "loop":
             _quantize_nn_spec(layer.loop.conditionNetwork, nbits, qm, **kwargs)
             _quantize_nn_spec(layer.loop.bodyNetwork, nbits, qm, **kwargs)
         else:
-            raise Exception('Unknown layer ' + layer_type + ' to be quantized')
-
-
-@deprecated(suffix="instead use 'quantize_weights'.")
-def quantize_spec_weights(spec, nbits, quantization_mode, **kwargs):
-    return _quantize_spec_weights(spec, nbits, quantization_mode, **kwargs)
+            raise Exception("Unknown layer " + layer_type + " to be quantized")
 
 
 def _quantize_spec_weights(spec, nbits, quantization_mode, **kwargs):
+    nn_model_types = [
+        "neuralNetwork",
+        "neuralNetworkClassifier",
+        "neuralNetworkRegressor",
+    ]
 
-    nn_model_types = ['neuralNetwork', 'neuralNetworkClassifier',
-                      'neuralNetworkRegressor']
-
-    model_type = spec.WhichOneof('Type')
+    model_type = spec.WhichOneof("Type")
 
     # Neural network models
     if model_type in nn_model_types:
         # Bump up to appropriate spec version if required
         if nbits == 16:
-            spec.specificationVersion = max(_MINIMUM_FP16_SPEC_VERSION,
-                                            spec.specificationVersion)
+            spec.specificationVersion = max(
+                _MINIMUM_FP16_SPEC_VERSION, spec.specificationVersion
+            )
         else:
-            spec.specificationVersion = max(_MINIMUM_QUANTIZED_MODEL_SPEC_VERSION,
-                                            spec.specificationVersion)
+            spec.specificationVersion = max(
+                _MINIMUM_QUANTIZED_MODEL_SPEC_VERSION, spec.specificationVersion
+            )
 
-        if spec.WhichOneof('Type') == 'neuralNetwork':
-            _quantize_nn_spec(spec.neuralNetwork, nbits, quantization_mode,
-                    **kwargs)
+        if spec.WhichOneof("Type") == "neuralNetwork":
+            _quantize_nn_spec(spec.neuralNetwork, nbits, quantization_mode, **kwargs)
 
-        elif spec.WhichOneof('Type') in 'neuralNetworkClassifier':
-            _quantize_nn_spec(spec.neuralNetworkClassifier, nbits,
-                    quantization_mode, **kwargs)
+        elif spec.WhichOneof("Type") in "neuralNetworkClassifier":
+            _quantize_nn_spec(
+                spec.neuralNetworkClassifier, nbits, quantization_mode, **kwargs
+            )
 
-        elif spec.WhichOneof('Type') in 'neuralNetworkRegressor':
-            _quantize_nn_spec(spec.neuralNetworkRegressor, nbits,
-                    quantization_mode, **kwargs)
+        elif spec.WhichOneof("Type") in "neuralNetworkRegressor":
+            _quantize_nn_spec(
+                spec.neuralNetworkRegressor, nbits, quantization_mode, **kwargs
+            )
 
     # Recursively convert all pipeline models
-    elif spec.WhichOneof('Type') == 'pipeline':
+    elif spec.WhichOneof("Type") == "pipeline":
         for model_spec in spec.pipeline.models:
             _quantize_spec_weights(model_spec, nbits, quantization_mode, **kwargs)
 
-    elif spec.WhichOneof('Type') in ['pipelineClassifier', 'pipelineRegressor']:
+    elif spec.WhichOneof("Type") in ["pipelineClassifier", "pipelineRegressor"]:
         _quantize_spec_weights(spec.pipeline, nbits, quantization_mode, **kwargs)
 
     return spec
@@ -801,11 +1138,12 @@ def _quantize_spec_weights(spec, nbits, quantization_mode, **kwargs):
 
 def _load_and_resize_image(image_path, size):
     from PIL import Image
+
     img = Image.open(image_path)
     return img.resize(size, Image.ANTIALIAS)
 
 
-class TopKMetrics():
+class TopKMetrics:
     def __init__(self, topk):
         self._topk = topk
         self._correct_count = 0
@@ -818,21 +1156,21 @@ def add_metric(self, output1, output2):
                 self._correct_count += 1
         else:
             self._topk = min(len(output1.keys()), self._topk)
-            out1_topk =  sorted(output1, key=output1.get,reverse=True)[:self._topk]
-            out2_topk =  sorted(output2, key=output2.get,reverse=True)[:self._topk]
+            out1_topk = sorted(output1, key=output1.get, reverse=True)[: self._topk]
+            out2_topk = sorted(output2, key=output2.get, reverse=True)[: self._topk]
             if out1_topk[0] in out2_topk:
                 self._correct_count += 1
 
     def display_metrics(self):
-        pcorrect = (float(self._correct_count) / float(self._total_count))* 100
+        pcorrect = (float(self._correct_count) / float(self._total_count)) * 100
         pcorrect = _np.round(pcorrect, decimals=2)
         if self._topk == 1:
-            print('Top 1 Agreement: {}%\n'.format(pcorrect))
+            print("Top 1 Agreement: {}%\n".format(pcorrect))
         else:
-            print('Top {} Agreement: {}%\n'.format(self._topk, pcorrect))
+            print("Top {} Agreement: {}%\n".format(self._topk, pcorrect))
 
 
-class NoiseMetrics():
+class NoiseMetrics:
     def __init__(self):
         self._snr = []
         self._psnr = []
@@ -852,9 +1190,9 @@ def add_metric(self, output1, output2):
 
         # Output is Image
         if isinstance(output1, PIL.Image.Image):
-            if output1.mode == 'RGBA':
-                output1 = output1.convert('RGB')
-                output2 = output2.convert('RGB')
+            if output1.mode == "RGBA":
+                output1 = output1.convert("RGB")
+                output2 = output2.convert("RGB")
             arr1 = _np.array(output1).flatten()
             arr2 = _np.array(output2).flatten()
             snr, psnr = self._compute_snr(arr1, arr2)
@@ -870,31 +1208,36 @@ def add_metric(self, output1, output2):
             self._psnr.append(psnr)
 
     def display_metrics(self):
-        print('SNR:  {} +/- {}'.format(_np.mean(self._snr), _np.var(self._snr)))
-        print('PSNR: {} +/- {}\n'.format(_np.mean(self._psnr), _np.var(self._psnr)))
+        print("SNR:  {} +/- {}".format(_np.mean(self._snr), _np.var(self._snr)))
+        print("PSNR: {} +/- {}\n".format(_np.mean(self._psnr), _np.var(self._psnr)))
 
 
-class OutputMetric():
+class OutputMetric:
     """
     Utility class to calculate and hold metrics between
     two model outputs
     """
+
     def __init__(self, name, type):
         self.name = name
         self._metrics = []
 
-        if type == 'stringType':
+        if type == "stringType":
             self._metrics.append(TopKMetrics(topk=1))
 
-        elif type == 'dictionaryType':
+        elif type == "dictionaryType":
             self._metrics.append(TopKMetrics(topk=5))
 
-        elif type == 'imageType' or type == 'multiArrayType':
+        elif type == "imageType" or type == "multiArrayType":
             self._metrics.append(NoiseMetrics())
 
         else:
-            raise Exception("""Unable to determine which metric to
-            compute for output: {}""".format(name))
+            raise Exception(
+                """Unable to determine which metric to
+            compute for output: {}""".format(
+                    name
+                )
+            )
 
     def add_metric(self, output1, output2):
         for metric in self._metrics:
@@ -905,53 +1248,64 @@ def display_metrics(self):
             metric.display_metrics()
 
 
-class ModelMetrics():
+class ModelMetrics:
     """
     A utility class to hold evaluation metrics
     """
+
     def __init__(self, spec):
         self.model_metrics = {}
         for output in spec.description.output:
-            output_type = output.type.WhichOneof('Type')
+            output_type = output.type.WhichOneof("Type")
             self.model_metrics[output.name] = OutputMetric(output.name, output_type)
 
     def add_metrics(self, model1_output, model2_output):
         outputs = model1_output.keys()
         for output in outputs:
-            self.model_metrics[output].add_metric(model1_output[output], model2_output[output])
+            self.model_metrics[output].add_metric(
+                model1_output[output], model2_output[output]
+            )
 
     def display_metrics(self):
         for metric in self.model_metrics:
-            print('Output {}:'.format(metric))
-            dash = '----------'
+            print("Output {}:".format(metric))
+            dash = "----------"
             for x in range(0, len(metric)):
-                dash += '-'
+                dash += "-"
             print(dash)
             self.model_metrics[metric].display_metrics()
 
 
 def _characterize_qmodel_perf_with_data_dir(fpmodel, qspec, data_dir):
-    supported_image_exts = ['jpg', 'bmp', 'png', 'jpeg']
-    test_image_paths = ['{}/{}'.format(data_dir, fn) for fn in
-                        os.listdir(data_dir) if
-                        any(fn.endswith(ext) for ext in supported_image_exts)]
+    supported_image_exts = ["jpg", "bmp", "png", "jpeg"]
+    test_image_paths = [
+        "{}/{}".format(data_dir, fn)
+        for fn in _listdir(data_dir)
+        if any(fn.endswith(ext) for ext in supported_image_exts)
+    ]
 
     if not test_image_paths:
-        raise Exception("""Path contains no supported image files.
+        raise Exception(
+            """Path contains no supported image files.
         Supported file types include jpg, bmp, png and jpeg.
-        """.format(data_dir))
+        """.format(
+                data_dir
+            )
+        )
 
     qmodel = _get_model(qspec)
     model_metrics = ModelMetrics(qspec)
 
     input_name = qspec.description.input[0].name
-    input_size = (qspec.description.input[0].type.imageType.width,
-                  qspec.description.input[0].type.imageType.height)
+    input_size = (
+        qspec.description.input[0].type.imageType.width,
+        qspec.description.input[0].type.imageType.height,
+    )
 
-    print('\n\n')
-    print('Analyzing {} images'.format(len(test_image_paths)))
-    print('Running Analysis this may take a while ...')
-    print('\n')
+    print("\n\n")
+    print("Analyzing {} images".format(len(test_image_paths)))
+    print("Running Analysis this may take a while ...")
+    print("\n")
 
     analyzed = 0
     tried = 0
@@ -970,12 +1324,11 @@ def _characterize_qmodel_perf_with_data_dir(fpmodel, qspec, data_dir):
         # Update Progress
         tried += 1
         if tried % 10 == 0:
-            sys.stdout.write('\r')
-            sys.stdout.write(
-                'Analyzed {}/{}'.format(tried, len(test_image_paths)))
-            sys.stdout.flush()
+            _stdout.write("\r")
+            _stdout.write("Analyzed {}/{}".format(tried, len(test_image_paths)))
+            _stdout.flush()
 
-    print('\n')
+    print("\n")
     model_metrics.display_metrics()
 
 
@@ -983,10 +1336,10 @@ def _characterize_quantized_model_perf(fpmodel, qspec, sample_data):
     qmodel = _get_model(qspec)
     model_metrics = ModelMetrics(qspec)
 
-    print('\n\n')
-    print('Analyzing {} samples'.format(len(sample_data)))
-    print('Running Analysis this may take a while ...')
-    print('\n')
+    print("\n\n")
+    print("Analyzing {} samples".format(len(sample_data)))
+    print("Running Analysis this may take a while ...")
+    print("\n")
 
     analyzed = 0
     tried = 0
@@ -1004,17 +1357,15 @@ def _characterize_quantized_model_perf(fpmodel, qspec, sample_data):
         # Update Progress
         tried += 1
         if tried % 10 == 0:
-            sys.stdout.write('\r')
-            sys.stdout.write(
-                'Analyzed {}/{}'.format(tried, len(sample_data)))
-            sys.stdout.flush()
+            _stdout.write("\r")
+            _stdout.write("Analyzed {}/{}".format(tried, len(sample_data)))
+            _stdout.flush()
 
-    print('\n')
+    print("\n")
     model_metrics.display_metrics()
 
 
-def compare_models(full_precision_model, quantized_model,
-                              sample_data):
+def compare_models(full_precision_model, quantized_model, sample_data):
     """
     Utility function to compare the performance of a full precision vs quantized model
 
@@ -1035,37 +1386,164 @@ def compare_models(full_precision_model, quantized_model,
     :return:
         None. Performance metrics are printed out
     """
-    emessage = ("""
+    emessage = """
     Invalid sample data provided. Only a list of dictionaries
     containing sample data or path to a folder containing images is
-    supported""")
+    supported"""
 
     spec = full_precision_model.get_spec()
     num_inputs = len(spec.description.input)
-    if isinstance(sample_data, str):
-        input_type = spec.description.input[0].type.WhichOneof('Type')
-        if num_inputs != 1 or input_type != 'imageType':
-            raise Exception("""Unable to analyze quantized models. Sample data
+    if isinstance(sample_data, _string_types):
+        input_type = spec.description.input[0].type.WhichOneof("Type")
+        if num_inputs != 1 or input_type != "imageType":
+            raise Exception(
+                """Unable to analyze quantized models. Sample data
             was a path to a directory which is only supported with models with
             one image type input. Please try passing in a list of sample inputs
             as sample data.
-            """)
-        _characterize_qmodel_perf_with_data_dir(full_precision_model, quantized_model.get_spec(), sample_data)
+            """
+            )
+        _characterize_qmodel_perf_with_data_dir(
+            full_precision_model, quantized_model.get_spec(), sample_data
+        )
 
     elif isinstance(sample_data, list):
         if not all(type(d) is dict for d in sample_data):
             raise Exception(emessage)
-        _characterize_quantized_model_perf(full_precision_model, quantized_model.get_spec(), sample_data)
+        _characterize_quantized_model_perf(
+            full_precision_model, quantized_model.get_spec(), sample_data
+        )
 
     else:
         raise Exception(emessage)
 
 
-def quantize_weights(full_precision_model,
-                     nbits,
-                     quantization_mode="linear",
-                     sample_data=None,
-                     **kwargs):
+def activate_int8_int8_matrix_multiplications(spec, selector=None):
+    """
+    Utility function that takes in either a full precision (float) spec or
+    an nbit quantized spec to selectively enable int8 activation + weight quantization
+    of matrix multiplication operations where the second matrix represents a constant weight.
+
+    spec: MLModel.get_spec()
+        Currently conversion for only neural network models is supported.
+        If a pipeline model is passed in then all embedded neural network models embedded within
+        will be modified.
+
+    selector: (optional) MatrixMultiplyLayerSelector
+        A MatrixMultiplyLayerSelector object that enables int8 activation + weight quantization
+        only on those layers for which the user-specified criterion on the minimum/maximum number
+        of size/channels in constant weight parameters is met.
+        It can also be derived to provide custom selection.
+
+    """
+
+    # Recursively convert all pipeline models
+    if spec.WhichOneof("Type") == "pipeline":
+        for model_spec in spec.pipeline.models:
+            activate_int8_int8_matrix_multiplications(model_spec, selector=selector)
+        return spec
+
+    elif spec.WhichOneof("Type") in ["pipelineClassifier", "pipelineRegressor"]:
+        activate_int8_int8_matrix_multiplications(spec.pipeline, selector=selector)
+        return spec
+
+    # Neural network models
+    elif spec.WhichOneof("Type") in [
+        "neuralNetwork",
+        "neuralNetworkClassifier",
+        "neuralNetworkRegressor",
+    ]:
+
+        if selector is None:
+            selector = MatrixMultiplyLayerSelector()
+
+        # Dequantize all the selected matrix multiplication layers
+        spec = _quantize_spec_weights(
+            spec,
+            nbits=None,
+            quantization_mode=_QUANTIZATION_MODE_DEQUANTIZE,
+            selector=selector,
+        )
+
+        def _quantized_weight_and_scale(W):
+            W_max = max(_np.abs(_np.min(W)), _np.abs(_np.max(W)))
+            W_normalized = W / W_max  # [-1,1]
+            W_quantized_int8 = 127.0 * W_normalized  # [-127, 127]
+            W_quantized_int8 = W_quantized_int8.astype(_np.int8)
+            quant_scale = W_max / 127.0
+            return W_quantized_int8, quant_scale
+
+        if spec.WhichOneof("Type") == "neuralNetwork":
+            nn_spec = spec.neuralNetwork
+
+        elif spec.WhichOneof("Type") in "neuralNetworkClassifier":
+            nn_spec = spec.neuralNetworkClassifier
+
+        elif spec.WhichOneof("Type") in "neuralNetworkRegressor":
+            nn_spec = spec.neuralNetworkRegressor
+
+        def _process_nn_layers(nn_spec):
+            layers = nn_spec.layers
+
+            # Replacing each matrix multiplication
+            for layer in layers:
+                layer_type = layer.WhichOneof("layer")
+                if not selector.do_quantize(layer):
+                    continue
+
+                if layer_type == "branch":
+                    _process_nn_layers(layer.branch.ifBranch)
+                    _process_nn_layers(layer.branch.elseBranch)
+
+                elif layer_type == "loop":
+                    _process_nn_layers(layer.loop.conditionNetwork)
+                    _process_nn_layers(layer.loop.bodyNetwork)
+
+                elif layer_type in ["innerProduct", "batchedMatmul"]:
+                    # Bump up to appropriate spec version if at least one replacement occurs
+                    spec.specificationVersion = max(
+                        _SPECIFICATION_VERSION_IOS_14, spec.specificationVersion,
+                    )
+
+                    # InnerProduct
+                    if layer_type == "innerProduct":
+                        matmul_layer = layer.innerProduct
+
+                    # BatchedMatmul
+                    elif layer_type == "batchedMatmul":
+                        matmul_layer = layer.batchedMatmul
+
+                    wp = matmul_layer.weights
+
+                    if len(wp.floatValue) == 0:
+                        continue
+                    else:
+                        qw, qs = _quantized_weight_and_scale(wp.floatValue)
+
+                    print(
+                        "Modifying layer {} with size of weights {}, to use Int8 * Int8 matrix multiplication".format(
+                            layer.name, qw.size
+                        )
+                    )
+
+                    matmul_layer.int8DynamicQuantize = True
+                    wp.quantization.numberOfBits = 8
+                    wp.quantization.linearQuantization.scale.extend(map(float, [qs]))
+                    wp.int8RawValue = bytes()
+                    wp.int8RawValue += qw.tobytes()
+                    del wp.floatValue[:]
+
+        _process_nn_layers(nn_spec)
+
+        return spec
+
+    else:
+        raise ValueError("Model Type {} not supported.".format(spec.WhichOneof("Type")))
+
+
+def quantize_weights(
+    full_precision_model, nbits, quantization_mode="linear", sample_data=None, **kwargs
+):
     """
     Utility function to convert a full precision (float) MLModel to a
     nbit quantized MLModel (float16).
@@ -1075,9 +1553,9 @@ def quantize_weights(full_precision_model,
         for only neural network models is supported. If a pipeline model is
         passed in then all embedded neural network models embedded within
         will be converted.
-        
+
     nbits: int
-        Number of bits per quantized weight. Only 16-bit float point and 
+        Number of bits per quantized weight. Only 16-bit float point and
             1-8 bit is supported
 
     quantization_mode: str
@@ -1144,23 +1622,27 @@ def quantize_weights(full_precision_model,
         "linear_lut": _QUANTIZATION_MODE_LOOKUP_TABLE_LINEAR,
         "custom_lut": _QUANTIZATION_MODE_CUSTOM_LOOKUP_TABLE,
         "dequantization": _QUANTIZATION_MODE_DEQUANTIZE,
-        "linear_symmetric": _QUANTIZATION_MODE_LINEAR_SYMMETRIC
+        "linear_symmetric": _QUANTIZATION_MODE_LINEAR_SYMMETRIC,
     }
     try:
         qmode = qmode_mapping[quantization_mode]
     except KeyError:
         # kmeans is deprecated. Instead kmeans_lut is used. No need to show it.
-        del qmode_mapping['kmeans']
-        raise Exception("Invalid quantization mode. Quantization mode must be "
-                        "one of {}".format(qmode_mapping))
+        del qmode_mapping["kmeans"]
+        raise Exception(
+            "Invalid quantization mode. Quantization mode must be "
+            "one of {}".format(qmode_mapping)
+        )
 
     print("Quantizing using {} quantization".format(quantization_mode))
     spec = full_precision_model.get_spec()
     qspec = _quantize_spec_weights(spec, nbits, qmode, **kwargs)
 
-    if macos_version() < (10, 14):
-        print("WARNING! Unable to return a quantized MLModel instance since"
-              "OS != macOS 10.14 or later")
+    if _macos_version() < (10, 14):
+        print(
+            "WARNING! Unable to return a quantized MLModel instance since"
+            "OS != macOS 10.14 or later"
+        )
         print("Returning quantized model specification instead")
         return qspec
 
diff --git a/coremltools/models/neural_network/spec_inspection_utils.py b/coremltools/models/neural_network/spec_inspection_utils.py
index 86b9ed2f3..a26d9525c 100644
--- a/coremltools/models/neural_network/spec_inspection_utils.py
+++ b/coremltools/models/neural_network/spec_inspection_utils.py
@@ -1,6 +1,5 @@
-from __future__ import print_function
+from __future__ import print_function as _
 from ...proto import NeuralNetwork_pb2 as _NeuralNetwork_pb2
-from coremltools.models._deprecation import deprecated
 
 
 def _get_weight_param_summary(wp):
@@ -10,25 +9,43 @@ def _get_weight_param_summary(wp):
     Returns:
     a str summary for wp
     """
-    summary_str = ''
-    if wp.HasField('quantization'):
+    summary_str = ""
+    if wp.HasField("quantization"):
         nbits = wp.quantization.numberOfBits
-        quant_type = 'linearly' if wp.quantization.HasField('linearQuantization') else 'lookup-table'
-        summary_str += '{}-bit {} quantized'.format(nbits, quant_type)
+        quant_type = (
+            "linearly"
+            if wp.quantization.HasField("linearQuantization")
+            else "lookup-table"
+        )
+        summary_str += "{}-bit {} quantized".format(nbits, quant_type)
 
     if len(wp.floatValue) > 0:
-        summary_str += '({} floatValues)'.format(len(wp.floatValue))
+        summary_str += "({} floatValues)".format(len(wp.floatValue))
     if len(wp.float16Value) > 0:
-        summary_str += '({} bytes float16Values)'.format(len(wp.float16Value))
+        summary_str += "({} bytes float16Values)".format(len(wp.float16Value))
     if len(wp.rawValue) > 0:
-        summary_str += '({} bytes rawValues)'.format(len(wp.rawValue))
+        summary_str += "({} bytes rawValues)".format(len(wp.rawValue))
 
     return summary_str
 
 
 def _get_lstm_weight_param_summary(lstm_wp):
     weight_name_list = [
-        'W_i','W_f','W_z','W_o','H_i','H_f','H_z','H_o','b_i','b_f','b_z','b_o','p_i','p_f','p_o'
+        "W_i",
+        "W_f",
+        "W_z",
+        "W_o",
+        "H_i",
+        "H_f",
+        "H_z",
+        "H_o",
+        "b_i",
+        "b_f",
+        "b_z",
+        "b_o",
+        "p_i",
+        "p_f",
+        "p_o",
     ]
     wp_summary_list = [
         _get_weight_param_summary(lstm_wp.inputGateWeightMatrix),
@@ -50,25 +67,19 @@ def _get_lstm_weight_param_summary(lstm_wp):
     lstm_wp_summary_list = []
     for idx, summary in enumerate(wp_summary_list):
         if len(summary) > 0:
-            lstm_wp_summary_list.append(weight_name_list[idx] + ', ' + summary)
+            lstm_wp_summary_list.append(weight_name_list[idx] + ", " + summary)
 
-    return ('\n' + ' '*8).join(lstm_wp_summary_list)
+    return ("\n" + " " * 8).join(lstm_wp_summary_list)
 
-@deprecated
-def get_feature_description_summary(feature):
-    return _get_feature_description_summary(feature)
 
 def _get_feature_description_summary(feature):
-    if feature.type.HasField('multiArrayType'):
+    if feature.type.HasField("multiArrayType"):
         shape = list(feature.type.multiArrayType.shape)
         int_shape = [int(x) for x in shape]
         return str(int_shape)
     else:
-        return ('({})'.format(str(feature.type))).replace('\n', '')
+        return ("({})".format(str(feature.type))).replace("\n", "")
 
-@deprecated
-def summarize_network_layer_info(layer):
-    return _summarize_network_layer_info(layer)
 
 def _summarize_network_layer_info(layer):
     """
@@ -81,7 +92,7 @@ def _summarize_network_layer_info(layer):
     layer_outputs : list[str] - a list of strings representing output blobs of the layer
     layer_field_content : list[(str, str)] - a list of two-tuple of (parameter_name, content)
     """
-    layer_type_str = layer.WhichOneof('layer')
+    layer_type_str = layer.WhichOneof("layer")
 
     layer_name = layer.name
     layer_inputs = list(layer.input)
@@ -92,8 +103,8 @@ def _summarize_network_layer_info(layer):
     layer_field_content = []
 
     for name in layer_field_names:
-        field = getattr(typed_layer,name)
-        summary_str = ''
+        field = getattr(typed_layer, name)
+        summary_str = ""
         if type(field) == _NeuralNetwork_pb2.LSTMWeightParams:
             summary_str = _get_lstm_weight_param_summary(field)
         elif type(field) == _NeuralNetwork_pb2.WeightParams:
@@ -101,15 +112,12 @@ def _summarize_network_layer_info(layer):
         else:
             field_str = str(field)
             if len(field_str) > 0:
-                summary_str = field_str.replace('\n', ' ')
+                summary_str = field_str.replace("\n", " ")
         if len(summary_str) > 0:
             layer_field_content.append([name, summary_str])
 
     return layer_type_str, layer_name, layer_inputs, layer_outputs, layer_field_content
 
-@deprecated
-def summarize_neural_network_spec(mlmodel_spec):
-    return _summarize_neural_network_spec(mlmodel_spec)
 
 def _summarize_neural_network_spec(mlmodel_spec):
     """ Summarize network into the following structure.
@@ -121,53 +129,69 @@ def _summarize_neural_network_spec(mlmodel_spec):
     layers : list[(str, list[str], list[str], list[(str, str)])] - a list of layers represented by
         layer name, input blobs, output blobs, a list of (parameter name, content)
     """
-    inputs = [(blob.name, _get_feature_description_summary(blob)) for blob in mlmodel_spec.description.input]
-    outputs = [(blob.name, _get_feature_description_summary(blob)) for blob in mlmodel_spec.description.output]
+    inputs = [
+        (blob.name, _get_feature_description_summary(blob))
+        for blob in mlmodel_spec.description.input
+    ]
+    outputs = [
+        (blob.name, _get_feature_description_summary(blob))
+        for blob in mlmodel_spec.description.output
+    ]
     nn = None
 
-    if mlmodel_spec.HasField('neuralNetwork'):
+    if mlmodel_spec.HasField("neuralNetwork"):
         nn = mlmodel_spec.neuralNetwork
-    elif mlmodel_spec.HasField('neuralNetworkClassifier'):
+    elif mlmodel_spec.HasField("neuralNetworkClassifier"):
         nn = mlmodel_spec.neuralNetworkClassifier
-    elif mlmodel_spec.HasField('neuralNetworkRegressor'):
+    elif mlmodel_spec.HasField("neuralNetworkRegressor"):
         nn = mlmodel_spec.neuralNetworkRegressor
 
-    layers = [_summarize_network_layer_info(layer) for layer in nn.layers] if nn != None else None
+    layers = (
+        [_summarize_network_layer_info(layer) for layer in nn.layers]
+        if nn != None
+        else None
+    )
     return (inputs, outputs, layers)
 
+
 def _prRed(skk, end=None):
     print("\033[91m {}\033[00m".format(skk), end=end)
 
+
 def _prLightPurple(skk, end=None):
     print("\033[94m {}\033[00m".format(skk), end=end)
 
+
 def _prPurple(skk, end=None):
     print("\033[95m {}\033[00m".format(skk), end=end)
 
+
 def _prGreen(skk, end=None):
     print("\033[92m {}\033[00m".format(skk), end=end)
 
 
-def _print_layer_type_and_arguments(layer_type_str, layer_inputs, indentation, to_indent=True,
-                                    shape=None, value=None):
+def _print_layer_type_and_arguments(
+    layer_type_str, layer_inputs, indentation, to_indent=True, shape=None, value=None
+):
     if to_indent:
-        _prRed(indentation * '\t' + '{}'.format(layer_type_str), end='')
+        _prRed(indentation * "\t" + "{}".format(layer_type_str), end="")
     else:
-        _prRed('{}'.format(layer_type_str), end='')
+        _prRed("{}".format(layer_type_str), end="")
 
     if shape is None:
-        _prLightPurple('({})'.format(', '.join(layer_inputs)))
+        _prLightPurple("({})".format(", ".join(layer_inputs)))
     elif value is not None:
-        _prLightPurple('(shape = ', end='')
-        print('{}, '.format(str(shape)), end='')
-        _prLightPurple('value = ', end='')
-        values = ','.join(["{0: 0.1f}".format(v) for v in value]).lstrip()
-        print('[{}]'.format(values), end='')
-        _prLightPurple(')')
+        _prLightPurple("(shape = ", end="")
+        print("{}, ".format(str(shape)), end="")
+        _prLightPurple("value = ", end="")
+        values = ",".join(["{0: 0.1f}".format(v) for v in value]).lstrip()
+        print("[{}]".format(values), end="")
+        _prLightPurple(")")
     else:
-        _prLightPurple('(shape = ', end='')
-        print('{}'.format(str(shape)), end='')
-        _prLightPurple(')')
+        _prLightPurple("(shape = ", end="")
+        print("{}".format(str(shape)), end="")
+        _prLightPurple(")")
+
 
 def _find_size(arr):
     s = 1
@@ -175,81 +199,95 @@ def _find_size(arr):
         s *= a
     return s
 
-@deprecated
-def summarize_neural_network_spec_code_style(nn_spec, indentation=0, input_names=None, output_names=None):
-    return _summarize_neural_network_spec_code_style(nn_spec, indentation=indentation,
-                                                     input_names=input_names, output_names=output_names)
 
-def _summarize_neural_network_spec_code_style(nn_spec, indentation=0, input_names=None, output_names=None):
+def _summarize_neural_network_spec_code_style(
+    nn_spec, indentation=0, input_names=None, output_names=None
+):
     """
     print nn_spec as if writing code
     """
     indentation_size = 1
 
     if input_names:
-        print('def model({}) :'.format(', '.join(input_names)))
+        print("def model({}):".format(", ".join(input_names)))
         indentation += indentation_size
 
     for i, layer in enumerate(nn_spec.layers):
-        layer_type_str = layer.WhichOneof('layer')
+        layer_type_str = layer.WhichOneof("layer")
         layer_inputs = list(layer.input)
         layer_outputs = list(layer.output)
 
-        if layer_type_str == 'loop':
+        if layer_type_str == "loop":
             if len(layer.loop.conditionNetwork.layers) > 0:
-                _prPurple(indentation * '\t' + 'Condition Network: ')
-                _summarize_neural_network_spec_code_style(layer.loop.conditionNetwork, indentation=indentation)
+                _prPurple(indentation * "\t" + "Condition Network: ")
+                _summarize_neural_network_spec_code_style(
+                    layer.loop.conditionNetwork, indentation=indentation
+                )
             if layer.loop.conditionVar:
                 layer_inputs.append(layer.loop.conditionVar)
             _print_layer_type_and_arguments(layer_type_str, layer_inputs, indentation)
             indentation += indentation_size
-            _summarize_neural_network_spec_code_style(layer.loop.bodyNetwork, indentation=indentation)
+            _summarize_neural_network_spec_code_style(
+                layer.loop.bodyNetwork, indentation=indentation
+            )
             if len(layer.loop.conditionNetwork.layers) > 0:
-                _prPurple(indentation * '\t' + 'Condition Network: ')
-                _summarize_neural_network_spec_code_style(layer.loop.conditionNetwork, indentation=indentation)
+                _prPurple(indentation * "\t" + "Condition Network: ")
+                _summarize_neural_network_spec_code_style(
+                    layer.loop.conditionNetwork, indentation=indentation
+                )
             indentation -= indentation_size
             continue
 
-        if layer_type_str == 'branch':
+        if layer_type_str == "branch":
             _print_layer_type_and_arguments(layer_type_str, layer_inputs, indentation)
-            _prRed(indentation * '\t' + 'IfBranch:')
+            _prRed(indentation * "\t" + "IfBranch:")
             indentation += indentation_size
-            _summarize_neural_network_spec_code_style(layer.branch.ifBranch, indentation=indentation)
+            _summarize_neural_network_spec_code_style(
+                layer.branch.ifBranch, indentation=indentation
+            )
             indentation -= indentation_size
             if len(layer.branch.elseBranch.layers) > 0:
-                _prRed(indentation * '\t' + 'ElseBranch:')
+                _prRed(indentation * "\t" + "ElseBranch:")
                 indentation += indentation_size
-                _summarize_neural_network_spec_code_style(layer.branch.elseBranch, indentation=indentation)
+                _summarize_neural_network_spec_code_style(
+                    layer.branch.elseBranch, indentation=indentation
+                )
                 indentation -= indentation_size
             continue
 
-        if layer_type_str == 'loopBreak' or layer_type_str == 'loopContinue':
-            _prRed(indentation * '\t' + layer_type_str)
+        if layer_type_str == "loopBreak" or layer_type_str == "loopContinue":
+            _prRed(indentation * "\t" + layer_type_str)
             continue
 
         shape = None
         value = None
-        if layer_type_str == 'loadConstant':
+        if layer_type_str == "loadConstant":
             shape = layer.loadConstant.shape
             shape = list(shape)
             int_shape = [int(x) for x in shape]
-            shape = tuple([1,1] + int_shape)
+            shape = tuple([1, 1] + int_shape)
             size = _find_size(shape)
             if size < 4 and len(layer.loadConstant.data.floatValue) > 0:
                 value = map(float, list(layer.loadConstant.data.floatValue))
 
-        if layer_type_str == 'loadConstantND':
+        if layer_type_str == "loadConstantND":
             shape = layer.loadConstantND.shape
             shape = tuple(map(int, list(shape)))
             size = _find_size(shape)
             if size < 4 and len(layer.loadConstantND.data.floatValue) > 0:
                 value = map(float, list(layer.loadConstantND.data.floatValue))
 
-        print(indentation * '\t', end='')
-        print('{} = '.format(', '.join(layer_outputs)), end='')
-        _print_layer_type_and_arguments(layer_type_str, layer_inputs, indentation, to_indent=False,
-                                        shape=shape, value=value)
+        print(indentation * "\t", end="")
+        print("{} =".format(", ".join(layer_outputs)), end="")
+        _print_layer_type_and_arguments(
+            layer_type_str,
+            layer_inputs,
+            indentation,
+            to_indent=False,
+            shape=shape,
+            value=value,
+        )
 
     if output_names:
-        _prRed('\n' + indentation * '\t' + 'return ', end='')
-        print('{}'.format(', '.join(output_names)))
\ No newline at end of file
+        _prRed("\n" + indentation * "\t" + "return ", end="")
+        print("{}".format(", ".join(output_names)))
diff --git a/coremltools/models/neural_network/utils.py b/coremltools/models/neural_network/utils.py
new file mode 100644
index 000000000..d3798ea71
--- /dev/null
+++ b/coremltools/models/neural_network/utils.py
@@ -0,0 +1,132 @@
+from .builder import NeuralNetworkBuilder
+from coremltools.models.utils import _get_model
+import copy as _copy
+
+
+def make_image_input(
+    model,
+    input_name,
+    is_bgr=False,
+    red_bias=0.0,
+    blue_bias=0.0,
+    green_bias=0.0,
+    gray_bias=0.0,
+    scale=1.0,
+    image_format="NHWC",
+):
+    """
+    Convert input of type multiarray to type image
+
+    Parameters
+    ----------
+    TODO
+
+    Returns
+    -------
+    model: MLModel
+    A coreML MLModel object
+
+    Examples
+    --------
+    TODO
+    """
+
+    spec = model.get_spec()
+
+    if spec.WhichOneof("Type") not in [
+        "neuralNetwork",
+        "neuralNetworkClassifier",
+        "neuralNetworkRegressor",
+    ]:
+        raise ValueError(
+            "Provided model must be of type neuralNetwork, neuralNetworkClassifier or neuralNetworkRegressor"
+        )
+
+    if not isinstance(input_name, list):
+        input_name = [input_name]
+
+    spec_inputs = [i.name for i in spec.description.input]
+    for name in input_name:
+        if name not in spec_inputs:
+            msg = "Provided input_name: {}, is not an existing input to the model"
+            raise ValueError(msg.format(name))
+
+    builder = NeuralNetworkBuilder(spec=spec)
+    builder.set_pre_processing_parameters(
+        image_input_names=input_name,
+        is_bgr=is_bgr,
+        red_bias=red_bias,
+        green_bias=green_bias,
+        blue_bias=blue_bias,
+        gray_bias=gray_bias,
+        image_scale=scale,
+        image_format=image_format,
+    )
+    return _get_model(spec)
+
+
+def make_nn_classifier(
+    model,
+    class_labels,
+    predicted_feature_name=None,
+    predicted_probabilities_output=None,
+):
+    """
+    Convert a model of type "neuralNetwork" to type "neuralNetworkClassifier"
+
+    Parameters
+    ----------
+    TODO
+
+    Returns
+    -------
+    model: MLModel
+    A coreML MLModel object
+
+    Examples
+    --------
+    TODO
+    """
+
+    spec = model.get_spec()
+
+    if spec.WhichOneof("Type") != "neuralNetwork":
+        raise ValueError('Provided model must be of type "neuralNetwork"')
+
+    # convert type to "neuralNetworkClassifier" and copy messages from "neuralNetwork"
+    nn_spec = _copy.deepcopy(spec.neuralNetwork)
+    spec.ClearField("neuralNetwork")
+    for layer in nn_spec.layers:
+        spec.neuralNetworkClassifier.layers.add().CopyFrom(layer)
+    for preprocessing in nn_spec.preprocessing:
+        spec.neuralNetworkClassifier.preprocessing.add().CopyFrom(preprocessing)
+    spec.neuralNetworkClassifier.arrayInputShapeMapping = nn_spec.arrayInputShapeMapping
+    spec.neuralNetworkClassifier.imageInputShapeMapping = nn_spec.imageInputShapeMapping
+    spec.neuralNetworkClassifier.updateParams.CopyFrom(nn_spec.updateParams)
+
+    # set properties related to classifier
+    builder = NeuralNetworkBuilder(spec=spec)
+    message = "Class labels must be a list of integers / strings or a file path"
+    classes_in = class_labels
+    if isinstance(classes_in, str):
+        import os
+
+        if not os.path.isfile(classes_in):
+            raise ValueError("Path to class labels (%s) does not exist." % classes_in)
+        with open(classes_in, "r") as f:
+            classes = f.read()
+        classes = classes.splitlines()
+    elif isinstance(classes_in, list):  # list[int or str]
+        classes = classes_in
+        assert all([isinstance(x, (int, str)) for x in classes]), message
+    else:
+        raise ValueError(message)
+
+    kwargs = {}
+    if predicted_feature_name is not None:
+        kwargs["predicted_feature_name"] = predicted_feature_name
+    if predicted_probabilities_output is not None:
+        kwargs["prediction_blob"] = predicted_probabilities_output
+    builder.set_class_labels(classes, **kwargs)
+
+    return _get_model(spec)
diff --git a/coremltools/models/pipeline.py b/coremltools/models/pipeline.py
index 7daa0826b..c8aad77a4 100644
--- a/coremltools/models/pipeline.py
+++ b/coremltools/models/pipeline.py
@@ -6,7 +6,7 @@
 """
 Pipeline utils for this package.
 """
-from .. import SPECIFICATION_VERSION
+from .. import SPECIFICATION_VERSION as _SPECIFICATION_VERSION
 from ..proto import Model_pb2 as _Model_pb2
 from . import _feature_management
 from . import model as _model
@@ -15,13 +15,14 @@
 from ._interface_management import set_classifier_interface_params
 from ._interface_management import set_transform_interface_params, set_training_features
 
+
 class Pipeline(object):
-    """ 
-    A pipeline model that exposes a sequence of models as a single model, 
-    It requires a set of inputs, a sequence of other models and a set of outputs. 
-    
-    This class is the base class for :py:class:`PipelineClassifier` and 
-    :py:class:`PipelineRegressor`, which contain a sequence ending in a classifier 
+    """
+    A pipeline model that exposes a sequence of models as a single model,
+    It requires a set of inputs, a sequence of other models and a set of outputs.
+
+    This class is the base class for :py:class:`PipelineClassifier` and
+    :py:class:`PipelineRegressor`, which contain a sequence ending in a classifier
     or regressor and themselves behave like a classifier or regressor.  This class
     may be used directly for a sequence of feature transformer objects.
 
@@ -33,41 +34,45 @@ def __init__(self, input_features, output_features, training_features=None):
 
         Parameters
         ----------
-        
+
         input_features: [list of 2-tuples]
             Name(s) of the input features, given as a list of `('name', datatype)`
-            tuples.  The datatypes entry can be any of the data types defined in the 
+            tuples.  The datatypes entry can be any of the data types defined in the
             :py:mod:`models.datatypes` module.
 
         output_features: [list of features]
             Name(s) of the output features, given as a list of
             `('name',datatype)` tuples.  The datatypes entry can be any of the
             data types defined in the :py:mod:`models.datatypes` module.  All features
-            must be either defined in the inputs or be produced by one of the 
-            contained models. 
+            must be either defined in the inputs or be produced by one of the
+            contained models.
 
         """
         spec = _Model_pb2.Model()
-        spec.specificationVersion = SPECIFICATION_VERSION
-        
+        spec.specificationVersion = _SPECIFICATION_VERSION
+
         # Access this to declare it as a pipeline
         spec.pipeline
 
-        spec = set_transform_interface_params(spec, input_features, output_features, training_features)
+        spec = set_transform_interface_params(
+            spec, input_features, output_features, training_features
+        )
 
         # Save the spec as a member variable.
         self.spec = spec
 
     def _validate_updatable_pipeline_on_add_model(self, spec):
         if spec.isUpdatable:
-            raise ValueError("New sub-models cannot be added after the pipeline has been marked as updatable")
+            raise ValueError(
+                "New sub-models cannot be added after the pipeline has been marked as updatable"
+            )
 
     def add_model(self, spec):
         """
-        Add a protobuf spec or :py:class:`models.MLModel` instance to the pipeline. 
+        Add a protobuf spec or :py:class:`models.MLModel` instance to the pipeline.
 
-        All input features of this model must either match the input_features 
-        of the pipeline, or match the outputs of a previous model. 
+        All input features of this model must either match the input_features
+        of the pipeline, or match the outputs of a previous model.
 
         Parameters
         ----------
@@ -88,15 +93,21 @@ def _validate_sub_models_and_make_updatable(self, pipeline, spec):
 
         num_models = len(pipeline.models)
         if num_models < 1:
-            raise ValueError("Pipeline does not seem to have any models. It should be marked as updatable only after adding all sub-models.")
+            raise ValueError(
+                "Pipeline does not seem to have any models. It should be marked as updatable only after adding all sub-models."
+            )
 
         for model in pipeline.models[:-1]:
             if model.isUpdatable:
-                raise ValueError("Only the last model can be updatable in an updatable pipeline.")
+                raise ValueError(
+                    "Only the last model can be updatable in an updatable pipeline."
+                )
 
         last_model = pipeline.models[num_models - 1]
         if not last_model.isUpdatable:
-            raise ValueError("A pipeline can be made updatable only if the last model is updatable.")
+            raise ValueError(
+                "A pipeline can be made updatable only if the last model is updatable."
+            )
 
         spec.isUpdatable = True
 
@@ -117,8 +128,8 @@ def set_training_input(self, training_input):
 
 
 class PipelineRegressor(Pipeline):
-    """ 
-    A pipeline model that exposes a sequence of models as a single model, 
+    """
+    A pipeline model that exposes a sequence of models as a single model,
     It requires a set of inputs, a sequence of other models and a set of outputs.
     In this case the pipeline itself behaves as a regression model by designating
     a real valued output feature as its 'predicted feature'.
@@ -126,15 +137,15 @@ class PipelineRegressor(Pipeline):
 
     def __init__(self, input_features, output_features, training_features=None):
         """
-        Create a set of pipeline models given a set of model specs.  The final 
-        output model must be a regression model. 
+        Create a set of pipeline models given a set of model specs.  The final
+        output model must be a regression model.
 
         Parameters
         ----------
-        
+
         input_features: [list of 2-tuples]
             Name(s) of the input features, given as a list of `('name', datatype)`
-            tuples.  The datatypes entry can be any of the data types defined in the 
+            tuples.  The datatypes entry can be any of the data types defined in the
             :py:mod:`models.datatypes` module.
 
         output_features: [list of features]
@@ -146,21 +157,23 @@ def __init__(self, input_features, output_features, training_features=None):
 
         """
         spec = _Model_pb2.Model()
-        spec.specificationVersion = SPECIFICATION_VERSION
-        
+        spec.specificationVersion = _SPECIFICATION_VERSION
+
         # Access this to declare it as a pipeline
         spec.pipelineRegressor
-        spec = set_regressor_interface_params(spec, input_features, output_features, training_features)
+        spec = set_regressor_interface_params(
+            spec, input_features, output_features, training_features
+        )
 
         # Save as a member variable
         self.spec = spec
 
     def add_model(self, spec):
         """
-        Add a protobuf spec or :py:class:`models.MLModel` instance to the pipeline. 
+        Add a protobuf spec or :py:class:`models.MLModel` instance to the pipeline.
 
-        All input features of this model must either match the input_features 
-        of the pipeline, or match the outputs of a previous model. 
+        All input features of this model must either match the input_features
+        of the pipeline, or match the outputs of a previous model.
 
         Parameters
         ----------
@@ -168,7 +181,9 @@ def add_model(self, spec):
             A protobuf spec or MLModel instance containing a model.
         """
 
-        super(PipelineRegressor, self)._validate_updatable_pipeline_on_add_model(self.spec)
+        super(PipelineRegressor, self)._validate_updatable_pipeline_on_add_model(
+            self.spec
+        )
 
         if isinstance(spec, _model.MLModel):
             spec = spec._spec
@@ -178,7 +193,9 @@ def add_model(self, spec):
         step_spec.CopyFrom(spec)
 
     def make_updatable(self):
-        super(PipelineRegressor, self)._validate_sub_models_and_make_updatable(self.spec.pipelineRegressor.pipeline, self.spec)
+        super(PipelineRegressor, self)._validate_sub_models_and_make_updatable(
+            self.spec.pipelineRegressor.pipeline, self.spec
+        )
 
     def set_training_input(self, training_input):
         """
@@ -192,48 +209,58 @@ def set_training_input(self, training_input):
         spec = self.spec
         set_training_features(spec, training_input)
 
+
 class PipelineClassifier(Pipeline):
-    """ 
-    A pipeline model that exposes a sequence of models as a single model, 
+    """
+    A pipeline model that exposes a sequence of models as a single model,
     It requires a set of inputs, a sequence of other models and a set of outputs.
     In this case the pipeline itself behaves as a classification model by designating
     a discrete categorical output feature as its 'predicted feature'.
     """
 
-    def __init__(self, input_features, class_labels, output_features=None, training_features=None):
+    def __init__(
+        self, input_features, class_labels, output_features=None, training_features=None
+    ):
         """
-        Create a set of pipeline models given a set of model specs.  The last 
-        model in this list must be a classifier model. 
+        Create a set of pipeline models given a set of model specs.  The last
+        model in this list must be a classifier model.
 
         Parameters
         ----------
         input_features: [list of 2-tuples]
             Name(s) of the input features, given as a list of `('name', datatype)`
-            tuples.  The datatypes entry can be any of the data types defined in the 
+            tuples.  The datatypes entry can be any of the data types defined in the
             :py:mod:`models.datatypes` module.
 
         class_labels: [list]
-            A list of string or integer class labels to use in making predictions. 
+            A list of string or integer class labels to use in making predictions.
             This list must match the class labels in the model outputting the categorical
             predictedFeatureName
 
         output_features: [list]
-            A string or a list of two strings specifying the names of the two 
-            output features, the first being a class label corresponding 
-            to the class with the highest predicted score, and the second being 
-            a dictionary mapping each class to its score. If `output_features` 
-            is a string, it specifies the predicted class label and the class 
-            scores is set to the default value of `"classProbability."` 
- 
+            A string or a list of two strings specifying the names of the two
+            output features, the first being a class label corresponding
+            to the class with the highest predicted score, and the second being
+            a dictionary mapping each class to its score. If `output_features`
+            is a string, it specifies the predicted class label and the class
+            scores is set to the default value of `"classProbability."`
+
         """
 
         output_features = _feature_management.process_or_validate_classifier_output_features(
-                output_features, class_labels)
+            output_features, class_labels
+        )
 
         spec = _Model_pb2.Model()
-        spec.specificationVersion = SPECIFICATION_VERSION
-        spec = set_classifier_interface_params(spec, input_features,
-                class_labels, 'pipelineClassifier', output_features, training_features)
+        spec.specificationVersion = _SPECIFICATION_VERSION
+        spec = set_classifier_interface_params(
+            spec,
+            input_features,
+            class_labels,
+            "pipelineClassifier",
+            output_features,
+            training_features,
+        )
 
         # Access this to declare it as a pipeline
         spec.pipelineClassifier
@@ -243,10 +270,10 @@ def __init__(self, input_features, class_labels, output_features=None, training_
 
     def add_model(self, spec):
         """
-        Add a protobuf spec or :py:class:`models.MLModel` instance to the pipeline. 
+        Add a protobuf spec or :py:class:`models.MLModel` instance to the pipeline.
 
-        All input features of this model must either match the input_features 
-        of the pipeline, or match the outputs of a previous model. 
+        All input features of this model must either match the input_features
+        of the pipeline, or match the outputs of a previous model.
 
         Parameters
         ----------
@@ -254,7 +281,9 @@ def add_model(self, spec):
             A protobuf spec or MLModel instance containing a model.
         """
 
-        super(PipelineClassifier, self)._validate_updatable_pipeline_on_add_model(self.spec)
+        super(PipelineClassifier, self)._validate_updatable_pipeline_on_add_model(
+            self.spec
+        )
 
         if isinstance(spec, _model.MLModel):
             spec = spec._spec
@@ -263,7 +292,9 @@ def add_model(self, spec):
         step_spec.CopyFrom(spec)
 
     def make_updatable(self):
-        super(PipelineClassifier, self)._validate_sub_models_and_make_updatable(self.spec.pipelineClassifier.pipeline, self.spec)
+        super(PipelineClassifier, self)._validate_sub_models_and_make_updatable(
+            self.spec.pipelineClassifier.pipeline, self.spec
+        )
 
     def set_training_input(self, training_input):
         """
@@ -275,4 +306,4 @@ def set_training_input(self, training_input):
             List of training input names and type of the network.
         """
         spec = self.spec
-        set_training_features(spec, training_input)
\ No newline at end of file
+        set_training_features(spec, training_input)
diff --git a/coremltools/models/tree_ensemble.py b/coremltools/models/tree_ensemble.py
index 86d272e6b..2eee4f6af 100644
--- a/coremltools/models/tree_ensemble.py
+++ b/coremltools/models/tree_ensemble.py
@@ -6,7 +6,7 @@
 """
 Tree ensemble builder class to construct CoreML models.
 """
-from .. import SPECIFICATION_VERSION
+from .. import SPECIFICATION_VERSION as _SPECIFICATION_VERSION
 from ..proto import Model_pb2 as _Model_pb2
 from ..proto import TreeEnsemble_pb2 as _TreeEnsemble_pb2
 from ..proto import FeatureTypes_pb2 as _FeatureTypes_pb2
@@ -15,11 +15,12 @@
 from ._interface_management import set_classifier_interface_params
 import collections as _collections
 
+
 class TreeEnsembleBase(object):
     """
     Base class for the tree ensemble builder class.  This should be instantiated
-    either through the :py:class:`TreeEnsembleRegressor` or 
-    :py:class:`TreeEnsembleClassifier` classes. 
+    either through the :py:class:`TreeEnsembleRegressor` or
+    :py:class:`TreeEnsembleClassifier` classes.
     """
 
     def __init__(self):
@@ -28,7 +29,7 @@ def __init__(self):
         """
         # Set inputs and outputs
         spec = _Model_pb2.Model()
-        spec.specificationVersion = SPECIFICATION_VERSION
+        spec.specificationVersion = _SPECIFICATION_VERSION
 
         # Save the spec in the protobuf
         self.spec = spec
@@ -37,16 +38,16 @@ def set_default_prediction_value(self, values):
         """
         Set the default prediction value(s).
 
-        The values given here form the base prediction value that the values 
-        at activated leaves are added to.  If values is a scalar, then 
+        The values given here form the base prediction value that the values
+        at activated leaves are added to.  If values is a scalar, then
         the output of the tree must also be 1 dimensional; otherwise, values
         must be a list with length matching the dimension of values in the tree.
 
         Parameters
         ----------
         values: [int | double | list[double]]
-            Default values for predictions.  
-            
+            Default values for predictions.
+
         """
         if type(values) is not list:
             values = [float(values)]
@@ -56,36 +57,36 @@ def set_default_prediction_value(self, values):
 
     def set_post_evaluation_transform(self, value):
         r"""
-        Set the post processing transform applied after the prediction value 
-        from the tree ensemble. 
+        Set the post processing transform applied after the prediction value
+        from the tree ensemble.
 
         Parameters
         ----------
 
         value: str
 
-            A value denoting the transform applied.  Possible values are: 
+            A value denoting the transform applied.  Possible values are:
 
-            - "NoTransform" (default).  Do not apply a transform. 
+            - "NoTransform" (default).  Do not apply a transform.
 
-            - "Classification_SoftMax".  
+            - "Classification_SoftMax".
 
-              Apply a softmax function to the outcome to produce normalized, 
+              Apply a softmax function to the outcome to produce normalized,
               non-negative scores that sum to 1.  The transformation applied to
-              dimension `i` is equivalent to: 
-                
+              dimension `i` is equivalent to:
+
                 .. math::
 
                     \frac{e^{x_i}}{\sum_j e^{x_j}}
 
-              Note: This is the output transformation applied by the XGBoost package 
+              Note: This is the output transformation applied by the XGBoost package
               with multiclass classification.
 
-            - "Regression_Logistic". 
+            - "Regression_Logistic".
 
-              Applies a logistic transform the predicted value, specifically: 
+              Applies a logistic transform the predicted value, specifically:
 
-                .. math:: 
+                .. math::
 
                     (1 + e^{-v})^{-1}
 
@@ -93,12 +94,22 @@ def set_post_evaluation_transform(self, value):
 
 
         """
-        self.tree_spec.postEvaluationTransform = \
-                _TreeEnsemble_pb2.TreeEnsemblePostEvaluationTransform.Value(value)
-
-    def add_branch_node(self, tree_id, node_id, feature_index, feature_value,
-            branch_mode, true_child_id, false_child_id, relative_hit_rate = None,
-            missing_value_tracks_true_child = False):
+        self.tree_spec.postEvaluationTransform = _TreeEnsemble_pb2.TreeEnsemblePostEvaluationTransform.Value(
+            value
+        )
+
+    def add_branch_node(
+        self,
+        tree_id,
+        node_id,
+        feature_index,
+        feature_value,
+        branch_mode,
+        true_child_id,
+        false_child_id,
+        relative_hit_rate=None,
+        missing_value_tracks_true_child=False,
+    ):
         """
         Add a branch node to the tree ensemble.
 
@@ -114,38 +125,38 @@ def add_branch_node(self, tree_id, node_id, feature_index, feature_value,
             Index of the feature in the input being split on.
 
         feature_value: double or int
-            The value used in the feature comparison determining the traversal 
-            direction from this node. 
+            The value used in the feature comparison determining the traversal
+            direction from this node.
 
         branch_mode: str
-            Branch mode of the node, specifying the condition under which the node 
-            referenced by `true_child_id` is called next.   
+            Branch mode of the node, specifying the condition under which the node
+            referenced by `true_child_id` is called next.
 
             Must be one of the following:
 
               - `"BranchOnValueLessThanEqual"`. Traverse to node `true_child_id`
-                if `input[feature_index] <= feature_value`, and `false_child_id` 
-                otherwise. 
+                if `input[feature_index] <= feature_value`, and `false_child_id`
+                otherwise.
 
               - `"BranchOnValueLessThan"`. Traverse to node `true_child_id`
-                if `input[feature_index] < feature_value`, and `false_child_id` 
-                otherwise. 
+                if `input[feature_index] < feature_value`, and `false_child_id`
+                otherwise.
 
               - `"BranchOnValueGreaterThanEqual"`. Traverse to node `true_child_id`
-                if `input[feature_index] >= feature_value`, and `false_child_id` 
-                otherwise. 
+                if `input[feature_index] >= feature_value`, and `false_child_id`
+                otherwise.
 
               - `"BranchOnValueGreaterThan"`. Traverse to node `true_child_id`
-                if `input[feature_index] > feature_value`, and `false_child_id` 
-                otherwise. 
+                if `input[feature_index] > feature_value`, and `false_child_id`
+                otherwise.
 
               - `"BranchOnValueEqual"`. Traverse to node `true_child_id`
-                if `input[feature_index] == feature_value`, and `false_child_id` 
-                otherwise. 
+                if `input[feature_index] == feature_value`, and `false_child_id`
+                otherwise.
 
               - `"BranchOnValueNotEqual"`. Traverse to node `true_child_id`
-                if `input[feature_index] != feature_value`, and `false_child_id` 
-                otherwise. 
+                if `input[feature_index] != feature_value`, and `false_child_id`
+                otherwise.
 
         true_child_id: int
             ID of the child under the true condition of the split.  An error will
@@ -160,10 +171,10 @@ def add_branch_node(self, tree_id, node_id, feature_index, feature_value,
             this `tree_id`.
 
         relative_hit_rate: float [optional]
-            When the model is converted compiled by CoreML, this gives hints to 
-            Core ML about which node is more likely to be hit on evaluation, 
-            allowing for additional optimizations. The values can be on any scale, 
-            with the values between child nodes being compared relative to each 
+            When the model is converted compiled by CoreML, this gives hints to
+            Core ML about which node is more likely to be hit on evaluation,
+            allowing for additional optimizations. The values can be on any scale,
+            with the values between child nodes being compared relative to each
             other.
 
         missing_value_tracks_true_child: bool [optional]
@@ -178,14 +189,15 @@ def add_branch_node(self, tree_id, node_id, feature_index, feature_value,
         spec_node.branchFeatureValue = feature_value
         spec_node.trueChildNodeId = true_child_id
         spec_node.falseChildNodeId = false_child_id
-        spec_node.nodeBehavior = \
-           _TreeEnsemble_pb2.TreeEnsembleParameters.TreeNode.TreeNodeBehavior.Value(branch_mode)
+        spec_node.nodeBehavior = _TreeEnsemble_pb2.TreeEnsembleParameters.TreeNode.TreeNodeBehavior.Value(
+            branch_mode
+        )
 
         if relative_hit_rate is not None:
             spec_node.relativeHitRate = relative_hit_rate
         spec_node.missingValueTracksTrueChild = missing_value_tracks_true_child
 
-    def add_leaf_node(self, tree_id, node_id, values, relative_hit_rate = None):
+    def add_leaf_node(self, tree_id, node_id, values, relative_hit_rate=None):
         """
         Add a leaf node to the tree ensemble.
 
@@ -195,33 +207,34 @@ def add_leaf_node(self, tree_id, node_id, values, relative_hit_rate = None):
             ID of the tree to add the node to.
 
         node_id: int
-            ID of the node within the tree. 
+            ID of the node within the tree.
 
         values: [float | int | list | dict]
-            Value(s) at the leaf node to add to the prediction when this node is 
-            activated.  If the prediction dimension of the tree is 1, then the 
-            value is specified as a float or integer value.  
-            
-            For multidimensional predictions, the values can be a list of numbers 
+            Value(s) at the leaf node to add to the prediction when this node is
+            activated.  If the prediction dimension of the tree is 1, then the
+            value is specified as a float or integer value.
+
+            For multidimensional predictions, the values can be a list of numbers
             with length matching the dimension of the predictions or a dictionary
             mapping index to value added to that dimension.
 
             Note that the dimension of any tree must match the dimension given
-            when :py:meth:`set_default_prediction_value` is called. 
+            when :py:meth:`set_default_prediction_value` is called.
 
         """
         spec_node = self.tree_parameters.nodes.add()
         spec_node.treeId = tree_id
         spec_node.nodeId = node_id
-        spec_node.nodeBehavior = \
-           _TreeEnsemble_pb2.TreeEnsembleParameters.TreeNode.TreeNodeBehavior.Value('LeafNode')
+        spec_node.nodeBehavior = _TreeEnsemble_pb2.TreeEnsembleParameters.TreeNode.TreeNodeBehavior.Value(
+            "LeafNode"
+        )
 
         if not isinstance(values, _collections.Iterable):
             values = [values]
 
         if relative_hit_rate is not None:
             spec_node.relativeHitRate = relative_hit_rate
-                
+
         if type(values) == dict:
             iter = values.items()
         else:
@@ -231,15 +244,17 @@ def add_leaf_node(self, tree_id, node_id, values, relative_hit_rate = None):
             ev_info = spec_node.evaluationInfo.add()
             ev_info.evaluationIndex = index
             ev_info.evaluationValue = float(value)
-            spec_node.nodeBehavior = \
-                _TreeEnsemble_pb2.TreeEnsembleParameters.TreeNode.TreeNodeBehavior.Value('LeafNode')
+            spec_node.nodeBehavior = _TreeEnsemble_pb2.TreeEnsembleParameters.TreeNode.TreeNodeBehavior.Value(
+                "LeafNode"
+            )
+
 
 class TreeEnsembleRegressor(TreeEnsembleBase):
     """
-    Tree Ensemble builder class to construct a Tree Ensemble regression model. 
+    Tree Ensemble builder class to construct a Tree Ensemble regression model.
 
     The TreeEnsembleRegressor class constructs a Tree Ensemble model incrementally
-    using methods to add branch and leaf nodes specifying the behavior of the model. 
+    using methods to add branch and leaf nodes specifying the behavior of the model.
 
     Examples
     --------
@@ -293,24 +308,23 @@ class TreeEnsembleRegressor(TreeEnsembleBase):
 
     """
 
-
     def __init__(self, features, target):
         """
-        Create a Tree Ensemble regression model that takes one or more input 
-        features and maps them to an output feature. 
+        Create a Tree Ensemble regression model that takes one or more input
+        features and maps them to an output feature.
 
         Parameters
         ----------
 
         features: [list of features]
             Name(s) of the input features, given as a list of `('name', datatype)`
-            tuples.  The features are one of :py:class:`models.datatypes.Int64`, 
-            :py:class:`datatypes.Double`, or :py:class:`models.datatypes.Array`.  
-            Feature indices in the nodes are counted sequentially from 0 through 
-            the features. 
-        
+            tuples.  The features are one of :py:class:`models.datatypes.Int64`,
+            :py:class:`datatypes.Double`, or :py:class:`models.datatypes.Array`.
+            Feature indices in the nodes are counted sequentially from 0 through
+            the features.
+
         target:  (default = None)
-           Name of the target feature predicted. 
+           Name of the target feature predicted.
         """
         super(TreeEnsembleRegressor, self).__init__()
         spec = self.spec
@@ -318,12 +332,13 @@ def __init__(self, features, target):
         self.tree_spec = spec.treeEnsembleRegressor
         self.tree_parameters = self.tree_spec.treeEnsemble
 
+
 class TreeEnsembleClassifier(TreeEnsembleBase):
     """
-    Tree Ensemble builder class to construct a Tree Ensemble classification model. 
+    Tree Ensemble builder class to construct a Tree Ensemble classification model.
 
     The TreeEnsembleClassifier class constructs a Tree Ensemble model incrementally
-    using methods to add branch and leaf nodes specifying the behavior of the model. 
+    using methods to add branch and leaf nodes specifying the behavior of the model.
 
 
     Examples
@@ -333,7 +348,7 @@ class TreeEnsembleClassifier(TreeEnsembleBase):
 
         >>> input_features = [("a", datatypes.Array(3)), ("b", datatypes.Double())]
 
-        >>> tm = TreeEnsembleClassifier(features = input_features, class_labels = [0, 1], 
+        >>> tm = TreeEnsembleClassifier(features = input_features, class_labels = [0, 1],
                                         output_features = "predicted_class")
 
         >>> # Split on a[2] <= 3
@@ -342,7 +357,7 @@ class TreeEnsembleClassifier(TreeEnsembleBase):
         >>> # Add leaf to the true branch of node 0 that subtracts 1.
         >>> tm.add_leaf_node(0, 1, -1)
 
-        >>> # Add split on b == 0 to the false branch of node 0. 
+        >>> # Add split on b == 0 to the false branch of node 0.
         >>> tm.add_branch_node(0, 2, 3, 0, "BranchOnValueEqual", 3, 4)
 
         >>> # Add leaf to the true branch of node 2 that adds 1 to the result.
@@ -351,7 +366,7 @@ class TreeEnsembleClassifier(TreeEnsembleBase):
         >>> # Add leaf to the false branch of node 2 that subtracts 1 from the result.
         >>> tm.add_leaf_node(0, 4, -1)
 
-        >>> # Put in a softmax transform to translate these into probabilities. 
+        >>> # Put in a softmax transform to translate these into probabilities.
         >>> tm.set_post_evaluation_transform("Classification_SoftMax")
 
         >>> tm.set_default_prediction_value([0, 0])
@@ -372,7 +387,6 @@ class TreeEnsembleClassifier(TreeEnsembleBase):
 
     """
 
-
     def __init__(self, features, class_labels, output_features):
         """
         Create a tree ensemble classifier model.
@@ -381,27 +395,27 @@ def __init__(self, features, class_labels, output_features):
         ----------
         features: [list of features]
             Name(s) of the input features, given as a list of `('name', datatype)`
-            tuples.  The features are one of :py:class:`models.datatypes.Int64`, 
-            :py:class:`datatypes.Double`, or :py:class:`models.datatypes.Array`.  
-            Feature indices in the nodes are counted sequentially from 0 through 
-            the features. 
-        
+            tuples.  The features are one of :py:class:`models.datatypes.Int64`,
+            :py:class:`datatypes.Double`, or :py:class:`models.datatypes.Array`.
+            Feature indices in the nodes are counted sequentially from 0 through
+            the features.
+
         class_labels: [list]
-            A list of string or integer class labels to use in making predictions. 
-            The length of this must match the dimension of the tree model. 
+            A list of string or integer class labels to use in making predictions.
+            The length of this must match the dimension of the tree model.
 
         output_features: [list]
-            A string or a list of two strings specifying the names of the two 
-            output features, the first being a class label corresponding 
-            to the class with the highest predicted score, and the second being 
-            a dictionary mapping each class to its score. If `output_features` 
-            is a string, it specifies the predicted class label and the class 
-            scores is set to the default value of `"classProbability."` 
+            A string or a list of two strings specifying the names of the two
+            output features, the first being a class label corresponding
+            to the class with the highest predicted score, and the second being
+            a dictionary mapping each class to its score. If `output_features`
+            is a string, it specifies the predicted class label and the class
+            scores is set to the default value of `"classProbability."`
         """
         super(TreeEnsembleClassifier, self).__init__()
         spec = self.spec
-        spec = set_classifier_interface_params(spec, features, class_labels,
-                'treeEnsembleClassifier', output_features)
+        spec = set_classifier_interface_params(
+            spec, features, class_labels, "treeEnsembleClassifier", output_features
+        )
         self.tree_spec = spec.treeEnsembleClassifier
         self.tree_parameters = self.tree_spec.treeEnsemble
-
diff --git a/coremltools/models/utils.py b/coremltools/models/utils.py
index 07b569440..37a565530 100644
--- a/coremltools/models/utils.py
+++ b/coremltools/models/utils.py
@@ -10,12 +10,11 @@
 import numpy as _np
 import os as _os
 import six as _six
-import warnings
-import sys
+import warnings as _warnings
+import sys as _sys
 from coremltools.proto import Model_pb2 as _Model_pb2
-from coremltools.models._deprecation import deprecated
-
-from .._deps import HAS_SKLEARN as _HAS_SKLEARN
+from coremltools.models._deprecation import deprecated as _deprecated
+from .._deps import _HAS_SKLEARN
 
 if _HAS_SKLEARN:
     import scipy.sparse as _sp
@@ -57,27 +56,25 @@ def save_spec(spec, filename, auto_set_specification_version=False):
     if not ext:
         filename = "{}.mlmodel".format(filename)
     else:
-        if ext != '.mlmodel':
+        if ext != ".mlmodel":
             raise Exception("Extension must be .mlmodel (not {})".format(ext))
 
-    # set model coremltools version
-    from coremltools import __version__
-    spec.description.metadata.userDefined['coremltoolsVersion'] = __version__
-
     spec = spec.SerializeToString()
     if auto_set_specification_version:
         try:
             # always try to downgrade the specification version to the
             # minimal version that supports everything in this mlmodel
             from ..libcoremlpython import _MLModelProxy
+
             spec = _MLModelProxy.auto_set_specification_version(spec)
         except Exception as e:
             print(e)
-            warnings.warn(
+            _warnings.warn(
                 "Failed to automatic set specification version for this model.",
-                RuntimeWarning)
+                RuntimeWarning,
+            )
 
-    with open(filename, 'wb') as f:
+    with open(filename, "wb") as f:
         f.write(spec)
 
 
@@ -107,9 +104,10 @@ def load_spec(filename):
     save_spec
     """
     from ..proto import Model_pb2
+
     spec = Model_pb2.Model()
 
-    with open(filename, 'rb') as f:
+    with open(filename, "rb") as f:
         contents = f.read()
         spec.ParseFromString(contents)
         return spec
@@ -132,7 +130,7 @@ def _get_nn_layers(spec):
     """
 
     layers = []
-    if spec.WhichOneof('Type') == 'pipeline':
+    if spec.WhichOneof("Type") == "pipeline":
         layers = []
         for model_spec in spec.pipeline.models:
             if not layers:
@@ -140,8 +138,7 @@ def _get_nn_layers(spec):
             else:
                 layers.extend(_get_nn_layers(model_spec))
 
-    elif spec.WhichOneof('Type') in ['pipelineClassifier',
-                                        'pipelineRegressor']:
+    elif spec.WhichOneof("Type") in ["pipelineClassifier", "pipelineRegressor"]:
         layers = []
         for model_spec in spec.pipeline.models:
             if not layers:
@@ -161,9 +158,9 @@ def _get_nn_layers(spec):
 
 def _fp32_to_reversed_fp16_byte_array(fp32_arr):
     raw_fp16 = _np.float16(fp32_arr)
-    x = ''
+    x = ""
     for fp16 in raw_fp16:
-        all_bytes = _np.fromstring(fp16.tobytes(), dtype='int8')
+        all_bytes = _np.fromstring(fp16.tobytes(), dtype="int8")
         x += all_bytes[1].tobytes()
         x += all_bytes[0].tobytes()
     return x
@@ -171,12 +168,15 @@ def _fp32_to_reversed_fp16_byte_array(fp32_arr):
 
 def _fp32_to_fp16_byte_array(fp32_arr):
     if _np.amax(fp32_arr) >= 65504 or _np.amin(fp32_arr) <= -65504:
-        raise Exception('Model cannot be converted as '
-                        'it has weights that cannot be represented in '
-                        'half precision.\n')
+        raise Exception(
+            "Model cannot be converted as "
+            "it has weights that cannot be represented in "
+            "half precision.\n"
+        )
 
     import sys
-    if sys.byteorder == 'little':
+
+    if sys.byteorder == "little":
         return _np.float16(fp32_arr).tobytes()
     else:
         return _fp32_to_reversed_fp16_byte_array(fp32_arr)
@@ -191,21 +191,30 @@ def _wp_to_fp16wp(wp):
     del wp.floatValue[:]
 
 
-
+@_deprecated(
+    suffix="instead use 'coremltools.models.neural_network.quantization_utils.quantize_weights'."
+)
 def convert_neural_network_spec_weights_to_fp16(fp_spec):
     return _convert_neural_network_spec_weights_to_fp16(fp_spec)
 
+
 def _convert_neural_network_spec_weights_to_fp16(fp_spec):
     from .neural_network.quantization_utils import _quantize_spec_weights
-    from .neural_network.quantization_utils import _QUANTIZATION_MODE_LINEAR_QUANTIZATION
+    from .neural_network.quantization_utils import (
+        _QUANTIZATION_MODE_LINEAR_QUANTIZATION,
+    )
 
     qspec = _quantize_spec_weights(fp_spec, 16, _QUANTIZATION_MODE_LINEAR_QUANTIZATION)
     return qspec
 
 
+@_deprecated(
+    suffix="instead use 'coremltools.models.neural_network.quantization_utils.quantize_weights'."
+)
 def convert_neural_network_weights_to_fp16(full_precision_model):
     return _convert_neural_network_weights_to_fp16(full_precision_model)
 
+
 def _convert_neural_network_weights_to_fp16(full_precision_model):
     """
     Utility function to convert a full precision (float) MLModel to a
@@ -224,11 +233,6 @@ def _convert_neural_network_weights_to_fp16(full_precision_model):
     model: MLModel
         The converted half precision MLModel
 
-    Examples
-    --------
-    .. sourcecode:: python
-
-        >>> half_precision_model = coremltools.utils.convert_neural_network_weights_to_fp16(model)
     """
     spec = full_precision_model.get_spec()
     return _get_model(_convert_neural_network_spec_weights_to_fp16(spec))
@@ -239,6 +243,7 @@ def _get_model(spec):
     Utility to get the model and the data.
     """
     from . import MLModel
+
     if isinstance(spec, MLModel):
         return spec
     else:
@@ -288,7 +293,7 @@ def evaluate_regressor(model, data, target="target", verbose=False):
     max_error = 0
     error_squared = 0
 
-    for index,row in data.iterrows():
+    for index, row in data.iterrows():
         predicted = model.predict(dict(row))[_to_unicode(target)]
         other_framework = row["prediction"]
         delta = predicted - other_framework
@@ -302,7 +307,7 @@ def evaluate_regressor(model, data, target="target", verbose=False):
     ret = {
         "samples": len(data),
         "rmse": _math.sqrt(error_squared / len(data)),
-        "max_error": max_error
+        "max_error": max_error,
     }
 
     if verbose:
@@ -310,7 +315,7 @@ def evaluate_regressor(model, data, target="target", verbose=False):
     return ret
 
 
-def evaluate_classifier(model, data, target='target', verbose=False):
+def evaluate_classifier(model, data, target="target", verbose=False):
     """
     Evaluate a Core ML classifier model and compare against predictions
     from the original framework (for testing correctness of conversion).
@@ -351,7 +356,7 @@ def evaluate_classifier(model, data, target='target', verbose=False):
 
     num_errors = 0
 
-    for index,row in data.iterrows():
+    for index, row in data.iterrows():
         predicted = model.predict(dict(row))[_to_unicode(target)]
         other_framework = row["prediction"]
         if predicted != other_framework:
@@ -360,10 +365,7 @@ def evaluate_classifier(model, data, target='target', verbose=False):
         if verbose:
             print("{}\t\t\t\t{}".format(other_framework, predicted))
 
-    ret = {
-        "num_samples": len(data),
-        "num_errors": num_errors
-    }
+    ret = {"num_samples": len(data), "num_errors": num_errors}
 
     if verbose:
         print("results: {}".format(ret))
@@ -371,9 +373,9 @@ def evaluate_classifier(model, data, target='target', verbose=False):
     return ret
 
 
-def evaluate_classifier_with_probabilities(model, data,
-                                           probabilities='probabilities',
-                                           verbose = False):
+def evaluate_classifier_with_probabilities(
+    model, data, probabilities="probabilities", verbose=False
+):
     """
     Evaluate a classifier specification for testing.
 
@@ -401,13 +403,18 @@ def evaluate_classifier_with_probabilities(model, data,
 
     max_probability_error, num_key_mismatch = 0, 0
 
-    for _,row in data.iterrows():
-        predicted_values = model.predict(dict(row))[_to_unicode(probabilities)]
+    for _, row in data.iterrows():
+        input_dict = {k: v for k, v in dict(row).items() if k != probabilities}
+        predicted_values = model.predict(input_dict)[_to_unicode(probabilities)]
         other_values = row[probabilities]
 
         if set(predicted_values.keys()) != set(other_values.keys()):
             if verbose:
-                print("Different classes: ", str(predicted_values.keys()), str(other_values.keys()))
+                print(
+                    "Different classes: ",
+                    str(predicted_values.keys()),
+                    str(other_values.keys()),
+                )
             num_key_mismatch += 1
             continue
 
@@ -424,7 +431,7 @@ def evaluate_classifier_with_probabilities(model, data,
     ret = {
         "num_samples": len(data),
         "max_probability_error": max_probability_error,
-        "num_key_mismatch": num_key_mismatch
+        "num_key_mismatch": num_key_mismatch,
     }
 
     if verbose:
@@ -433,8 +440,9 @@ def evaluate_classifier_with_probabilities(model, data,
     return ret
 
 
-def rename_feature(spec, current_name, new_name, rename_inputs=True,
-                   rename_outputs=True):
+def rename_feature(
+    spec, current_name, new_name, rename_inputs=True, rename_outputs=True
+):
     """
     Rename a feature in the specification.
 
@@ -496,23 +504,27 @@ def rename_feature(spec, current_name, new_name, rename_inputs=True,
 
     # Rename internally in NN model
     nn = None
-    for nn_type in ['neuralNetwork','neuralNetworkClassifier','neuralNetworkRegressor']:
+    for nn_type in [
+        "neuralNetwork",
+        "neuralNetworkClassifier",
+        "neuralNetworkRegressor",
+    ]:
         if spec.HasField(nn_type):
-            nn = getattr(spec,nn_type)
+            nn = getattr(spec, nn_type)
 
     if nn is not None:
         for layer in nn.layers:
             if rename_inputs:
-                for index,name in enumerate(layer.input):
+                for index, name in enumerate(layer.input):
                     if name == current_name:
                         layer.input[index] = new_name
                 if rename_outputs:
-                    for index,name in enumerate(layer.output):
+                    for index, name in enumerate(layer.output):
                         if name == current_name:
                             layer.output[index] = new_name
 
     # Rename internally for feature vectorizer
-    if spec.HasField('featureVectorizer') and rename_inputs:
+    if spec.HasField("featureVectorizer") and rename_inputs:
         for input in spec.featureVectorizer.inputList:
             if input.inputColumn == current_name:
                 input.inputColumn = new_name
@@ -520,20 +532,22 @@ def rename_feature(spec, current_name, new_name, rename_inputs=True,
 
     # Rename for pipeline models
     pipeline = None
-    if spec.HasField('pipeline'):
+    if spec.HasField("pipeline"):
         pipeline = spec.pipeline
-    elif spec.HasField('pipelineClassifier'):
+    elif spec.HasField("pipelineClassifier"):
         pipeline = spec.pipelineClassifier.pipeline
-    elif spec.HasField('pipelineRegressor'):
+    elif spec.HasField("pipelineRegressor"):
         pipeline = spec.pipelineRegressor.pipeline
 
     if pipeline is not None:
-        for index,model in enumerate(pipeline.models):
-            rename_feature(model,
-                           current_name,
-                           new_name,
-                           rename_inputs or (index != 0),
-                           rename_outputs or (index < len(spec.pipeline.models)))
+        for index, model in enumerate(pipeline.models):
+            rename_feature(
+                model,
+                current_name,
+                new_name,
+                rename_inputs or (index != 0),
+                rename_outputs or (index < len(spec.pipeline.models)),
+            )
 
 
 def _sanitize_value(x):
@@ -552,7 +566,7 @@ def _sanitize_value(x):
     elif isinstance(x, list):
         return [_sanitize_value(v) for v in x]
     elif isinstance(x, dict):
-        return dict( (_sanitize_value(k), _sanitize_value(v)) for k, v in x.items())
+        return dict((_sanitize_value(k), _sanitize_value(v)) for k, v in x.items())
     else:
         assert False, str(x)
 
@@ -567,9 +581,11 @@ def _element_equal(x, y):
         except:
             return False
     elif isinstance(x, dict):
-        return (isinstance(y, dict)
-                and _element_equal(x.keys(), y.keys())
-                and all(_element_equal(x[k], y[k]) for k in x.keys()))
+        return (
+            isinstance(y, dict)
+            and _element_equal(x.keys(), y.keys())
+            and all(_element_equal(x[k], y[k]) for k in x.keys())
+        )
     elif isinstance(x, float):
         return abs(x - y) < 1e-5 * (abs(x) + abs(y))
     elif isinstance(x, (list, tuple)):
@@ -578,8 +594,7 @@ def _element_equal(x, y):
         return bool(x == y)
 
 
-def evaluate_transformer(model, input_data, reference_output,
-                         verbose=False):
+def evaluate_transformer(model, input_data, reference_output, verbose=False):
     """
     Evaluate a transformer specification for testing.
 
@@ -630,7 +645,7 @@ def evaluate_transformer(model, input_data, reference_output,
         assert isinstance(ref_data, dict)
         assert isinstance(predicted, dict)
 
-        predicted_trimmed = dict( (k, predicted[k]) for k in ref_data.keys())
+        predicted_trimmed = dict((k, predicted[k]) for k in ref_data.keys())
 
         if verbose:
             print("Predicted:\n\t", str(predicted_trimmed))
@@ -638,17 +653,14 @@ def evaluate_transformer(model, input_data, reference_output,
         if not _element_equal(predicted_trimmed, ref_data):
             num_errors += 1
 
-    ret = {
-        "num_samples": len(input_data),
-        "num_errors": num_errors
-    }
+    ret = {"num_samples": len(input_data), "num_errors": num_errors}
 
     if verbose:
         print("results: {}".format(ret))
     return ret
 
 
-def has_custom_layer(spec):
+def _has_custom_layer(spec):
     """
 
     Returns true if the given protobuf specification has a custom layer, and false otherwise.
@@ -666,16 +678,12 @@ def has_custom_layer(spec):
 
     layers = _get_nn_layers(spec)
     for layer in layers:
-        if layer.WhichOneof('layer') == 'custom':
+        if layer.WhichOneof("layer") == "custom":
             return True
 
     return False
 
 
-@deprecated
-def get_custom_layer_names(spec):
-    return _get_custom_layer_names(spec)
-
 def _get_custom_layer_names(spec):
     """
 
@@ -694,14 +702,11 @@ def _get_custom_layer_names(spec):
     layers = _get_nn_layers(spec)
     layers_out = set()
     for layer in layers:
-        if (layer.WhichOneof('layer') == 'custom'):
+        if layer.WhichOneof("layer") == "custom":
             layers_out.add(layer.custom.className)
 
     return layers_out
 
-@deprecated
-def get_custom_layers(spec):
-    return _get_custom_layers(spec)
 
 def _get_custom_layers(spec):
     """
@@ -720,16 +725,12 @@ def _get_custom_layers(spec):
     layers = _get_nn_layers(spec)
     layers_out = []
     for layer in layers:
-        if (layer.WhichOneof('layer') == 'custom'):
+        if layer.WhichOneof("layer") == "custom":
             layers_out.append(layer)
 
     return layers_out
 
 
-@deprecated
-def replace_custom_layer_name(spec, oldname, newname):
-    return _replace_custom_layer_name(spec, oldname, newname)
-
 def _replace_custom_layer_name(spec, oldname, newname):
     """
 
@@ -756,24 +757,34 @@ def _replace_custom_layer_name(spec, oldname, newname):
             layer.custom.className = newname
 
 
-def is_macos():
+def _is_macos():
     """Returns True if current platform is MacOS, False otherwise."""
-    return sys.platform == 'darwin'
+    return _sys.platform == "darwin"
 
 
-def macos_version():
+def _macos_version():
     """
     Returns macOS version as a tuple of integers, making it easy to do proper
     version comparisons. On non-Macs, it returns an empty tuple.
     """
-    if is_macos():
+    if _is_macos():
         import platform
+
         ver_str = platform.mac_ver()[0]
-        return tuple([int(v) for v in ver_str.split('.')])
+        return tuple([int(v) for v in ver_str.split(".")])
 
     return ()
 
 
+def _python_version():
+    """
+    Return python version as a tuple of integers
+    """
+    version = _sys.version.split(" ")[0]
+    version = list(map(int, list(version.split("."))))
+    return tuple(version)
+
+
 def _get_feature(spec, feature_name):
     for input_feature in spec.description.input:
         if input_feature.name == feature_name:
@@ -783,7 +794,7 @@ def _get_feature(spec, feature_name):
         if output_feature.name == feature_name:
             return output_feature
 
-    raise Exception('Feature with name {} does not exist'.format(feature_name))
+    raise Exception("Feature with name {} does not exist".format(feature_name))
 
 
 def _get_input_names(spec):
@@ -815,10 +826,16 @@ def convert_double_to_float_multiarray_type(spec):
         >>> coremltools.utils.convert_double_to_float_multiarray_type(spec)
         >>> model = coremltools.models.MLModel(spec)
     """
+
     def _convert_to_float(feature):
-        if feature.type.HasField('multiArrayType'):
-            if feature.type.multiArrayType.dataType == _Model_pb2.ArrayFeatureType.DOUBLE:
-                feature.type.multiArrayType.dataType = _Model_pb2.ArrayFeatureType.FLOAT32
+        if feature.type.HasField("multiArrayType"):
+            if (
+                feature.type.multiArrayType.dataType
+                == _Model_pb2.ArrayFeatureType.DOUBLE
+            ):
+                feature.type.multiArrayType.dataType = (
+                    _Model_pb2.ArrayFeatureType.FLOAT32
+                )
 
     for feature in spec.description.input:
         _convert_to_float(feature)
@@ -828,3 +845,7 @@ def _convert_to_float(feature):
 
     for feature in spec.description.trainingInput:
         _convert_to_float(feature)
+
+    if spec.WhichOneof("Type") == "pipeline":
+        for model_spec in spec.pipeline.models:
+            convert_double_to_float_multiarray_type(model_spec)
diff --git a/coremltools/proto/FeatureTypes_pb2.py b/coremltools/proto/FeatureTypes_pb2.py
index eda542462..2b70cb1a5 100644
--- a/coremltools/proto/FeatureTypes_pb2.py
+++ b/coremltools/proto/FeatureTypes_pb2.py
@@ -19,7 +19,7 @@
   name='FeatureTypes.proto',
   package='CoreML.Specification',
   syntax='proto3',
-  serialized_pb=_b('\n\x12\x46\x65\x61tureTypes.proto\x12\x14\x43oreML.Specification\"\x12\n\x10Int64FeatureType\"\x13\n\x11\x44oubleFeatureType\"\x13\n\x11StringFeatureType\"3\n\tSizeRange\x12\x12\n\nlowerBound\x18\x01 \x01(\x04\x12\x12\n\nupperBound\x18\x02 \x01(\x03\"\xfe\x04\n\x10ImageFeatureType\x12\r\n\x05width\x18\x01 \x01(\x03\x12\x0e\n\x06height\x18\x02 \x01(\x03\x12V\n\x0f\x65numeratedSizes\x18\x15 \x01(\x0b\x32;.CoreML.Specification.ImageFeatureType.EnumeratedImageSizesH\x00\x12O\n\x0eimageSizeRange\x18\x1f \x01(\x0b\x32\x35.CoreML.Specification.ImageFeatureType.ImageSizeRangeH\x00\x12\x45\n\ncolorSpace\x18\x03 \x01(\x0e\x32\x31.CoreML.Specification.ImageFeatureType.ColorSpace\x1a*\n\tImageSize\x12\r\n\x05width\x18\x01 \x01(\x04\x12\x0e\n\x06height\x18\x02 \x01(\x04\x1aW\n\x14\x45numeratedImageSizes\x12?\n\x05sizes\x18\x01 \x03(\x0b\x32\x30.CoreML.Specification.ImageFeatureType.ImageSize\x1a{\n\x0eImageSizeRange\x12\x33\n\nwidthRange\x18\x01 \x01(\x0b\x32\x1f.CoreML.Specification.SizeRange\x12\x34\n\x0bheightRange\x18\x02 \x01(\x0b\x32\x1f.CoreML.Specification.SizeRange\"F\n\nColorSpace\x12\x17\n\x13INVALID_COLOR_SPACE\x10\x00\x12\r\n\tGRAYSCALE\x10\n\x12\x07\n\x03RGB\x10\x14\x12\x07\n\x03\x42GR\x10\x1e\x42\x11\n\x0fSizeFlexibility\"\xa0\x04\n\x10\x41rrayFeatureType\x12\r\n\x05shape\x18\x01 \x03(\x03\x12\x46\n\x08\x64\x61taType\x18\x02 \x01(\x0e\x32\x34.CoreML.Specification.ArrayFeatureType.ArrayDataType\x12S\n\x10\x65numeratedShapes\x18\x15 \x01(\x0b\x32\x37.CoreML.Specification.ArrayFeatureType.EnumeratedShapesH\x00\x12G\n\nshapeRange\x18\x1f \x01(\x0b\x32\x31.CoreML.Specification.ArrayFeatureType.ShapeRangeH\x00\x1a\x16\n\x05Shape\x12\r\n\x05shape\x18\x01 \x03(\x03\x1aP\n\x10\x45numeratedShapes\x12<\n\x06shapes\x18\x01 \x03(\x0b\x32,.CoreML.Specification.ArrayFeatureType.Shape\x1a\x41\n\nShapeRange\x12\x33\n\nsizeRanges\x18\x01 \x03(\x0b\x32\x1f.CoreML.Specification.SizeRange\"V\n\rArrayDataType\x12\x1b\n\x17INVALID_ARRAY_DATA_TYPE\x10\x00\x12\r\n\x07\x46LOAT32\x10\xa0\x80\x04\x12\x0c\n\x06\x44OUBLE\x10\xc0\x80\x04\x12\x0b\n\x05INT32\x10\xa0\x80\x08\x42\x12\n\x10ShapeFlexibility\"\xa4\x01\n\x15\x44ictionaryFeatureType\x12>\n\x0cint64KeyType\x18\x01 \x01(\x0b\x32&.CoreML.Specification.Int64FeatureTypeH\x00\x12@\n\rstringKeyType\x18\x02 \x01(\x0b\x32\'.CoreML.Specification.StringFeatureTypeH\x00\x42\t\n\x07KeyType\"\xcd\x01\n\x13SequenceFeatureType\x12;\n\tint64Type\x18\x01 \x01(\x0b\x32&.CoreML.Specification.Int64FeatureTypeH\x00\x12=\n\nstringType\x18\x03 \x01(\x0b\x32\'.CoreML.Specification.StringFeatureTypeH\x00\x12\x32\n\tsizeRange\x18\x65 \x01(\x0b\x32\x1f.CoreML.Specification.SizeRangeB\x06\n\x04Type\"\xee\x03\n\x0b\x46\x65\x61tureType\x12;\n\tint64Type\x18\x01 \x01(\x0b\x32&.CoreML.Specification.Int64FeatureTypeH\x00\x12=\n\ndoubleType\x18\x02 \x01(\x0b\x32\'.CoreML.Specification.DoubleFeatureTypeH\x00\x12=\n\nstringType\x18\x03 \x01(\x0b\x32\'.CoreML.Specification.StringFeatureTypeH\x00\x12;\n\timageType\x18\x04 \x01(\x0b\x32&.CoreML.Specification.ImageFeatureTypeH\x00\x12@\n\x0emultiArrayType\x18\x05 \x01(\x0b\x32&.CoreML.Specification.ArrayFeatureTypeH\x00\x12\x45\n\x0e\x64ictionaryType\x18\x06 \x01(\x0b\x32+.CoreML.Specification.DictionaryFeatureTypeH\x00\x12\x41\n\x0csequenceType\x18\x07 \x01(\x0b\x32).CoreML.Specification.SequenceFeatureTypeH\x00\x12\x13\n\nisOptional\x18\xe8\x07 \x01(\x08\x42\x06\n\x04TypeB\x02H\x03\x62\x06proto3')
+  serialized_pb=_b('\n\x12\x46\x65\x61tureTypes.proto\x12\x14\x43oreML.Specification\"\x12\n\x10Int64FeatureType\"\x13\n\x11\x44oubleFeatureType\"\x13\n\x11StringFeatureType\"3\n\tSizeRange\x12\x12\n\nlowerBound\x18\x01 \x01(\x04\x12\x12\n\nupperBound\x18\x02 \x01(\x03\"\xfe\x04\n\x10ImageFeatureType\x12\r\n\x05width\x18\x01 \x01(\x03\x12\x0e\n\x06height\x18\x02 \x01(\x03\x12V\n\x0f\x65numeratedSizes\x18\x15 \x01(\x0b\x32;.CoreML.Specification.ImageFeatureType.EnumeratedImageSizesH\x00\x12O\n\x0eimageSizeRange\x18\x1f \x01(\x0b\x32\x35.CoreML.Specification.ImageFeatureType.ImageSizeRangeH\x00\x12\x45\n\ncolorSpace\x18\x03 \x01(\x0e\x32\x31.CoreML.Specification.ImageFeatureType.ColorSpace\x1a*\n\tImageSize\x12\r\n\x05width\x18\x01 \x01(\x04\x12\x0e\n\x06height\x18\x02 \x01(\x04\x1aW\n\x14\x45numeratedImageSizes\x12?\n\x05sizes\x18\x01 \x03(\x0b\x32\x30.CoreML.Specification.ImageFeatureType.ImageSize\x1a{\n\x0eImageSizeRange\x12\x33\n\nwidthRange\x18\x01 \x01(\x0b\x32\x1f.CoreML.Specification.SizeRange\x12\x34\n\x0bheightRange\x18\x02 \x01(\x0b\x32\x1f.CoreML.Specification.SizeRange\"F\n\nColorSpace\x12\x17\n\x13INVALID_COLOR_SPACE\x10\x00\x12\r\n\tGRAYSCALE\x10\n\x12\x07\n\x03RGB\x10\x14\x12\x07\n\x03\x42GR\x10\x1e\x42\x11\n\x0fSizeFlexibility\"\x8e\x05\n\x10\x41rrayFeatureType\x12\r\n\x05shape\x18\x01 \x03(\x03\x12\x46\n\x08\x64\x61taType\x18\x02 \x01(\x0e\x32\x34.CoreML.Specification.ArrayFeatureType.ArrayDataType\x12S\n\x10\x65numeratedShapes\x18\x15 \x01(\x0b\x32\x37.CoreML.Specification.ArrayFeatureType.EnumeratedShapesH\x00\x12G\n\nshapeRange\x18\x1f \x01(\x0b\x32\x31.CoreML.Specification.ArrayFeatureType.ShapeRangeH\x00\x12\x19\n\x0fintDefaultValue\x18) \x01(\x05H\x01\x12\x1b\n\x11\x66loatDefaultValue\x18\x33 \x01(\x02H\x01\x12\x1c\n\x12\x64oubleDefaultValue\x18= \x01(\x01H\x01\x1a\x16\n\x05Shape\x12\r\n\x05shape\x18\x01 \x03(\x03\x1aP\n\x10\x45numeratedShapes\x12<\n\x06shapes\x18\x01 \x03(\x0b\x32,.CoreML.Specification.ArrayFeatureType.Shape\x1a\x41\n\nShapeRange\x12\x33\n\nsizeRanges\x18\x01 \x03(\x0b\x32\x1f.CoreML.Specification.SizeRange\"V\n\rArrayDataType\x12\x1b\n\x17INVALID_ARRAY_DATA_TYPE\x10\x00\x12\r\n\x07\x46LOAT32\x10\xa0\x80\x04\x12\x0c\n\x06\x44OUBLE\x10\xc0\x80\x04\x12\x0b\n\x05INT32\x10\xa0\x80\x08\x42\x12\n\x10ShapeFlexibilityB\x16\n\x14\x64\x65\x66\x61ultOptionalValue\"\xa4\x01\n\x15\x44ictionaryFeatureType\x12>\n\x0cint64KeyType\x18\x01 \x01(\x0b\x32&.CoreML.Specification.Int64FeatureTypeH\x00\x12@\n\rstringKeyType\x18\x02 \x01(\x0b\x32\'.CoreML.Specification.StringFeatureTypeH\x00\x42\t\n\x07KeyType\"\xcd\x01\n\x13SequenceFeatureType\x12;\n\tint64Type\x18\x01 \x01(\x0b\x32&.CoreML.Specification.Int64FeatureTypeH\x00\x12=\n\nstringType\x18\x03 \x01(\x0b\x32\'.CoreML.Specification.StringFeatureTypeH\x00\x12\x32\n\tsizeRange\x18\x65 \x01(\x0b\x32\x1f.CoreML.Specification.SizeRangeB\x06\n\x04Type\"\xee\x03\n\x0b\x46\x65\x61tureType\x12;\n\tint64Type\x18\x01 \x01(\x0b\x32&.CoreML.Specification.Int64FeatureTypeH\x00\x12=\n\ndoubleType\x18\x02 \x01(\x0b\x32\'.CoreML.Specification.DoubleFeatureTypeH\x00\x12=\n\nstringType\x18\x03 \x01(\x0b\x32\'.CoreML.Specification.StringFeatureTypeH\x00\x12;\n\timageType\x18\x04 \x01(\x0b\x32&.CoreML.Specification.ImageFeatureTypeH\x00\x12@\n\x0emultiArrayType\x18\x05 \x01(\x0b\x32&.CoreML.Specification.ArrayFeatureTypeH\x00\x12\x45\n\x0e\x64ictionaryType\x18\x06 \x01(\x0b\x32+.CoreML.Specification.DictionaryFeatureTypeH\x00\x12\x41\n\x0csequenceType\x18\x07 \x01(\x0b\x32).CoreML.Specification.SequenceFeatureTypeH\x00\x12\x13\n\nisOptional\x18\xe8\x07 \x01(\x08\x42\x06\n\x04TypeB\x02H\x03\x62\x06proto3')
 )
 
 
@@ -79,8 +79,8 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=1239,
-  serialized_end=1325,
+  serialized_start=1325,
+  serialized_end=1411,
 )
 _sym_db.RegisterEnumDescriptor(_ARRAYFEATURETYPE_ARRAYDATATYPE)
 
@@ -388,8 +388,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=1066,
-  serialized_end=1088,
+  serialized_start=1152,
+  serialized_end=1174,
 )
 
 _ARRAYFEATURETYPE_ENUMERATEDSHAPES = _descriptor.Descriptor(
@@ -418,8 +418,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=1090,
-  serialized_end=1170,
+  serialized_start=1176,
+  serialized_end=1256,
 )
 
 _ARRAYFEATURETYPE_SHAPERANGE = _descriptor.Descriptor(
@@ -448,8 +448,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=1172,
-  serialized_end=1237,
+  serialized_start=1258,
+  serialized_end=1323,
 )
 
 _ARRAYFEATURETYPE = _descriptor.Descriptor(
@@ -487,6 +487,27 @@
       message_type=None, enum_type=None, containing_type=None,
       is_extension=False, extension_scope=None,
       options=None),
+    _descriptor.FieldDescriptor(
+      name='intDefaultValue', full_name='CoreML.Specification.ArrayFeatureType.intDefaultValue', index=4,
+      number=41, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='floatDefaultValue', full_name='CoreML.Specification.ArrayFeatureType.floatDefaultValue', index=5,
+      number=51, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='doubleDefaultValue', full_name='CoreML.Specification.ArrayFeatureType.doubleDefaultValue', index=6,
+      number=61, type=1, cpp_type=5, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
   ],
   extensions=[
   ],
@@ -502,9 +523,12 @@
     _descriptor.OneofDescriptor(
       name='ShapeFlexibility', full_name='CoreML.Specification.ArrayFeatureType.ShapeFlexibility',
       index=0, containing_type=None, fields=[]),
+    _descriptor.OneofDescriptor(
+      name='defaultOptionalValue', full_name='CoreML.Specification.ArrayFeatureType.defaultOptionalValue',
+      index=1, containing_type=None, fields=[]),
   ],
   serialized_start=801,
-  serialized_end=1345,
+  serialized_end=1455,
 )
 
 
@@ -544,8 +568,8 @@
       name='KeyType', full_name='CoreML.Specification.DictionaryFeatureType.KeyType',
       index=0, containing_type=None, fields=[]),
   ],
-  serialized_start=1348,
-  serialized_end=1512,
+  serialized_start=1458,
+  serialized_end=1622,
 )
 
 
@@ -592,8 +616,8 @@
       name='Type', full_name='CoreML.Specification.SequenceFeatureType.Type',
       index=0, containing_type=None, fields=[]),
   ],
-  serialized_start=1515,
-  serialized_end=1720,
+  serialized_start=1625,
+  serialized_end=1830,
 )
 
 
@@ -675,8 +699,8 @@
       name='Type', full_name='CoreML.Specification.FeatureType.Type',
       index=0, containing_type=None, fields=[]),
   ],
-  serialized_start=1723,
-  serialized_end=2217,
+  serialized_start=1833,
+  serialized_end=2327,
 )
 
 _IMAGEFEATURETYPE_IMAGESIZE.containing_type = _IMAGEFEATURETYPE
@@ -710,6 +734,15 @@
 _ARRAYFEATURETYPE.oneofs_by_name['ShapeFlexibility'].fields.append(
   _ARRAYFEATURETYPE.fields_by_name['shapeRange'])
 _ARRAYFEATURETYPE.fields_by_name['shapeRange'].containing_oneof = _ARRAYFEATURETYPE.oneofs_by_name['ShapeFlexibility']
+_ARRAYFEATURETYPE.oneofs_by_name['defaultOptionalValue'].fields.append(
+  _ARRAYFEATURETYPE.fields_by_name['intDefaultValue'])
+_ARRAYFEATURETYPE.fields_by_name['intDefaultValue'].containing_oneof = _ARRAYFEATURETYPE.oneofs_by_name['defaultOptionalValue']
+_ARRAYFEATURETYPE.oneofs_by_name['defaultOptionalValue'].fields.append(
+  _ARRAYFEATURETYPE.fields_by_name['floatDefaultValue'])
+_ARRAYFEATURETYPE.fields_by_name['floatDefaultValue'].containing_oneof = _ARRAYFEATURETYPE.oneofs_by_name['defaultOptionalValue']
+_ARRAYFEATURETYPE.oneofs_by_name['defaultOptionalValue'].fields.append(
+  _ARRAYFEATURETYPE.fields_by_name['doubleDefaultValue'])
+_ARRAYFEATURETYPE.fields_by_name['doubleDefaultValue'].containing_oneof = _ARRAYFEATURETYPE.oneofs_by_name['defaultOptionalValue']
 _DICTIONARYFEATURETYPE.fields_by_name['int64KeyType'].message_type = _INT64FEATURETYPE
 _DICTIONARYFEATURETYPE.fields_by_name['stringKeyType'].message_type = _STRINGFEATURETYPE
 _DICTIONARYFEATURETYPE.oneofs_by_name['KeyType'].fields.append(
diff --git a/coremltools/proto/Model_pb2.py b/coremltools/proto/Model_pb2.py
index 92edba16b..7b04c4915 100644
--- a/coremltools/proto/Model_pb2.py
+++ b/coremltools/proto/Model_pb2.py
@@ -239,7 +239,7 @@
   name='Model.proto',
   package='CoreML.Specification',
   syntax='proto3',
-  serialized_pb=_b('\n\x0bModel.proto\x12\x14\x43oreML.Specification\x1a\x18VisionFeaturePrint.proto\x1a\x14TextClassifier.proto\x1a\x10WordTagger.proto\x1a\x0fGazetteer.proto\x1a\x13WordEmbedding.proto\x1a\x1b\x41rrayFeatureExtractor.proto\x1a\x1d\x42\x61yesianProbitRegressor.proto\x1a\x18\x43\x61tegoricalMapping.proto\x1a\x11\x43ustomModel.proto\x1a\x14\x44ictVectorizer.proto\x1a\x12\x46\x65\x61tureTypes.proto\x1a\x17\x46\x65\x61tureVectorizer.proto\x1a\x12GLMRegressor.proto\x1a\x13GLMClassifier.proto\x1a\x16NearestNeighbors.proto\x1a\x0eIdentity.proto\x1a\rImputer.proto\x1a\x13NeuralNetwork.proto\x1a\x10Normalizer.proto\x1a\x13OneHotEncoder.proto\x1a\x0cScaler.proto\x1a\x1bNonMaximumSuppression.proto\x1a\tSVM.proto\x1a\x12TreeEnsemble.proto\x1a\x10Parameters.proto\x1a\x1fItemSimilarityRecommender.proto\x1a SoundAnalysisPreprocessing.proto\x1a\x11LinkedModel.proto\"F\n\x08Pipeline\x12+\n\x06models\x18\x01 \x03(\x0b\x32\x1b.CoreML.Specification.Model\x12\r\n\x05names\x18\x02 \x03(\t\"F\n\x12PipelineClassifier\x12\x30\n\x08pipeline\x18\x01 \x01(\x0b\x32\x1e.CoreML.Specification.Pipeline\"E\n\x11PipelineRegressor\x12\x30\n\x08pipeline\x18\x01 \x01(\x0b\x32\x1e.CoreML.Specification.Pipeline\"m\n\x12\x46\x65\x61tureDescription\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x10shortDescription\x18\x02 \x01(\t\x12/\n\x04type\x18\x03 \x01(\x0b\x32!.CoreML.Specification.FeatureType\"\xd6\x01\n\x08Metadata\x12\x18\n\x10shortDescription\x18\x01 \x01(\t\x12\x15\n\rversionString\x18\x02 \x01(\t\x12\x0e\n\x06\x61uthor\x18\x03 \x01(\t\x12\x0f\n\x07license\x18\x04 \x01(\t\x12\x44\n\x0buserDefined\x18\x64 \x03(\x0b\x32/.CoreML.Specification.Metadata.UserDefinedEntry\x1a\x32\n\x10UserDefinedEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xba\x02\n\x10ModelDescription\x12\x37\n\x05input\x18\x01 \x03(\x0b\x32(.CoreML.Specification.FeatureDescription\x12\x38\n\x06output\x18\n \x03(\x0b\x32(.CoreML.Specification.FeatureDescription\x12\x1c\n\x14predictedFeatureName\x18\x0b \x01(\t\x12\"\n\x1apredictedProbabilitiesName\x18\x0c \x01(\t\x12?\n\rtrainingInput\x18\x32 \x03(\x0b\x32(.CoreML.Specification.FeatureDescription\x12\x30\n\x08metadata\x18\x64 \x01(\x0b\x32\x1e.CoreML.Specification.Metadata\"\xc2\x13\n\x05Model\x12\x1c\n\x14specificationVersion\x18\x01 \x01(\x05\x12;\n\x0b\x64\x65scription\x18\x02 \x01(\x0b\x32&.CoreML.Specification.ModelDescription\x12\x13\n\x0bisUpdatable\x18\n \x01(\x08\x12G\n\x12pipelineClassifier\x18\xc8\x01 \x01(\x0b\x32(.CoreML.Specification.PipelineClassifierH\x00\x12\x45\n\x11pipelineRegressor\x18\xc9\x01 \x01(\x0b\x32\'.CoreML.Specification.PipelineRegressorH\x00\x12\x33\n\x08pipeline\x18\xca\x01 \x01(\x0b\x32\x1e.CoreML.Specification.PipelineH\x00\x12;\n\x0cglmRegressor\x18\xac\x02 \x01(\x0b\x32\".CoreML.Specification.GLMRegressorH\x00\x12O\n\x16supportVectorRegressor\x18\xad\x02 \x01(\x0b\x32,.CoreML.Specification.SupportVectorRegressorH\x00\x12M\n\x15treeEnsembleRegressor\x18\xae\x02 \x01(\x0b\x32+.CoreML.Specification.TreeEnsembleRegressorH\x00\x12O\n\x16neuralNetworkRegressor\x18\xaf\x02 \x01(\x0b\x32,.CoreML.Specification.NeuralNetworkRegressorH\x00\x12Q\n\x17\x62\x61yesianProbitRegressor\x18\xb0\x02 \x01(\x0b\x32-.CoreML.Specification.BayesianProbitRegressorH\x00\x12=\n\rglmClassifier\x18\x90\x03 \x01(\x0b\x32#.CoreML.Specification.GLMClassifierH\x00\x12Q\n\x17supportVectorClassifier\x18\x91\x03 \x01(\x0b\x32-.CoreML.Specification.SupportVectorClassifierH\x00\x12O\n\x16treeEnsembleClassifier\x18\x92\x03 \x01(\x0b\x32,.CoreML.Specification.TreeEnsembleClassifierH\x00\x12Q\n\x17neuralNetworkClassifier\x18\x93\x03 \x01(\x0b\x32-.CoreML.Specification.NeuralNetworkClassifierH\x00\x12Y\n\x1bkNearestNeighborsClassifier\x18\x94\x03 \x01(\x0b\x32\x31.CoreML.Specification.KNearestNeighborsClassifierH\x00\x12=\n\rneuralNetwork\x18\xf4\x03 \x01(\x0b\x32#.CoreML.Specification.NeuralNetworkH\x00\x12U\n\x19itemSimilarityRecommender\x18\xf5\x03 \x01(\x0b\x32/.CoreML.Specification.ItemSimilarityRecommenderH\x00\x12\x39\n\x0b\x63ustomModel\x18\xab\x04 \x01(\x0b\x32!.CoreML.Specification.CustomModelH\x00\x12\x39\n\x0blinkedModel\x18\xac\x04 \x01(\x0b\x32!.CoreML.Specification.LinkedModelH\x00\x12=\n\roneHotEncoder\x18\xd8\x04 \x01(\x0b\x32#.CoreML.Specification.OneHotEncoderH\x00\x12\x31\n\x07imputer\x18\xd9\x04 \x01(\x0b\x32\x1d.CoreML.Specification.ImputerH\x00\x12\x45\n\x11\x66\x65\x61tureVectorizer\x18\xda\x04 \x01(\x0b\x32\'.CoreML.Specification.FeatureVectorizerH\x00\x12?\n\x0e\x64ictVectorizer\x18\xdb\x04 \x01(\x0b\x32$.CoreML.Specification.DictVectorizerH\x00\x12/\n\x06scaler\x18\xdc\x04 \x01(\x0b\x32\x1c.CoreML.Specification.ScalerH\x00\x12G\n\x12\x63\x61tegoricalMapping\x18\xde\x04 \x01(\x0b\x32(.CoreML.Specification.CategoricalMappingH\x00\x12\x37\n\nnormalizer\x18\xdf\x04 \x01(\x0b\x32 .CoreML.Specification.NormalizerH\x00\x12M\n\x15\x61rrayFeatureExtractor\x18\xe1\x04 \x01(\x0b\x32+.CoreML.Specification.ArrayFeatureExtractorH\x00\x12M\n\x15nonMaximumSuppression\x18\xe2\x04 \x01(\x0b\x32+.CoreML.Specification.NonMaximumSuppressionH\x00\x12\x33\n\x08identity\x18\x84\x07 \x01(\x0b\x32\x1e.CoreML.Specification.IdentityH\x00\x12L\n\x0etextClassifier\x18\xd0\x0f \x01(\x0b\x32\x31.CoreML.Specification.CoreMLModels.TextClassifierH\x00\x12\x44\n\nwordTagger\x18\xd1\x0f \x01(\x0b\x32-.CoreML.Specification.CoreMLModels.WordTaggerH\x00\x12T\n\x12visionFeaturePrint\x18\xd2\x0f \x01(\x0b\x32\x35.CoreML.Specification.CoreMLModels.VisionFeaturePrintH\x00\x12\x64\n\x1asoundAnalysisPreprocessing\x18\xd3\x0f \x01(\x0b\x32=.CoreML.Specification.CoreMLModels.SoundAnalysisPreprocessingH\x00\x12\x42\n\tgazetteer\x18\xd4\x0f \x01(\x0b\x32,.CoreML.Specification.CoreMLModels.GazetteerH\x00\x12J\n\rwordEmbedding\x18\xd5\x0f \x01(\x0b\x32\x30.CoreML.Specification.CoreMLModels.WordEmbeddingH\x00\x42\x06\n\x04TypeB\x02H\x03P\x00P\x01P\x02P\x03P\x04P\x05P\x06P\x07P\x08P\tP\nP\x0bP\x0cP\rP\x0eP\x0fP\x10P\x11P\x12P\x13P\x14P\x15P\x16P\x17P\x18P\x19P\x1aP\x1b\x62\x06proto3')
+  serialized_pb=_b('\n\x0bModel.proto\x12\x14\x43oreML.Specification\x1a\x18VisionFeaturePrint.proto\x1a\x14TextClassifier.proto\x1a\x10WordTagger.proto\x1a\x0fGazetteer.proto\x1a\x13WordEmbedding.proto\x1a\x1b\x41rrayFeatureExtractor.proto\x1a\x1d\x42\x61yesianProbitRegressor.proto\x1a\x18\x43\x61tegoricalMapping.proto\x1a\x11\x43ustomModel.proto\x1a\x14\x44ictVectorizer.proto\x1a\x12\x46\x65\x61tureTypes.proto\x1a\x17\x46\x65\x61tureVectorizer.proto\x1a\x12GLMRegressor.proto\x1a\x13GLMClassifier.proto\x1a\x16NearestNeighbors.proto\x1a\x0eIdentity.proto\x1a\rImputer.proto\x1a\x13NeuralNetwork.proto\x1a\x10Normalizer.proto\x1a\x13OneHotEncoder.proto\x1a\x0cScaler.proto\x1a\x1bNonMaximumSuppression.proto\x1a\tSVM.proto\x1a\x12TreeEnsemble.proto\x1a\x10Parameters.proto\x1a\x1fItemSimilarityRecommender.proto\x1a SoundAnalysisPreprocessing.proto\x1a\x11LinkedModel.proto\"F\n\x08Pipeline\x12+\n\x06models\x18\x01 \x03(\x0b\x32\x1b.CoreML.Specification.Model\x12\r\n\x05names\x18\x02 \x03(\t\"F\n\x12PipelineClassifier\x12\x30\n\x08pipeline\x18\x01 \x01(\x0b\x32\x1e.CoreML.Specification.Pipeline\"E\n\x11PipelineRegressor\x12\x30\n\x08pipeline\x18\x01 \x01(\x0b\x32\x1e.CoreML.Specification.Pipeline\"m\n\x12\x46\x65\x61tureDescription\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x10shortDescription\x18\x02 \x01(\t\x12/\n\x04type\x18\x03 \x01(\x0b\x32!.CoreML.Specification.FeatureType\"\xd6\x01\n\x08Metadata\x12\x18\n\x10shortDescription\x18\x01 \x01(\t\x12\x15\n\rversionString\x18\x02 \x01(\t\x12\x0e\n\x06\x61uthor\x18\x03 \x01(\t\x12\x0f\n\x07license\x18\x04 \x01(\t\x12\x44\n\x0buserDefined\x18\x64 \x03(\x0b\x32/.CoreML.Specification.Metadata.UserDefinedEntry\x1a\x32\n\x10UserDefinedEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xba\x02\n\x10ModelDescription\x12\x37\n\x05input\x18\x01 \x03(\x0b\x32(.CoreML.Specification.FeatureDescription\x12\x38\n\x06output\x18\n \x03(\x0b\x32(.CoreML.Specification.FeatureDescription\x12\x1c\n\x14predictedFeatureName\x18\x0b \x01(\t\x12\"\n\x1apredictedProbabilitiesName\x18\x0c \x01(\t\x12?\n\rtrainingInput\x18\x32 \x03(\x0b\x32(.CoreML.Specification.FeatureDescription\x12\x30\n\x08metadata\x18\x64 \x01(\x0b\x32\x1e.CoreML.Specification.Metadata\"4\n\x0fSerializedModel\x12\x12\n\nidentifier\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\x0c\"\x85\x14\n\x05Model\x12\x1c\n\x14specificationVersion\x18\x01 \x01(\x05\x12;\n\x0b\x64\x65scription\x18\x02 \x01(\x0b\x32&.CoreML.Specification.ModelDescription\x12\x13\n\x0bisUpdatable\x18\n \x01(\x08\x12G\n\x12pipelineClassifier\x18\xc8\x01 \x01(\x0b\x32(.CoreML.Specification.PipelineClassifierH\x00\x12\x45\n\x11pipelineRegressor\x18\xc9\x01 \x01(\x0b\x32\'.CoreML.Specification.PipelineRegressorH\x00\x12\x33\n\x08pipeline\x18\xca\x01 \x01(\x0b\x32\x1e.CoreML.Specification.PipelineH\x00\x12;\n\x0cglmRegressor\x18\xac\x02 \x01(\x0b\x32\".CoreML.Specification.GLMRegressorH\x00\x12O\n\x16supportVectorRegressor\x18\xad\x02 \x01(\x0b\x32,.CoreML.Specification.SupportVectorRegressorH\x00\x12M\n\x15treeEnsembleRegressor\x18\xae\x02 \x01(\x0b\x32+.CoreML.Specification.TreeEnsembleRegressorH\x00\x12O\n\x16neuralNetworkRegressor\x18\xaf\x02 \x01(\x0b\x32,.CoreML.Specification.NeuralNetworkRegressorH\x00\x12Q\n\x17\x62\x61yesianProbitRegressor\x18\xb0\x02 \x01(\x0b\x32-.CoreML.Specification.BayesianProbitRegressorH\x00\x12=\n\rglmClassifier\x18\x90\x03 \x01(\x0b\x32#.CoreML.Specification.GLMClassifierH\x00\x12Q\n\x17supportVectorClassifier\x18\x91\x03 \x01(\x0b\x32-.CoreML.Specification.SupportVectorClassifierH\x00\x12O\n\x16treeEnsembleClassifier\x18\x92\x03 \x01(\x0b\x32,.CoreML.Specification.TreeEnsembleClassifierH\x00\x12Q\n\x17neuralNetworkClassifier\x18\x93\x03 \x01(\x0b\x32-.CoreML.Specification.NeuralNetworkClassifierH\x00\x12Y\n\x1bkNearestNeighborsClassifier\x18\x94\x03 \x01(\x0b\x32\x31.CoreML.Specification.KNearestNeighborsClassifierH\x00\x12=\n\rneuralNetwork\x18\xf4\x03 \x01(\x0b\x32#.CoreML.Specification.NeuralNetworkH\x00\x12U\n\x19itemSimilarityRecommender\x18\xf5\x03 \x01(\x0b\x32/.CoreML.Specification.ItemSimilarityRecommenderH\x00\x12\x39\n\x0b\x63ustomModel\x18\xab\x04 \x01(\x0b\x32!.CoreML.Specification.CustomModelH\x00\x12\x39\n\x0blinkedModel\x18\xac\x04 \x01(\x0b\x32!.CoreML.Specification.LinkedModelH\x00\x12=\n\roneHotEncoder\x18\xd8\x04 \x01(\x0b\x32#.CoreML.Specification.OneHotEncoderH\x00\x12\x31\n\x07imputer\x18\xd9\x04 \x01(\x0b\x32\x1d.CoreML.Specification.ImputerH\x00\x12\x45\n\x11\x66\x65\x61tureVectorizer\x18\xda\x04 \x01(\x0b\x32\'.CoreML.Specification.FeatureVectorizerH\x00\x12?\n\x0e\x64ictVectorizer\x18\xdb\x04 \x01(\x0b\x32$.CoreML.Specification.DictVectorizerH\x00\x12/\n\x06scaler\x18\xdc\x04 \x01(\x0b\x32\x1c.CoreML.Specification.ScalerH\x00\x12G\n\x12\x63\x61tegoricalMapping\x18\xde\x04 \x01(\x0b\x32(.CoreML.Specification.CategoricalMappingH\x00\x12\x37\n\nnormalizer\x18\xdf\x04 \x01(\x0b\x32 .CoreML.Specification.NormalizerH\x00\x12M\n\x15\x61rrayFeatureExtractor\x18\xe1\x04 \x01(\x0b\x32+.CoreML.Specification.ArrayFeatureExtractorH\x00\x12M\n\x15nonMaximumSuppression\x18\xe2\x04 \x01(\x0b\x32+.CoreML.Specification.NonMaximumSuppressionH\x00\x12\x33\n\x08identity\x18\x84\x07 \x01(\x0b\x32\x1e.CoreML.Specification.IdentityH\x00\x12L\n\x0etextClassifier\x18\xd0\x0f \x01(\x0b\x32\x31.CoreML.Specification.CoreMLModels.TextClassifierH\x00\x12\x44\n\nwordTagger\x18\xd1\x0f \x01(\x0b\x32-.CoreML.Specification.CoreMLModels.WordTaggerH\x00\x12T\n\x12visionFeaturePrint\x18\xd2\x0f \x01(\x0b\x32\x35.CoreML.Specification.CoreMLModels.VisionFeaturePrintH\x00\x12\x64\n\x1asoundAnalysisPreprocessing\x18\xd3\x0f \x01(\x0b\x32=.CoreML.Specification.CoreMLModels.SoundAnalysisPreprocessingH\x00\x12\x42\n\tgazetteer\x18\xd4\x0f \x01(\x0b\x32,.CoreML.Specification.CoreMLModels.GazetteerH\x00\x12J\n\rwordEmbedding\x18\xd5\x0f \x01(\x0b\x32\x30.CoreML.Specification.CoreMLModels.WordEmbeddingH\x00\x12\x41\n\x0fserializedModel\x18\xb8\x17 \x01(\x0b\x32%.CoreML.Specification.SerializedModelH\x00\x42\x06\n\x04TypeB\x02H\x03P\x00P\x01P\x02P\x03P\x04P\x05P\x06P\x07P\x08P\tP\nP\x0bP\x0cP\rP\x0eP\x0fP\x10P\x11P\x12P\x13P\x14P\x15P\x16P\x17P\x18P\x19P\x1aP\x1b\x62\x06proto3')
   ,
   dependencies=[VisionFeaturePrint__pb2.DESCRIPTOR,TextClassifier__pb2.DESCRIPTOR,WordTagger__pb2.DESCRIPTOR,Gazetteer__pb2.DESCRIPTOR,WordEmbedding__pb2.DESCRIPTOR,ArrayFeatureExtractor__pb2.DESCRIPTOR,BayesianProbitRegressor__pb2.DESCRIPTOR,CategoricalMapping__pb2.DESCRIPTOR,CustomModel__pb2.DESCRIPTOR,DictVectorizer__pb2.DESCRIPTOR,FeatureTypes__pb2.DESCRIPTOR,FeatureVectorizer__pb2.DESCRIPTOR,GLMRegressor__pb2.DESCRIPTOR,GLMClassifier__pb2.DESCRIPTOR,NearestNeighbors__pb2.DESCRIPTOR,Identity__pb2.DESCRIPTOR,Imputer__pb2.DESCRIPTOR,NeuralNetwork__pb2.DESCRIPTOR,Normalizer__pb2.DESCRIPTOR,OneHotEncoder__pb2.DESCRIPTOR,Scaler__pb2.DESCRIPTOR,NonMaximumSuppression__pb2.DESCRIPTOR,SVM__pb2.DESCRIPTOR,TreeEnsemble__pb2.DESCRIPTOR,Parameters__pb2.DESCRIPTOR,ItemSimilarityRecommender__pb2.DESCRIPTOR,SoundAnalysisPreprocessing__pb2.DESCRIPTOR,LinkedModel__pb2.DESCRIPTOR,],
   public_dependencies=[VisionFeaturePrint__pb2.DESCRIPTOR,TextClassifier__pb2.DESCRIPTOR,WordTagger__pb2.DESCRIPTOR,Gazetteer__pb2.DESCRIPTOR,WordEmbedding__pb2.DESCRIPTOR,ArrayFeatureExtractor__pb2.DESCRIPTOR,BayesianProbitRegressor__pb2.DESCRIPTOR,CategoricalMapping__pb2.DESCRIPTOR,CustomModel__pb2.DESCRIPTOR,DictVectorizer__pb2.DESCRIPTOR,FeatureTypes__pb2.DESCRIPTOR,FeatureVectorizer__pb2.DESCRIPTOR,GLMRegressor__pb2.DESCRIPTOR,GLMClassifier__pb2.DESCRIPTOR,NearestNeighbors__pb2.DESCRIPTOR,Identity__pb2.DESCRIPTOR,Imputer__pb2.DESCRIPTOR,NeuralNetwork__pb2.DESCRIPTOR,Normalizer__pb2.DESCRIPTOR,OneHotEncoder__pb2.DESCRIPTOR,Scaler__pb2.DESCRIPTOR,NonMaximumSuppression__pb2.DESCRIPTOR,SVM__pb2.DESCRIPTOR,TreeEnsemble__pb2.DESCRIPTOR,Parameters__pb2.DESCRIPTOR,ItemSimilarityRecommender__pb2.DESCRIPTOR,SoundAnalysisPreprocessing__pb2.DESCRIPTOR,LinkedModel__pb2.DESCRIPTOR,])
@@ -554,6 +554,44 @@
 )
 
 
+_SERIALIZEDMODEL = _descriptor.Descriptor(
+  name='SerializedModel',
+  full_name='CoreML.Specification.SerializedModel',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='identifier', full_name='CoreML.Specification.SerializedModel.identifier', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='model', full_name='CoreML.Specification.SerializedModel.model', index=1,
+      number=2, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b(""),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=1507,
+  serialized_end=1559,
+)
+
+
 _MODEL = _descriptor.Descriptor(
   name='Model',
   full_name='CoreML.Specification.Model',
@@ -813,6 +851,13 @@
       message_type=None, enum_type=None, containing_type=None,
       is_extension=False, extension_scope=None,
       options=None),
+    _descriptor.FieldDescriptor(
+      name='serializedModel', full_name='CoreML.Specification.Model.serializedModel', index=36,
+      number=3000, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
   ],
   extensions=[
   ],
@@ -828,8 +873,8 @@
       name='Type', full_name='CoreML.Specification.Model.Type',
       index=0, containing_type=None, fields=[]),
   ],
-  serialized_start=1508,
-  serialized_end=4006,
+  serialized_start=1562,
+  serialized_end=4127,
 )
 
 _PIPELINE.fields_by_name['models'].message_type = _MODEL
@@ -876,6 +921,7 @@
 _MODEL.fields_by_name['soundAnalysisPreprocessing'].message_type = SoundAnalysisPreprocessing__pb2._SOUNDANALYSISPREPROCESSING
 _MODEL.fields_by_name['gazetteer'].message_type = Gazetteer__pb2._GAZETTEER
 _MODEL.fields_by_name['wordEmbedding'].message_type = WordEmbedding__pb2._WORDEMBEDDING
+_MODEL.fields_by_name['serializedModel'].message_type = _SERIALIZEDMODEL
 _MODEL.oneofs_by_name['Type'].fields.append(
   _MODEL.fields_by_name['pipelineClassifier'])
 _MODEL.fields_by_name['pipelineClassifier'].containing_oneof = _MODEL.oneofs_by_name['Type']
@@ -975,12 +1021,16 @@
 _MODEL.oneofs_by_name['Type'].fields.append(
   _MODEL.fields_by_name['wordEmbedding'])
 _MODEL.fields_by_name['wordEmbedding'].containing_oneof = _MODEL.oneofs_by_name['Type']
+_MODEL.oneofs_by_name['Type'].fields.append(
+  _MODEL.fields_by_name['serializedModel'])
+_MODEL.fields_by_name['serializedModel'].containing_oneof = _MODEL.oneofs_by_name['Type']
 DESCRIPTOR.message_types_by_name['Pipeline'] = _PIPELINE
 DESCRIPTOR.message_types_by_name['PipelineClassifier'] = _PIPELINECLASSIFIER
 DESCRIPTOR.message_types_by_name['PipelineRegressor'] = _PIPELINEREGRESSOR
 DESCRIPTOR.message_types_by_name['FeatureDescription'] = _FEATUREDESCRIPTION
 DESCRIPTOR.message_types_by_name['Metadata'] = _METADATA
 DESCRIPTOR.message_types_by_name['ModelDescription'] = _MODELDESCRIPTION
+DESCRIPTOR.message_types_by_name['SerializedModel'] = _SERIALIZEDMODEL
 DESCRIPTOR.message_types_by_name['Model'] = _MODEL
 _sym_db.RegisterFileDescriptor(DESCRIPTOR)
 
@@ -1034,6 +1084,13 @@
   ))
 _sym_db.RegisterMessage(ModelDescription)
 
+SerializedModel = _reflection.GeneratedProtocolMessageType('SerializedModel', (_message.Message,), dict(
+  DESCRIPTOR = _SERIALIZEDMODEL,
+  __module__ = 'Model_pb2'
+  # @@protoc_insertion_point(class_scope:CoreML.Specification.SerializedModel)
+  ))
+_sym_db.RegisterMessage(SerializedModel)
+
 Model = _reflection.GeneratedProtocolMessageType('Model', (_message.Message,), dict(
   DESCRIPTOR = _MODEL,
   __module__ = 'Model_pb2'
diff --git a/coremltools/proto/NeuralNetwork_pb2.py b/coremltools/proto/NeuralNetwork_pb2.py
index 3b25893ca..98a34ff7c 100644
--- a/coremltools/proto/NeuralNetwork_pb2.py
+++ b/coremltools/proto/NeuralNetwork_pb2.py
@@ -36,7 +36,7 @@
   name='NeuralNetwork.proto',
   package='CoreML.Specification',
   syntax='proto3',
-  serialized_pb=_b('\n\x13NeuralNetwork.proto\x12\x14\x43oreML.Specification\x1a\x14\x44\x61taStructures.proto\x1a\x10Parameters.proto\"\x88\x03\n\rNeuralNetwork\x12\x38\n\x06layers\x18\x01 \x03(\x0b\x32(.CoreML.Specification.NeuralNetworkLayer\x12G\n\rpreprocessing\x18\x02 \x03(\x0b\x32\x30.CoreML.Specification.NeuralNetworkPreprocessing\x12Y\n\x16\x61rrayInputShapeMapping\x18\x05 \x01(\x0e\x32\x39.CoreML.Specification.NeuralNetworkMultiArrayShapeMapping\x12T\n\x16imageInputShapeMapping\x18\x06 \x01(\x0e\x32\x34.CoreML.Specification.NeuralNetworkImageShapeMapping\x12\x43\n\x0cupdateParams\x18\n \x01(\x0b\x32-.CoreML.Specification.NetworkUpdateParameters\"x\n\x18NeuralNetworkImageScaler\x12\x14\n\x0c\x63hannelScale\x18\n \x01(\x02\x12\x10\n\x08\x62lueBias\x18\x14 \x01(\x02\x12\x11\n\tgreenBias\x18\x15 \x01(\x02\x12\x0f\n\x07redBias\x18\x16 \x01(\x02\x12\x10\n\x08grayBias\x18\x1e \x01(\x02\"+\n\x16NeuralNetworkMeanImage\x12\x11\n\tmeanImage\x18\x01 \x03(\x02\"\xc6\x01\n\x1aNeuralNetworkPreprocessing\x12\x13\n\x0b\x66\x65\x61tureName\x18\x01 \x01(\t\x12@\n\x06scaler\x18\n \x01(\x0b\x32..CoreML.Specification.NeuralNetworkImageScalerH\x00\x12\x41\n\tmeanImage\x18\x0b \x01(\x0b\x32,.CoreML.Specification.NeuralNetworkMeanImageH\x00\x42\x0e\n\x0cpreprocessor\"\x10\n\x0e\x41\x63tivationReLU\"$\n\x13\x41\x63tivationLeakyReLU\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\"\x10\n\x0e\x41\x63tivationTanh\"3\n\x14\x41\x63tivationScaledTanh\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\x12\x0c\n\x04\x62\x65ta\x18\x02 \x01(\x02\"\x13\n\x11\x41\x63tivationSigmoid\"/\n\x10\x41\x63tivationLinear\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\x12\x0c\n\x04\x62\x65ta\x18\x02 \x01(\x02\"4\n\x15\x41\x63tivationSigmoidHard\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\x12\x0c\n\x04\x62\x65ta\x18\x02 \x01(\x02\"D\n\x0f\x41\x63tivationPReLU\x12\x31\n\x05\x61lpha\x18\x01 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\x1e\n\rActivationELU\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\"*\n\x19\x41\x63tivationThresholdedReLU\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\"\x14\n\x12\x41\x63tivationSoftsign\"\x14\n\x12\x41\x63tivationSoftplus\"\x83\x01\n\x1c\x41\x63tivationParametricSoftplus\x12\x31\n\x05\x61lpha\x18\x01 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62\x65ta\x18\x02 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\xd4\x06\n\x10\x41\x63tivationParams\x12\x38\n\x06linear\x18\x05 \x01(\x0b\x32&.CoreML.Specification.ActivationLinearH\x00\x12\x34\n\x04ReLU\x18\n \x01(\x0b\x32$.CoreML.Specification.ActivationReLUH\x00\x12>\n\tleakyReLU\x18\x0f \x01(\x0b\x32).CoreML.Specification.ActivationLeakyReLUH\x00\x12J\n\x0fthresholdedReLU\x18\x14 \x01(\x0b\x32/.CoreML.Specification.ActivationThresholdedReLUH\x00\x12\x36\n\x05PReLU\x18\x19 \x01(\x0b\x32%.CoreML.Specification.ActivationPReLUH\x00\x12\x34\n\x04tanh\x18\x1e \x01(\x0b\x32$.CoreML.Specification.ActivationTanhH\x00\x12@\n\nscaledTanh\x18\x1f \x01(\x0b\x32*.CoreML.Specification.ActivationScaledTanhH\x00\x12:\n\x07sigmoid\x18( \x01(\x0b\x32\'.CoreML.Specification.ActivationSigmoidH\x00\x12\x42\n\x0bsigmoidHard\x18) \x01(\x0b\x32+.CoreML.Specification.ActivationSigmoidHardH\x00\x12\x32\n\x03\x45LU\x18\x32 \x01(\x0b\x32#.CoreML.Specification.ActivationELUH\x00\x12<\n\x08softsign\x18< \x01(\x0b\x32(.CoreML.Specification.ActivationSoftsignH\x00\x12<\n\x08softplus\x18\x46 \x01(\x0b\x32(.CoreML.Specification.ActivationSoftplusH\x00\x12P\n\x12parametricSoftplus\x18G \x01(\x0b\x32\x32.CoreML.Specification.ActivationParametricSoftplusH\x00\x42\x12\n\x10NonlinearityType\"(\n\x06Tensor\x12\x0c\n\x04rank\x18\x01 \x01(\r\x12\x10\n\x08\x64imValue\x18\x02 \x03(\x03\"\xceQ\n\x12NeuralNetworkLayer\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05input\x18\x02 \x03(\t\x12\x0e\n\x06output\x18\x03 \x03(\t\x12\x31\n\x0binputTensor\x18\x04 \x03(\x0b\x32\x1c.CoreML.Specification.Tensor\x12\x32\n\x0coutputTensor\x18\x05 \x03(\x0b\x32\x1c.CoreML.Specification.Tensor\x12\x13\n\x0bisUpdatable\x18\n \x01(\x08\x12\x43\n\x0b\x63onvolution\x18\x64 \x01(\x0b\x32,.CoreML.Specification.ConvolutionLayerParamsH\x00\x12;\n\x07pooling\x18x \x01(\x0b\x32(.CoreML.Specification.PoolingLayerParamsH\x00\x12=\n\nactivation\x18\x82\x01 \x01(\x0b\x32&.CoreML.Specification.ActivationParamsH\x00\x12\x46\n\x0cinnerProduct\x18\x8c\x01 \x01(\x0b\x32-.CoreML.Specification.InnerProductLayerParamsH\x00\x12@\n\tembedding\x18\x96\x01 \x01(\x0b\x32*.CoreML.Specification.EmbeddingLayerParamsH\x00\x12@\n\tbatchnorm\x18\xa0\x01 \x01(\x0b\x32*.CoreML.Specification.BatchnormLayerParamsH\x00\x12\x46\n\x03mvn\x18\xa5\x01 \x01(\x0b\x32\x36.CoreML.Specification.MeanVarianceNormalizeLayerParamsH\x00\x12\x44\n\x0bl2normalize\x18\xaa\x01 \x01(\x0b\x32,.CoreML.Specification.L2NormalizeLayerParamsH\x00\x12<\n\x07softmax\x18\xaf\x01 \x01(\x0b\x32(.CoreML.Specification.SoftmaxLayerParamsH\x00\x12\x34\n\x03lrn\x18\xb4\x01 \x01(\x0b\x32$.CoreML.Specification.LRNLayerParamsH\x00\x12\x36\n\x04\x63rop\x18\xbe\x01 \x01(\x0b\x32%.CoreML.Specification.CropLayerParamsH\x00\x12<\n\x07padding\x18\xc8\x01 \x01(\x0b\x32(.CoreML.Specification.PaddingLayerParamsH\x00\x12>\n\x08upsample\x18\xd2\x01 \x01(\x0b\x32).CoreML.Specification.UpsampleLayerParamsH\x00\x12J\n\x0eresizeBilinear\x18\xd3\x01 \x01(\x0b\x32/.CoreML.Specification.ResizeBilinearLayerParamsH\x00\x12\x42\n\ncropResize\x18\xd4\x01 \x01(\x0b\x32+.CoreML.Specification.CropResizeLayerParamsH\x00\x12@\n\x05unary\x18\xdc\x01 \x01(\x0b\x32..CoreML.Specification.UnaryFunctionLayerParamsH\x00\x12\x34\n\x03\x61\x64\x64\x18\xe6\x01 \x01(\x0b\x32$.CoreML.Specification.AddLayerParamsH\x00\x12>\n\x08multiply\x18\xe7\x01 \x01(\x0b\x32).CoreML.Specification.MultiplyLayerParamsH\x00\x12<\n\x07\x61verage\x18\xf0\x01 \x01(\x0b\x32(.CoreML.Specification.AverageLayerParamsH\x00\x12\x38\n\x05scale\x18\xf5\x01 \x01(\x0b\x32&.CoreML.Specification.ScaleLayerParamsH\x00\x12\x36\n\x04\x62ias\x18\xfa\x01 \x01(\x0b\x32%.CoreML.Specification.BiasLayerParamsH\x00\x12\x34\n\x03max\x18\x84\x02 \x01(\x0b\x32$.CoreML.Specification.MaxLayerParamsH\x00\x12\x34\n\x03min\x18\x85\x02 \x01(\x0b\x32$.CoreML.Specification.MinLayerParamsH\x00\x12;\n\x03\x64ot\x18\x8e\x02 \x01(\x0b\x32+.CoreML.Specification.DotProductLayerParamsH\x00\x12:\n\x06reduce\x18\x98\x02 \x01(\x0b\x32\'.CoreML.Specification.ReduceLayerParamsH\x00\x12\x46\n\x0cloadConstant\x18\xa2\x02 \x01(\x0b\x32-.CoreML.Specification.LoadConstantLayerParamsH\x00\x12<\n\x07reshape\x18\xac\x02 \x01(\x0b\x32(.CoreML.Specification.ReshapeLayerParamsH\x00\x12<\n\x07\x66latten\x18\xad\x02 \x01(\x0b\x32(.CoreML.Specification.FlattenLayerParamsH\x00\x12<\n\x07permute\x18\xb6\x02 \x01(\x0b\x32(.CoreML.Specification.PermuteLayerParamsH\x00\x12:\n\x06\x63oncat\x18\xc0\x02 \x01(\x0b\x32\'.CoreML.Specification.ConcatLayerParamsH\x00\x12\x38\n\x05split\x18\xca\x02 \x01(\x0b\x32&.CoreML.Specification.SplitLayerParamsH\x00\x12J\n\x0esequenceRepeat\x18\xd4\x02 \x01(\x0b\x32/.CoreML.Specification.SequenceRepeatLayerParamsH\x00\x12J\n\x0ereorganizeData\x18\xd9\x02 \x01(\x0b\x32/.CoreML.Specification.ReorganizeDataLayerParamsH\x00\x12\x38\n\x05slice\x18\xde\x02 \x01(\x0b\x32&.CoreML.Specification.SliceLayerParamsH\x00\x12L\n\x0fsimpleRecurrent\x18\x90\x03 \x01(\x0b\x32\x30.CoreML.Specification.SimpleRecurrentLayerParamsH\x00\x12\x34\n\x03gru\x18\x9a\x03 \x01(\x0b\x32$.CoreML.Specification.GRULayerParamsH\x00\x12R\n\x12uniDirectionalLSTM\x18\xa4\x03 \x01(\x0b\x32\x33.CoreML.Specification.UniDirectionalLSTMLayerParamsH\x00\x12P\n\x11\x62iDirectionalLSTM\x18\xae\x03 \x01(\x0b\x32\x32.CoreML.Specification.BiDirectionalLSTMLayerParamsH\x00\x12:\n\x06\x63ustom\x18\xf4\x03 \x01(\x0b\x32\'.CoreML.Specification.CustomLayerParamsH\x00\x12\x36\n\x04\x63opy\x18\xd8\x04 \x01(\x0b\x32%.CoreML.Specification.CopyLayerParamsH\x00\x12:\n\x06\x62ranch\x18\xdd\x04 \x01(\x0b\x32\'.CoreML.Specification.BranchLayerParamsH\x00\x12\x36\n\x04loop\x18\xe7\x04 \x01(\x0b\x32%.CoreML.Specification.LoopLayerParamsH\x00\x12@\n\tloopBreak\x18\xec\x04 \x01(\x0b\x32*.CoreML.Specification.LoopBreakLayerParamsH\x00\x12\x46\n\x0cloopContinue\x18\xf1\x04 \x01(\x0b\x32-.CoreML.Specification.LoopContinueLayerParamsH\x00\x12\x44\n\x0brangeStatic\x18\xfb\x04 \x01(\x0b\x32,.CoreML.Specification.RangeStaticLayerParamsH\x00\x12\x46\n\x0crangeDynamic\x18\x80\x05 \x01(\x0b\x32-.CoreML.Specification.RangeDynamicLayerParamsH\x00\x12\x36\n\x04\x63lip\x18\x94\x05 \x01(\x0b\x32%.CoreML.Specification.ClipLayerParamsH\x00\x12\x36\n\x04\x63\x65il\x18\x99\x05 \x01(\x0b\x32%.CoreML.Specification.CeilLayerParamsH\x00\x12\x38\n\x05\x66loor\x18\x9e\x05 \x01(\x0b\x32&.CoreML.Specification.FloorLayerParamsH\x00\x12\x36\n\x04sign\x18\xa8\x05 \x01(\x0b\x32%.CoreML.Specification.SignLayerParamsH\x00\x12\x38\n\x05round\x18\xad\x05 \x01(\x0b\x32&.CoreML.Specification.RoundLayerParamsH\x00\x12\x36\n\x04\x65xp2\x18\xbc\x05 \x01(\x0b\x32%.CoreML.Specification.Exp2LayerParamsH\x00\x12\x34\n\x03sin\x18\xc6\x05 \x01(\x0b\x32$.CoreML.Specification.SinLayerParamsH\x00\x12\x34\n\x03\x63os\x18\xcb\x05 \x01(\x0b\x32$.CoreML.Specification.CosLayerParamsH\x00\x12\x34\n\x03tan\x18\xd0\x05 \x01(\x0b\x32$.CoreML.Specification.TanLayerParamsH\x00\x12\x36\n\x04\x61sin\x18\xda\x05 \x01(\x0b\x32%.CoreML.Specification.AsinLayerParamsH\x00\x12\x36\n\x04\x61\x63os\x18\xdf\x05 \x01(\x0b\x32%.CoreML.Specification.AcosLayerParamsH\x00\x12\x36\n\x04\x61tan\x18\xe4\x05 \x01(\x0b\x32%.CoreML.Specification.AtanLayerParamsH\x00\x12\x36\n\x04sinh\x18\xee\x05 \x01(\x0b\x32%.CoreML.Specification.SinhLayerParamsH\x00\x12\x36\n\x04\x63osh\x18\xf3\x05 \x01(\x0b\x32%.CoreML.Specification.CoshLayerParamsH\x00\x12\x36\n\x04tanh\x18\xf8\x05 \x01(\x0b\x32%.CoreML.Specification.TanhLayerParamsH\x00\x12\x38\n\x05\x61sinh\x18\x82\x06 \x01(\x0b\x32&.CoreML.Specification.AsinhLayerParamsH\x00\x12\x38\n\x05\x61\x63osh\x18\x87\x06 \x01(\x0b\x32&.CoreML.Specification.AcoshLayerParamsH\x00\x12\x38\n\x05\x61tanh\x18\x8c\x06 \x01(\x0b\x32&.CoreML.Specification.AtanhLayerParamsH\x00\x12\x34\n\x03\x65rf\x18\x96\x06 \x01(\x0b\x32$.CoreML.Specification.ErfLayerParamsH\x00\x12\x36\n\x04gelu\x18\x9b\x06 \x01(\x0b\x32%.CoreML.Specification.GeluLayerParamsH\x00\x12\x38\n\x05\x65qual\x18\xaf\x06 \x01(\x0b\x32&.CoreML.Specification.EqualLayerParamsH\x00\x12>\n\x08notEqual\x18\xb4\x06 \x01(\x0b\x32).CoreML.Specification.NotEqualLayerParamsH\x00\x12>\n\x08lessThan\x18\xb9\x06 \x01(\x0b\x32).CoreML.Specification.LessThanLayerParamsH\x00\x12@\n\tlessEqual\x18\xbb\x06 \x01(\x0b\x32*.CoreML.Specification.LessEqualLayerParamsH\x00\x12\x44\n\x0bgreaterThan\x18\xbe\x06 \x01(\x0b\x32,.CoreML.Specification.GreaterThanLayerParamsH\x00\x12\x46\n\x0cgreaterEqual\x18\xc0\x06 \x01(\x0b\x32-.CoreML.Specification.GreaterEqualLayerParamsH\x00\x12@\n\tlogicalOr\x18\xc8\x06 \x01(\x0b\x32*.CoreML.Specification.LogicalOrLayerParamsH\x00\x12\x42\n\nlogicalXor\x18\xcd\x06 \x01(\x0b\x32+.CoreML.Specification.LogicalXorLayerParamsH\x00\x12\x42\n\nlogicalNot\x18\xd2\x06 \x01(\x0b\x32+.CoreML.Specification.LogicalNotLayerParamsH\x00\x12\x42\n\nlogicalAnd\x18\xd7\x06 \x01(\x0b\x32+.CoreML.Specification.LogicalAndLayerParamsH\x00\x12N\n\x10modBroadcastable\x18\xe1\x06 \x01(\x0b\x32\x31.CoreML.Specification.ModBroadcastableLayerParamsH\x00\x12N\n\x10minBroadcastable\x18\xe6\x06 \x01(\x0b\x32\x31.CoreML.Specification.MinBroadcastableLayerParamsH\x00\x12N\n\x10maxBroadcastable\x18\xeb\x06 \x01(\x0b\x32\x31.CoreML.Specification.MaxBroadcastableLayerParamsH\x00\x12N\n\x10\x61\x64\x64\x42roadcastable\x18\xf0\x06 \x01(\x0b\x32\x31.CoreML.Specification.AddBroadcastableLayerParamsH\x00\x12N\n\x10powBroadcastable\x18\xf5\x06 \x01(\x0b\x32\x31.CoreML.Specification.PowBroadcastableLayerParamsH\x00\x12T\n\x13\x64ivideBroadcastable\x18\xfa\x06 \x01(\x0b\x32\x34.CoreML.Specification.DivideBroadcastableLayerParamsH\x00\x12X\n\x15\x66loorDivBroadcastable\x18\xff\x06 \x01(\x0b\x32\x36.CoreML.Specification.FloorDivBroadcastableLayerParamsH\x00\x12X\n\x15multiplyBroadcastable\x18\x84\x07 \x01(\x0b\x32\x36.CoreML.Specification.MultiplyBroadcastableLayerParamsH\x00\x12X\n\x15subtractBroadcastable\x18\x89\x07 \x01(\x0b\x32\x36.CoreML.Specification.SubtractBroadcastableLayerParamsH\x00\x12\x36\n\x04tile\x18\x98\x07 \x01(\x0b\x32%.CoreML.Specification.TileLayerParamsH\x00\x12\x38\n\x05stack\x18\x9d\x07 \x01(\x0b\x32&.CoreML.Specification.StackLayerParamsH\x00\x12:\n\x06gather\x18\xa2\x07 \x01(\x0b\x32\'.CoreML.Specification.GatherLayerParamsH\x00\x12<\n\x07scatter\x18\xa7\x07 \x01(\x0b\x32(.CoreML.Specification.ScatterLayerParamsH\x00\x12>\n\x08gatherND\x18\xac\x07 \x01(\x0b\x32).CoreML.Specification.GatherNDLayerParamsH\x00\x12@\n\tscatterND\x18\xb1\x07 \x01(\x0b\x32*.CoreML.Specification.ScatterNDLayerParamsH\x00\x12@\n\tsoftmaxND\x18\xb6\x07 \x01(\x0b\x32*.CoreML.Specification.SoftmaxNDLayerParamsH\x00\x12L\n\x0fgatherAlongAxis\x18\xb8\x07 \x01(\x0b\x32\x30.CoreML.Specification.GatherAlongAxisLayerParamsH\x00\x12N\n\x10scatterAlongAxis\x18\xba\x07 \x01(\x0b\x32\x31.CoreML.Specification.ScatterAlongAxisLayerParamsH\x00\x12<\n\x07reverse\x18\xc0\x07 \x01(\x0b\x32(.CoreML.Specification.ReverseLayerParamsH\x00\x12\x42\n\nreverseSeq\x18\xc5\x07 \x01(\x0b\x32+.CoreML.Specification.ReverseSeqLayerParamsH\x00\x12<\n\x07splitND\x18\xcf\x07 \x01(\x0b\x32(.CoreML.Specification.SplitNDLayerParamsH\x00\x12>\n\x08\x63oncatND\x18\xd4\x07 \x01(\x0b\x32).CoreML.Specification.ConcatNDLayerParamsH\x00\x12@\n\ttranspose\x18\xd9\x07 \x01(\x0b\x32*.CoreML.Specification.TransposeLayerParamsH\x00\x12\x44\n\x0bsliceStatic\x18\xe3\x07 \x01(\x0b\x32,.CoreML.Specification.SliceStaticLayerParamsH\x00\x12\x46\n\x0csliceDynamic\x18\xe8\x07 \x01(\x0b\x32-.CoreML.Specification.SliceDynamicLayerParamsH\x00\x12J\n\x0eslidingWindows\x18\xed\x07 \x01(\x0b\x32/.CoreML.Specification.SlidingWindowsLayerParamsH\x00\x12\x36\n\x04topK\x18\xf7\x07 \x01(\x0b\x32%.CoreML.Specification.TopKLayerParamsH\x00\x12:\n\x06\x61rgMin\x18\xfc\x07 \x01(\x0b\x32\'.CoreML.Specification.ArgMinLayerParamsH\x00\x12:\n\x06\x61rgMax\x18\x81\x08 \x01(\x0b\x32\'.CoreML.Specification.ArgMaxLayerParamsH\x00\x12\x44\n\x0b\x65mbeddingND\x18\x90\x08 \x01(\x0b\x32,.CoreML.Specification.EmbeddingNDLayerParamsH\x00\x12H\n\rbatchedMatmul\x18\x95\x08 \x01(\x0b\x32..CoreML.Specification.BatchedMatMulLayerParamsH\x00\x12>\n\x08getShape\x18\xa9\x08 \x01(\x0b\x32).CoreML.Specification.GetShapeLayerParamsH\x00\x12J\n\x0eloadConstantND\x18\xae\x08 \x01(\x0b\x32/.CoreML.Specification.LoadConstantNDLayerParamsH\x00\x12>\n\x08\x66illLike\x18\xb8\x08 \x01(\x0b\x32).CoreML.Specification.FillLikeLayerParamsH\x00\x12\x42\n\nfillStatic\x18\xbd\x08 \x01(\x0b\x32+.CoreML.Specification.FillStaticLayerParamsH\x00\x12\x44\n\x0b\x66illDynamic\x18\xc2\x08 \x01(\x0b\x32,.CoreML.Specification.FillDynamicLayerParamsH\x00\x12L\n\x0f\x62roadcastToLike\x18\xcc\x08 \x01(\x0b\x32\x30.CoreML.Specification.BroadcastToLikeLayerParamsH\x00\x12P\n\x11\x62roadcastToStatic\x18\xd1\x08 \x01(\x0b\x32\x32.CoreML.Specification.BroadcastToStaticLayerParamsH\x00\x12R\n\x12\x62roadcastToDynamic\x18\xd6\x08 \x01(\x0b\x32\x33.CoreML.Specification.BroadcastToDynamicLayerParamsH\x00\x12<\n\x07squeeze\x18\xe0\x08 \x01(\x0b\x32(.CoreML.Specification.SqueezeLayerParamsH\x00\x12\x42\n\nexpandDims\x18\xe5\x08 \x01(\x0b\x32+.CoreML.Specification.ExpandDimsLayerParamsH\x00\x12\x44\n\x0b\x66lattenTo2D\x18\xea\x08 \x01(\x0b\x32,.CoreML.Specification.FlattenTo2DLayerParamsH\x00\x12\x44\n\x0breshapeLike\x18\xef\x08 \x01(\x0b\x32,.CoreML.Specification.ReshapeLikeLayerParamsH\x00\x12H\n\rreshapeStatic\x18\xf4\x08 \x01(\x0b\x32..CoreML.Specification.ReshapeStaticLayerParamsH\x00\x12J\n\x0ereshapeDynamic\x18\xf9\x08 \x01(\x0b\x32/.CoreML.Specification.ReshapeDynamicLayerParamsH\x00\x12X\n\x15rankPreservingReshape\x18\xfe\x08 \x01(\x0b\x32\x36.CoreML.Specification.RankPreservingReshapeLayerParamsH\x00\x12H\n\x0b\x63onstantPad\x18\x83\t \x01(\x0b\x32\x30.CoreML.Specification.ConstantPaddingLayerParamsH\x00\x12N\n\x10randomNormalLike\x18\x92\t \x01(\x0b\x32\x31.CoreML.Specification.RandomNormalLikeLayerParamsH\x00\x12R\n\x12randomNormalStatic\x18\x97\t \x01(\x0b\x32\x33.CoreML.Specification.RandomNormalStaticLayerParamsH\x00\x12T\n\x13randomNormalDynamic\x18\x9c\t \x01(\x0b\x32\x34.CoreML.Specification.RandomNormalDynamicLayerParamsH\x00\x12P\n\x11randomUniformLike\x18\xa6\t \x01(\x0b\x32\x32.CoreML.Specification.RandomUniformLikeLayerParamsH\x00\x12T\n\x13randomUniformStatic\x18\xab\t \x01(\x0b\x32\x34.CoreML.Specification.RandomUniformStaticLayerParamsH\x00\x12V\n\x14randomUniformDynamic\x18\xb0\t \x01(\x0b\x32\x35.CoreML.Specification.RandomUniformDynamicLayerParamsH\x00\x12T\n\x13randomBernoulliLike\x18\xba\t \x01(\x0b\x32\x34.CoreML.Specification.RandomBernoulliLikeLayerParamsH\x00\x12X\n\x15randomBernoulliStatic\x18\xbf\t \x01(\x0b\x32\x36.CoreML.Specification.RandomBernoulliStaticLayerParamsH\x00\x12Z\n\x16randomBernoulliDynamic\x18\xc4\t \x01(\x0b\x32\x37.CoreML.Specification.RandomBernoulliDynamicLayerParamsH\x00\x12\\\n\x17\x63\x61tegoricalDistribution\x18\xce\t \x01(\x0b\x32\x38.CoreML.Specification.CategoricalDistributionLayerParamsH\x00\x12>\n\x08reduceL1\x18\xe2\t \x01(\x0b\x32).CoreML.Specification.ReduceL1LayerParamsH\x00\x12>\n\x08reduceL2\x18\xe7\t \x01(\x0b\x32).CoreML.Specification.ReduceL2LayerParamsH\x00\x12@\n\treduceMax\x18\xec\t \x01(\x0b\x32*.CoreML.Specification.ReduceMaxLayerParamsH\x00\x12@\n\treduceMin\x18\xf1\t \x01(\x0b\x32*.CoreML.Specification.ReduceMinLayerParamsH\x00\x12@\n\treduceSum\x18\xf6\t \x01(\x0b\x32*.CoreML.Specification.ReduceSumLayerParamsH\x00\x12\x42\n\nreduceProd\x18\xfb\t \x01(\x0b\x32+.CoreML.Specification.ReduceProdLayerParamsH\x00\x12\x42\n\nreduceMean\x18\x80\n \x01(\x0b\x32+.CoreML.Specification.ReduceMeanLayerParamsH\x00\x12\x46\n\x0creduceLogSum\x18\x85\n \x01(\x0b\x32-.CoreML.Specification.ReduceLogSumLayerParamsH\x00\x12L\n\x0freduceSumSquare\x18\x8a\n \x01(\x0b\x32\x30.CoreML.Specification.ReduceSumSquareLayerParamsH\x00\x12L\n\x0freduceLogSumExp\x18\x8f\n \x01(\x0b\x32\x30.CoreML.Specification.ReduceLogSumExpLayerParamsH\x00\x12\x46\n\x0cwhereNonZero\x18\xa1\n \x01(\x0b\x32-.CoreML.Specification.WhereNonZeroLayerParamsH\x00\x12J\n\x0ematrixBandPart\x18\xa3\n \x01(\x0b\x32/.CoreML.Specification.MatrixBandPartLayerParamsH\x00\x12L\n\x0flowerTriangular\x18\xa8\n \x01(\x0b\x32\x30.CoreML.Specification.LowerTriangularLayerParamsH\x00\x12L\n\x0fupperTriangular\x18\xad\n \x01(\x0b\x32\x30.CoreML.Specification.UpperTriangularLayerParamsH\x00\x12R\n\x12whereBroadcastable\x18\xb2\n \x01(\x0b\x32\x33.CoreML.Specification.WhereBroadcastableLayerParamsH\x00\x12R\n\x12layerNormalization\x18\xc6\n \x01(\x0b\x32\x33.CoreML.Specification.LayerNormalizationLayerParamsH\x00\x12X\n\x15NonMaximumSuppression\x18\xf8\n \x01(\x0b\x32\x36.CoreML.Specification.NonMaximumSuppressionLayerParamsH\x00\x42\x07\n\x05layer\"\x83\x01\n\x11\x42ranchLayerParams\x12\x35\n\x08ifBranch\x18\x01 \x01(\x0b\x32#.CoreML.Specification.NeuralNetwork\x12\x37\n\nelseBranch\x18\x02 \x01(\x0b\x32#.CoreML.Specification.NeuralNetwork\"\xbb\x01\n\x0fLoopLayerParams\x12\x19\n\x11maxLoopIterations\x18\x01 \x01(\x04\x12\x14\n\x0c\x63onditionVar\x18\x02 \x01(\t\x12=\n\x10\x63onditionNetwork\x18\x03 \x01(\x0b\x32#.CoreML.Specification.NeuralNetwork\x12\x38\n\x0b\x62odyNetwork\x18\x04 \x01(\x0b\x32#.CoreML.Specification.NeuralNetwork\"\x16\n\x14LoopBreakLayerParams\"\x19\n\x17LoopContinueLayerParams\"\x11\n\x0f\x43opyLayerParams\"\'\n\x16GreaterThanLayerParams\x12\r\n\x05\x61lpha\x18\x02 \x01(\x02\"(\n\x17GreaterEqualLayerParams\x12\r\n\x05\x61lpha\x18\x02 \x01(\x02\"$\n\x13LessThanLayerParams\x12\r\n\x05\x61lpha\x18\x02 \x01(\x02\"%\n\x14LessEqualLayerParams\x12\r\n\x05\x61lpha\x18\x02 \x01(\x02\"!\n\x10\x45qualLayerParams\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\"$\n\x13NotEqualLayerParams\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\"\x17\n\x15LogicalAndLayerParams\"\x16\n\x14LogicalOrLayerParams\"\x17\n\x15LogicalXorLayerParams\"\x17\n\x15LogicalNotLayerParams\"\x8e\x01\n\rBorderAmounts\x12\x44\n\rborderAmounts\x18\n \x03(\x0b\x32-.CoreML.Specification.BorderAmounts.EdgeSizes\x1a\x37\n\tEdgeSizes\x12\x15\n\rstartEdgeSize\x18\x01 \x01(\x04\x12\x13\n\x0b\x65ndEdgeSize\x18\x02 \x01(\x04\"K\n\x0cValidPadding\x12;\n\x0epaddingAmounts\x18\x01 \x01(\x0b\x32#.CoreML.Specification.BorderAmounts\"\x96\x01\n\x0bSamePadding\x12H\n\rasymmetryMode\x18\x01 \x01(\x0e\x32\x31.CoreML.Specification.SamePadding.SamePaddingMode\"=\n\x0fSamePaddingMode\x12\x16\n\x12\x42OTTOM_RIGHT_HEAVY\x10\x00\x12\x12\n\x0eTOP_LEFT_HEAVY\x10\x01\"\xbd\x01\n\x0cSamplingMode\x12\x41\n\x0esamplingMethod\x18\x01 \x01(\x0e\x32).CoreML.Specification.SamplingMode.Method\"j\n\x06Method\x12\x1f\n\x1bSTRICT_ALIGN_ENDPOINTS_MODE\x10\x00\x12\x18\n\x14\x41LIGN_ENDPOINTS_MODE\x10\x01\x12\x11\n\rUPSAMPLE_MODE\x10\x02\x12\x12\n\x0eROI_ALIGN_MODE\x10\x03\"\xd8\x01\n\x12\x42oxCoordinatesMode\x12\x45\n\x07\x62oxMode\x18\x01 \x01(\x0e\x32\x34.CoreML.Specification.BoxCoordinatesMode.Coordinates\"{\n\x0b\x43oordinates\x12\x18\n\x14\x43ORNERS_HEIGHT_FIRST\x10\x00\x12\x17\n\x13\x43ORNERS_WIDTH_FIRST\x10\x01\x12\x1c\n\x18\x43\x45NTER_SIZE_HEIGHT_FIRST\x10\x02\x12\x1b\n\x17\x43\x45NTER_SIZE_WIDTH_FIRST\x10\x03\"\x9f\x01\n\x0cWeightParams\x12\x12\n\nfloatValue\x18\x01 \x03(\x02\x12\x14\n\x0c\x66loat16Value\x18\x02 \x01(\x0c\x12\x10\n\x08rawValue\x18\x1e \x01(\x0c\x12>\n\x0cquantization\x18( \x01(\x0b\x32(.CoreML.Specification.QuantizationParams\x12\x13\n\x0bisUpdatable\x18\x32 \x01(\x08\"\xe4\x01\n\x12QuantizationParams\x12\x14\n\x0cnumberOfBits\x18\x01 \x01(\x04\x12L\n\x12linearQuantization\x18\x65 \x01(\x0b\x32..CoreML.Specification.LinearQuantizationParamsH\x00\x12V\n\x17lookupTableQuantization\x18\x66 \x01(\x0b\x32\x33.CoreML.Specification.LookUpTableQuantizationParamsH\x00\x42\x12\n\x10QuantizationType\"7\n\x18LinearQuantizationParams\x12\r\n\x05scale\x18\x01 \x03(\x02\x12\x0c\n\x04\x62ias\x18\x02 \x03(\x02\"3\n\x1dLookUpTableQuantizationParams\x12\x12\n\nfloatValue\x18\x01 \x03(\x02\"\xbd\x03\n\x16\x43onvolutionLayerParams\x12\x16\n\x0eoutputChannels\x18\x01 \x01(\x04\x12\x16\n\x0ekernelChannels\x18\x02 \x01(\x04\x12\x0f\n\x07nGroups\x18\n \x01(\x04\x12\x12\n\nkernelSize\x18\x14 \x03(\x04\x12\x0e\n\x06stride\x18\x1e \x03(\x04\x12\x16\n\x0e\x64ilationFactor\x18( \x03(\x04\x12\x33\n\x05valid\x18\x32 \x01(\x0b\x32\".CoreML.Specification.ValidPaddingH\x00\x12\x31\n\x04same\x18\x33 \x01(\x0b\x32!.CoreML.Specification.SamePaddingH\x00\x12\x17\n\x0fisDeconvolution\x18< \x01(\x08\x12\x0f\n\x07hasBias\x18\x46 \x01(\x08\x12\x33\n\x07weights\x18Z \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62ias\x18[ \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x13\n\x0boutputShape\x18\x64 \x03(\x04\x42\x18\n\x16\x43onvolutionPaddingType\"\xc0\x01\n\x17InnerProductLayerParams\x12\x15\n\rinputChannels\x18\x01 \x01(\x04\x12\x16\n\x0eoutputChannels\x18\x02 \x01(\x04\x12\x0f\n\x07hasBias\x18\n \x01(\x08\x12\x33\n\x07weights\x18\x14 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62ias\x18\x15 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\xb8\x01\n\x14\x45mbeddingLayerParams\x12\x10\n\x08inputDim\x18\x01 \x01(\x04\x12\x16\n\x0eoutputChannels\x18\x02 \x01(\x04\x12\x0f\n\x07hasBias\x18\n \x01(\x08\x12\x33\n\x07weights\x18\x14 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62ias\x18\x15 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\xba\x01\n\x16\x45mbeddingNDLayerParams\x12\x11\n\tvocabSize\x18\x01 \x01(\x04\x12\x15\n\rembeddingSize\x18\x02 \x01(\x04\x12\x0f\n\x07hasBias\x18\x03 \x01(\x08\x12\x33\n\x07weights\x18\x14 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62ias\x18\x15 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\xbd\x02\n\x14\x42\x61tchnormLayerParams\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x04\x12\x16\n\x0e\x63omputeMeanVar\x18\x05 \x01(\x08\x12\x1d\n\x15instanceNormalization\x18\x06 \x01(\x08\x12\x0f\n\x07\x65psilon\x18\n \x01(\x02\x12\x31\n\x05gamma\x18\x0f \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62\x65ta\x18\x10 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04mean\x18\x11 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x34\n\x08variance\x18\x12 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\xe8\x03\n\x12PoolingLayerParams\x12\x42\n\x04type\x18\x01 \x01(\x0e\x32\x34.CoreML.Specification.PoolingLayerParams.PoolingType\x12\x12\n\nkernelSize\x18\n \x03(\x04\x12\x0e\n\x06stride\x18\x14 \x03(\x04\x12\x33\n\x05valid\x18\x1e \x01(\x0b\x32\".CoreML.Specification.ValidPaddingH\x00\x12\x31\n\x04same\x18\x1f \x01(\x0b\x32!.CoreML.Specification.SamePaddingH\x00\x12Y\n\x10includeLastPixel\x18  \x01(\x0b\x32=.CoreML.Specification.PoolingLayerParams.ValidCompletePaddingH\x00\x12\x1d\n\x15\x61vgPoolExcludePadding\x18\x32 \x01(\x08\x12\x15\n\rglobalPooling\x18< \x01(\x08\x1a.\n\x14ValidCompletePadding\x12\x16\n\x0epaddingAmounts\x18\n \x03(\x04\"+\n\x0bPoolingType\x12\x07\n\x03MAX\x10\x00\x12\x0b\n\x07\x41VERAGE\x10\x01\x12\x06\n\x02L2\x10\x02\x42\x14\n\x12PoolingPaddingType\"\xa1\x03\n\x12PaddingLayerParams\x12L\n\x08\x63onstant\x18\x01 \x01(\x0b\x32\x38.CoreML.Specification.PaddingLayerParams.PaddingConstantH\x00\x12P\n\nreflection\x18\x02 \x01(\x0b\x32:.CoreML.Specification.PaddingLayerParams.PaddingReflectionH\x00\x12R\n\x0breplication\x18\x03 \x01(\x0b\x32;.CoreML.Specification.PaddingLayerParams.PaddingReplicationH\x00\x12;\n\x0epaddingAmounts\x18\n \x01(\x0b\x32#.CoreML.Specification.BorderAmounts\x1a \n\x0fPaddingConstant\x12\r\n\x05value\x18\x01 \x01(\x02\x1a\x13\n\x11PaddingReflection\x1a\x14\n\x12PaddingReplicationB\r\n\x0bPaddingType\"+\n\x11\x43oncatLayerParams\x12\x16\n\x0esequenceConcat\x18\x64 \x01(\x08\"K\n\x0eLRNLayerParams\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\x12\x0c\n\x04\x62\x65ta\x18\x02 \x01(\x02\x12\x11\n\tlocalSize\x18\x03 \x01(\x04\x12\t\n\x01k\x18\x04 \x01(\x02\"\x14\n\x12SoftmaxLayerParams\"$\n\x10SplitLayerParams\x12\x10\n\x08nOutputs\x18\x01 \x01(\x04\"\x1f\n\x0e\x41\x64\x64LayerParams\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\"$\n\x13MultiplyLayerParams\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\"\x84\x02\n\x18UnaryFunctionLayerParams\x12\x46\n\x04type\x18\x01 \x01(\x0e\x32\x38.CoreML.Specification.UnaryFunctionLayerParams.Operation\x12\r\n\x05\x61lpha\x18\x02 \x01(\x02\x12\x0f\n\x07\x65psilon\x18\x03 \x01(\x02\x12\r\n\x05shift\x18\x04 \x01(\x02\x12\r\n\x05scale\x18\x05 \x01(\x02\"b\n\tOperation\x12\x08\n\x04SQRT\x10\x00\x12\t\n\x05RSQRT\x10\x01\x12\x0b\n\x07INVERSE\x10\x02\x12\t\n\x05POWER\x10\x03\x12\x07\n\x03\x45XP\x10\x04\x12\x07\n\x03LOG\x10\x05\x12\x07\n\x03\x41\x42S\x10\x06\x12\r\n\tTHRESHOLD\x10\x07\"\xa2\x01\n\x13UpsampleLayerParams\x12\x15\n\rscalingFactor\x18\x01 \x03(\x04\x12I\n\x04mode\x18\x05 \x01(\x0e\x32;.CoreML.Specification.UpsampleLayerParams.InterpolationMode\")\n\x11InterpolationMode\x12\x06\n\x02NN\x10\x00\x12\x0c\n\x08\x42ILINEAR\x10\x01\"a\n\x19ResizeBilinearLayerParams\x12\x12\n\ntargetSize\x18\x01 \x03(\x04\x12\x30\n\x04mode\x18\x02 \x01(\x0b\x32\".CoreML.Specification.SamplingMode\"\xd4\x01\n\x15\x43ropResizeLayerParams\x12\x12\n\ntargetSize\x18\x01 \x03(\x04\x12\x1d\n\x15normalizedCoordinates\x18\x02 \x01(\x08\x12\x30\n\x04mode\x18\x03 \x01(\x0b\x32\".CoreML.Specification.SamplingMode\x12@\n\x0e\x62oxIndicesMode\x18\x04 \x01(\x0b\x32(.CoreML.Specification.BoxCoordinatesMode\x12\x14\n\x0cspatialScale\x18\x05 \x01(\x02\"R\n\x0f\x42iasLayerParams\x12\r\n\x05shape\x18\x01 \x03(\x04\x12\x30\n\x04\x62ias\x18\x02 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\xaf\x01\n\x10ScaleLayerParams\x12\x12\n\nshapeScale\x18\x01 \x03(\x04\x12\x31\n\x05scale\x18\x02 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x0f\n\x07hasBias\x18\x03 \x01(\x08\x12\x11\n\tshapeBias\x18\x04 \x03(\x04\x12\x30\n\x04\x62ias\x18\x05 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"Z\n\x17LoadConstantLayerParams\x12\r\n\x05shape\x18\x01 \x03(\x04\x12\x30\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\".CoreML.Specification.WeightParams\")\n\x16L2NormalizeLayerParams\x12\x0f\n\x07\x65psilon\x18\x01 \x01(\x02\"\x8e\x01\n\x12\x46lattenLayerParams\x12\x43\n\x04mode\x18\x01 \x01(\x0e\x32\x35.CoreML.Specification.FlattenLayerParams.FlattenOrder\"3\n\x0c\x46lattenOrder\x12\x11\n\rCHANNEL_FIRST\x10\x00\x12\x10\n\x0c\x43HANNEL_LAST\x10\x01\"\xa3\x01\n\x12ReshapeLayerParams\x12\x13\n\x0btargetShape\x18\x01 \x03(\x03\x12\x43\n\x04mode\x18\x02 \x01(\x0e\x32\x35.CoreML.Specification.ReshapeLayerParams.ReshapeOrder\"3\n\x0cReshapeOrder\x12\x11\n\rCHANNEL_FIRST\x10\x00\x12\x10\n\x0c\x43HANNEL_LAST\x10\x01\"\"\n\x12PermuteLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x03(\x04\"\xbe\x01\n\x19ReorganizeDataLayerParams\x12P\n\x04mode\x18\x01 \x01(\x0e\x32\x42.CoreML.Specification.ReorganizeDataLayerParams.ReorganizationType\x12\x11\n\tblockSize\x18\x02 \x01(\x04\"<\n\x12ReorganizationType\x12\x12\n\x0eSPACE_TO_DEPTH\x10\x00\x12\x12\n\x0e\x44\x45PTH_TO_SPACE\x10\x01\"\xc8\x01\n\x10SliceLayerParams\x12\x12\n\nstartIndex\x18\x01 \x01(\x03\x12\x10\n\x08\x65ndIndex\x18\x02 \x01(\x03\x12\x0e\n\x06stride\x18\x03 \x01(\x04\x12>\n\x04\x61xis\x18\x04 \x01(\x0e\x32\x30.CoreML.Specification.SliceLayerParams.SliceAxis\">\n\tSliceAxis\x12\x10\n\x0c\x43HANNEL_AXIS\x10\x00\x12\x0f\n\x0bHEIGHT_AXIS\x10\x01\x12\x0e\n\nWIDTH_AXIS\x10\x02\"\xd9\x02\n\x11ReduceLayerParams\x12\x45\n\x04mode\x18\x01 \x01(\x0e\x32\x37.CoreML.Specification.ReduceLayerParams.ReduceOperation\x12\x0f\n\x07\x65psilon\x18\x02 \x01(\x02\x12@\n\x04\x61xis\x18\x03 \x01(\x0e\x32\x32.CoreML.Specification.ReduceLayerParams.ReduceAxis\"v\n\x0fReduceOperation\x12\x07\n\x03SUM\x10\x00\x12\x07\n\x03\x41VG\x10\x01\x12\x08\n\x04PROD\x10\x02\x12\n\n\x06LOGSUM\x10\x03\x12\r\n\tSUMSQUARE\x10\x04\x12\x06\n\x02L1\x10\x05\x12\x06\n\x02L2\x10\x06\x12\x07\n\x03MAX\x10\x07\x12\x07\n\x03MIN\x10\x08\x12\n\n\x06\x41RGMAX\x10\t\"2\n\nReduceAxis\x12\x07\n\x03\x43HW\x10\x00\x12\x06\n\x02HW\x10\x01\x12\x05\n\x01\x43\x10\x02\x12\x05\n\x01H\x10\x03\x12\x05\n\x01W\x10\x04\"[\n\x0f\x43ropLayerParams\x12\x38\n\x0b\x63ropAmounts\x18\x01 \x01(\x0b\x32#.CoreML.Specification.BorderAmounts\x12\x0e\n\x06offset\x18\x05 \x03(\x04\"\x14\n\x12\x41verageLayerParams\"\x10\n\x0eMaxLayerParams\"\x10\n\x0eMinLayerParams\"1\n\x15\x44otProductLayerParams\x12\x18\n\x10\x63osineSimilarity\x18\x01 \x01(\x08\"f\n MeanVarianceNormalizeLayerParams\x12\x16\n\x0e\x61\x63rossChannels\x18\x01 \x01(\x08\x12\x19\n\x11normalizeVariance\x18\x02 \x01(\x08\x12\x0f\n\x07\x65psilon\x18\x03 \x01(\x02\"1\n\x19SequenceRepeatLayerParams\x12\x14\n\x0cnRepetitions\x18\x01 \x01(\x04\"\xff\x02\n\x1aSimpleRecurrentLayerParams\x12\x17\n\x0finputVectorSize\x18\x01 \x01(\x04\x12\x18\n\x10outputVectorSize\x18\x02 \x01(\x04\x12:\n\nactivation\x18\n \x01(\x0b\x32&.CoreML.Specification.ActivationParams\x12\x16\n\x0esequenceOutput\x18\x0f \x01(\x08\x12\x15\n\rhasBiasVector\x18\x14 \x01(\x08\x12\x38\n\x0cweightMatrix\x18\x1e \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12;\n\x0frecursionMatrix\x18\x1f \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x36\n\nbiasVector\x18  \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x14\n\x0creverseInput\x18\x64 \x01(\x08\"\xaa\x06\n\x0eGRULayerParams\x12\x17\n\x0finputVectorSize\x18\x01 \x01(\x04\x12\x18\n\x10outputVectorSize\x18\x02 \x01(\x04\x12;\n\x0b\x61\x63tivations\x18\n \x03(\x0b\x32&.CoreML.Specification.ActivationParams\x12\x16\n\x0esequenceOutput\x18\x0f \x01(\x08\x12\x16\n\x0ehasBiasVectors\x18\x14 \x01(\x08\x12\x42\n\x16updateGateWeightMatrix\x18\x1e \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x41\n\x15resetGateWeightMatrix\x18\x1f \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x42\n\x16outputGateWeightMatrix\x18  \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x45\n\x19updateGateRecursionMatrix\x18\x32 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x44\n\x18resetGateRecursionMatrix\x18\x33 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x45\n\x19outputGateRecursionMatrix\x18\x34 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12@\n\x14updateGateBiasVector\x18\x46 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12?\n\x13resetGateBiasVector\x18G \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12@\n\x14outputGateBiasVector\x18H \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x14\n\x0creverseInput\x18\x64 \x01(\x08\"\xaa\x01\n\nLSTMParams\x12\x16\n\x0esequenceOutput\x18\n \x01(\x08\x12\x16\n\x0ehasBiasVectors\x18\x14 \x01(\x08\x12\x12\n\nforgetBias\x18\x1e \x01(\x08\x12\x1a\n\x12hasPeepholeVectors\x18( \x01(\x08\x12!\n\x19\x63oupledInputAndForgetGate\x18\x32 \x01(\x08\x12\x19\n\x11\x63\x65llClipThreshold\x18< \x01(\x02\"\x94\x08\n\x10LSTMWeightParams\x12\x41\n\x15inputGateWeightMatrix\x18\x01 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x42\n\x16\x66orgetGateWeightMatrix\x18\x02 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x42\n\x16\x62lockInputWeightMatrix\x18\x03 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x42\n\x16outputGateWeightMatrix\x18\x04 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x44\n\x18inputGateRecursionMatrix\x18\x14 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x45\n\x19\x66orgetGateRecursionMatrix\x18\x15 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x45\n\x19\x62lockInputRecursionMatrix\x18\x16 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x45\n\x19outputGateRecursionMatrix\x18\x17 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12?\n\x13inputGateBiasVector\x18( \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12@\n\x14\x66orgetGateBiasVector\x18) \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12@\n\x14\x62lockInputBiasVector\x18* \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12@\n\x14outputGateBiasVector\x18+ \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x43\n\x17inputGatePeepholeVector\x18< \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x44\n\x18\x66orgetGatePeepholeVector\x18= \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x44\n\x18outputGatePeepholeVector\x18> \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\x95\x02\n\x1dUniDirectionalLSTMLayerParams\x12\x17\n\x0finputVectorSize\x18\x01 \x01(\x04\x12\x18\n\x10outputVectorSize\x18\x02 \x01(\x04\x12;\n\x0b\x61\x63tivations\x18\n \x03(\x0b\x32&.CoreML.Specification.ActivationParams\x12\x30\n\x06params\x18\x0f \x01(\x0b\x32 .CoreML.Specification.LSTMParams\x12<\n\x0cweightParams\x18\x14 \x01(\x0b\x32&.CoreML.Specification.LSTMWeightParams\x12\x14\n\x0creverseInput\x18\x64 \x01(\x08\"\xd2\x02\n\x1c\x42iDirectionalLSTMLayerParams\x12\x17\n\x0finputVectorSize\x18\x01 \x01(\x04\x12\x18\n\x10outputVectorSize\x18\x02 \x01(\x04\x12\x46\n\x16\x61\x63tivationsForwardLSTM\x18\n \x03(\x0b\x32&.CoreML.Specification.ActivationParams\x12G\n\x17\x61\x63tivationsBackwardLSTM\x18\x0b \x03(\x0b\x32&.CoreML.Specification.ActivationParams\x12\x30\n\x06params\x18\x0f \x01(\x0b\x32 .CoreML.Specification.LSTMParams\x12<\n\x0cweightParams\x18\x14 \x03(\x0b\x32&.CoreML.Specification.LSTMWeightParams\"\xbe\x03\n\x11\x43ustomLayerParams\x12\x11\n\tclassName\x18\n \x01(\t\x12\x33\n\x07weights\x18\x14 \x03(\x0b\x32\".CoreML.Specification.WeightParams\x12K\n\nparameters\x18\x1e \x03(\x0b\x32\x37.CoreML.Specification.CustomLayerParams.ParametersEntry\x12\x13\n\x0b\x64\x65scription\x18( \x01(\t\x1a\x8c\x01\n\x15\x43ustomLayerParamValue\x12\x15\n\x0b\x64oubleValue\x18\n \x01(\x01H\x00\x12\x15\n\x0bstringValue\x18\x14 \x01(\tH\x00\x12\x12\n\x08intValue\x18\x1e \x01(\x05H\x00\x12\x13\n\tlongValue\x18( \x01(\x03H\x00\x12\x13\n\tboolValue\x18\x32 \x01(\x08H\x00\x42\x07\n\x05value\x1ap\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12L\n\x05value\x18\x02 \x01(\x0b\x32=.CoreML.Specification.CustomLayerParams.CustomLayerParamValue:\x02\x38\x01\"$\n\x14TransposeLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x04\"\x83\x02\n\x18\x42\x61tchedMatMulLayerParams\x12\x12\n\ntransposeA\x18\x01 \x01(\x08\x12\x12\n\ntransposeB\x18\x02 \x01(\x08\x12\"\n\x1aweightMatrixFirstDimension\x18\x05 \x01(\x04\x12#\n\x1bweightMatrixSecondDimension\x18\x06 \x01(\x04\x12\x0f\n\x07hasBias\x18\x07 \x01(\x08\x12\x33\n\x07weights\x18\x08 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62ias\x18\t \x01(\x0b\x32\".CoreML.Specification.WeightParams\"#\n\x13\x43oncatNDLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\"$\n\x14SoftmaxNDLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\"(\n\x12ReverseLayerParams\x12\x12\n\nreverseDim\x18\x01 \x03(\x08\"@\n\x15ReverseSeqLayerParams\x12\x11\n\tbatchAxis\x18\x01 \x01(\x03\x12\x14\n\x0csequenceAxis\x18\x02 \x01(\x03\"\\\n\x19LoadConstantNDLayerParams\x12\r\n\x05shape\x18\x01 \x03(\x04\x12\x30\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"$\n\x13\x46illLikeLayerParams\x12\r\n\x05value\x18\x01 \x01(\x02\";\n\x15\x46illStaticLayerParams\x12\r\n\x05value\x18\x01 \x01(\x02\x12\x13\n\x0btargetShape\x18\x02 \x03(\x04\"\'\n\x16\x46illDynamicLayerParams\x12\r\n\x05value\x18\x01 \x01(\x02\"\x1f\n\x1dWhereBroadcastableLayerParams\"\x10\n\x0eSinLayerParams\"\x10\n\x0e\x43osLayerParams\"\x10\n\x0eTanLayerParams\"\x11\n\x0f\x41sinLayerParams\"\x11\n\x0f\x41\x63osLayerParams\"\x11\n\x0f\x41tanLayerParams\"\x11\n\x0fSinhLayerParams\"\x11\n\x0f\x43oshLayerParams\"\x11\n\x0fTanhLayerParams\"\x12\n\x10\x41sinhLayerParams\"\x12\n\x10\x41\x63oshLayerParams\"\x12\n\x10\x41tanhLayerParams\"\x1d\n\x1bPowBroadcastableLayerParams\"\x11\n\x0f\x45xp2LayerParams\"\x19\n\x17WhereNonZeroLayerParams\"?\n\x19MatrixBandPartLayerParams\x12\x10\n\x08numLower\x18\x01 \x01(\x03\x12\x10\n\x08numUpper\x18\x02 \x01(\x03\"\'\n\x1aUpperTriangularLayerParams\x12\t\n\x01k\x18\x01 \x01(\x03\"\'\n\x1aLowerTriangularLayerParams\x12\t\n\x01k\x18\x01 \x01(\x03\"\x1c\n\x1a\x42roadcastToLikeLayerParams\"3\n\x1c\x42roadcastToStaticLayerParams\x12\x13\n\x0btargetShape\x18\x01 \x03(\x04\"\x1f\n\x1d\x42roadcastToDynamicLayerParams\"\x1d\n\x1b\x41\x64\x64\x42roadcastableLayerParams\"\x1d\n\x1bMaxBroadcastableLayerParams\"\x1d\n\x1bMinBroadcastableLayerParams\"\x1d\n\x1bModBroadcastableLayerParams\"\"\n FloorDivBroadcastableLayerParams\"\"\n SubtractBroadcastableLayerParams\"\"\n MultiplyBroadcastableLayerParams\" \n\x1e\x44ivideBroadcastableLayerParams\"!\n\x11GatherLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\"S\n\x12ScatterLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12/\n\x04mode\x18\x02 \x01(\x0e\x32!.CoreML.Specification.ScatterMode\"\x15\n\x13GatherNDLayerParams\"G\n\x14ScatterNDLayerParams\x12/\n\x04mode\x18\x01 \x01(\x0e\x32!.CoreML.Specification.ScatterMode\"*\n\x1aGatherAlongAxisLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\"\\\n\x1bScatterAlongAxisLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12/\n\x04mode\x18\x02 \x01(\x0e\x32!.CoreML.Specification.ScatterMode\" \n\x10StackLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\"7\n RankPreservingReshapeLayerParams\x12\x13\n\x0btargetShape\x18\x01 \x03(\x03\"a\n\x1a\x43onstantPaddingLayerParams\x12\r\n\x05value\x18\x01 \x01(\x02\x12\x12\n\npadAmounts\x18\x02 \x03(\x04\x12 \n\x18padToGivenOutputSizeMode\x18\x03 \x01(\x08\"I\n\x1bRandomNormalLikeLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0c\n\x04mean\x18\x02 \x01(\x02\x12\x0e\n\x06stdDev\x18\x03 \x01(\x02\"`\n\x1dRandomNormalStaticLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0c\n\x04mean\x18\x02 \x01(\x02\x12\x0e\n\x06stdDev\x18\x03 \x01(\x02\x12\x13\n\x0boutputShape\x18\x04 \x03(\x04\"L\n\x1eRandomNormalDynamicLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0c\n\x04mean\x18\x02 \x01(\x02\x12\x0e\n\x06stdDev\x18\x03 \x01(\x02\"L\n\x1cRandomUniformLikeLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0e\n\x06minVal\x18\x02 \x01(\x02\x12\x0e\n\x06maxVal\x18\x03 \x01(\x02\"c\n\x1eRandomUniformStaticLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0e\n\x06minVal\x18\x02 \x01(\x02\x12\x0e\n\x06maxVal\x18\x03 \x01(\x02\x12\x13\n\x0boutputShape\x18\x04 \x03(\x04\"O\n\x1fRandomUniformDynamicLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0e\n\x06minVal\x18\x02 \x01(\x02\x12\x0e\n\x06maxVal\x18\x03 \x01(\x02\"<\n\x1eRandomBernoulliLikeLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0c\n\x04prob\x18\x02 \x01(\x02\"S\n RandomBernoulliStaticLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0c\n\x04prob\x18\x02 \x01(\x02\x12\x13\n\x0boutputShape\x18\x03 \x03(\x04\"?\n!RandomBernoulliDynamicLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0c\n\x04prob\x18\x02 \x01(\x02\"z\n\"CategoricalDistributionLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x12\n\nnumSamples\x18\x02 \x01(\x03\x12\x10\n\x08isLogits\x18\x03 \x01(\x08\x12\x0b\n\x03\x65ps\x18\x04 \x01(\x02\x12\x13\n\x0btemperature\x18\x05 \x01(\x02\"H\n\x13ReduceL1LayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"H\n\x13ReduceL2LayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"I\n\x14ReduceMaxLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"I\n\x14ReduceMinLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"I\n\x14ReduceSumLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"J\n\x15ReduceProdLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"J\n\x15ReduceMeanLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"L\n\x17ReduceLogSumLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"O\n\x1aReduceSumSquareLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"O\n\x1aReduceLogSumExpLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"%\n\x15\x45xpandDimsLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\"&\n\x16\x46lattenTo2DLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\"/\n\x18ReshapeStaticLayerParams\x12\x13\n\x0btargetShape\x18\x01 \x03(\x03\"\x18\n\x16ReshapeLikeLayerParams\"\x1b\n\x19ReshapeDynamicLayerParams\"6\n\x12SqueezeLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x12\n\nsqueezeAll\x18\x02 \x01(\x08\">\n\x0fTopKLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12\t\n\x01K\x18\x02 \x01(\x04\x12\x12\n\nuseBottomK\x18\x03 \x01(\x08\"4\n\x11\x41rgMaxLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12\x11\n\tremoveDim\x18\x02 \x01(\x08\"4\n\x11\x41rgMinLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12\x11\n\tremoveDim\x18\x02 \x01(\x08\"I\n\x12SplitNDLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12\x11\n\tnumSplits\x18\x02 \x01(\x04\x12\x12\n\nsplitSizes\x18\x03 \x03(\x04\"\x11\n\x0f\x43\x65ilLayerParams\"\x12\n\x10RoundLayerParams\"\x12\n\x10\x46loorLayerParams\"\x11\n\x0fSignLayerParams\"1\n\x0f\x43lipLayerParams\x12\x0e\n\x06minVal\x18\x01 \x01(\x02\x12\x0e\n\x06maxVal\x18\x02 \x01(\x02\"q\n\x16SliceStaticLayerParams\x12\x10\n\x08\x62\x65ginIds\x18\x01 \x03(\x03\x12\x12\n\nbeginMasks\x18\x02 \x03(\x08\x12\x0e\n\x06\x65ndIds\x18\x03 \x03(\x03\x12\x10\n\x08\x65ndMasks\x18\x04 \x03(\x08\x12\x0f\n\x07strides\x18\x05 \x03(\x03\"`\n\x17SliceDynamicLayerParams\x12\x12\n\nbeginMasks\x18\x02 \x03(\x08\x12\x0e\n\x06\x65ndIds\x18\x03 \x03(\x03\x12\x10\n\x08\x65ndMasks\x18\x04 \x03(\x08\x12\x0f\n\x07strides\x18\x05 \x03(\x03\"\x1f\n\x0fTileLayerParams\x12\x0c\n\x04reps\x18\x01 \x03(\x04\"\x15\n\x13GetShapeLayerParams\"\x10\n\x0e\x45rfLayerParams\"\x99\x01\n\x0fGeluLayerParams\x12<\n\x04mode\x18\x01 \x01(\x0e\x32..CoreML.Specification.GeluLayerParams.GeluMode\"H\n\x08GeluMode\x12\t\n\x05\x45XACT\x10\x00\x12\x16\n\x12TANH_APPROXIMATION\x10\x01\x12\x19\n\x15SIGMOID_APPROXIMATION\x10\x02\"U\n\x16RangeStaticLayerParams\x12\x10\n\x08\x65ndValue\x18\x01 \x01(\x02\x12\x12\n\nstartValue\x18\x02 \x01(\x02\x12\x15\n\rstepSizeValue\x18\x03 \x01(\x02\"D\n\x17RangeDynamicLayerParams\x12\x12\n\nstartValue\x18\x02 \x01(\x02\x12\x15\n\rstepSizeValue\x18\x03 \x01(\x02\"K\n\x19SlidingWindowsLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12\x12\n\nwindowSize\x18\x02 \x01(\x04\x12\x0c\n\x04step\x18\x03 \x01(\x04\"\xaa\x01\n\x1dLayerNormalizationLayerParams\x12\x17\n\x0fnormalizedShape\x18\x01 \x03(\x03\x12\x0b\n\x03\x65ps\x18\x02 \x01(\x02\x12\x31\n\x05gamma\x18\x03 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62\x65ta\x18\x04 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\x7f\n NonMaximumSuppressionLayerParams\x12\x14\n\x0ciouThreshold\x18\x01 \x01(\x02\x12\x16\n\x0escoreThreshold\x18\x02 \x01(\x02\x12\x10\n\x08maxBoxes\x18\x03 \x01(\x04\x12\x1b\n\x13perClassSuppression\x18\x04 \x01(\x08\"\xc5\x04\n\x17NeuralNetworkClassifier\x12\x38\n\x06layers\x18\x01 \x03(\x0b\x32(.CoreML.Specification.NeuralNetworkLayer\x12G\n\rpreprocessing\x18\x02 \x03(\x0b\x32\x30.CoreML.Specification.NeuralNetworkPreprocessing\x12Y\n\x16\x61rrayInputShapeMapping\x18\x05 \x01(\x0e\x32\x39.CoreML.Specification.NeuralNetworkMultiArrayShapeMapping\x12T\n\x16imageInputShapeMapping\x18\x06 \x01(\x0e\x32\x34.CoreML.Specification.NeuralNetworkImageShapeMapping\x12\x43\n\x0cupdateParams\x18\n \x01(\x0b\x32-.CoreML.Specification.NetworkUpdateParameters\x12?\n\x11stringClassLabels\x18\x64 \x01(\x0b\x32\".CoreML.Specification.StringVectorH\x00\x12=\n\x10int64ClassLabels\x18\x65 \x01(\x0b\x32!.CoreML.Specification.Int64VectorH\x00\x12\"\n\x19labelProbabilityLayerName\x18\xc8\x01 \x01(\tB\r\n\x0b\x43lassLabels\"\x91\x03\n\x16NeuralNetworkRegressor\x12\x38\n\x06layers\x18\x01 \x03(\x0b\x32(.CoreML.Specification.NeuralNetworkLayer\x12G\n\rpreprocessing\x18\x02 \x03(\x0b\x32\x30.CoreML.Specification.NeuralNetworkPreprocessing\x12Y\n\x16\x61rrayInputShapeMapping\x18\x05 \x01(\x0e\x32\x39.CoreML.Specification.NeuralNetworkMultiArrayShapeMapping\x12T\n\x16imageInputShapeMapping\x18\x06 \x01(\x0e\x32\x34.CoreML.Specification.NeuralNetworkImageShapeMapping\x12\x43\n\x0cupdateParams\x18\n \x01(\x0b\x32-.CoreML.Specification.NetworkUpdateParameters\"\xa2\x02\n\x17NetworkUpdateParameters\x12\x33\n\nlossLayers\x18\x01 \x03(\x0b\x32\x1f.CoreML.Specification.LossLayer\x12\x32\n\toptimizer\x18\x02 \x01(\x0b\x32\x1f.CoreML.Specification.Optimizer\x12\x34\n\x06\x65pochs\x18\x03 \x01(\x0b\x32$.CoreML.Specification.Int64Parameter\x12\x34\n\x07shuffle\x18\n \x01(\x0b\x32#.CoreML.Specification.BoolParameter\x12\x32\n\x04seed\x18\x14 \x01(\x0b\x32$.CoreML.Specification.Int64Parameter\"\xe4\x01\n\tLossLayer\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x62\n categoricalCrossEntropyLossLayer\x18\n \x01(\x0b\x32\x36.CoreML.Specification.CategoricalCrossEntropyLossLayerH\x00\x12T\n\x19meanSquaredErrorLossLayer\x18\x0b \x01(\x0b\x32/.CoreML.Specification.MeanSquaredErrorLossLayerH\x00\x42\x0f\n\rLossLayerType\"A\n CategoricalCrossEntropyLossLayer\x12\r\n\x05input\x18\x01 \x01(\t\x12\x0e\n\x06target\x18\x02 \x01(\t\":\n\x19MeanSquaredErrorLossLayer\x12\r\n\x05input\x18\x01 \x01(\t\x12\x0e\n\x06target\x18\x02 \x01(\t\"\x96\x01\n\tOptimizer\x12:\n\x0csgdOptimizer\x18\n \x01(\x0b\x32\".CoreML.Specification.SGDOptimizerH\x00\x12<\n\radamOptimizer\x18\x0b \x01(\x0b\x32#.CoreML.Specification.AdamOptimizerH\x00\x42\x0f\n\rOptimizerType\"\xc1\x01\n\x0cSGDOptimizer\x12;\n\x0clearningRate\x18\x01 \x01(\x0b\x32%.CoreML.Specification.DoubleParameter\x12;\n\rminiBatchSize\x18\x02 \x01(\x0b\x32$.CoreML.Specification.Int64Parameter\x12\x37\n\x08momentum\x18\x03 \x01(\x0b\x32%.CoreML.Specification.DoubleParameter\"\xa9\x02\n\rAdamOptimizer\x12;\n\x0clearningRate\x18\x01 \x01(\x0b\x32%.CoreML.Specification.DoubleParameter\x12;\n\rminiBatchSize\x18\x02 \x01(\x0b\x32$.CoreML.Specification.Int64Parameter\x12\x34\n\x05\x62\x65ta1\x18\x03 \x01(\x0b\x32%.CoreML.Specification.DoubleParameter\x12\x34\n\x05\x62\x65ta2\x18\x04 \x01(\x0b\x32%.CoreML.Specification.DoubleParameter\x12\x32\n\x03\x65ps\x18\x05 \x01(\x0b\x32%.CoreML.Specification.DoubleParameter*W\n#NeuralNetworkMultiArrayShapeMapping\x12\x17\n\x13RANK5_ARRAY_MAPPING\x10\x00\x12\x17\n\x13\x45XACT_ARRAY_MAPPING\x10\x01*R\n\x1eNeuralNetworkImageShapeMapping\x12\x17\n\x13RANK5_IMAGE_MAPPING\x10\x00\x12\x17\n\x13RANK4_IMAGE_MAPPING\x10\x01*\x87\x01\n\x0bScatterMode\x12\x12\n\x0eSCATTER_UPDATE\x10\x00\x12\x0f\n\x0bSCATTER_ADD\x10\x01\x12\x0f\n\x0bSCATTER_SUB\x10\x02\x12\x0f\n\x0bSCATTER_MUL\x10\x03\x12\x0f\n\x0bSCATTER_DIV\x10\x04\x12\x0f\n\x0bSCATTER_MAX\x10\x05\x12\x0f\n\x0bSCATTER_MIN\x10\x06\x42\x02H\x03P\x00P\x01\x62\x06proto3')
+  serialized_pb=_b('\n\x13NeuralNetwork.proto\x12\x14\x43oreML.Specification\x1a\x14\x44\x61taStructures.proto\x1a\x10Parameters.proto\"\x88\x03\n\rNeuralNetwork\x12\x38\n\x06layers\x18\x01 \x03(\x0b\x32(.CoreML.Specification.NeuralNetworkLayer\x12G\n\rpreprocessing\x18\x02 \x03(\x0b\x32\x30.CoreML.Specification.NeuralNetworkPreprocessing\x12Y\n\x16\x61rrayInputShapeMapping\x18\x05 \x01(\x0e\x32\x39.CoreML.Specification.NeuralNetworkMultiArrayShapeMapping\x12T\n\x16imageInputShapeMapping\x18\x06 \x01(\x0e\x32\x34.CoreML.Specification.NeuralNetworkImageShapeMapping\x12\x43\n\x0cupdateParams\x18\n \x01(\x0b\x32-.CoreML.Specification.NetworkUpdateParameters\"x\n\x18NeuralNetworkImageScaler\x12\x14\n\x0c\x63hannelScale\x18\n \x01(\x02\x12\x10\n\x08\x62lueBias\x18\x14 \x01(\x02\x12\x11\n\tgreenBias\x18\x15 \x01(\x02\x12\x0f\n\x07redBias\x18\x16 \x01(\x02\x12\x10\n\x08grayBias\x18\x1e \x01(\x02\"+\n\x16NeuralNetworkMeanImage\x12\x11\n\tmeanImage\x18\x01 \x03(\x02\"\xc6\x01\n\x1aNeuralNetworkPreprocessing\x12\x13\n\x0b\x66\x65\x61tureName\x18\x01 \x01(\t\x12@\n\x06scaler\x18\n \x01(\x0b\x32..CoreML.Specification.NeuralNetworkImageScalerH\x00\x12\x41\n\tmeanImage\x18\x0b \x01(\x0b\x32,.CoreML.Specification.NeuralNetworkMeanImageH\x00\x42\x0e\n\x0cpreprocessor\"\x10\n\x0e\x41\x63tivationReLU\"$\n\x13\x41\x63tivationLeakyReLU\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\"\x10\n\x0e\x41\x63tivationTanh\"3\n\x14\x41\x63tivationScaledTanh\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\x12\x0c\n\x04\x62\x65ta\x18\x02 \x01(\x02\"\x13\n\x11\x41\x63tivationSigmoid\"/\n\x10\x41\x63tivationLinear\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\x12\x0c\n\x04\x62\x65ta\x18\x02 \x01(\x02\"4\n\x15\x41\x63tivationSigmoidHard\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\x12\x0c\n\x04\x62\x65ta\x18\x02 \x01(\x02\"D\n\x0f\x41\x63tivationPReLU\x12\x31\n\x05\x61lpha\x18\x01 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\x1e\n\rActivationELU\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\"*\n\x19\x41\x63tivationThresholdedReLU\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\"\x14\n\x12\x41\x63tivationSoftsign\"\x14\n\x12\x41\x63tivationSoftplus\"\x83\x01\n\x1c\x41\x63tivationParametricSoftplus\x12\x31\n\x05\x61lpha\x18\x01 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62\x65ta\x18\x02 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\xd4\x06\n\x10\x41\x63tivationParams\x12\x38\n\x06linear\x18\x05 \x01(\x0b\x32&.CoreML.Specification.ActivationLinearH\x00\x12\x34\n\x04ReLU\x18\n \x01(\x0b\x32$.CoreML.Specification.ActivationReLUH\x00\x12>\n\tleakyReLU\x18\x0f \x01(\x0b\x32).CoreML.Specification.ActivationLeakyReLUH\x00\x12J\n\x0fthresholdedReLU\x18\x14 \x01(\x0b\x32/.CoreML.Specification.ActivationThresholdedReLUH\x00\x12\x36\n\x05PReLU\x18\x19 \x01(\x0b\x32%.CoreML.Specification.ActivationPReLUH\x00\x12\x34\n\x04tanh\x18\x1e \x01(\x0b\x32$.CoreML.Specification.ActivationTanhH\x00\x12@\n\nscaledTanh\x18\x1f \x01(\x0b\x32*.CoreML.Specification.ActivationScaledTanhH\x00\x12:\n\x07sigmoid\x18( \x01(\x0b\x32\'.CoreML.Specification.ActivationSigmoidH\x00\x12\x42\n\x0bsigmoidHard\x18) \x01(\x0b\x32+.CoreML.Specification.ActivationSigmoidHardH\x00\x12\x32\n\x03\x45LU\x18\x32 \x01(\x0b\x32#.CoreML.Specification.ActivationELUH\x00\x12<\n\x08softsign\x18< \x01(\x0b\x32(.CoreML.Specification.ActivationSoftsignH\x00\x12<\n\x08softplus\x18\x46 \x01(\x0b\x32(.CoreML.Specification.ActivationSoftplusH\x00\x12P\n\x12parametricSoftplus\x18G \x01(\x0b\x32\x32.CoreML.Specification.ActivationParametricSoftplusH\x00\x42\x12\n\x10NonlinearityType\"(\n\x06Tensor\x12\x0c\n\x04rank\x18\x01 \x01(\r\x12\x10\n\x08\x64imValue\x18\x02 \x03(\x03\"\xeaU\n\x12NeuralNetworkLayer\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05input\x18\x02 \x03(\t\x12\x0e\n\x06output\x18\x03 \x03(\t\x12\x31\n\x0binputTensor\x18\x04 \x03(\x0b\x32\x1c.CoreML.Specification.Tensor\x12\x32\n\x0coutputTensor\x18\x05 \x03(\x0b\x32\x1c.CoreML.Specification.Tensor\x12\x13\n\x0bisUpdatable\x18\n \x01(\x08\x12\x43\n\x0b\x63onvolution\x18\x64 \x01(\x0b\x32,.CoreML.Specification.ConvolutionLayerParamsH\x00\x12;\n\x07pooling\x18x \x01(\x0b\x32(.CoreML.Specification.PoolingLayerParamsH\x00\x12=\n\nactivation\x18\x82\x01 \x01(\x0b\x32&.CoreML.Specification.ActivationParamsH\x00\x12\x46\n\x0cinnerProduct\x18\x8c\x01 \x01(\x0b\x32-.CoreML.Specification.InnerProductLayerParamsH\x00\x12@\n\tembedding\x18\x96\x01 \x01(\x0b\x32*.CoreML.Specification.EmbeddingLayerParamsH\x00\x12@\n\tbatchnorm\x18\xa0\x01 \x01(\x0b\x32*.CoreML.Specification.BatchnormLayerParamsH\x00\x12\x46\n\x03mvn\x18\xa5\x01 \x01(\x0b\x32\x36.CoreML.Specification.MeanVarianceNormalizeLayerParamsH\x00\x12\x44\n\x0bl2normalize\x18\xaa\x01 \x01(\x0b\x32,.CoreML.Specification.L2NormalizeLayerParamsH\x00\x12<\n\x07softmax\x18\xaf\x01 \x01(\x0b\x32(.CoreML.Specification.SoftmaxLayerParamsH\x00\x12\x34\n\x03lrn\x18\xb4\x01 \x01(\x0b\x32$.CoreML.Specification.LRNLayerParamsH\x00\x12\x36\n\x04\x63rop\x18\xbe\x01 \x01(\x0b\x32%.CoreML.Specification.CropLayerParamsH\x00\x12<\n\x07padding\x18\xc8\x01 \x01(\x0b\x32(.CoreML.Specification.PaddingLayerParamsH\x00\x12>\n\x08upsample\x18\xd2\x01 \x01(\x0b\x32).CoreML.Specification.UpsampleLayerParamsH\x00\x12J\n\x0eresizeBilinear\x18\xd3\x01 \x01(\x0b\x32/.CoreML.Specification.ResizeBilinearLayerParamsH\x00\x12\x42\n\ncropResize\x18\xd4\x01 \x01(\x0b\x32+.CoreML.Specification.CropResizeLayerParamsH\x00\x12@\n\x05unary\x18\xdc\x01 \x01(\x0b\x32..CoreML.Specification.UnaryFunctionLayerParamsH\x00\x12\x34\n\x03\x61\x64\x64\x18\xe6\x01 \x01(\x0b\x32$.CoreML.Specification.AddLayerParamsH\x00\x12>\n\x08multiply\x18\xe7\x01 \x01(\x0b\x32).CoreML.Specification.MultiplyLayerParamsH\x00\x12<\n\x07\x61verage\x18\xf0\x01 \x01(\x0b\x32(.CoreML.Specification.AverageLayerParamsH\x00\x12\x38\n\x05scale\x18\xf5\x01 \x01(\x0b\x32&.CoreML.Specification.ScaleLayerParamsH\x00\x12\x36\n\x04\x62ias\x18\xfa\x01 \x01(\x0b\x32%.CoreML.Specification.BiasLayerParamsH\x00\x12\x34\n\x03max\x18\x84\x02 \x01(\x0b\x32$.CoreML.Specification.MaxLayerParamsH\x00\x12\x34\n\x03min\x18\x85\x02 \x01(\x0b\x32$.CoreML.Specification.MinLayerParamsH\x00\x12;\n\x03\x64ot\x18\x8e\x02 \x01(\x0b\x32+.CoreML.Specification.DotProductLayerParamsH\x00\x12:\n\x06reduce\x18\x98\x02 \x01(\x0b\x32\'.CoreML.Specification.ReduceLayerParamsH\x00\x12\x46\n\x0cloadConstant\x18\xa2\x02 \x01(\x0b\x32-.CoreML.Specification.LoadConstantLayerParamsH\x00\x12<\n\x07reshape\x18\xac\x02 \x01(\x0b\x32(.CoreML.Specification.ReshapeLayerParamsH\x00\x12<\n\x07\x66latten\x18\xad\x02 \x01(\x0b\x32(.CoreML.Specification.FlattenLayerParamsH\x00\x12<\n\x07permute\x18\xb6\x02 \x01(\x0b\x32(.CoreML.Specification.PermuteLayerParamsH\x00\x12:\n\x06\x63oncat\x18\xc0\x02 \x01(\x0b\x32\'.CoreML.Specification.ConcatLayerParamsH\x00\x12\x38\n\x05split\x18\xca\x02 \x01(\x0b\x32&.CoreML.Specification.SplitLayerParamsH\x00\x12J\n\x0esequenceRepeat\x18\xd4\x02 \x01(\x0b\x32/.CoreML.Specification.SequenceRepeatLayerParamsH\x00\x12J\n\x0ereorganizeData\x18\xd9\x02 \x01(\x0b\x32/.CoreML.Specification.ReorganizeDataLayerParamsH\x00\x12\x38\n\x05slice\x18\xde\x02 \x01(\x0b\x32&.CoreML.Specification.SliceLayerParamsH\x00\x12L\n\x0fsimpleRecurrent\x18\x90\x03 \x01(\x0b\x32\x30.CoreML.Specification.SimpleRecurrentLayerParamsH\x00\x12\x34\n\x03gru\x18\x9a\x03 \x01(\x0b\x32$.CoreML.Specification.GRULayerParamsH\x00\x12R\n\x12uniDirectionalLSTM\x18\xa4\x03 \x01(\x0b\x32\x33.CoreML.Specification.UniDirectionalLSTMLayerParamsH\x00\x12P\n\x11\x62iDirectionalLSTM\x18\xae\x03 \x01(\x0b\x32\x32.CoreML.Specification.BiDirectionalLSTMLayerParamsH\x00\x12:\n\x06\x63ustom\x18\xf4\x03 \x01(\x0b\x32\'.CoreML.Specification.CustomLayerParamsH\x00\x12\x36\n\x04\x63opy\x18\xd8\x04 \x01(\x0b\x32%.CoreML.Specification.CopyLayerParamsH\x00\x12:\n\x06\x62ranch\x18\xdd\x04 \x01(\x0b\x32\'.CoreML.Specification.BranchLayerParamsH\x00\x12\x36\n\x04loop\x18\xe7\x04 \x01(\x0b\x32%.CoreML.Specification.LoopLayerParamsH\x00\x12@\n\tloopBreak\x18\xec\x04 \x01(\x0b\x32*.CoreML.Specification.LoopBreakLayerParamsH\x00\x12\x46\n\x0cloopContinue\x18\xf1\x04 \x01(\x0b\x32-.CoreML.Specification.LoopContinueLayerParamsH\x00\x12\x44\n\x0brangeStatic\x18\xfb\x04 \x01(\x0b\x32,.CoreML.Specification.RangeStaticLayerParamsH\x00\x12\x46\n\x0crangeDynamic\x18\x80\x05 \x01(\x0b\x32-.CoreML.Specification.RangeDynamicLayerParamsH\x00\x12\x36\n\x04\x63lip\x18\x94\x05 \x01(\x0b\x32%.CoreML.Specification.ClipLayerParamsH\x00\x12\x36\n\x04\x63\x65il\x18\x99\x05 \x01(\x0b\x32%.CoreML.Specification.CeilLayerParamsH\x00\x12\x38\n\x05\x66loor\x18\x9e\x05 \x01(\x0b\x32&.CoreML.Specification.FloorLayerParamsH\x00\x12\x36\n\x04sign\x18\xa8\x05 \x01(\x0b\x32%.CoreML.Specification.SignLayerParamsH\x00\x12\x38\n\x05round\x18\xad\x05 \x01(\x0b\x32&.CoreML.Specification.RoundLayerParamsH\x00\x12\x36\n\x04\x65xp2\x18\xbc\x05 \x01(\x0b\x32%.CoreML.Specification.Exp2LayerParamsH\x00\x12\x34\n\x03sin\x18\xc6\x05 \x01(\x0b\x32$.CoreML.Specification.SinLayerParamsH\x00\x12\x34\n\x03\x63os\x18\xcb\x05 \x01(\x0b\x32$.CoreML.Specification.CosLayerParamsH\x00\x12\x34\n\x03tan\x18\xd0\x05 \x01(\x0b\x32$.CoreML.Specification.TanLayerParamsH\x00\x12\x36\n\x04\x61sin\x18\xda\x05 \x01(\x0b\x32%.CoreML.Specification.AsinLayerParamsH\x00\x12\x36\n\x04\x61\x63os\x18\xdf\x05 \x01(\x0b\x32%.CoreML.Specification.AcosLayerParamsH\x00\x12\x36\n\x04\x61tan\x18\xe4\x05 \x01(\x0b\x32%.CoreML.Specification.AtanLayerParamsH\x00\x12\x36\n\x04sinh\x18\xee\x05 \x01(\x0b\x32%.CoreML.Specification.SinhLayerParamsH\x00\x12\x36\n\x04\x63osh\x18\xf3\x05 \x01(\x0b\x32%.CoreML.Specification.CoshLayerParamsH\x00\x12\x36\n\x04tanh\x18\xf8\x05 \x01(\x0b\x32%.CoreML.Specification.TanhLayerParamsH\x00\x12\x38\n\x05\x61sinh\x18\x82\x06 \x01(\x0b\x32&.CoreML.Specification.AsinhLayerParamsH\x00\x12\x38\n\x05\x61\x63osh\x18\x87\x06 \x01(\x0b\x32&.CoreML.Specification.AcoshLayerParamsH\x00\x12\x38\n\x05\x61tanh\x18\x8c\x06 \x01(\x0b\x32&.CoreML.Specification.AtanhLayerParamsH\x00\x12\x34\n\x03\x65rf\x18\x96\x06 \x01(\x0b\x32$.CoreML.Specification.ErfLayerParamsH\x00\x12\x36\n\x04gelu\x18\x9b\x06 \x01(\x0b\x32%.CoreML.Specification.GeluLayerParamsH\x00\x12\x38\n\x05\x65qual\x18\xaf\x06 \x01(\x0b\x32&.CoreML.Specification.EqualLayerParamsH\x00\x12>\n\x08notEqual\x18\xb4\x06 \x01(\x0b\x32).CoreML.Specification.NotEqualLayerParamsH\x00\x12>\n\x08lessThan\x18\xb9\x06 \x01(\x0b\x32).CoreML.Specification.LessThanLayerParamsH\x00\x12@\n\tlessEqual\x18\xbb\x06 \x01(\x0b\x32*.CoreML.Specification.LessEqualLayerParamsH\x00\x12\x44\n\x0bgreaterThan\x18\xbe\x06 \x01(\x0b\x32,.CoreML.Specification.GreaterThanLayerParamsH\x00\x12\x46\n\x0cgreaterEqual\x18\xc0\x06 \x01(\x0b\x32-.CoreML.Specification.GreaterEqualLayerParamsH\x00\x12@\n\tlogicalOr\x18\xc8\x06 \x01(\x0b\x32*.CoreML.Specification.LogicalOrLayerParamsH\x00\x12\x42\n\nlogicalXor\x18\xcd\x06 \x01(\x0b\x32+.CoreML.Specification.LogicalXorLayerParamsH\x00\x12\x42\n\nlogicalNot\x18\xd2\x06 \x01(\x0b\x32+.CoreML.Specification.LogicalNotLayerParamsH\x00\x12\x42\n\nlogicalAnd\x18\xd7\x06 \x01(\x0b\x32+.CoreML.Specification.LogicalAndLayerParamsH\x00\x12N\n\x10modBroadcastable\x18\xe1\x06 \x01(\x0b\x32\x31.CoreML.Specification.ModBroadcastableLayerParamsH\x00\x12N\n\x10minBroadcastable\x18\xe6\x06 \x01(\x0b\x32\x31.CoreML.Specification.MinBroadcastableLayerParamsH\x00\x12N\n\x10maxBroadcastable\x18\xeb\x06 \x01(\x0b\x32\x31.CoreML.Specification.MaxBroadcastableLayerParamsH\x00\x12N\n\x10\x61\x64\x64\x42roadcastable\x18\xf0\x06 \x01(\x0b\x32\x31.CoreML.Specification.AddBroadcastableLayerParamsH\x00\x12N\n\x10powBroadcastable\x18\xf5\x06 \x01(\x0b\x32\x31.CoreML.Specification.PowBroadcastableLayerParamsH\x00\x12T\n\x13\x64ivideBroadcastable\x18\xfa\x06 \x01(\x0b\x32\x34.CoreML.Specification.DivideBroadcastableLayerParamsH\x00\x12X\n\x15\x66loorDivBroadcastable\x18\xff\x06 \x01(\x0b\x32\x36.CoreML.Specification.FloorDivBroadcastableLayerParamsH\x00\x12X\n\x15multiplyBroadcastable\x18\x84\x07 \x01(\x0b\x32\x36.CoreML.Specification.MultiplyBroadcastableLayerParamsH\x00\x12X\n\x15subtractBroadcastable\x18\x89\x07 \x01(\x0b\x32\x36.CoreML.Specification.SubtractBroadcastableLayerParamsH\x00\x12\x36\n\x04tile\x18\x98\x07 \x01(\x0b\x32%.CoreML.Specification.TileLayerParamsH\x00\x12\x38\n\x05stack\x18\x9d\x07 \x01(\x0b\x32&.CoreML.Specification.StackLayerParamsH\x00\x12:\n\x06gather\x18\xa2\x07 \x01(\x0b\x32\'.CoreML.Specification.GatherLayerParamsH\x00\x12<\n\x07scatter\x18\xa7\x07 \x01(\x0b\x32(.CoreML.Specification.ScatterLayerParamsH\x00\x12>\n\x08gatherND\x18\xac\x07 \x01(\x0b\x32).CoreML.Specification.GatherNDLayerParamsH\x00\x12@\n\tscatterND\x18\xb1\x07 \x01(\x0b\x32*.CoreML.Specification.ScatterNDLayerParamsH\x00\x12@\n\tsoftmaxND\x18\xb6\x07 \x01(\x0b\x32*.CoreML.Specification.SoftmaxNDLayerParamsH\x00\x12L\n\x0fgatherAlongAxis\x18\xb8\x07 \x01(\x0b\x32\x30.CoreML.Specification.GatherAlongAxisLayerParamsH\x00\x12N\n\x10scatterAlongAxis\x18\xba\x07 \x01(\x0b\x32\x31.CoreML.Specification.ScatterAlongAxisLayerParamsH\x00\x12<\n\x07reverse\x18\xc0\x07 \x01(\x0b\x32(.CoreML.Specification.ReverseLayerParamsH\x00\x12\x42\n\nreverseSeq\x18\xc5\x07 \x01(\x0b\x32+.CoreML.Specification.ReverseSeqLayerParamsH\x00\x12<\n\x07splitND\x18\xcf\x07 \x01(\x0b\x32(.CoreML.Specification.SplitNDLayerParamsH\x00\x12>\n\x08\x63oncatND\x18\xd4\x07 \x01(\x0b\x32).CoreML.Specification.ConcatNDLayerParamsH\x00\x12@\n\ttranspose\x18\xd9\x07 \x01(\x0b\x32*.CoreML.Specification.TransposeLayerParamsH\x00\x12\x44\n\x0bsliceStatic\x18\xe3\x07 \x01(\x0b\x32,.CoreML.Specification.SliceStaticLayerParamsH\x00\x12\x46\n\x0csliceDynamic\x18\xe8\x07 \x01(\x0b\x32-.CoreML.Specification.SliceDynamicLayerParamsH\x00\x12J\n\x0eslidingWindows\x18\xed\x07 \x01(\x0b\x32/.CoreML.Specification.SlidingWindowsLayerParamsH\x00\x12\x36\n\x04topK\x18\xf7\x07 \x01(\x0b\x32%.CoreML.Specification.TopKLayerParamsH\x00\x12:\n\x06\x61rgMin\x18\xfc\x07 \x01(\x0b\x32\'.CoreML.Specification.ArgMinLayerParamsH\x00\x12:\n\x06\x61rgMax\x18\x81\x08 \x01(\x0b\x32\'.CoreML.Specification.ArgMaxLayerParamsH\x00\x12\x44\n\x0b\x65mbeddingND\x18\x90\x08 \x01(\x0b\x32,.CoreML.Specification.EmbeddingNDLayerParamsH\x00\x12H\n\rbatchedMatmul\x18\x95\x08 \x01(\x0b\x32..CoreML.Specification.BatchedMatMulLayerParamsH\x00\x12>\n\x08getShape\x18\xa9\x08 \x01(\x0b\x32).CoreML.Specification.GetShapeLayerParamsH\x00\x12J\n\x0eloadConstantND\x18\xae\x08 \x01(\x0b\x32/.CoreML.Specification.LoadConstantNDLayerParamsH\x00\x12>\n\x08\x66illLike\x18\xb8\x08 \x01(\x0b\x32).CoreML.Specification.FillLikeLayerParamsH\x00\x12\x42\n\nfillStatic\x18\xbd\x08 \x01(\x0b\x32+.CoreML.Specification.FillStaticLayerParamsH\x00\x12\x44\n\x0b\x66illDynamic\x18\xc2\x08 \x01(\x0b\x32,.CoreML.Specification.FillDynamicLayerParamsH\x00\x12L\n\x0f\x62roadcastToLike\x18\xcc\x08 \x01(\x0b\x32\x30.CoreML.Specification.BroadcastToLikeLayerParamsH\x00\x12P\n\x11\x62roadcastToStatic\x18\xd1\x08 \x01(\x0b\x32\x32.CoreML.Specification.BroadcastToStaticLayerParamsH\x00\x12R\n\x12\x62roadcastToDynamic\x18\xd6\x08 \x01(\x0b\x32\x33.CoreML.Specification.BroadcastToDynamicLayerParamsH\x00\x12<\n\x07squeeze\x18\xe0\x08 \x01(\x0b\x32(.CoreML.Specification.SqueezeLayerParamsH\x00\x12\x42\n\nexpandDims\x18\xe5\x08 \x01(\x0b\x32+.CoreML.Specification.ExpandDimsLayerParamsH\x00\x12\x44\n\x0b\x66lattenTo2D\x18\xea\x08 \x01(\x0b\x32,.CoreML.Specification.FlattenTo2DLayerParamsH\x00\x12\x44\n\x0breshapeLike\x18\xef\x08 \x01(\x0b\x32,.CoreML.Specification.ReshapeLikeLayerParamsH\x00\x12H\n\rreshapeStatic\x18\xf4\x08 \x01(\x0b\x32..CoreML.Specification.ReshapeStaticLayerParamsH\x00\x12J\n\x0ereshapeDynamic\x18\xf9\x08 \x01(\x0b\x32/.CoreML.Specification.ReshapeDynamicLayerParamsH\x00\x12X\n\x15rankPreservingReshape\x18\xfe\x08 \x01(\x0b\x32\x36.CoreML.Specification.RankPreservingReshapeLayerParamsH\x00\x12H\n\x0b\x63onstantPad\x18\x83\t \x01(\x0b\x32\x30.CoreML.Specification.ConstantPaddingLayerParamsH\x00\x12N\n\x10randomNormalLike\x18\x92\t \x01(\x0b\x32\x31.CoreML.Specification.RandomNormalLikeLayerParamsH\x00\x12R\n\x12randomNormalStatic\x18\x97\t \x01(\x0b\x32\x33.CoreML.Specification.RandomNormalStaticLayerParamsH\x00\x12T\n\x13randomNormalDynamic\x18\x9c\t \x01(\x0b\x32\x34.CoreML.Specification.RandomNormalDynamicLayerParamsH\x00\x12P\n\x11randomUniformLike\x18\xa6\t \x01(\x0b\x32\x32.CoreML.Specification.RandomUniformLikeLayerParamsH\x00\x12T\n\x13randomUniformStatic\x18\xab\t \x01(\x0b\x32\x34.CoreML.Specification.RandomUniformStaticLayerParamsH\x00\x12V\n\x14randomUniformDynamic\x18\xb0\t \x01(\x0b\x32\x35.CoreML.Specification.RandomUniformDynamicLayerParamsH\x00\x12T\n\x13randomBernoulliLike\x18\xba\t \x01(\x0b\x32\x34.CoreML.Specification.RandomBernoulliLikeLayerParamsH\x00\x12X\n\x15randomBernoulliStatic\x18\xbf\t \x01(\x0b\x32\x36.CoreML.Specification.RandomBernoulliStaticLayerParamsH\x00\x12Z\n\x16randomBernoulliDynamic\x18\xc4\t \x01(\x0b\x32\x37.CoreML.Specification.RandomBernoulliDynamicLayerParamsH\x00\x12\\\n\x17\x63\x61tegoricalDistribution\x18\xce\t \x01(\x0b\x32\x38.CoreML.Specification.CategoricalDistributionLayerParamsH\x00\x12>\n\x08reduceL1\x18\xe2\t \x01(\x0b\x32).CoreML.Specification.ReduceL1LayerParamsH\x00\x12>\n\x08reduceL2\x18\xe7\t \x01(\x0b\x32).CoreML.Specification.ReduceL2LayerParamsH\x00\x12@\n\treduceMax\x18\xec\t \x01(\x0b\x32*.CoreML.Specification.ReduceMaxLayerParamsH\x00\x12@\n\treduceMin\x18\xf1\t \x01(\x0b\x32*.CoreML.Specification.ReduceMinLayerParamsH\x00\x12@\n\treduceSum\x18\xf6\t \x01(\x0b\x32*.CoreML.Specification.ReduceSumLayerParamsH\x00\x12\x42\n\nreduceProd\x18\xfb\t \x01(\x0b\x32+.CoreML.Specification.ReduceProdLayerParamsH\x00\x12\x42\n\nreduceMean\x18\x80\n \x01(\x0b\x32+.CoreML.Specification.ReduceMeanLayerParamsH\x00\x12\x46\n\x0creduceLogSum\x18\x85\n \x01(\x0b\x32-.CoreML.Specification.ReduceLogSumLayerParamsH\x00\x12L\n\x0freduceSumSquare\x18\x8a\n \x01(\x0b\x32\x30.CoreML.Specification.ReduceSumSquareLayerParamsH\x00\x12L\n\x0freduceLogSumExp\x18\x8f\n \x01(\x0b\x32\x30.CoreML.Specification.ReduceLogSumExpLayerParamsH\x00\x12\x46\n\x0cwhereNonZero\x18\xa1\n \x01(\x0b\x32-.CoreML.Specification.WhereNonZeroLayerParamsH\x00\x12J\n\x0ematrixBandPart\x18\xa3\n \x01(\x0b\x32/.CoreML.Specification.MatrixBandPartLayerParamsH\x00\x12L\n\x0flowerTriangular\x18\xa8\n \x01(\x0b\x32\x30.CoreML.Specification.LowerTriangularLayerParamsH\x00\x12L\n\x0fupperTriangular\x18\xad\n \x01(\x0b\x32\x30.CoreML.Specification.UpperTriangularLayerParamsH\x00\x12R\n\x12whereBroadcastable\x18\xb2\n \x01(\x0b\x32\x33.CoreML.Specification.WhereBroadcastableLayerParamsH\x00\x12R\n\x12layerNormalization\x18\xc6\n \x01(\x0b\x32\x33.CoreML.Specification.LayerNormalizationLayerParamsH\x00\x12X\n\x15NonMaximumSuppression\x18\xf8\n \x01(\x0b\x32\x36.CoreML.Specification.NonMaximumSuppressionLayerParamsH\x00\x12:\n\x06oneHot\x18\xaa\x0b \x01(\x0b\x32\'.CoreML.Specification.OneHotLayerParamsH\x00\x12:\n\x06\x63umSum\x18\xaf\x0b \x01(\x0b\x32\'.CoreML.Specification.CumSumLayerParamsH\x00\x12\x44\n\x0b\x63lampedReLU\x18\xb4\x0b \x01(\x0b\x32,.CoreML.Specification.ClampedReLULayerParamsH\x00\x12<\n\x07\x61rgSort\x18\xb5\x0b \x01(\x0b\x32(.CoreML.Specification.ArgSortLayerParamsH\x00\x12@\n\tpooling3d\x18\xb9\x0b \x01(\x0b\x32*.CoreML.Specification.Pooling3DLayerParamsH\x00\x12L\n\x0fglobalPooling3d\x18\xba\x0b \x01(\x0b\x32\x30.CoreML.Specification.GlobalPooling3DLayerParamsH\x00\x12\x44\n\x0bsliceBySize\x18\xbe\x0b \x01(\x0b\x32,.CoreML.Specification.SliceBySizeLayerParamsH\x00\x12H\n\rconvolution3d\x18\xbf\x0b \x01(\x0b\x32..CoreML.Specification.Convolution3DLayerParamsH\x00\x42\x07\n\x05layer\"\x83\x01\n\x11\x42ranchLayerParams\x12\x35\n\x08ifBranch\x18\x01 \x01(\x0b\x32#.CoreML.Specification.NeuralNetwork\x12\x37\n\nelseBranch\x18\x02 \x01(\x0b\x32#.CoreML.Specification.NeuralNetwork\"\xbb\x01\n\x0fLoopLayerParams\x12\x19\n\x11maxLoopIterations\x18\x01 \x01(\x04\x12\x14\n\x0c\x63onditionVar\x18\x02 \x01(\t\x12=\n\x10\x63onditionNetwork\x18\x03 \x01(\x0b\x32#.CoreML.Specification.NeuralNetwork\x12\x38\n\x0b\x62odyNetwork\x18\x04 \x01(\x0b\x32#.CoreML.Specification.NeuralNetwork\"\x16\n\x14LoopBreakLayerParams\"\x19\n\x17LoopContinueLayerParams\"\x11\n\x0f\x43opyLayerParams\"\'\n\x16GreaterThanLayerParams\x12\r\n\x05\x61lpha\x18\x02 \x01(\x02\"(\n\x17GreaterEqualLayerParams\x12\r\n\x05\x61lpha\x18\x02 \x01(\x02\"$\n\x13LessThanLayerParams\x12\r\n\x05\x61lpha\x18\x02 \x01(\x02\"%\n\x14LessEqualLayerParams\x12\r\n\x05\x61lpha\x18\x02 \x01(\x02\"!\n\x10\x45qualLayerParams\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\"$\n\x13NotEqualLayerParams\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\"\x17\n\x15LogicalAndLayerParams\"\x16\n\x14LogicalOrLayerParams\"\x17\n\x15LogicalXorLayerParams\"\x17\n\x15LogicalNotLayerParams\"\x8e\x01\n\rBorderAmounts\x12\x44\n\rborderAmounts\x18\n \x03(\x0b\x32-.CoreML.Specification.BorderAmounts.EdgeSizes\x1a\x37\n\tEdgeSizes\x12\x15\n\rstartEdgeSize\x18\x01 \x01(\x04\x12\x13\n\x0b\x65ndEdgeSize\x18\x02 \x01(\x04\"K\n\x0cValidPadding\x12;\n\x0epaddingAmounts\x18\x01 \x01(\x0b\x32#.CoreML.Specification.BorderAmounts\"\x96\x01\n\x0bSamePadding\x12H\n\rasymmetryMode\x18\x01 \x01(\x0e\x32\x31.CoreML.Specification.SamePadding.SamePaddingMode\"=\n\x0fSamePaddingMode\x12\x16\n\x12\x42OTTOM_RIGHT_HEAVY\x10\x00\x12\x12\n\x0eTOP_LEFT_HEAVY\x10\x01\"\xbd\x01\n\x0cSamplingMode\x12\x41\n\x0esamplingMethod\x18\x01 \x01(\x0e\x32).CoreML.Specification.SamplingMode.Method\"j\n\x06Method\x12\x1f\n\x1bSTRICT_ALIGN_ENDPOINTS_MODE\x10\x00\x12\x18\n\x14\x41LIGN_ENDPOINTS_MODE\x10\x01\x12\x11\n\rUPSAMPLE_MODE\x10\x02\x12\x12\n\x0eROI_ALIGN_MODE\x10\x03\"\xd8\x01\n\x12\x42oxCoordinatesMode\x12\x45\n\x07\x62oxMode\x18\x01 \x01(\x0e\x32\x34.CoreML.Specification.BoxCoordinatesMode.Coordinates\"{\n\x0b\x43oordinates\x12\x18\n\x14\x43ORNERS_HEIGHT_FIRST\x10\x00\x12\x17\n\x13\x43ORNERS_WIDTH_FIRST\x10\x01\x12\x1c\n\x18\x43\x45NTER_SIZE_HEIGHT_FIRST\x10\x02\x12\x1b\n\x17\x43\x45NTER_SIZE_WIDTH_FIRST\x10\x03\"\xb5\x01\n\x0cWeightParams\x12\x12\n\nfloatValue\x18\x01 \x03(\x02\x12\x14\n\x0c\x66loat16Value\x18\x02 \x01(\x0c\x12\x10\n\x08rawValue\x18\x1e \x01(\x0c\x12\x14\n\x0cint8RawValue\x18\x1f \x01(\x0c\x12>\n\x0cquantization\x18( \x01(\x0b\x32(.CoreML.Specification.QuantizationParams\x12\x13\n\x0bisUpdatable\x18\x32 \x01(\x08\"\xe4\x01\n\x12QuantizationParams\x12\x14\n\x0cnumberOfBits\x18\x01 \x01(\x04\x12L\n\x12linearQuantization\x18\x65 \x01(\x0b\x32..CoreML.Specification.LinearQuantizationParamsH\x00\x12V\n\x17lookupTableQuantization\x18\x66 \x01(\x0b\x32\x33.CoreML.Specification.LookUpTableQuantizationParamsH\x00\x42\x12\n\x10QuantizationType\"7\n\x18LinearQuantizationParams\x12\r\n\x05scale\x18\x01 \x03(\x02\x12\x0c\n\x04\x62ias\x18\x02 \x03(\x02\"3\n\x1dLookUpTableQuantizationParams\x12\x12\n\nfloatValue\x18\x01 \x03(\x02\"\xbd\x03\n\x16\x43onvolutionLayerParams\x12\x16\n\x0eoutputChannels\x18\x01 \x01(\x04\x12\x16\n\x0ekernelChannels\x18\x02 \x01(\x04\x12\x0f\n\x07nGroups\x18\n \x01(\x04\x12\x12\n\nkernelSize\x18\x14 \x03(\x04\x12\x0e\n\x06stride\x18\x1e \x03(\x04\x12\x16\n\x0e\x64ilationFactor\x18( \x03(\x04\x12\x33\n\x05valid\x18\x32 \x01(\x0b\x32\".CoreML.Specification.ValidPaddingH\x00\x12\x31\n\x04same\x18\x33 \x01(\x0b\x32!.CoreML.Specification.SamePaddingH\x00\x12\x17\n\x0fisDeconvolution\x18< \x01(\x08\x12\x0f\n\x07hasBias\x18\x46 \x01(\x08\x12\x33\n\x07weights\x18Z \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62ias\x18[ \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x13\n\x0boutputShape\x18\x64 \x03(\x04\x42\x18\n\x16\x43onvolutionPaddingType\"\xec\x05\n\x18\x43onvolution3DLayerParams\x12\x16\n\x0eoutputChannels\x18\x01 \x01(\x05\x12\x15\n\rinputChannels\x18\x02 \x01(\x05\x12\x0f\n\x07nGroups\x18\n \x01(\x05\x12\x13\n\x0bkernelDepth\x18\x14 \x01(\x05\x12\x14\n\x0ckernelHeight\x18\x15 \x01(\x05\x12\x13\n\x0bkernelWidth\x18\x16 \x01(\x05\x12\x13\n\x0bstrideDepth\x18\x1f \x01(\x05\x12\x14\n\x0cstrideHeight\x18  \x01(\x05\x12\x13\n\x0bstrideWidth\x18! \x01(\x05\x12\x15\n\rdilationDepth\x18( \x01(\x05\x12\x16\n\x0e\x64ilationHeight\x18) \x01(\x05\x12\x15\n\rdilationWidth\x18* \x01(\x05\x12\x0f\n\x07hasBias\x18\x32 \x01(\x08\x12\x33\n\x07weights\x18< \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62ias\x18= \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12O\n\x0bpaddingType\x18\x46 \x01(\x0e\x32:.CoreML.Specification.Convolution3DLayerParams.PaddingType\x12\x1a\n\x12\x63ustomPaddingFront\x18P \x01(\x05\x12\x19\n\x11\x63ustomPaddingBack\x18Q \x01(\x05\x12\x18\n\x10\x63ustomPaddingTop\x18R \x01(\x05\x12\x1b\n\x13\x63ustomPaddingBottom\x18S \x01(\x05\x12\x19\n\x11\x63ustomPaddingLeft\x18T \x01(\x05\x12\x1a\n\x12\x63ustomPaddingRight\x18U \x01(\x05\x12\x17\n\x0fisDeconvolution\x18V \x01(\x08\x12\x13\n\x0boutputShape\x18W \x03(\x04\".\n\x0bPaddingType\x12\n\n\x06\x43USTOM\x10\x00\x12\t\n\x05VALID\x10\x01\x12\x08\n\x04SAME\x10\x02\"\xdd\x01\n\x17InnerProductLayerParams\x12\x15\n\rinputChannels\x18\x01 \x01(\x04\x12\x16\n\x0eoutputChannels\x18\x02 \x01(\x04\x12\x0f\n\x07hasBias\x18\n \x01(\x08\x12\x33\n\x07weights\x18\x14 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62ias\x18\x15 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x1b\n\x13int8DynamicQuantize\x18\x16 \x01(\x08\"\xb8\x01\n\x14\x45mbeddingLayerParams\x12\x10\n\x08inputDim\x18\x01 \x01(\x04\x12\x16\n\x0eoutputChannels\x18\x02 \x01(\x04\x12\x0f\n\x07hasBias\x18\n \x01(\x08\x12\x33\n\x07weights\x18\x14 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62ias\x18\x15 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\xba\x01\n\x16\x45mbeddingNDLayerParams\x12\x11\n\tvocabSize\x18\x01 \x01(\x04\x12\x15\n\rembeddingSize\x18\x02 \x01(\x04\x12\x0f\n\x07hasBias\x18\x03 \x01(\x08\x12\x33\n\x07weights\x18\x14 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62ias\x18\x15 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\xbd\x02\n\x14\x42\x61tchnormLayerParams\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x04\x12\x16\n\x0e\x63omputeMeanVar\x18\x05 \x01(\x08\x12\x1d\n\x15instanceNormalization\x18\x06 \x01(\x08\x12\x0f\n\x07\x65psilon\x18\n \x01(\x02\x12\x31\n\x05gamma\x18\x0f \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62\x65ta\x18\x10 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04mean\x18\x11 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x34\n\x08variance\x18\x12 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\xe8\x03\n\x12PoolingLayerParams\x12\x42\n\x04type\x18\x01 \x01(\x0e\x32\x34.CoreML.Specification.PoolingLayerParams.PoolingType\x12\x12\n\nkernelSize\x18\n \x03(\x04\x12\x0e\n\x06stride\x18\x14 \x03(\x04\x12\x33\n\x05valid\x18\x1e \x01(\x0b\x32\".CoreML.Specification.ValidPaddingH\x00\x12\x31\n\x04same\x18\x1f \x01(\x0b\x32!.CoreML.Specification.SamePaddingH\x00\x12Y\n\x10includeLastPixel\x18  \x01(\x0b\x32=.CoreML.Specification.PoolingLayerParams.ValidCompletePaddingH\x00\x12\x1d\n\x15\x61vgPoolExcludePadding\x18\x32 \x01(\x08\x12\x15\n\rglobalPooling\x18< \x01(\x08\x1a.\n\x14ValidCompletePadding\x12\x16\n\x0epaddingAmounts\x18\n \x03(\x04\"+\n\x0bPoolingType\x12\x07\n\x03MAX\x10\x00\x12\x0b\n\x07\x41VERAGE\x10\x01\x12\x06\n\x02L2\x10\x02\x42\x14\n\x12PoolingPaddingType\"\xd6\x04\n\x14Pooling3DLayerParams\x12\x46\n\x04type\x18\x01 \x01(\x0e\x32\x38.CoreML.Specification.Pooling3DLayerParams.PoolingType3D\x12\x13\n\x0bkernelDepth\x18\x02 \x01(\x05\x12\x14\n\x0ckernelHeight\x18\x03 \x01(\x05\x12\x13\n\x0bkernelWidth\x18\x04 \x01(\x05\x12\x13\n\x0bstrideDepth\x18\x05 \x01(\x05\x12\x14\n\x0cstrideHeight\x18\x06 \x01(\x05\x12\x13\n\x0bstrideWidth\x18\x07 \x01(\x05\x12T\n\x0bpaddingType\x18\x0f \x01(\x0e\x32?.CoreML.Specification.Pooling3DLayerParams.Pooling3DPaddingType\x12\x1a\n\x12\x63ustomPaddingFront\x18\x08 \x01(\x05\x12\x19\n\x11\x63ustomPaddingBack\x18\t \x01(\x05\x12\x18\n\x10\x63ustomPaddingTop\x18\n \x01(\x05\x12\x1b\n\x13\x63ustomPaddingBottom\x18\x0b \x01(\x05\x12\x19\n\x11\x63ustomPaddingLeft\x18\x0c \x01(\x05\x12\x1a\n\x12\x63ustomPaddingRight\x18\r \x01(\x05\x12\x1b\n\x13\x63ountExcludePadding\x18\x0e \x01(\x08\"%\n\rPoolingType3D\x12\x07\n\x03MAX\x10\x00\x12\x0b\n\x07\x41VERAGE\x10\x01\"7\n\x14Pooling3DPaddingType\x12\n\n\x06\x43USTOM\x10\x00\x12\t\n\x05VALID\x10\x01\x12\x08\n\x04SAME\x10\x02\"\x9d\x01\n\x1aGlobalPooling3DLayerParams\x12R\n\x04type\x18\x01 \x01(\x0e\x32\x44.CoreML.Specification.GlobalPooling3DLayerParams.GlobalPoolingType3D\"+\n\x13GlobalPoolingType3D\x12\x07\n\x03MAX\x10\x00\x12\x0b\n\x07\x41VERAGE\x10\x01\"\xa1\x03\n\x12PaddingLayerParams\x12L\n\x08\x63onstant\x18\x01 \x01(\x0b\x32\x38.CoreML.Specification.PaddingLayerParams.PaddingConstantH\x00\x12P\n\nreflection\x18\x02 \x01(\x0b\x32:.CoreML.Specification.PaddingLayerParams.PaddingReflectionH\x00\x12R\n\x0breplication\x18\x03 \x01(\x0b\x32;.CoreML.Specification.PaddingLayerParams.PaddingReplicationH\x00\x12;\n\x0epaddingAmounts\x18\n \x01(\x0b\x32#.CoreML.Specification.BorderAmounts\x1a \n\x0fPaddingConstant\x12\r\n\x05value\x18\x01 \x01(\x02\x1a\x13\n\x11PaddingReflection\x1a\x14\n\x12PaddingReplicationB\r\n\x0bPaddingType\"+\n\x11\x43oncatLayerParams\x12\x16\n\x0esequenceConcat\x18\x64 \x01(\x08\"K\n\x0eLRNLayerParams\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\x12\x0c\n\x04\x62\x65ta\x18\x02 \x01(\x02\x12\x11\n\tlocalSize\x18\x03 \x01(\x04\x12\t\n\x01k\x18\x04 \x01(\x02\"\x14\n\x12SoftmaxLayerParams\"$\n\x10SplitLayerParams\x12\x10\n\x08nOutputs\x18\x01 \x01(\x04\"\x1f\n\x0e\x41\x64\x64LayerParams\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\"$\n\x13MultiplyLayerParams\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\"\x84\x02\n\x18UnaryFunctionLayerParams\x12\x46\n\x04type\x18\x01 \x01(\x0e\x32\x38.CoreML.Specification.UnaryFunctionLayerParams.Operation\x12\r\n\x05\x61lpha\x18\x02 \x01(\x02\x12\x0f\n\x07\x65psilon\x18\x03 \x01(\x02\x12\r\n\x05shift\x18\x04 \x01(\x02\x12\r\n\x05scale\x18\x05 \x01(\x02\"b\n\tOperation\x12\x08\n\x04SQRT\x10\x00\x12\t\n\x05RSQRT\x10\x01\x12\x0b\n\x07INVERSE\x10\x02\x12\t\n\x05POWER\x10\x03\x12\x07\n\x03\x45XP\x10\x04\x12\x07\n\x03LOG\x10\x05\x12\x07\n\x03\x41\x42S\x10\x06\x12\r\n\tTHRESHOLD\x10\x07\"\xf1\x02\n\x13UpsampleLayerParams\x12\x15\n\rscalingFactor\x18\x01 \x03(\x04\x12\x1f\n\x17\x66ractionalScalingFactor\x18\x07 \x03(\x02\x12I\n\x04mode\x18\x05 \x01(\x0e\x32;.CoreML.Specification.UpsampleLayerParams.InterpolationMode\x12X\n\x12linearUpsampleMode\x18\x06 \x01(\x0e\x32<.CoreML.Specification.UpsampleLayerParams.LinearUpsampleMode\")\n\x11InterpolationMode\x12\x06\n\x02NN\x10\x00\x12\x0c\n\x08\x42ILINEAR\x10\x01\"R\n\x12LinearUpsampleMode\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\x16\n\x12\x41LIGN_CORNERS_TRUE\x10\x01\x12\x17\n\x13\x41LIGN_CORNERS_FALSE\x10\x02\"a\n\x19ResizeBilinearLayerParams\x12\x12\n\ntargetSize\x18\x01 \x03(\x04\x12\x30\n\x04mode\x18\x02 \x01(\x0b\x32\".CoreML.Specification.SamplingMode\"\xd4\x01\n\x15\x43ropResizeLayerParams\x12\x12\n\ntargetSize\x18\x01 \x03(\x04\x12\x1d\n\x15normalizedCoordinates\x18\x02 \x01(\x08\x12\x30\n\x04mode\x18\x03 \x01(\x0b\x32\".CoreML.Specification.SamplingMode\x12@\n\x0e\x62oxIndicesMode\x18\x04 \x01(\x0b\x32(.CoreML.Specification.BoxCoordinatesMode\x12\x14\n\x0cspatialScale\x18\x05 \x01(\x02\"R\n\x0f\x42iasLayerParams\x12\r\n\x05shape\x18\x01 \x03(\x04\x12\x30\n\x04\x62ias\x18\x02 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\xaf\x01\n\x10ScaleLayerParams\x12\x12\n\nshapeScale\x18\x01 \x03(\x04\x12\x31\n\x05scale\x18\x02 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x0f\n\x07hasBias\x18\x03 \x01(\x08\x12\x11\n\tshapeBias\x18\x04 \x03(\x04\x12\x30\n\x04\x62ias\x18\x05 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"Z\n\x17LoadConstantLayerParams\x12\r\n\x05shape\x18\x01 \x03(\x04\x12\x30\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\".CoreML.Specification.WeightParams\")\n\x16L2NormalizeLayerParams\x12\x0f\n\x07\x65psilon\x18\x01 \x01(\x02\"\x8e\x01\n\x12\x46lattenLayerParams\x12\x43\n\x04mode\x18\x01 \x01(\x0e\x32\x35.CoreML.Specification.FlattenLayerParams.FlattenOrder\"3\n\x0c\x46lattenOrder\x12\x11\n\rCHANNEL_FIRST\x10\x00\x12\x10\n\x0c\x43HANNEL_LAST\x10\x01\"\xa3\x01\n\x12ReshapeLayerParams\x12\x13\n\x0btargetShape\x18\x01 \x03(\x03\x12\x43\n\x04mode\x18\x02 \x01(\x0e\x32\x35.CoreML.Specification.ReshapeLayerParams.ReshapeOrder\"3\n\x0cReshapeOrder\x12\x11\n\rCHANNEL_FIRST\x10\x00\x12\x10\n\x0c\x43HANNEL_LAST\x10\x01\"\"\n\x12PermuteLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x03(\x04\"\xd1\x01\n\x19ReorganizeDataLayerParams\x12P\n\x04mode\x18\x01 \x01(\x0e\x32\x42.CoreML.Specification.ReorganizeDataLayerParams.ReorganizationType\x12\x11\n\tblockSize\x18\x02 \x01(\x04\"O\n\x12ReorganizationType\x12\x12\n\x0eSPACE_TO_DEPTH\x10\x00\x12\x12\n\x0e\x44\x45PTH_TO_SPACE\x10\x01\x12\x11\n\rPIXEL_SHUFFLE\x10\x02\"\xc8\x01\n\x10SliceLayerParams\x12\x12\n\nstartIndex\x18\x01 \x01(\x03\x12\x10\n\x08\x65ndIndex\x18\x02 \x01(\x03\x12\x0e\n\x06stride\x18\x03 \x01(\x04\x12>\n\x04\x61xis\x18\x04 \x01(\x0e\x32\x30.CoreML.Specification.SliceLayerParams.SliceAxis\">\n\tSliceAxis\x12\x10\n\x0c\x43HANNEL_AXIS\x10\x00\x12\x0f\n\x0bHEIGHT_AXIS\x10\x01\x12\x0e\n\nWIDTH_AXIS\x10\x02\"\xd9\x02\n\x11ReduceLayerParams\x12\x45\n\x04mode\x18\x01 \x01(\x0e\x32\x37.CoreML.Specification.ReduceLayerParams.ReduceOperation\x12\x0f\n\x07\x65psilon\x18\x02 \x01(\x02\x12@\n\x04\x61xis\x18\x03 \x01(\x0e\x32\x32.CoreML.Specification.ReduceLayerParams.ReduceAxis\"v\n\x0fReduceOperation\x12\x07\n\x03SUM\x10\x00\x12\x07\n\x03\x41VG\x10\x01\x12\x08\n\x04PROD\x10\x02\x12\n\n\x06LOGSUM\x10\x03\x12\r\n\tSUMSQUARE\x10\x04\x12\x06\n\x02L1\x10\x05\x12\x06\n\x02L2\x10\x06\x12\x07\n\x03MAX\x10\x07\x12\x07\n\x03MIN\x10\x08\x12\n\n\x06\x41RGMAX\x10\t\"2\n\nReduceAxis\x12\x07\n\x03\x43HW\x10\x00\x12\x06\n\x02HW\x10\x01\x12\x05\n\x01\x43\x10\x02\x12\x05\n\x01H\x10\x03\x12\x05\n\x01W\x10\x04\"[\n\x0f\x43ropLayerParams\x12\x38\n\x0b\x63ropAmounts\x18\x01 \x01(\x0b\x32#.CoreML.Specification.BorderAmounts\x12\x0e\n\x06offset\x18\x05 \x03(\x04\"\x14\n\x12\x41verageLayerParams\"\x10\n\x0eMaxLayerParams\"\x10\n\x0eMinLayerParams\"1\n\x15\x44otProductLayerParams\x12\x18\n\x10\x63osineSimilarity\x18\x01 \x01(\x08\"f\n MeanVarianceNormalizeLayerParams\x12\x16\n\x0e\x61\x63rossChannels\x18\x01 \x01(\x08\x12\x19\n\x11normalizeVariance\x18\x02 \x01(\x08\x12\x0f\n\x07\x65psilon\x18\x03 \x01(\x02\"1\n\x19SequenceRepeatLayerParams\x12\x14\n\x0cnRepetitions\x18\x01 \x01(\x04\"\xff\x02\n\x1aSimpleRecurrentLayerParams\x12\x17\n\x0finputVectorSize\x18\x01 \x01(\x04\x12\x18\n\x10outputVectorSize\x18\x02 \x01(\x04\x12:\n\nactivation\x18\n \x01(\x0b\x32&.CoreML.Specification.ActivationParams\x12\x16\n\x0esequenceOutput\x18\x0f \x01(\x08\x12\x15\n\rhasBiasVector\x18\x14 \x01(\x08\x12\x38\n\x0cweightMatrix\x18\x1e \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12;\n\x0frecursionMatrix\x18\x1f \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x36\n\nbiasVector\x18  \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x14\n\x0creverseInput\x18\x64 \x01(\x08\"\xaa\x06\n\x0eGRULayerParams\x12\x17\n\x0finputVectorSize\x18\x01 \x01(\x04\x12\x18\n\x10outputVectorSize\x18\x02 \x01(\x04\x12;\n\x0b\x61\x63tivations\x18\n \x03(\x0b\x32&.CoreML.Specification.ActivationParams\x12\x16\n\x0esequenceOutput\x18\x0f \x01(\x08\x12\x16\n\x0ehasBiasVectors\x18\x14 \x01(\x08\x12\x42\n\x16updateGateWeightMatrix\x18\x1e \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x41\n\x15resetGateWeightMatrix\x18\x1f \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x42\n\x16outputGateWeightMatrix\x18  \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x45\n\x19updateGateRecursionMatrix\x18\x32 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x44\n\x18resetGateRecursionMatrix\x18\x33 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x45\n\x19outputGateRecursionMatrix\x18\x34 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12@\n\x14updateGateBiasVector\x18\x46 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12?\n\x13resetGateBiasVector\x18G \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12@\n\x14outputGateBiasVector\x18H \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x14\n\x0creverseInput\x18\x64 \x01(\x08\"\xaa\x01\n\nLSTMParams\x12\x16\n\x0esequenceOutput\x18\n \x01(\x08\x12\x16\n\x0ehasBiasVectors\x18\x14 \x01(\x08\x12\x12\n\nforgetBias\x18\x1e \x01(\x08\x12\x1a\n\x12hasPeepholeVectors\x18( \x01(\x08\x12!\n\x19\x63oupledInputAndForgetGate\x18\x32 \x01(\x08\x12\x19\n\x11\x63\x65llClipThreshold\x18< \x01(\x02\"\x94\x08\n\x10LSTMWeightParams\x12\x41\n\x15inputGateWeightMatrix\x18\x01 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x42\n\x16\x66orgetGateWeightMatrix\x18\x02 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x42\n\x16\x62lockInputWeightMatrix\x18\x03 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x42\n\x16outputGateWeightMatrix\x18\x04 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x44\n\x18inputGateRecursionMatrix\x18\x14 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x45\n\x19\x66orgetGateRecursionMatrix\x18\x15 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x45\n\x19\x62lockInputRecursionMatrix\x18\x16 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x45\n\x19outputGateRecursionMatrix\x18\x17 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12?\n\x13inputGateBiasVector\x18( \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12@\n\x14\x66orgetGateBiasVector\x18) \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12@\n\x14\x62lockInputBiasVector\x18* \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12@\n\x14outputGateBiasVector\x18+ \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x43\n\x17inputGatePeepholeVector\x18< \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x44\n\x18\x66orgetGatePeepholeVector\x18= \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x44\n\x18outputGatePeepholeVector\x18> \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\x95\x02\n\x1dUniDirectionalLSTMLayerParams\x12\x17\n\x0finputVectorSize\x18\x01 \x01(\x04\x12\x18\n\x10outputVectorSize\x18\x02 \x01(\x04\x12;\n\x0b\x61\x63tivations\x18\n \x03(\x0b\x32&.CoreML.Specification.ActivationParams\x12\x30\n\x06params\x18\x0f \x01(\x0b\x32 .CoreML.Specification.LSTMParams\x12<\n\x0cweightParams\x18\x14 \x01(\x0b\x32&.CoreML.Specification.LSTMWeightParams\x12\x14\n\x0creverseInput\x18\x64 \x01(\x08\"\xd2\x02\n\x1c\x42iDirectionalLSTMLayerParams\x12\x17\n\x0finputVectorSize\x18\x01 \x01(\x04\x12\x18\n\x10outputVectorSize\x18\x02 \x01(\x04\x12\x46\n\x16\x61\x63tivationsForwardLSTM\x18\n \x03(\x0b\x32&.CoreML.Specification.ActivationParams\x12G\n\x17\x61\x63tivationsBackwardLSTM\x18\x0b \x03(\x0b\x32&.CoreML.Specification.ActivationParams\x12\x30\n\x06params\x18\x0f \x01(\x0b\x32 .CoreML.Specification.LSTMParams\x12<\n\x0cweightParams\x18\x14 \x03(\x0b\x32&.CoreML.Specification.LSTMWeightParams\"\xbe\x03\n\x11\x43ustomLayerParams\x12\x11\n\tclassName\x18\n \x01(\t\x12\x33\n\x07weights\x18\x14 \x03(\x0b\x32\".CoreML.Specification.WeightParams\x12K\n\nparameters\x18\x1e \x03(\x0b\x32\x37.CoreML.Specification.CustomLayerParams.ParametersEntry\x12\x13\n\x0b\x64\x65scription\x18( \x01(\t\x1a\x8c\x01\n\x15\x43ustomLayerParamValue\x12\x15\n\x0b\x64oubleValue\x18\n \x01(\x01H\x00\x12\x15\n\x0bstringValue\x18\x14 \x01(\tH\x00\x12\x12\n\x08intValue\x18\x1e \x01(\x05H\x00\x12\x13\n\tlongValue\x18( \x01(\x03H\x00\x12\x13\n\tboolValue\x18\x32 \x01(\x08H\x00\x42\x07\n\x05value\x1ap\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12L\n\x05value\x18\x02 \x01(\x0b\x32=.CoreML.Specification.CustomLayerParams.CustomLayerParamValue:\x02\x38\x01\"$\n\x14TransposeLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x04\"\xa0\x02\n\x18\x42\x61tchedMatMulLayerParams\x12\x12\n\ntransposeA\x18\x01 \x01(\x08\x12\x12\n\ntransposeB\x18\x02 \x01(\x08\x12\"\n\x1aweightMatrixFirstDimension\x18\x05 \x01(\x04\x12#\n\x1bweightMatrixSecondDimension\x18\x06 \x01(\x04\x12\x0f\n\x07hasBias\x18\x07 \x01(\x08\x12\x33\n\x07weights\x18\x08 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62ias\x18\t \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x1b\n\x13int8DynamicQuantize\x18\n \x01(\x08\"#\n\x13\x43oncatNDLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\"$\n\x14SoftmaxNDLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\"(\n\x12ReverseLayerParams\x12\x12\n\nreverseDim\x18\x01 \x03(\x08\"@\n\x15ReverseSeqLayerParams\x12\x11\n\tbatchAxis\x18\x01 \x01(\x03\x12\x14\n\x0csequenceAxis\x18\x02 \x01(\x03\"\\\n\x19LoadConstantNDLayerParams\x12\r\n\x05shape\x18\x01 \x03(\x04\x12\x30\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"$\n\x13\x46illLikeLayerParams\x12\r\n\x05value\x18\x01 \x01(\x02\";\n\x15\x46illStaticLayerParams\x12\r\n\x05value\x18\x01 \x01(\x02\x12\x13\n\x0btargetShape\x18\x02 \x03(\x04\"\'\n\x16\x46illDynamicLayerParams\x12\r\n\x05value\x18\x01 \x01(\x02\"\x1f\n\x1dWhereBroadcastableLayerParams\"\x10\n\x0eSinLayerParams\"\x10\n\x0e\x43osLayerParams\"\x10\n\x0eTanLayerParams\"\x11\n\x0f\x41sinLayerParams\"\x11\n\x0f\x41\x63osLayerParams\"\x11\n\x0f\x41tanLayerParams\"\x11\n\x0fSinhLayerParams\"\x11\n\x0f\x43oshLayerParams\"\x11\n\x0fTanhLayerParams\"\x12\n\x10\x41sinhLayerParams\"\x12\n\x10\x41\x63oshLayerParams\"\x12\n\x10\x41tanhLayerParams\"\x1d\n\x1bPowBroadcastableLayerParams\"\x11\n\x0f\x45xp2LayerParams\"\x19\n\x17WhereNonZeroLayerParams\"?\n\x19MatrixBandPartLayerParams\x12\x10\n\x08numLower\x18\x01 \x01(\x03\x12\x10\n\x08numUpper\x18\x02 \x01(\x03\"\'\n\x1aUpperTriangularLayerParams\x12\t\n\x01k\x18\x01 \x01(\x03\"\'\n\x1aLowerTriangularLayerParams\x12\t\n\x01k\x18\x01 \x01(\x03\"\x1c\n\x1a\x42roadcastToLikeLayerParams\"3\n\x1c\x42roadcastToStaticLayerParams\x12\x13\n\x0btargetShape\x18\x01 \x03(\x04\"\x1f\n\x1d\x42roadcastToDynamicLayerParams\"\x1d\n\x1b\x41\x64\x64\x42roadcastableLayerParams\"\x1d\n\x1bMaxBroadcastableLayerParams\"\x1d\n\x1bMinBroadcastableLayerParams\"\x1d\n\x1bModBroadcastableLayerParams\"\"\n FloorDivBroadcastableLayerParams\"\"\n SubtractBroadcastableLayerParams\"\"\n MultiplyBroadcastableLayerParams\" \n\x1e\x44ivideBroadcastableLayerParams\"!\n\x11GatherLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\"S\n\x12ScatterLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12/\n\x04mode\x18\x02 \x01(\x0e\x32!.CoreML.Specification.ScatterMode\"\x15\n\x13GatherNDLayerParams\"G\n\x14ScatterNDLayerParams\x12/\n\x04mode\x18\x01 \x01(\x0e\x32!.CoreML.Specification.ScatterMode\"*\n\x1aGatherAlongAxisLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\"\\\n\x1bScatterAlongAxisLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12/\n\x04mode\x18\x02 \x01(\x0e\x32!.CoreML.Specification.ScatterMode\" \n\x10StackLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\"7\n RankPreservingReshapeLayerParams\x12\x13\n\x0btargetShape\x18\x01 \x03(\x03\"a\n\x1a\x43onstantPaddingLayerParams\x12\r\n\x05value\x18\x01 \x01(\x02\x12\x12\n\npadAmounts\x18\x02 \x03(\x04\x12 \n\x18padToGivenOutputSizeMode\x18\x03 \x01(\x08\"I\n\x1bRandomNormalLikeLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0c\n\x04mean\x18\x02 \x01(\x02\x12\x0e\n\x06stdDev\x18\x03 \x01(\x02\"`\n\x1dRandomNormalStaticLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0c\n\x04mean\x18\x02 \x01(\x02\x12\x0e\n\x06stdDev\x18\x03 \x01(\x02\x12\x13\n\x0boutputShape\x18\x04 \x03(\x04\"L\n\x1eRandomNormalDynamicLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0c\n\x04mean\x18\x02 \x01(\x02\x12\x0e\n\x06stdDev\x18\x03 \x01(\x02\"L\n\x1cRandomUniformLikeLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0e\n\x06minVal\x18\x02 \x01(\x02\x12\x0e\n\x06maxVal\x18\x03 \x01(\x02\"c\n\x1eRandomUniformStaticLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0e\n\x06minVal\x18\x02 \x01(\x02\x12\x0e\n\x06maxVal\x18\x03 \x01(\x02\x12\x13\n\x0boutputShape\x18\x04 \x03(\x04\"O\n\x1fRandomUniformDynamicLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0e\n\x06minVal\x18\x02 \x01(\x02\x12\x0e\n\x06maxVal\x18\x03 \x01(\x02\"<\n\x1eRandomBernoulliLikeLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0c\n\x04prob\x18\x02 \x01(\x02\"S\n RandomBernoulliStaticLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0c\n\x04prob\x18\x02 \x01(\x02\x12\x13\n\x0boutputShape\x18\x03 \x03(\x04\"?\n!RandomBernoulliDynamicLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x0c\n\x04prob\x18\x02 \x01(\x02\"z\n\"CategoricalDistributionLayerParams\x12\x0c\n\x04seed\x18\x01 \x01(\x03\x12\x12\n\nnumSamples\x18\x02 \x01(\x03\x12\x10\n\x08isLogits\x18\x03 \x01(\x08\x12\x0b\n\x03\x65ps\x18\x04 \x01(\x02\x12\x13\n\x0btemperature\x18\x05 \x01(\x02\"H\n\x13ReduceL1LayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"H\n\x13ReduceL2LayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"I\n\x14ReduceMaxLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"I\n\x14ReduceMinLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"I\n\x14ReduceSumLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"J\n\x15ReduceProdLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"J\n\x15ReduceMeanLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"L\n\x17ReduceLogSumLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"O\n\x1aReduceSumSquareLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"O\n\x1aReduceLogSumExpLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x10\n\x08keepDims\x18\x02 \x01(\x08\x12\x11\n\treduceAll\x18\x03 \x01(\x08\"%\n\x15\x45xpandDimsLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\"&\n\x16\x46lattenTo2DLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\"/\n\x18ReshapeStaticLayerParams\x12\x13\n\x0btargetShape\x18\x01 \x03(\x03\"\x18\n\x16ReshapeLikeLayerParams\"\x1b\n\x19ReshapeDynamicLayerParams\"6\n\x12SqueezeLayerParams\x12\x0c\n\x04\x61xes\x18\x01 \x03(\x03\x12\x12\n\nsqueezeAll\x18\x02 \x01(\x08\">\n\x0fTopKLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12\t\n\x01K\x18\x02 \x01(\x04\x12\x12\n\nuseBottomK\x18\x03 \x01(\x08\"4\n\x11\x41rgMaxLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12\x11\n\tremoveDim\x18\x02 \x01(\x08\"4\n\x11\x41rgMinLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12\x11\n\tremoveDim\x18\x02 \x01(\x08\"I\n\x12SplitNDLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12\x11\n\tnumSplits\x18\x02 \x01(\x04\x12\x12\n\nsplitSizes\x18\x03 \x03(\x04\"\x11\n\x0f\x43\x65ilLayerParams\"\x12\n\x10RoundLayerParams\"\x12\n\x10\x46loorLayerParams\"\x11\n\x0fSignLayerParams\"1\n\x0f\x43lipLayerParams\x12\x0e\n\x06minVal\x18\x01 \x01(\x02\x12\x0e\n\x06maxVal\x18\x02 \x01(\x02\"\x87\x01\n\x16SliceStaticLayerParams\x12\x10\n\x08\x62\x65ginIds\x18\x01 \x03(\x03\x12\x12\n\nbeginMasks\x18\x02 \x03(\x08\x12\x0e\n\x06\x65ndIds\x18\x03 \x03(\x03\x12\x10\n\x08\x65ndMasks\x18\x04 \x03(\x08\x12\x0f\n\x07strides\x18\x05 \x03(\x03\x12\x14\n\x0csqueezeMasks\x18\x06 \x03(\x08\"v\n\x17SliceDynamicLayerParams\x12\x12\n\nbeginMasks\x18\x02 \x03(\x08\x12\x0e\n\x06\x65ndIds\x18\x03 \x03(\x03\x12\x10\n\x08\x65ndMasks\x18\x04 \x03(\x08\x12\x0f\n\x07strides\x18\x05 \x03(\x03\x12\x14\n\x0csqueezeMasks\x18\x06 \x03(\x08\"\x1f\n\x0fTileLayerParams\x12\x0c\n\x04reps\x18\x01 \x03(\x04\"\x15\n\x13GetShapeLayerParams\"\x10\n\x0e\x45rfLayerParams\"\x99\x01\n\x0fGeluLayerParams\x12<\n\x04mode\x18\x01 \x01(\x0e\x32..CoreML.Specification.GeluLayerParams.GeluMode\"H\n\x08GeluMode\x12\t\n\x05\x45XACT\x10\x00\x12\x16\n\x12TANH_APPROXIMATION\x10\x01\x12\x19\n\x15SIGMOID_APPROXIMATION\x10\x02\"U\n\x16RangeStaticLayerParams\x12\x10\n\x08\x65ndValue\x18\x01 \x01(\x02\x12\x12\n\nstartValue\x18\x02 \x01(\x02\x12\x15\n\rstepSizeValue\x18\x03 \x01(\x02\"D\n\x17RangeDynamicLayerParams\x12\x12\n\nstartValue\x18\x02 \x01(\x02\x12\x15\n\rstepSizeValue\x18\x03 \x01(\x02\"K\n\x19SlidingWindowsLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12\x12\n\nwindowSize\x18\x02 \x01(\x04\x12\x0c\n\x04step\x18\x03 \x01(\x04\"\xaa\x01\n\x1dLayerNormalizationLayerParams\x12\x17\n\x0fnormalizedShape\x18\x01 \x03(\x03\x12\x0b\n\x03\x65ps\x18\x02 \x01(\x02\x12\x31\n\x05gamma\x18\x03 \x01(\x0b\x32\".CoreML.Specification.WeightParams\x12\x30\n\x04\x62\x65ta\x18\x04 \x01(\x0b\x32\".CoreML.Specification.WeightParams\"\x7f\n NonMaximumSuppressionLayerParams\x12\x14\n\x0ciouThreshold\x18\x01 \x01(\x02\x12\x16\n\x0escoreThreshold\x18\x02 \x01(\x02\x12\x10\n\x08maxBoxes\x18\x03 \x01(\x04\x12\x1b\n\x13perClassSuppression\x18\x04 \x01(\x08\"5\n\x16\x43lampedReLULayerParams\x12\r\n\x05\x61lpha\x18\x01 \x01(\x02\x12\x0c\n\x04\x62\x65ta\x18\x02 \x01(\x02\"6\n\x12\x41rgSortLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12\x12\n\ndescending\x18\x02 \x01(\x08\"4\n\x16SliceBySizeLayerParams\x12\x0c\n\x04size\x18\x02 \x01(\x03\x12\x0c\n\x04\x61xis\x18\x03 \x01(\x03\"\xc5\x04\n\x17NeuralNetworkClassifier\x12\x38\n\x06layers\x18\x01 \x03(\x0b\x32(.CoreML.Specification.NeuralNetworkLayer\x12G\n\rpreprocessing\x18\x02 \x03(\x0b\x32\x30.CoreML.Specification.NeuralNetworkPreprocessing\x12Y\n\x16\x61rrayInputShapeMapping\x18\x05 \x01(\x0e\x32\x39.CoreML.Specification.NeuralNetworkMultiArrayShapeMapping\x12T\n\x16imageInputShapeMapping\x18\x06 \x01(\x0e\x32\x34.CoreML.Specification.NeuralNetworkImageShapeMapping\x12\x43\n\x0cupdateParams\x18\n \x01(\x0b\x32-.CoreML.Specification.NetworkUpdateParameters\x12?\n\x11stringClassLabels\x18\x64 \x01(\x0b\x32\".CoreML.Specification.StringVectorH\x00\x12=\n\x10int64ClassLabels\x18\x65 \x01(\x0b\x32!.CoreML.Specification.Int64VectorH\x00\x12\"\n\x19labelProbabilityLayerName\x18\xc8\x01 \x01(\tB\r\n\x0b\x43lassLabels\"^\n\x11OneHotLayerParams\x12\x18\n\x10oneHotVectorSize\x18\x01 \x01(\x04\x12\x0c\n\x04\x61xis\x18\x02 \x01(\x03\x12\x0f\n\x07onValue\x18\x03 \x01(\x02\x12\x10\n\x08offValue\x18\x04 \x01(\x02\"K\n\x11\x43umSumLayerParams\x12\x0c\n\x04\x61xis\x18\x01 \x01(\x03\x12\x17\n\x0f\x65xcludeFinalSum\x18\x02 \x01(\x08\x12\x0f\n\x07reverse\x18\x03 \x01(\x08\"\x91\x03\n\x16NeuralNetworkRegressor\x12\x38\n\x06layers\x18\x01 \x03(\x0b\x32(.CoreML.Specification.NeuralNetworkLayer\x12G\n\rpreprocessing\x18\x02 \x03(\x0b\x32\x30.CoreML.Specification.NeuralNetworkPreprocessing\x12Y\n\x16\x61rrayInputShapeMapping\x18\x05 \x01(\x0e\x32\x39.CoreML.Specification.NeuralNetworkMultiArrayShapeMapping\x12T\n\x16imageInputShapeMapping\x18\x06 \x01(\x0e\x32\x34.CoreML.Specification.NeuralNetworkImageShapeMapping\x12\x43\n\x0cupdateParams\x18\n \x01(\x0b\x32-.CoreML.Specification.NetworkUpdateParameters\"\xa2\x02\n\x17NetworkUpdateParameters\x12\x33\n\nlossLayers\x18\x01 \x03(\x0b\x32\x1f.CoreML.Specification.LossLayer\x12\x32\n\toptimizer\x18\x02 \x01(\x0b\x32\x1f.CoreML.Specification.Optimizer\x12\x34\n\x06\x65pochs\x18\x03 \x01(\x0b\x32$.CoreML.Specification.Int64Parameter\x12\x34\n\x07shuffle\x18\n \x01(\x0b\x32#.CoreML.Specification.BoolParameter\x12\x32\n\x04seed\x18\x14 \x01(\x0b\x32$.CoreML.Specification.Int64Parameter\"\xe4\x01\n\tLossLayer\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x62\n categoricalCrossEntropyLossLayer\x18\n \x01(\x0b\x32\x36.CoreML.Specification.CategoricalCrossEntropyLossLayerH\x00\x12T\n\x19meanSquaredErrorLossLayer\x18\x0b \x01(\x0b\x32/.CoreML.Specification.MeanSquaredErrorLossLayerH\x00\x42\x0f\n\rLossLayerType\"A\n CategoricalCrossEntropyLossLayer\x12\r\n\x05input\x18\x01 \x01(\t\x12\x0e\n\x06target\x18\x02 \x01(\t\":\n\x19MeanSquaredErrorLossLayer\x12\r\n\x05input\x18\x01 \x01(\t\x12\x0e\n\x06target\x18\x02 \x01(\t\"\x96\x01\n\tOptimizer\x12:\n\x0csgdOptimizer\x18\n \x01(\x0b\x32\".CoreML.Specification.SGDOptimizerH\x00\x12<\n\radamOptimizer\x18\x0b \x01(\x0b\x32#.CoreML.Specification.AdamOptimizerH\x00\x42\x0f\n\rOptimizerType\"\xc1\x01\n\x0cSGDOptimizer\x12;\n\x0clearningRate\x18\x01 \x01(\x0b\x32%.CoreML.Specification.DoubleParameter\x12;\n\rminiBatchSize\x18\x02 \x01(\x0b\x32$.CoreML.Specification.Int64Parameter\x12\x37\n\x08momentum\x18\x03 \x01(\x0b\x32%.CoreML.Specification.DoubleParameter\"\xa9\x02\n\rAdamOptimizer\x12;\n\x0clearningRate\x18\x01 \x01(\x0b\x32%.CoreML.Specification.DoubleParameter\x12;\n\rminiBatchSize\x18\x02 \x01(\x0b\x32$.CoreML.Specification.Int64Parameter\x12\x34\n\x05\x62\x65ta1\x18\x03 \x01(\x0b\x32%.CoreML.Specification.DoubleParameter\x12\x34\n\x05\x62\x65ta2\x18\x04 \x01(\x0b\x32%.CoreML.Specification.DoubleParameter\x12\x32\n\x03\x65ps\x18\x05 \x01(\x0b\x32%.CoreML.Specification.DoubleParameter*W\n#NeuralNetworkMultiArrayShapeMapping\x12\x17\n\x13RANK5_ARRAY_MAPPING\x10\x00\x12\x17\n\x13\x45XACT_ARRAY_MAPPING\x10\x01*R\n\x1eNeuralNetworkImageShapeMapping\x12\x17\n\x13RANK5_IMAGE_MAPPING\x10\x00\x12\x17\n\x13RANK4_IMAGE_MAPPING\x10\x01*\x87\x01\n\x0bScatterMode\x12\x12\n\x0eSCATTER_UPDATE\x10\x00\x12\x0f\n\x0bSCATTER_ADD\x10\x01\x12\x0f\n\x0bSCATTER_SUB\x10\x02\x12\x0f\n\x0bSCATTER_MUL\x10\x03\x12\x0f\n\x0bSCATTER_DIV\x10\x04\x12\x0f\n\x0bSCATTER_MAX\x10\x05\x12\x0f\n\x0bSCATTER_MIN\x10\x06\x42\x02H\x03P\x00P\x01\x62\x06proto3')
   ,
   dependencies=[DataStructures__pb2.DESCRIPTOR,Parameters__pb2.DESCRIPTOR,],
   public_dependencies=[DataStructures__pb2.DESCRIPTOR,Parameters__pb2.DESCRIPTOR,])
@@ -58,8 +58,8 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=30985,
-  serialized_end=31072,
+  serialized_start=33726,
+  serialized_end=33813,
 )
 _sym_db.RegisterEnumDescriptor(_NEURALNETWORKMULTIARRAYSHAPEMAPPING)
 
@@ -81,8 +81,8 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=31074,
-  serialized_end=31156,
+  serialized_start=33815,
+  serialized_end=33897,
 )
 _sym_db.RegisterEnumDescriptor(_NEURALNETWORKIMAGESHAPEMAPPING)
 
@@ -124,8 +124,8 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=31159,
-  serialized_end=31294,
+  serialized_start=33900,
+  serialized_end=34035,
 )
 _sym_db.RegisterEnumDescriptor(_SCATTERMODE)
 
@@ -160,8 +160,8 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=13807,
-  serialized_end=13868,
+  serialized_start=14347,
+  serialized_end=14408,
 )
 _sym_db.RegisterEnumDescriptor(_SAMEPADDING_SAMEPADDINGMODE)
 
@@ -190,8 +190,8 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=13954,
-  serialized_end=14060,
+  serialized_start=14494,
+  serialized_end=14600,
 )
 _sym_db.RegisterEnumDescriptor(_SAMPLINGMODE_METHOD)
 
@@ -220,11 +220,37 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=14156,
-  serialized_end=14279,
+  serialized_start=14696,
+  serialized_end=14819,
 )
 _sym_db.RegisterEnumDescriptor(_BOXCOORDINATESMODE_COORDINATES)
 
+_CONVOLUTION3DLAYERPARAMS_PADDINGTYPE = _descriptor.EnumDescriptor(
+  name='PaddingType',
+  full_name='CoreML.Specification.Convolution3DLayerParams.PaddingType',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='CUSTOM', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='VALID', index=1, number=1,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='SAME', index=2, number=2,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=16497,
+  serialized_end=16543,
+)
+_sym_db.RegisterEnumDescriptor(_CONVOLUTION3DLAYERPARAMS_PADDINGTYPE)
+
 _POOLINGLAYERPARAMS_POOLINGTYPE = _descriptor.EnumDescriptor(
   name='PoolingType',
   full_name='CoreML.Specification.PoolingLayerParams.PoolingType',
@@ -246,11 +272,81 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=16547,
-  serialized_end=16590,
+  serialized_start=17889,
+  serialized_end=17932,
 )
 _sym_db.RegisterEnumDescriptor(_POOLINGLAYERPARAMS_POOLINGTYPE)
 
+_POOLING3DLAYERPARAMS_POOLINGTYPE3D = _descriptor.EnumDescriptor(
+  name='PoolingType3D',
+  full_name='CoreML.Specification.Pooling3DLayerParams.PoolingType3D',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='MAX', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='AVERAGE', index=1, number=1,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=18461,
+  serialized_end=18498,
+)
+_sym_db.RegisterEnumDescriptor(_POOLING3DLAYERPARAMS_POOLINGTYPE3D)
+
+_POOLING3DLAYERPARAMS_POOLING3DPADDINGTYPE = _descriptor.EnumDescriptor(
+  name='Pooling3DPaddingType',
+  full_name='CoreML.Specification.Pooling3DLayerParams.Pooling3DPaddingType',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='CUSTOM', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='VALID', index=1, number=1,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='SAME', index=2, number=2,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=18500,
+  serialized_end=18555,
+)
+_sym_db.RegisterEnumDescriptor(_POOLING3DLAYERPARAMS_POOLING3DPADDINGTYPE)
+
+_GLOBALPOOLING3DLAYERPARAMS_GLOBALPOOLINGTYPE3D = _descriptor.EnumDescriptor(
+  name='GlobalPoolingType3D',
+  full_name='CoreML.Specification.GlobalPooling3DLayerParams.GlobalPoolingType3D',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='MAX', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='AVERAGE', index=1, number=1,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=18672,
+  serialized_end=18715,
+)
+_sym_db.RegisterEnumDescriptor(_GLOBALPOOLING3DLAYERPARAMS_GLOBALPOOLINGTYPE3D)
+
 _UNARYFUNCTIONLAYERPARAMS_OPERATION = _descriptor.EnumDescriptor(
   name='Operation',
   full_name='CoreML.Specification.UnaryFunctionLayerParams.Operation',
@@ -292,8 +388,8 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=17450,
-  serialized_end=17548,
+  serialized_start=19553,
+  serialized_end=19651,
 )
 _sym_db.RegisterEnumDescriptor(_UNARYFUNCTIONLAYERPARAMS_OPERATION)
 
@@ -314,11 +410,37 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=17672,
-  serialized_end=17713,
+  serialized_start=19898,
+  serialized_end=19939,
 )
 _sym_db.RegisterEnumDescriptor(_UPSAMPLELAYERPARAMS_INTERPOLATIONMODE)
 
+_UPSAMPLELAYERPARAMS_LINEARUPSAMPLEMODE = _descriptor.EnumDescriptor(
+  name='LinearUpsampleMode',
+  full_name='CoreML.Specification.UpsampleLayerParams.LinearUpsampleMode',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='DEFAULT', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='ALIGN_CORNERS_TRUE', index=1, number=1,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='ALIGN_CORNERS_FALSE', index=2, number=2,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=19941,
+  serialized_end=20023,
+)
+_sym_db.RegisterEnumDescriptor(_UPSAMPLELAYERPARAMS_LINEARUPSAMPLEMODE)
+
 _FLATTENLAYERPARAMS_FLATTENORDER = _descriptor.EnumDescriptor(
   name='FlattenOrder',
   full_name='CoreML.Specification.FlattenLayerParams.FlattenOrder',
@@ -336,8 +458,8 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=18518,
-  serialized_end=18569,
+  serialized_start=20828,
+  serialized_end=20879,
 )
 _sym_db.RegisterEnumDescriptor(_FLATTENLAYERPARAMS_FLATTENORDER)
 
@@ -358,8 +480,8 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=18684,
-  serialized_end=18735,
+  serialized_start=20994,
+  serialized_end=21045,
 )
 _sym_db.RegisterEnumDescriptor(_RESHAPELAYERPARAMS_RESHAPEORDER)
 
@@ -377,11 +499,15 @@
       name='DEPTH_TO_SPACE', index=1, number=1,
       options=None,
       type=None),
+    _descriptor.EnumValueDescriptor(
+      name='PIXEL_SHUFFLE', index=2, number=2,
+      options=None,
+      type=None),
   ],
   containing_type=None,
   options=None,
-  serialized_start=18904,
-  serialized_end=18964,
+  serialized_start=21214,
+  serialized_end=21293,
 )
 _sym_db.RegisterEnumDescriptor(_REORGANIZEDATALAYERPARAMS_REORGANIZATIONTYPE)
 
@@ -406,8 +532,8 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=19105,
-  serialized_end=19167,
+  serialized_start=21434,
+  serialized_end=21496,
 )
 _sym_db.RegisterEnumDescriptor(_SLICELAYERPARAMS_SLICEAXIS)
 
@@ -460,8 +586,8 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=19345,
-  serialized_end=19463,
+  serialized_start=21674,
+  serialized_end=21792,
 )
 _sym_db.RegisterEnumDescriptor(_REDUCELAYERPARAMS_REDUCEOPERATION)
 
@@ -494,8 +620,8 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=19465,
-  serialized_end=19515,
+  serialized_start=21794,
+  serialized_end=21844,
 )
 _sym_db.RegisterEnumDescriptor(_REDUCELAYERPARAMS_REDUCEAXIS)
 
@@ -520,8 +646,8 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=28087,
-  serialized_end=28159,
+  serialized_start=30490,
+  serialized_end=30562,
 )
 _sym_db.RegisterEnumDescriptor(_GELULAYERPARAMS_GELUMODE)
 
@@ -2374,6 +2500,62 @@
       message_type=None, enum_type=None, containing_type=None,
       is_extension=False, extension_scope=None,
       options=None),
+    _descriptor.FieldDescriptor(
+      name='oneHot', full_name='CoreML.Specification.NeuralNetworkLayer.oneHot', index=156,
+      number=1450, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='cumSum', full_name='CoreML.Specification.NeuralNetworkLayer.cumSum', index=157,
+      number=1455, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='clampedReLU', full_name='CoreML.Specification.NeuralNetworkLayer.clampedReLU', index=158,
+      number=1460, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='argSort', full_name='CoreML.Specification.NeuralNetworkLayer.argSort', index=159,
+      number=1461, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='pooling3d', full_name='CoreML.Specification.NeuralNetworkLayer.pooling3d', index=160,
+      number=1465, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='globalPooling3d', full_name='CoreML.Specification.NeuralNetworkLayer.globalPooling3d', index=161,
+      number=1466, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='sliceBySize', full_name='CoreML.Specification.NeuralNetworkLayer.sliceBySize', index=162,
+      number=1470, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='convolution3d', full_name='CoreML.Specification.NeuralNetworkLayer.convolution3d', index=163,
+      number=1471, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
   ],
   extensions=[
   ],
@@ -2390,7 +2572,7 @@
       index=0, containing_type=None, fields=[]),
   ],
   serialized_start=2321,
-  serialized_end=12767,
+  serialized_end=13307,
 )
 
 
@@ -2427,8 +2609,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=12770,
-  serialized_end=12901,
+  serialized_start=13310,
+  serialized_end=13441,
 )
 
 
@@ -2479,8 +2661,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=12904,
-  serialized_end=13091,
+  serialized_start=13444,
+  serialized_end=13631,
 )
 
 
@@ -2503,8 +2685,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13093,
-  serialized_end=13115,
+  serialized_start=13633,
+  serialized_end=13655,
 )
 
 
@@ -2527,8 +2709,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13117,
-  serialized_end=13142,
+  serialized_start=13657,
+  serialized_end=13682,
 )
 
 
@@ -2551,8 +2733,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13144,
-  serialized_end=13161,
+  serialized_start=13684,
+  serialized_end=13701,
 )
 
 
@@ -2582,8 +2764,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13163,
-  serialized_end=13202,
+  serialized_start=13703,
+  serialized_end=13742,
 )
 
 
@@ -2613,8 +2795,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13204,
-  serialized_end=13244,
+  serialized_start=13744,
+  serialized_end=13784,
 )
 
 
@@ -2644,8 +2826,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13246,
-  serialized_end=13282,
+  serialized_start=13786,
+  serialized_end=13822,
 )
 
 
@@ -2675,8 +2857,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13284,
-  serialized_end=13321,
+  serialized_start=13824,
+  serialized_end=13861,
 )
 
 
@@ -2706,8 +2888,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13323,
-  serialized_end=13356,
+  serialized_start=13863,
+  serialized_end=13896,
 )
 
 
@@ -2737,8 +2919,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13358,
-  serialized_end=13394,
+  serialized_start=13898,
+  serialized_end=13934,
 )
 
 
@@ -2761,8 +2943,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13396,
-  serialized_end=13419,
+  serialized_start=13936,
+  serialized_end=13959,
 )
 
 
@@ -2785,8 +2967,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13421,
-  serialized_end=13443,
+  serialized_start=13961,
+  serialized_end=13983,
 )
 
 
@@ -2809,8 +2991,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13445,
-  serialized_end=13468,
+  serialized_start=13985,
+  serialized_end=14008,
 )
 
 
@@ -2833,8 +3015,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13470,
-  serialized_end=13493,
+  serialized_start=14010,
+  serialized_end=14033,
 )
 
 
@@ -2871,8 +3053,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13583,
-  serialized_end=13638,
+  serialized_start=14123,
+  serialized_end=14178,
 )
 
 _BORDERAMOUNTS = _descriptor.Descriptor(
@@ -2901,8 +3083,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13496,
-  serialized_end=13638,
+  serialized_start=14036,
+  serialized_end=14178,
 )
 
 
@@ -2932,8 +3114,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13640,
-  serialized_end=13715,
+  serialized_start=14180,
+  serialized_end=14255,
 )
 
 
@@ -2964,8 +3146,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13718,
-  serialized_end=13868,
+  serialized_start=14258,
+  serialized_end=14408,
 )
 
 
@@ -2996,8 +3178,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13871,
-  serialized_end=14060,
+  serialized_start=14411,
+  serialized_end=14600,
 )
 
 
@@ -3028,8 +3210,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=14063,
-  serialized_end=14279,
+  serialized_start=14603,
+  serialized_end=14819,
 )
 
 
@@ -3062,14 +3244,21 @@
       is_extension=False, extension_scope=None,
       options=None),
     _descriptor.FieldDescriptor(
-      name='quantization', full_name='CoreML.Specification.WeightParams.quantization', index=3,
+      name='int8RawValue', full_name='CoreML.Specification.WeightParams.int8RawValue', index=3,
+      number=31, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b(""),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='quantization', full_name='CoreML.Specification.WeightParams.quantization', index=4,
       number=40, type=11, cpp_type=10, label=1,
       has_default_value=False, default_value=None,
       message_type=None, enum_type=None, containing_type=None,
       is_extension=False, extension_scope=None,
       options=None),
     _descriptor.FieldDescriptor(
-      name='isUpdatable', full_name='CoreML.Specification.WeightParams.isUpdatable', index=4,
+      name='isUpdatable', full_name='CoreML.Specification.WeightParams.isUpdatable', index=5,
       number=50, type=8, cpp_type=7, label=1,
       has_default_value=False, default_value=False,
       message_type=None, enum_type=None, containing_type=None,
@@ -3087,8 +3276,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=14282,
-  serialized_end=14441,
+  serialized_start=14822,
+  serialized_end=15003,
 )
 
 
@@ -3135,8 +3324,8 @@
       name='QuantizationType', full_name='CoreML.Specification.QuantizationParams.QuantizationType',
       index=0, containing_type=None, fields=[]),
   ],
-  serialized_start=14444,
-  serialized_end=14672,
+  serialized_start=15006,
+  serialized_end=15234,
 )
 
 
@@ -3173,8 +3362,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=14674,
-  serialized_end=14729,
+  serialized_start=15236,
+  serialized_end=15291,
 )
 
 
@@ -3204,8 +3393,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=14731,
-  serialized_end=14782,
+  serialized_start=15293,
+  serialized_end=15344,
 )
 
 
@@ -3322,8 +3511,201 @@
       name='ConvolutionPaddingType', full_name='CoreML.Specification.ConvolutionLayerParams.ConvolutionPaddingType',
       index=0, containing_type=None, fields=[]),
   ],
-  serialized_start=14785,
-  serialized_end=15230,
+  serialized_start=15347,
+  serialized_end=15792,
+)
+
+
+_CONVOLUTION3DLAYERPARAMS = _descriptor.Descriptor(
+  name='Convolution3DLayerParams',
+  full_name='CoreML.Specification.Convolution3DLayerParams',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='outputChannels', full_name='CoreML.Specification.Convolution3DLayerParams.outputChannels', index=0,
+      number=1, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='inputChannels', full_name='CoreML.Specification.Convolution3DLayerParams.inputChannels', index=1,
+      number=2, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='nGroups', full_name='CoreML.Specification.Convolution3DLayerParams.nGroups', index=2,
+      number=10, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='kernelDepth', full_name='CoreML.Specification.Convolution3DLayerParams.kernelDepth', index=3,
+      number=20, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='kernelHeight', full_name='CoreML.Specification.Convolution3DLayerParams.kernelHeight', index=4,
+      number=21, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='kernelWidth', full_name='CoreML.Specification.Convolution3DLayerParams.kernelWidth', index=5,
+      number=22, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='strideDepth', full_name='CoreML.Specification.Convolution3DLayerParams.strideDepth', index=6,
+      number=31, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='strideHeight', full_name='CoreML.Specification.Convolution3DLayerParams.strideHeight', index=7,
+      number=32, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='strideWidth', full_name='CoreML.Specification.Convolution3DLayerParams.strideWidth', index=8,
+      number=33, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='dilationDepth', full_name='CoreML.Specification.Convolution3DLayerParams.dilationDepth', index=9,
+      number=40, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='dilationHeight', full_name='CoreML.Specification.Convolution3DLayerParams.dilationHeight', index=10,
+      number=41, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='dilationWidth', full_name='CoreML.Specification.Convolution3DLayerParams.dilationWidth', index=11,
+      number=42, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='hasBias', full_name='CoreML.Specification.Convolution3DLayerParams.hasBias', index=12,
+      number=50, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='weights', full_name='CoreML.Specification.Convolution3DLayerParams.weights', index=13,
+      number=60, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='bias', full_name='CoreML.Specification.Convolution3DLayerParams.bias', index=14,
+      number=61, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='paddingType', full_name='CoreML.Specification.Convolution3DLayerParams.paddingType', index=15,
+      number=70, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='customPaddingFront', full_name='CoreML.Specification.Convolution3DLayerParams.customPaddingFront', index=16,
+      number=80, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='customPaddingBack', full_name='CoreML.Specification.Convolution3DLayerParams.customPaddingBack', index=17,
+      number=81, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='customPaddingTop', full_name='CoreML.Specification.Convolution3DLayerParams.customPaddingTop', index=18,
+      number=82, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='customPaddingBottom', full_name='CoreML.Specification.Convolution3DLayerParams.customPaddingBottom', index=19,
+      number=83, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='customPaddingLeft', full_name='CoreML.Specification.Convolution3DLayerParams.customPaddingLeft', index=20,
+      number=84, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='customPaddingRight', full_name='CoreML.Specification.Convolution3DLayerParams.customPaddingRight', index=21,
+      number=85, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='isDeconvolution', full_name='CoreML.Specification.Convolution3DLayerParams.isDeconvolution', index=22,
+      number=86, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='outputShape', full_name='CoreML.Specification.Convolution3DLayerParams.outputShape', index=23,
+      number=87, type=4, cpp_type=4, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+    _CONVOLUTION3DLAYERPARAMS_PADDINGTYPE,
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=15795,
+  serialized_end=16543,
 )
 
 
@@ -3369,6 +3751,13 @@
       message_type=None, enum_type=None, containing_type=None,
       is_extension=False, extension_scope=None,
       options=None),
+    _descriptor.FieldDescriptor(
+      name='int8DynamicQuantize', full_name='CoreML.Specification.InnerProductLayerParams.int8DynamicQuantize', index=5,
+      number=22, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
   ],
   extensions=[
   ],
@@ -3381,8 +3770,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=15233,
-  serialized_end=15425,
+  serialized_start=16546,
+  serialized_end=16767,
 )
 
 
@@ -3440,8 +3829,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=15428,
-  serialized_end=15612,
+  serialized_start=16770,
+  serialized_end=16954,
 )
 
 
@@ -3499,8 +3888,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=15615,
-  serialized_end=15801,
+  serialized_start=16957,
+  serialized_end=17143,
 )
 
 
@@ -3579,8 +3968,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=15804,
-  serialized_end=16121,
+  serialized_start=17146,
+  serialized_end=17463,
 )
 
 
@@ -3610,8 +3999,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=16499,
-  serialized_end=16545,
+  serialized_start=17841,
+  serialized_end=17887,
 )
 
 _POOLINGLAYERPARAMS = _descriptor.Descriptor(
@@ -3671,30 +4060,193 @@
       is_extension=False, extension_scope=None,
       options=None),
     _descriptor.FieldDescriptor(
-      name='globalPooling', full_name='CoreML.Specification.PoolingLayerParams.globalPooling', index=7,
-      number=60, type=8, cpp_type=7, label=1,
-      has_default_value=False, default_value=False,
+      name='globalPooling', full_name='CoreML.Specification.PoolingLayerParams.globalPooling', index=7,
+      number=60, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[_POOLINGLAYERPARAMS_VALIDCOMPLETEPADDING, ],
+  enum_types=[
+    _POOLINGLAYERPARAMS_POOLINGTYPE,
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+    _descriptor.OneofDescriptor(
+      name='PoolingPaddingType', full_name='CoreML.Specification.PoolingLayerParams.PoolingPaddingType',
+      index=0, containing_type=None, fields=[]),
+  ],
+  serialized_start=17466,
+  serialized_end=17954,
+)
+
+
+_POOLING3DLAYERPARAMS = _descriptor.Descriptor(
+  name='Pooling3DLayerParams',
+  full_name='CoreML.Specification.Pooling3DLayerParams',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='type', full_name='CoreML.Specification.Pooling3DLayerParams.type', index=0,
+      number=1, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='kernelDepth', full_name='CoreML.Specification.Pooling3DLayerParams.kernelDepth', index=1,
+      number=2, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='kernelHeight', full_name='CoreML.Specification.Pooling3DLayerParams.kernelHeight', index=2,
+      number=3, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='kernelWidth', full_name='CoreML.Specification.Pooling3DLayerParams.kernelWidth', index=3,
+      number=4, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='strideDepth', full_name='CoreML.Specification.Pooling3DLayerParams.strideDepth', index=4,
+      number=5, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='strideHeight', full_name='CoreML.Specification.Pooling3DLayerParams.strideHeight', index=5,
+      number=6, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='strideWidth', full_name='CoreML.Specification.Pooling3DLayerParams.strideWidth', index=6,
+      number=7, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='paddingType', full_name='CoreML.Specification.Pooling3DLayerParams.paddingType', index=7,
+      number=15, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='customPaddingFront', full_name='CoreML.Specification.Pooling3DLayerParams.customPaddingFront', index=8,
+      number=8, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='customPaddingBack', full_name='CoreML.Specification.Pooling3DLayerParams.customPaddingBack', index=9,
+      number=9, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='customPaddingTop', full_name='CoreML.Specification.Pooling3DLayerParams.customPaddingTop', index=10,
+      number=10, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='customPaddingBottom', full_name='CoreML.Specification.Pooling3DLayerParams.customPaddingBottom', index=11,
+      number=11, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='customPaddingLeft', full_name='CoreML.Specification.Pooling3DLayerParams.customPaddingLeft', index=12,
+      number=12, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='customPaddingRight', full_name='CoreML.Specification.Pooling3DLayerParams.customPaddingRight', index=13,
+      number=13, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='countExcludePadding', full_name='CoreML.Specification.Pooling3DLayerParams.countExcludePadding', index=14,
+      number=14, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+    _POOLING3DLAYERPARAMS_POOLINGTYPE3D,
+    _POOLING3DLAYERPARAMS_POOLING3DPADDINGTYPE,
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=17957,
+  serialized_end=18555,
+)
+
+
+_GLOBALPOOLING3DLAYERPARAMS = _descriptor.Descriptor(
+  name='GlobalPooling3DLayerParams',
+  full_name='CoreML.Specification.GlobalPooling3DLayerParams',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='type', full_name='CoreML.Specification.GlobalPooling3DLayerParams.type', index=0,
+      number=1, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
       message_type=None, enum_type=None, containing_type=None,
       is_extension=False, extension_scope=None,
       options=None),
   ],
   extensions=[
   ],
-  nested_types=[_POOLINGLAYERPARAMS_VALIDCOMPLETEPADDING, ],
+  nested_types=[],
   enum_types=[
-    _POOLINGLAYERPARAMS_POOLINGTYPE,
+    _GLOBALPOOLING3DLAYERPARAMS_GLOBALPOOLINGTYPE3D,
   ],
   options=None,
   is_extendable=False,
   syntax='proto3',
   extension_ranges=[],
   oneofs=[
-    _descriptor.OneofDescriptor(
-      name='PoolingPaddingType', full_name='CoreML.Specification.PoolingLayerParams.PoolingPaddingType',
-      index=0, containing_type=None, fields=[]),
   ],
-  serialized_start=16124,
-  serialized_end=16612,
+  serialized_start=18558,
+  serialized_end=18715,
 )
 
 
@@ -3724,8 +4276,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=16942,
-  serialized_end=16974,
+  serialized_start=19045,
+  serialized_end=19077,
 )
 
 _PADDINGLAYERPARAMS_PADDINGREFLECTION = _descriptor.Descriptor(
@@ -3747,8 +4299,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=16976,
-  serialized_end=16995,
+  serialized_start=19079,
+  serialized_end=19098,
 )
 
 _PADDINGLAYERPARAMS_PADDINGREPLICATION = _descriptor.Descriptor(
@@ -3770,8 +4322,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=16997,
-  serialized_end=17017,
+  serialized_start=19100,
+  serialized_end=19120,
 )
 
 _PADDINGLAYERPARAMS = _descriptor.Descriptor(
@@ -3824,8 +4376,8 @@
       name='PaddingType', full_name='CoreML.Specification.PaddingLayerParams.PaddingType',
       index=0, containing_type=None, fields=[]),
   ],
-  serialized_start=16615,
-  serialized_end=17032,
+  serialized_start=18718,
+  serialized_end=19135,
 )
 
 
@@ -3855,8 +4407,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17034,
-  serialized_end=17077,
+  serialized_start=19137,
+  serialized_end=19180,
 )
 
 
@@ -3907,8 +4459,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17079,
-  serialized_end=17154,
+  serialized_start=19182,
+  serialized_end=19257,
 )
 
 
@@ -3931,8 +4483,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17156,
-  serialized_end=17176,
+  serialized_start=19259,
+  serialized_end=19279,
 )
 
 
@@ -3962,8 +4514,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17178,
-  serialized_end=17214,
+  serialized_start=19281,
+  serialized_end=19317,
 )
 
 
@@ -3993,8 +4545,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17216,
-  serialized_end=17247,
+  serialized_start=19319,
+  serialized_end=19350,
 )
 
 
@@ -4024,8 +4576,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17249,
-  serialized_end=17285,
+  serialized_start=19352,
+  serialized_end=19388,
 )
 
 
@@ -4084,8 +4636,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17288,
-  serialized_end=17548,
+  serialized_start=19391,
+  serialized_end=19651,
 )
 
 
@@ -4104,18 +4656,33 @@
       is_extension=False, extension_scope=None,
       options=None),
     _descriptor.FieldDescriptor(
-      name='mode', full_name='CoreML.Specification.UpsampleLayerParams.mode', index=1,
+      name='fractionalScalingFactor', full_name='CoreML.Specification.UpsampleLayerParams.fractionalScalingFactor', index=1,
+      number=7, type=2, cpp_type=6, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='mode', full_name='CoreML.Specification.UpsampleLayerParams.mode', index=2,
       number=5, type=14, cpp_type=8, label=1,
       has_default_value=False, default_value=0,
       message_type=None, enum_type=None, containing_type=None,
       is_extension=False, extension_scope=None,
       options=None),
+    _descriptor.FieldDescriptor(
+      name='linearUpsampleMode', full_name='CoreML.Specification.UpsampleLayerParams.linearUpsampleMode', index=3,
+      number=6, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
   ],
   extensions=[
   ],
   nested_types=[],
   enum_types=[
     _UPSAMPLELAYERPARAMS_INTERPOLATIONMODE,
+    _UPSAMPLELAYERPARAMS_LINEARUPSAMPLEMODE,
   ],
   options=None,
   is_extendable=False,
@@ -4123,8 +4690,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17551,
-  serialized_end=17713,
+  serialized_start=19654,
+  serialized_end=20023,
 )
 
 
@@ -4161,8 +4728,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17715,
-  serialized_end=17812,
+  serialized_start=20025,
+  serialized_end=20122,
 )
 
 
@@ -4220,8 +4787,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17815,
-  serialized_end=18027,
+  serialized_start=20125,
+  serialized_end=20337,
 )
 
 
@@ -4258,8 +4825,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=18029,
-  serialized_end=18111,
+  serialized_start=20339,
+  serialized_end=20421,
 )
 
 
@@ -4317,8 +4884,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=18114,
-  serialized_end=18289,
+  serialized_start=20424,
+  serialized_end=20599,
 )
 
 
@@ -4355,8 +4922,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=18291,
-  serialized_end=18381,
+  serialized_start=20601,
+  serialized_end=20691,
 )
 
 
@@ -4386,8 +4953,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=18383,
-  serialized_end=18424,
+  serialized_start=20693,
+  serialized_end=20734,
 )
 
 
@@ -4418,8 +4985,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=18427,
-  serialized_end=18569,
+  serialized_start=20737,
+  serialized_end=20879,
 )
 
 
@@ -4457,8 +5024,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=18572,
-  serialized_end=18735,
+  serialized_start=20882,
+  serialized_end=21045,
 )
 
 
@@ -4488,8 +5055,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=18737,
-  serialized_end=18771,
+  serialized_start=21047,
+  serialized_end=21081,
 )
 
 
@@ -4527,8 +5094,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=18774,
-  serialized_end=18964,
+  serialized_start=21084,
+  serialized_end=21293,
 )
 
 
@@ -4580,8 +5147,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=18967,
-  serialized_end=19167,
+  serialized_start=21296,
+  serialized_end=21496,
 )
 
 
@@ -4627,8 +5194,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=19170,
-  serialized_end=19515,
+  serialized_start=21499,
+  serialized_end=21844,
 )
 
 
@@ -4665,8 +5232,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=19517,
-  serialized_end=19608,
+  serialized_start=21846,
+  serialized_end=21937,
 )
 
 
@@ -4689,8 +5256,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=19610,
-  serialized_end=19630,
+  serialized_start=21939,
+  serialized_end=21959,
 )
 
 
@@ -4713,8 +5280,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=19632,
-  serialized_end=19648,
+  serialized_start=21961,
+  serialized_end=21977,
 )
 
 
@@ -4737,8 +5304,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=19650,
-  serialized_end=19666,
+  serialized_start=21979,
+  serialized_end=21995,
 )
 
 
@@ -4768,8 +5335,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=19668,
-  serialized_end=19717,
+  serialized_start=21997,
+  serialized_end=22046,
 )
 
 
@@ -4813,8 +5380,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=19719,
-  serialized_end=19821,
+  serialized_start=22048,
+  serialized_end=22150,
 )
 
 
@@ -4844,8 +5411,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=19823,
-  serialized_end=19872,
+  serialized_start=22152,
+  serialized_end=22201,
 )
 
 
@@ -4931,8 +5498,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=19875,
-  serialized_end=20258,
+  serialized_start=22204,
+  serialized_end=22587,
 )
 
 
@@ -5060,8 +5627,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=20261,
-  serialized_end=21071,
+  serialized_start=22590,
+  serialized_end=23400,
 )
 
 
@@ -5126,8 +5693,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=21074,
-  serialized_end=21244,
+  serialized_start=23403,
+  serialized_end=23573,
 )
 
 
@@ -5255,8 +5822,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=21247,
-  serialized_end=22291,
+  serialized_start=23576,
+  serialized_end=24620,
 )
 
 
@@ -5321,8 +5888,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=22294,
-  serialized_end=22571,
+  serialized_start=24623,
+  serialized_end=24900,
 )
 
 
@@ -5387,8 +5954,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=22574,
-  serialized_end=22912,
+  serialized_start=24903,
+  serialized_end=25241,
 )
 
 
@@ -5449,8 +6016,8 @@
       name='value', full_name='CoreML.Specification.CustomLayerParams.CustomLayerParamValue.value',
       index=0, containing_type=None, fields=[]),
   ],
-  serialized_start=23107,
-  serialized_end=23247,
+  serialized_start=25436,
+  serialized_end=25576,
 )
 
 _CUSTOMLAYERPARAMS_PARAMETERSENTRY = _descriptor.Descriptor(
@@ -5486,8 +6053,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=23249,
-  serialized_end=23361,
+  serialized_start=25578,
+  serialized_end=25690,
 )
 
 _CUSTOMLAYERPARAMS = _descriptor.Descriptor(
@@ -5537,8 +6104,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=22915,
-  serialized_end=23361,
+  serialized_start=25244,
+  serialized_end=25690,
 )
 
 
@@ -5568,8 +6135,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=23363,
-  serialized_end=23399,
+  serialized_start=25692,
+  serialized_end=25728,
 )
 
 
@@ -5629,6 +6196,13 @@
       message_type=None, enum_type=None, containing_type=None,
       is_extension=False, extension_scope=None,
       options=None),
+    _descriptor.FieldDescriptor(
+      name='int8DynamicQuantize', full_name='CoreML.Specification.BatchedMatMulLayerParams.int8DynamicQuantize', index=7,
+      number=10, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
   ],
   extensions=[
   ],
@@ -5641,8 +6215,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=23402,
-  serialized_end=23661,
+  serialized_start=25731,
+  serialized_end=26019,
 )
 
 
@@ -5672,8 +6246,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=23663,
-  serialized_end=23698,
+  serialized_start=26021,
+  serialized_end=26056,
 )
 
 
@@ -5703,8 +6277,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=23700,
-  serialized_end=23736,
+  serialized_start=26058,
+  serialized_end=26094,
 )
 
 
@@ -5734,8 +6308,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=23738,
-  serialized_end=23778,
+  serialized_start=26096,
+  serialized_end=26136,
 )
 
 
@@ -5772,8 +6346,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=23780,
-  serialized_end=23844,
+  serialized_start=26138,
+  serialized_end=26202,
 )
 
 
@@ -5810,8 +6384,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=23846,
-  serialized_end=23938,
+  serialized_start=26204,
+  serialized_end=26296,
 )
 
 
@@ -5841,8 +6415,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=23940,
-  serialized_end=23976,
+  serialized_start=26298,
+  serialized_end=26334,
 )
 
 
@@ -5879,8 +6453,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=23978,
-  serialized_end=24037,
+  serialized_start=26336,
+  serialized_end=26395,
 )
 
 
@@ -5910,8 +6484,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24039,
-  serialized_end=24078,
+  serialized_start=26397,
+  serialized_end=26436,
 )
 
 
@@ -5934,8 +6508,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24080,
-  serialized_end=24111,
+  serialized_start=26438,
+  serialized_end=26469,
 )
 
 
@@ -5958,8 +6532,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24113,
-  serialized_end=24129,
+  serialized_start=26471,
+  serialized_end=26487,
 )
 
 
@@ -5982,8 +6556,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24131,
-  serialized_end=24147,
+  serialized_start=26489,
+  serialized_end=26505,
 )
 
 
@@ -6006,8 +6580,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24149,
-  serialized_end=24165,
+  serialized_start=26507,
+  serialized_end=26523,
 )
 
 
@@ -6030,8 +6604,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24167,
-  serialized_end=24184,
+  serialized_start=26525,
+  serialized_end=26542,
 )
 
 
@@ -6054,8 +6628,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24186,
-  serialized_end=24203,
+  serialized_start=26544,
+  serialized_end=26561,
 )
 
 
@@ -6078,8 +6652,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24205,
-  serialized_end=24222,
+  serialized_start=26563,
+  serialized_end=26580,
 )
 
 
@@ -6102,8 +6676,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24224,
-  serialized_end=24241,
+  serialized_start=26582,
+  serialized_end=26599,
 )
 
 
@@ -6126,8 +6700,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24243,
-  serialized_end=24260,
+  serialized_start=26601,
+  serialized_end=26618,
 )
 
 
@@ -6150,8 +6724,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24262,
-  serialized_end=24279,
+  serialized_start=26620,
+  serialized_end=26637,
 )
 
 
@@ -6174,8 +6748,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24281,
-  serialized_end=24299,
+  serialized_start=26639,
+  serialized_end=26657,
 )
 
 
@@ -6198,8 +6772,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24301,
-  serialized_end=24319,
+  serialized_start=26659,
+  serialized_end=26677,
 )
 
 
@@ -6222,8 +6796,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24321,
-  serialized_end=24339,
+  serialized_start=26679,
+  serialized_end=26697,
 )
 
 
@@ -6246,8 +6820,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24341,
-  serialized_end=24370,
+  serialized_start=26699,
+  serialized_end=26728,
 )
 
 
@@ -6270,8 +6844,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24372,
-  serialized_end=24389,
+  serialized_start=26730,
+  serialized_end=26747,
 )
 
 
@@ -6294,8 +6868,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24391,
-  serialized_end=24416,
+  serialized_start=26749,
+  serialized_end=26774,
 )
 
 
@@ -6332,8 +6906,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24418,
-  serialized_end=24481,
+  serialized_start=26776,
+  serialized_end=26839,
 )
 
 
@@ -6363,8 +6937,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24483,
-  serialized_end=24522,
+  serialized_start=26841,
+  serialized_end=26880,
 )
 
 
@@ -6394,8 +6968,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24524,
-  serialized_end=24563,
+  serialized_start=26882,
+  serialized_end=26921,
 )
 
 
@@ -6418,8 +6992,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24565,
-  serialized_end=24593,
+  serialized_start=26923,
+  serialized_end=26951,
 )
 
 
@@ -6449,8 +7023,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24595,
-  serialized_end=24646,
+  serialized_start=26953,
+  serialized_end=27004,
 )
 
 
@@ -6473,8 +7047,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24648,
-  serialized_end=24679,
+  serialized_start=27006,
+  serialized_end=27037,
 )
 
 
@@ -6497,8 +7071,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24681,
-  serialized_end=24710,
+  serialized_start=27039,
+  serialized_end=27068,
 )
 
 
@@ -6521,8 +7095,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24712,
-  serialized_end=24741,
+  serialized_start=27070,
+  serialized_end=27099,
 )
 
 
@@ -6545,8 +7119,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24743,
-  serialized_end=24772,
+  serialized_start=27101,
+  serialized_end=27130,
 )
 
 
@@ -6569,8 +7143,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24774,
-  serialized_end=24803,
+  serialized_start=27132,
+  serialized_end=27161,
 )
 
 
@@ -6593,8 +7167,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24805,
-  serialized_end=24839,
+  serialized_start=27163,
+  serialized_end=27197,
 )
 
 
@@ -6617,8 +7191,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24841,
-  serialized_end=24875,
+  serialized_start=27199,
+  serialized_end=27233,
 )
 
 
@@ -6641,8 +7215,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24877,
-  serialized_end=24911,
+  serialized_start=27235,
+  serialized_end=27269,
 )
 
 
@@ -6665,8 +7239,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24913,
-  serialized_end=24945,
+  serialized_start=27271,
+  serialized_end=27303,
 )
 
 
@@ -6696,8 +7270,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24947,
-  serialized_end=24980,
+  serialized_start=27305,
+  serialized_end=27338,
 )
 
 
@@ -6734,8 +7308,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=24982,
-  serialized_end=25065,
+  serialized_start=27340,
+  serialized_end=27423,
 )
 
 
@@ -6758,8 +7332,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=25067,
-  serialized_end=25088,
+  serialized_start=27425,
+  serialized_end=27446,
 )
 
 
@@ -6789,8 +7363,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=25090,
-  serialized_end=25161,
+  serialized_start=27448,
+  serialized_end=27519,
 )
 
 
@@ -6820,8 +7394,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=25163,
-  serialized_end=25205,
+  serialized_start=27521,
+  serialized_end=27563,
 )
 
 
@@ -6858,8 +7432,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=25207,
-  serialized_end=25299,
+  serialized_start=27565,
+  serialized_end=27657,
 )
 
 
@@ -6889,8 +7463,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=25301,
-  serialized_end=25333,
+  serialized_start=27659,
+  serialized_end=27691,
 )
 
 
@@ -6920,8 +7494,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=25335,
-  serialized_end=25390,
+  serialized_start=27693,
+  serialized_end=27748,
 )
 
 
@@ -6965,8 +7539,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=25392,
-  serialized_end=25489,
+  serialized_start=27750,
+  serialized_end=27847,
 )
 
 
@@ -7010,8 +7584,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=25491,
-  serialized_end=25564,
+  serialized_start=27849,
+  serialized_end=27922,
 )
 
 
@@ -7062,8 +7636,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=25566,
-  serialized_end=25662,
+  serialized_start=27924,
+  serialized_end=28020,
 )
 
 
@@ -7107,8 +7681,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=25664,
-  serialized_end=25740,
+  serialized_start=28022,
+  serialized_end=28098,
 )
 
 
@@ -7152,8 +7726,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=25742,
-  serialized_end=25818,
+  serialized_start=28100,
+  serialized_end=28176,
 )
 
 
@@ -7204,8 +7778,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=25820,
-  serialized_end=25919,
+  serialized_start=28178,
+  serialized_end=28277,
 )
 
 
@@ -7249,8 +7823,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=25921,
-  serialized_end=26000,
+  serialized_start=28279,
+  serialized_end=28358,
 )
 
 
@@ -7287,8 +7861,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26002,
-  serialized_end=26062,
+  serialized_start=28360,
+  serialized_end=28420,
 )
 
 
@@ -7332,8 +7906,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26064,
-  serialized_end=26147,
+  serialized_start=28422,
+  serialized_end=28505,
 )
 
 
@@ -7370,8 +7944,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26149,
-  serialized_end=26212,
+  serialized_start=28507,
+  serialized_end=28570,
 )
 
 
@@ -7429,8 +8003,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26214,
-  serialized_end=26336,
+  serialized_start=28572,
+  serialized_end=28694,
 )
 
 
@@ -7474,8 +8048,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26338,
-  serialized_end=26410,
+  serialized_start=28696,
+  serialized_end=28768,
 )
 
 
@@ -7519,8 +8093,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26412,
-  serialized_end=26484,
+  serialized_start=28770,
+  serialized_end=28842,
 )
 
 
@@ -7564,8 +8138,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26486,
-  serialized_end=26559,
+  serialized_start=28844,
+  serialized_end=28917,
 )
 
 
@@ -7609,8 +8183,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26561,
-  serialized_end=26634,
+  serialized_start=28919,
+  serialized_end=28992,
 )
 
 
@@ -7654,8 +8228,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26636,
-  serialized_end=26709,
+  serialized_start=28994,
+  serialized_end=29067,
 )
 
 
@@ -7699,8 +8273,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26711,
-  serialized_end=26785,
+  serialized_start=29069,
+  serialized_end=29143,
 )
 
 
@@ -7744,8 +8318,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26787,
-  serialized_end=26861,
+  serialized_start=29145,
+  serialized_end=29219,
 )
 
 
@@ -7789,8 +8363,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26863,
-  serialized_end=26939,
+  serialized_start=29221,
+  serialized_end=29297,
 )
 
 
@@ -7834,8 +8408,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26941,
-  serialized_end=27020,
+  serialized_start=29299,
+  serialized_end=29378,
 )
 
 
@@ -7879,8 +8453,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27022,
-  serialized_end=27101,
+  serialized_start=29380,
+  serialized_end=29459,
 )
 
 
@@ -7910,8 +8484,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27103,
-  serialized_end=27140,
+  serialized_start=29461,
+  serialized_end=29498,
 )
 
 
@@ -7941,8 +8515,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27142,
-  serialized_end=27180,
+  serialized_start=29500,
+  serialized_end=29538,
 )
 
 
@@ -7972,8 +8546,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27182,
-  serialized_end=27229,
+  serialized_start=29540,
+  serialized_end=29587,
 )
 
 
@@ -7996,8 +8570,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27231,
-  serialized_end=27255,
+  serialized_start=29589,
+  serialized_end=29613,
 )
 
 
@@ -8020,8 +8594,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27257,
-  serialized_end=27284,
+  serialized_start=29615,
+  serialized_end=29642,
 )
 
 
@@ -8058,8 +8632,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27286,
-  serialized_end=27340,
+  serialized_start=29644,
+  serialized_end=29698,
 )
 
 
@@ -8103,8 +8677,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27342,
-  serialized_end=27404,
+  serialized_start=29700,
+  serialized_end=29762,
 )
 
 
@@ -8141,8 +8715,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27406,
-  serialized_end=27458,
+  serialized_start=29764,
+  serialized_end=29816,
 )
 
 
@@ -8179,8 +8753,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27460,
-  serialized_end=27512,
+  serialized_start=29818,
+  serialized_end=29870,
 )
 
 
@@ -8224,8 +8798,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27514,
-  serialized_end=27587,
+  serialized_start=29872,
+  serialized_end=29945,
 )
 
 
@@ -8248,8 +8822,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27589,
-  serialized_end=27606,
+  serialized_start=29947,
+  serialized_end=29964,
 )
 
 
@@ -8272,8 +8846,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27608,
-  serialized_end=27626,
+  serialized_start=29966,
+  serialized_end=29984,
 )
 
 
@@ -8296,8 +8870,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27628,
-  serialized_end=27646,
+  serialized_start=29986,
+  serialized_end=30004,
 )
 
 
@@ -8320,8 +8894,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27648,
-  serialized_end=27665,
+  serialized_start=30006,
+  serialized_end=30023,
 )
 
 
@@ -8358,8 +8932,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27667,
-  serialized_end=27716,
+  serialized_start=30025,
+  serialized_end=30074,
 )
 
 
@@ -8405,6 +8979,13 @@
       message_type=None, enum_type=None, containing_type=None,
       is_extension=False, extension_scope=None,
       options=None),
+    _descriptor.FieldDescriptor(
+      name='squeezeMasks', full_name='CoreML.Specification.SliceStaticLayerParams.squeezeMasks', index=5,
+      number=6, type=8, cpp_type=7, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
   ],
   extensions=[
   ],
@@ -8417,8 +8998,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27718,
-  serialized_end=27831,
+  serialized_start=30077,
+  serialized_end=30212,
 )
 
 
@@ -8457,6 +9038,13 @@
       message_type=None, enum_type=None, containing_type=None,
       is_extension=False, extension_scope=None,
       options=None),
+    _descriptor.FieldDescriptor(
+      name='squeezeMasks', full_name='CoreML.Specification.SliceDynamicLayerParams.squeezeMasks', index=4,
+      number=6, type=8, cpp_type=7, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
   ],
   extensions=[
   ],
@@ -8469,8 +9057,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27833,
-  serialized_end=27929,
+  serialized_start=30214,
+  serialized_end=30332,
 )
 
 
@@ -8500,8 +9088,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27931,
-  serialized_end=27962,
+  serialized_start=30334,
+  serialized_end=30365,
 )
 
 
@@ -8524,8 +9112,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27964,
-  serialized_end=27985,
+  serialized_start=30367,
+  serialized_end=30388,
 )
 
 
@@ -8548,8 +9136,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=27987,
-  serialized_end=28003,
+  serialized_start=30390,
+  serialized_end=30406,
 )
 
 
@@ -8580,8 +9168,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=28006,
-  serialized_end=28159,
+  serialized_start=30409,
+  serialized_end=30562,
 )
 
 
@@ -8625,8 +9213,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=28161,
-  serialized_end=28246,
+  serialized_start=30564,
+  serialized_end=30649,
 )
 
 
@@ -8663,8 +9251,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=28248,
-  serialized_end=28316,
+  serialized_start=30651,
+  serialized_end=30719,
 )
 
 
@@ -8708,8 +9296,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=28318,
-  serialized_end=28393,
+  serialized_start=30721,
+  serialized_end=30796,
 )
 
 
@@ -8760,8 +9348,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=28396,
-  serialized_end=28566,
+  serialized_start=30799,
+  serialized_end=30969,
 )
 
 
@@ -8812,8 +9400,122 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=28568,
-  serialized_end=28695,
+  serialized_start=30971,
+  serialized_end=31098,
+)
+
+
+_CLAMPEDRELULAYERPARAMS = _descriptor.Descriptor(
+  name='ClampedReLULayerParams',
+  full_name='CoreML.Specification.ClampedReLULayerParams',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='alpha', full_name='CoreML.Specification.ClampedReLULayerParams.alpha', index=0,
+      number=1, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='beta', full_name='CoreML.Specification.ClampedReLULayerParams.beta', index=1,
+      number=2, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=31100,
+  serialized_end=31153,
+)
+
+
+_ARGSORTLAYERPARAMS = _descriptor.Descriptor(
+  name='ArgSortLayerParams',
+  full_name='CoreML.Specification.ArgSortLayerParams',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='axis', full_name='CoreML.Specification.ArgSortLayerParams.axis', index=0,
+      number=1, type=3, cpp_type=2, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='descending', full_name='CoreML.Specification.ArgSortLayerParams.descending', index=1,
+      number=2, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=31155,
+  serialized_end=31209,
+)
+
+
+_SLICEBYSIZELAYERPARAMS = _descriptor.Descriptor(
+  name='SliceBySizeLayerParams',
+  full_name='CoreML.Specification.SliceBySizeLayerParams',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='size', full_name='CoreML.Specification.SliceBySizeLayerParams.size', index=0,
+      number=2, type=3, cpp_type=2, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='axis', full_name='CoreML.Specification.SliceBySizeLayerParams.axis', index=1,
+      number=3, type=3, cpp_type=2, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=31211,
+  serialized_end=31263,
 )
 
 
@@ -8895,8 +9597,105 @@
       name='ClassLabels', full_name='CoreML.Specification.NeuralNetworkClassifier.ClassLabels',
       index=0, containing_type=None, fields=[]),
   ],
-  serialized_start=28698,
-  serialized_end=29279,
+  serialized_start=31266,
+  serialized_end=31847,
+)
+
+
+_ONEHOTLAYERPARAMS = _descriptor.Descriptor(
+  name='OneHotLayerParams',
+  full_name='CoreML.Specification.OneHotLayerParams',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='oneHotVectorSize', full_name='CoreML.Specification.OneHotLayerParams.oneHotVectorSize', index=0,
+      number=1, type=4, cpp_type=4, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='axis', full_name='CoreML.Specification.OneHotLayerParams.axis', index=1,
+      number=2, type=3, cpp_type=2, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='onValue', full_name='CoreML.Specification.OneHotLayerParams.onValue', index=2,
+      number=3, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='offValue', full_name='CoreML.Specification.OneHotLayerParams.offValue', index=3,
+      number=4, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=31849,
+  serialized_end=31943,
+)
+
+
+_CUMSUMLAYERPARAMS = _descriptor.Descriptor(
+  name='CumSumLayerParams',
+  full_name='CoreML.Specification.CumSumLayerParams',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='axis', full_name='CoreML.Specification.CumSumLayerParams.axis', index=0,
+      number=1, type=3, cpp_type=2, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='excludeFinalSum', full_name='CoreML.Specification.CumSumLayerParams.excludeFinalSum', index=1,
+      number=2, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='reverse', full_name='CoreML.Specification.CumSumLayerParams.reverse', index=2,
+      number=3, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=31945,
+  serialized_end=32020,
 )
 
 
@@ -8954,8 +9753,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=29282,
-  serialized_end=29683,
+  serialized_start=32023,
+  serialized_end=32424,
 )
 
 
@@ -9013,8 +9812,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=29686,
-  serialized_end=29976,
+  serialized_start=32427,
+  serialized_end=32717,
 )
 
 
@@ -9061,8 +9860,8 @@
       name='LossLayerType', full_name='CoreML.Specification.LossLayer.LossLayerType',
       index=0, containing_type=None, fields=[]),
   ],
-  serialized_start=29979,
-  serialized_end=30207,
+  serialized_start=32720,
+  serialized_end=32948,
 )
 
 
@@ -9099,8 +9898,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=30209,
-  serialized_end=30274,
+  serialized_start=32950,
+  serialized_end=33015,
 )
 
 
@@ -9137,8 +9936,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=30276,
-  serialized_end=30334,
+  serialized_start=33017,
+  serialized_end=33075,
 )
 
 
@@ -9178,8 +9977,8 @@
       name='OptimizerType', full_name='CoreML.Specification.Optimizer.OptimizerType',
       index=0, containing_type=None, fields=[]),
   ],
-  serialized_start=30337,
-  serialized_end=30487,
+  serialized_start=33078,
+  serialized_end=33228,
 )
 
 
@@ -9223,8 +10022,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=30490,
-  serialized_end=30683,
+  serialized_start=33231,
+  serialized_end=33424,
 )
 
 
@@ -9282,8 +10081,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=30686,
-  serialized_end=30983,
+  serialized_start=33427,
+  serialized_end=33724,
 )
 
 _NEURALNETWORK.fields_by_name['layers'].message_type = _NEURALNETWORKLAYER
@@ -9506,6 +10305,14 @@
 _NEURALNETWORKLAYER.fields_by_name['whereBroadcastable'].message_type = _WHEREBROADCASTABLELAYERPARAMS
 _NEURALNETWORKLAYER.fields_by_name['layerNormalization'].message_type = _LAYERNORMALIZATIONLAYERPARAMS
 _NEURALNETWORKLAYER.fields_by_name['NonMaximumSuppression'].message_type = _NONMAXIMUMSUPPRESSIONLAYERPARAMS
+_NEURALNETWORKLAYER.fields_by_name['oneHot'].message_type = _ONEHOTLAYERPARAMS
+_NEURALNETWORKLAYER.fields_by_name['cumSum'].message_type = _CUMSUMLAYERPARAMS
+_NEURALNETWORKLAYER.fields_by_name['clampedReLU'].message_type = _CLAMPEDRELULAYERPARAMS
+_NEURALNETWORKLAYER.fields_by_name['argSort'].message_type = _ARGSORTLAYERPARAMS
+_NEURALNETWORKLAYER.fields_by_name['pooling3d'].message_type = _POOLING3DLAYERPARAMS
+_NEURALNETWORKLAYER.fields_by_name['globalPooling3d'].message_type = _GLOBALPOOLING3DLAYERPARAMS
+_NEURALNETWORKLAYER.fields_by_name['sliceBySize'].message_type = _SLICEBYSIZELAYERPARAMS
+_NEURALNETWORKLAYER.fields_by_name['convolution3d'].message_type = _CONVOLUTION3DLAYERPARAMS
 _NEURALNETWORKLAYER.oneofs_by_name['layer'].fields.append(
   _NEURALNETWORKLAYER.fields_by_name['convolution'])
 _NEURALNETWORKLAYER.fields_by_name['convolution'].containing_oneof = _NEURALNETWORKLAYER.oneofs_by_name['layer']
@@ -9956,6 +10763,30 @@
 _NEURALNETWORKLAYER.oneofs_by_name['layer'].fields.append(
   _NEURALNETWORKLAYER.fields_by_name['NonMaximumSuppression'])
 _NEURALNETWORKLAYER.fields_by_name['NonMaximumSuppression'].containing_oneof = _NEURALNETWORKLAYER.oneofs_by_name['layer']
+_NEURALNETWORKLAYER.oneofs_by_name['layer'].fields.append(
+  _NEURALNETWORKLAYER.fields_by_name['oneHot'])
+_NEURALNETWORKLAYER.fields_by_name['oneHot'].containing_oneof = _NEURALNETWORKLAYER.oneofs_by_name['layer']
+_NEURALNETWORKLAYER.oneofs_by_name['layer'].fields.append(
+  _NEURALNETWORKLAYER.fields_by_name['cumSum'])
+_NEURALNETWORKLAYER.fields_by_name['cumSum'].containing_oneof = _NEURALNETWORKLAYER.oneofs_by_name['layer']
+_NEURALNETWORKLAYER.oneofs_by_name['layer'].fields.append(
+  _NEURALNETWORKLAYER.fields_by_name['clampedReLU'])
+_NEURALNETWORKLAYER.fields_by_name['clampedReLU'].containing_oneof = _NEURALNETWORKLAYER.oneofs_by_name['layer']
+_NEURALNETWORKLAYER.oneofs_by_name['layer'].fields.append(
+  _NEURALNETWORKLAYER.fields_by_name['argSort'])
+_NEURALNETWORKLAYER.fields_by_name['argSort'].containing_oneof = _NEURALNETWORKLAYER.oneofs_by_name['layer']
+_NEURALNETWORKLAYER.oneofs_by_name['layer'].fields.append(
+  _NEURALNETWORKLAYER.fields_by_name['pooling3d'])
+_NEURALNETWORKLAYER.fields_by_name['pooling3d'].containing_oneof = _NEURALNETWORKLAYER.oneofs_by_name['layer']
+_NEURALNETWORKLAYER.oneofs_by_name['layer'].fields.append(
+  _NEURALNETWORKLAYER.fields_by_name['globalPooling3d'])
+_NEURALNETWORKLAYER.fields_by_name['globalPooling3d'].containing_oneof = _NEURALNETWORKLAYER.oneofs_by_name['layer']
+_NEURALNETWORKLAYER.oneofs_by_name['layer'].fields.append(
+  _NEURALNETWORKLAYER.fields_by_name['sliceBySize'])
+_NEURALNETWORKLAYER.fields_by_name['sliceBySize'].containing_oneof = _NEURALNETWORKLAYER.oneofs_by_name['layer']
+_NEURALNETWORKLAYER.oneofs_by_name['layer'].fields.append(
+  _NEURALNETWORKLAYER.fields_by_name['convolution3d'])
+_NEURALNETWORKLAYER.fields_by_name['convolution3d'].containing_oneof = _NEURALNETWORKLAYER.oneofs_by_name['layer']
 _BRANCHLAYERPARAMS.fields_by_name['ifBranch'].message_type = _NEURALNETWORK
 _BRANCHLAYERPARAMS.fields_by_name['elseBranch'].message_type = _NEURALNETWORK
 _LOOPLAYERPARAMS.fields_by_name['conditionNetwork'].message_type = _NEURALNETWORK
@@ -9988,6 +10819,10 @@
 _CONVOLUTIONLAYERPARAMS.oneofs_by_name['ConvolutionPaddingType'].fields.append(
   _CONVOLUTIONLAYERPARAMS.fields_by_name['same'])
 _CONVOLUTIONLAYERPARAMS.fields_by_name['same'].containing_oneof = _CONVOLUTIONLAYERPARAMS.oneofs_by_name['ConvolutionPaddingType']
+_CONVOLUTION3DLAYERPARAMS.fields_by_name['weights'].message_type = _WEIGHTPARAMS
+_CONVOLUTION3DLAYERPARAMS.fields_by_name['bias'].message_type = _WEIGHTPARAMS
+_CONVOLUTION3DLAYERPARAMS.fields_by_name['paddingType'].enum_type = _CONVOLUTION3DLAYERPARAMS_PADDINGTYPE
+_CONVOLUTION3DLAYERPARAMS_PADDINGTYPE.containing_type = _CONVOLUTION3DLAYERPARAMS
 _INNERPRODUCTLAYERPARAMS.fields_by_name['weights'].message_type = _WEIGHTPARAMS
 _INNERPRODUCTLAYERPARAMS.fields_by_name['bias'].message_type = _WEIGHTPARAMS
 _EMBEDDINGLAYERPARAMS.fields_by_name['weights'].message_type = _WEIGHTPARAMS
@@ -10013,6 +10848,12 @@
 _POOLINGLAYERPARAMS.oneofs_by_name['PoolingPaddingType'].fields.append(
   _POOLINGLAYERPARAMS.fields_by_name['includeLastPixel'])
 _POOLINGLAYERPARAMS.fields_by_name['includeLastPixel'].containing_oneof = _POOLINGLAYERPARAMS.oneofs_by_name['PoolingPaddingType']
+_POOLING3DLAYERPARAMS.fields_by_name['type'].enum_type = _POOLING3DLAYERPARAMS_POOLINGTYPE3D
+_POOLING3DLAYERPARAMS.fields_by_name['paddingType'].enum_type = _POOLING3DLAYERPARAMS_POOLING3DPADDINGTYPE
+_POOLING3DLAYERPARAMS_POOLINGTYPE3D.containing_type = _POOLING3DLAYERPARAMS
+_POOLING3DLAYERPARAMS_POOLING3DPADDINGTYPE.containing_type = _POOLING3DLAYERPARAMS
+_GLOBALPOOLING3DLAYERPARAMS.fields_by_name['type'].enum_type = _GLOBALPOOLING3DLAYERPARAMS_GLOBALPOOLINGTYPE3D
+_GLOBALPOOLING3DLAYERPARAMS_GLOBALPOOLINGTYPE3D.containing_type = _GLOBALPOOLING3DLAYERPARAMS
 _PADDINGLAYERPARAMS_PADDINGCONSTANT.containing_type = _PADDINGLAYERPARAMS
 _PADDINGLAYERPARAMS_PADDINGREFLECTION.containing_type = _PADDINGLAYERPARAMS
 _PADDINGLAYERPARAMS_PADDINGREPLICATION.containing_type = _PADDINGLAYERPARAMS
@@ -10032,7 +10873,9 @@
 _UNARYFUNCTIONLAYERPARAMS.fields_by_name['type'].enum_type = _UNARYFUNCTIONLAYERPARAMS_OPERATION
 _UNARYFUNCTIONLAYERPARAMS_OPERATION.containing_type = _UNARYFUNCTIONLAYERPARAMS
 _UPSAMPLELAYERPARAMS.fields_by_name['mode'].enum_type = _UPSAMPLELAYERPARAMS_INTERPOLATIONMODE
+_UPSAMPLELAYERPARAMS.fields_by_name['linearUpsampleMode'].enum_type = _UPSAMPLELAYERPARAMS_LINEARUPSAMPLEMODE
 _UPSAMPLELAYERPARAMS_INTERPOLATIONMODE.containing_type = _UPSAMPLELAYERPARAMS
+_UPSAMPLELAYERPARAMS_LINEARUPSAMPLEMODE.containing_type = _UPSAMPLELAYERPARAMS
 _RESIZEBILINEARLAYERPARAMS.fields_by_name['mode'].message_type = _SAMPLINGMODE
 _CROPRESIZELAYERPARAMS.fields_by_name['mode'].message_type = _SAMPLINGMODE
 _CROPRESIZELAYERPARAMS.fields_by_name['boxIndicesMode'].message_type = _BOXCOORDINATESMODE
@@ -10211,11 +11054,14 @@
 DESCRIPTOR.message_types_by_name['LinearQuantizationParams'] = _LINEARQUANTIZATIONPARAMS
 DESCRIPTOR.message_types_by_name['LookUpTableQuantizationParams'] = _LOOKUPTABLEQUANTIZATIONPARAMS
 DESCRIPTOR.message_types_by_name['ConvolutionLayerParams'] = _CONVOLUTIONLAYERPARAMS
+DESCRIPTOR.message_types_by_name['Convolution3DLayerParams'] = _CONVOLUTION3DLAYERPARAMS
 DESCRIPTOR.message_types_by_name['InnerProductLayerParams'] = _INNERPRODUCTLAYERPARAMS
 DESCRIPTOR.message_types_by_name['EmbeddingLayerParams'] = _EMBEDDINGLAYERPARAMS
 DESCRIPTOR.message_types_by_name['EmbeddingNDLayerParams'] = _EMBEDDINGNDLAYERPARAMS
 DESCRIPTOR.message_types_by_name['BatchnormLayerParams'] = _BATCHNORMLAYERPARAMS
 DESCRIPTOR.message_types_by_name['PoolingLayerParams'] = _POOLINGLAYERPARAMS
+DESCRIPTOR.message_types_by_name['Pooling3DLayerParams'] = _POOLING3DLAYERPARAMS
+DESCRIPTOR.message_types_by_name['GlobalPooling3DLayerParams'] = _GLOBALPOOLING3DLAYERPARAMS
 DESCRIPTOR.message_types_by_name['PaddingLayerParams'] = _PADDINGLAYERPARAMS
 DESCRIPTOR.message_types_by_name['ConcatLayerParams'] = _CONCATLAYERPARAMS
 DESCRIPTOR.message_types_by_name['LRNLayerParams'] = _LRNLAYERPARAMS
@@ -10346,7 +11192,12 @@
 DESCRIPTOR.message_types_by_name['SlidingWindowsLayerParams'] = _SLIDINGWINDOWSLAYERPARAMS
 DESCRIPTOR.message_types_by_name['LayerNormalizationLayerParams'] = _LAYERNORMALIZATIONLAYERPARAMS
 DESCRIPTOR.message_types_by_name['NonMaximumSuppressionLayerParams'] = _NONMAXIMUMSUPPRESSIONLAYERPARAMS
+DESCRIPTOR.message_types_by_name['ClampedReLULayerParams'] = _CLAMPEDRELULAYERPARAMS
+DESCRIPTOR.message_types_by_name['ArgSortLayerParams'] = _ARGSORTLAYERPARAMS
+DESCRIPTOR.message_types_by_name['SliceBySizeLayerParams'] = _SLICEBYSIZELAYERPARAMS
 DESCRIPTOR.message_types_by_name['NeuralNetworkClassifier'] = _NEURALNETWORKCLASSIFIER
+DESCRIPTOR.message_types_by_name['OneHotLayerParams'] = _ONEHOTLAYERPARAMS
+DESCRIPTOR.message_types_by_name['CumSumLayerParams'] = _CUMSUMLAYERPARAMS
 DESCRIPTOR.message_types_by_name['NeuralNetworkRegressor'] = _NEURALNETWORKREGRESSOR
 DESCRIPTOR.message_types_by_name['NetworkUpdateParameters'] = _NETWORKUPDATEPARAMETERS
 DESCRIPTOR.message_types_by_name['LossLayer'] = _LOSSLAYER
@@ -10683,6 +11534,13 @@
   ))
 _sym_db.RegisterMessage(ConvolutionLayerParams)
 
+Convolution3DLayerParams = _reflection.GeneratedProtocolMessageType('Convolution3DLayerParams', (_message.Message,), dict(
+  DESCRIPTOR = _CONVOLUTION3DLAYERPARAMS,
+  __module__ = 'NeuralNetwork_pb2'
+  # @@protoc_insertion_point(class_scope:CoreML.Specification.Convolution3DLayerParams)
+  ))
+_sym_db.RegisterMessage(Convolution3DLayerParams)
+
 InnerProductLayerParams = _reflection.GeneratedProtocolMessageType('InnerProductLayerParams', (_message.Message,), dict(
   DESCRIPTOR = _INNERPRODUCTLAYERPARAMS,
   __module__ = 'NeuralNetwork_pb2'
@@ -10726,6 +11584,20 @@
 _sym_db.RegisterMessage(PoolingLayerParams)
 _sym_db.RegisterMessage(PoolingLayerParams.ValidCompletePadding)
 
+Pooling3DLayerParams = _reflection.GeneratedProtocolMessageType('Pooling3DLayerParams', (_message.Message,), dict(
+  DESCRIPTOR = _POOLING3DLAYERPARAMS,
+  __module__ = 'NeuralNetwork_pb2'
+  # @@protoc_insertion_point(class_scope:CoreML.Specification.Pooling3DLayerParams)
+  ))
+_sym_db.RegisterMessage(Pooling3DLayerParams)
+
+GlobalPooling3DLayerParams = _reflection.GeneratedProtocolMessageType('GlobalPooling3DLayerParams', (_message.Message,), dict(
+  DESCRIPTOR = _GLOBALPOOLING3DLAYERPARAMS,
+  __module__ = 'NeuralNetwork_pb2'
+  # @@protoc_insertion_point(class_scope:CoreML.Specification.GlobalPooling3DLayerParams)
+  ))
+_sym_db.RegisterMessage(GlobalPooling3DLayerParams)
+
 PaddingLayerParams = _reflection.GeneratedProtocolMessageType('PaddingLayerParams', (_message.Message,), dict(
 
   PaddingConstant = _reflection.GeneratedProtocolMessageType('PaddingConstant', (_message.Message,), dict(
@@ -11676,6 +12548,27 @@
   ))
 _sym_db.RegisterMessage(NonMaximumSuppressionLayerParams)
 
+ClampedReLULayerParams = _reflection.GeneratedProtocolMessageType('ClampedReLULayerParams', (_message.Message,), dict(
+  DESCRIPTOR = _CLAMPEDRELULAYERPARAMS,
+  __module__ = 'NeuralNetwork_pb2'
+  # @@protoc_insertion_point(class_scope:CoreML.Specification.ClampedReLULayerParams)
+  ))
+_sym_db.RegisterMessage(ClampedReLULayerParams)
+
+ArgSortLayerParams = _reflection.GeneratedProtocolMessageType('ArgSortLayerParams', (_message.Message,), dict(
+  DESCRIPTOR = _ARGSORTLAYERPARAMS,
+  __module__ = 'NeuralNetwork_pb2'
+  # @@protoc_insertion_point(class_scope:CoreML.Specification.ArgSortLayerParams)
+  ))
+_sym_db.RegisterMessage(ArgSortLayerParams)
+
+SliceBySizeLayerParams = _reflection.GeneratedProtocolMessageType('SliceBySizeLayerParams', (_message.Message,), dict(
+  DESCRIPTOR = _SLICEBYSIZELAYERPARAMS,
+  __module__ = 'NeuralNetwork_pb2'
+  # @@protoc_insertion_point(class_scope:CoreML.Specification.SliceBySizeLayerParams)
+  ))
+_sym_db.RegisterMessage(SliceBySizeLayerParams)
+
 NeuralNetworkClassifier = _reflection.GeneratedProtocolMessageType('NeuralNetworkClassifier', (_message.Message,), dict(
   DESCRIPTOR = _NEURALNETWORKCLASSIFIER,
   __module__ = 'NeuralNetwork_pb2'
@@ -11683,6 +12576,20 @@
   ))
 _sym_db.RegisterMessage(NeuralNetworkClassifier)
 
+OneHotLayerParams = _reflection.GeneratedProtocolMessageType('OneHotLayerParams', (_message.Message,), dict(
+  DESCRIPTOR = _ONEHOTLAYERPARAMS,
+  __module__ = 'NeuralNetwork_pb2'
+  # @@protoc_insertion_point(class_scope:CoreML.Specification.OneHotLayerParams)
+  ))
+_sym_db.RegisterMessage(OneHotLayerParams)
+
+CumSumLayerParams = _reflection.GeneratedProtocolMessageType('CumSumLayerParams', (_message.Message,), dict(
+  DESCRIPTOR = _CUMSUMLAYERPARAMS,
+  __module__ = 'NeuralNetwork_pb2'
+  # @@protoc_insertion_point(class_scope:CoreML.Specification.CumSumLayerParams)
+  ))
+_sym_db.RegisterMessage(CumSumLayerParams)
+
 NeuralNetworkRegressor = _reflection.GeneratedProtocolMessageType('NeuralNetworkRegressor', (_message.Message,), dict(
   DESCRIPTOR = _NEURALNETWORKREGRESSOR,
   __module__ = 'NeuralNetwork_pb2'
diff --git a/coremltools/proto/VisionFeaturePrint_pb2.py b/coremltools/proto/VisionFeaturePrint_pb2.py
index 1dc81283c..ad8724cf2 100644
--- a/coremltools/proto/VisionFeaturePrint_pb2.py
+++ b/coremltools/proto/VisionFeaturePrint_pb2.py
@@ -19,7 +19,7 @@
   name='VisionFeaturePrint.proto',
   package='CoreML.Specification.CoreMLModels',
   syntax='proto3',
-  serialized_pb=_b('\n\x18VisionFeaturePrint.proto\x12!CoreML.Specification.CoreMLModels\"\xa1\x02\n\x12VisionFeaturePrint\x12L\n\x05scene\x18\x14 \x01(\x0b\x32;.CoreML.Specification.CoreMLModels.VisionFeaturePrint.SceneH\x00\x1a\xa2\x01\n\x05Scene\x12Y\n\x07version\x18\x01 \x01(\x0e\x32H.CoreML.Specification.CoreMLModels.VisionFeaturePrint.Scene.SceneVersion\">\n\x0cSceneVersion\x12\x19\n\x15SCENE_VERSION_INVALID\x10\x00\x12\x13\n\x0fSCENE_VERSION_1\x10\x01\x42\x18\n\x16VisionFeaturePrintTypeB\x02H\x03\x62\x06proto3')
+  serialized_pb=_b('\n\x18VisionFeaturePrint.proto\x12!CoreML.Specification.CoreMLModels\"\xac\x04\n\x12VisionFeaturePrint\x12L\n\x05scene\x18\x14 \x01(\x0b\x32;.CoreML.Specification.CoreMLModels.VisionFeaturePrint.SceneH\x00\x12N\n\x06object\x18\x15 \x01(\x0b\x32<.CoreML.Specification.CoreMLModels.VisionFeaturePrint.ObjectH\x00\x1a\xa2\x01\n\x05Scene\x12Y\n\x07version\x18\x01 \x01(\x0e\x32H.CoreML.Specification.CoreMLModels.VisionFeaturePrint.Scene.SceneVersion\">\n\x0cSceneVersion\x12\x19\n\x15SCENE_VERSION_INVALID\x10\x00\x12\x13\n\x0fSCENE_VERSION_1\x10\x01\x1a\xb8\x01\n\x06Object\x12[\n\x07version\x18\x01 \x01(\x0e\x32J.CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.ObjectVersion\x12\x0e\n\x06output\x18\x64 \x03(\t\"A\n\rObjectVersion\x12\x1a\n\x16OBJECT_VERSION_INVALID\x10\x00\x12\x14\n\x10OBJECT_VERSION_1\x10\x01\x42\x18\n\x16VisionFeaturePrintTypeB\x02H\x03\x62\x06proto3')
 )
 
 
@@ -41,11 +41,33 @@
   ],
   containing_type=None,
   options=None,
-  serialized_start=265,
-  serialized_end=327,
+  serialized_start=345,
+  serialized_end=407,
 )
 _sym_db.RegisterEnumDescriptor(_VISIONFEATUREPRINT_SCENE_SCENEVERSION)
 
+_VISIONFEATUREPRINT_OBJECT_OBJECTVERSION = _descriptor.EnumDescriptor(
+  name='ObjectVersion',
+  full_name='CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.ObjectVersion',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='OBJECT_VERSION_INVALID', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='OBJECT_VERSION_1', index=1, number=1,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=529,
+  serialized_end=594,
+)
+_sym_db.RegisterEnumDescriptor(_VISIONFEATUREPRINT_OBJECT_OBJECTVERSION)
+
 
 _VISIONFEATUREPRINT_SCENE = _descriptor.Descriptor(
   name='Scene',
@@ -74,8 +96,46 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=165,
-  serialized_end=327,
+  serialized_start=245,
+  serialized_end=407,
+)
+
+_VISIONFEATUREPRINT_OBJECT = _descriptor.Descriptor(
+  name='Object',
+  full_name='CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='version', full_name='CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.version', index=0,
+      number=1, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='output', full_name='CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output', index=1,
+      number=100, type=9, cpp_type=9, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+    _VISIONFEATUREPRINT_OBJECT_OBJECTVERSION,
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=410,
+  serialized_end=594,
 )
 
 _VISIONFEATUREPRINT = _descriptor.Descriptor(
@@ -92,10 +152,17 @@
       message_type=None, enum_type=None, containing_type=None,
       is_extension=False, extension_scope=None,
       options=None),
+    _descriptor.FieldDescriptor(
+      name='object', full_name='CoreML.Specification.CoreMLModels.VisionFeaturePrint.object', index=1,
+      number=21, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
   ],
   extensions=[
   ],
-  nested_types=[_VISIONFEATUREPRINT_SCENE, ],
+  nested_types=[_VISIONFEATUREPRINT_SCENE, _VISIONFEATUREPRINT_OBJECT, ],
   enum_types=[
   ],
   options=None,
@@ -108,16 +175,23 @@
       index=0, containing_type=None, fields=[]),
   ],
   serialized_start=64,
-  serialized_end=353,
+  serialized_end=620,
 )
 
 _VISIONFEATUREPRINT_SCENE.fields_by_name['version'].enum_type = _VISIONFEATUREPRINT_SCENE_SCENEVERSION
 _VISIONFEATUREPRINT_SCENE.containing_type = _VISIONFEATUREPRINT
 _VISIONFEATUREPRINT_SCENE_SCENEVERSION.containing_type = _VISIONFEATUREPRINT_SCENE
+_VISIONFEATUREPRINT_OBJECT.fields_by_name['version'].enum_type = _VISIONFEATUREPRINT_OBJECT_OBJECTVERSION
+_VISIONFEATUREPRINT_OBJECT.containing_type = _VISIONFEATUREPRINT
+_VISIONFEATUREPRINT_OBJECT_OBJECTVERSION.containing_type = _VISIONFEATUREPRINT_OBJECT
 _VISIONFEATUREPRINT.fields_by_name['scene'].message_type = _VISIONFEATUREPRINT_SCENE
+_VISIONFEATUREPRINT.fields_by_name['object'].message_type = _VISIONFEATUREPRINT_OBJECT
 _VISIONFEATUREPRINT.oneofs_by_name['VisionFeaturePrintType'].fields.append(
   _VISIONFEATUREPRINT.fields_by_name['scene'])
 _VISIONFEATUREPRINT.fields_by_name['scene'].containing_oneof = _VISIONFEATUREPRINT.oneofs_by_name['VisionFeaturePrintType']
+_VISIONFEATUREPRINT.oneofs_by_name['VisionFeaturePrintType'].fields.append(
+  _VISIONFEATUREPRINT.fields_by_name['object'])
+_VISIONFEATUREPRINT.fields_by_name['object'].containing_oneof = _VISIONFEATUREPRINT.oneofs_by_name['VisionFeaturePrintType']
 DESCRIPTOR.message_types_by_name['VisionFeaturePrint'] = _VISIONFEATUREPRINT
 _sym_db.RegisterFileDescriptor(DESCRIPTOR)
 
@@ -129,12 +203,20 @@
     # @@protoc_insertion_point(class_scope:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Scene)
     ))
   ,
+
+  Object = _reflection.GeneratedProtocolMessageType('Object', (_message.Message,), dict(
+    DESCRIPTOR = _VISIONFEATUREPRINT_OBJECT,
+    __module__ = 'VisionFeaturePrint_pb2'
+    # @@protoc_insertion_point(class_scope:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object)
+    ))
+  ,
   DESCRIPTOR = _VISIONFEATUREPRINT,
   __module__ = 'VisionFeaturePrint_pb2'
   # @@protoc_insertion_point(class_scope:CoreML.Specification.CoreMLModels.VisionFeaturePrint)
   ))
 _sym_db.RegisterMessage(VisionFeaturePrint)
 _sym_db.RegisterMessage(VisionFeaturePrint.Scene)
+_sym_db.RegisterMessage(VisionFeaturePrint.Object)
 
 
 DESCRIPTOR.has_options = True
diff --git a/coremltools/test/api/__init__.py b/coremltools/test/api/__init__.py
new file mode 100644
index 000000000..1665bc379
--- /dev/null
+++ b/coremltools/test/api/__init__.py
@@ -0,0 +1,4 @@
+#  Copyright (c) 2017 - 2020, Apple Inc. All rights reserved.
+#
+#  Use of this source code is governed by a BSD-3-clause license that can be
+#  found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
diff --git a/coremltools/test/api/test_api_examples.py b/coremltools/test/api/test_api_examples.py
new file mode 100644
index 000000000..b7dabddcc
--- /dev/null
+++ b/coremltools/test/api/test_api_examples.py
@@ -0,0 +1,397 @@
+from os import getcwd, chdir
+from shutil import rmtree
+from os.path import exists
+from tempfile import mkdtemp
+import pytest
+import numpy as np
+import coremltools as ct
+import os
+
+from coremltools._deps import (
+    _HAS_TF_1,
+    _HAS_TF_2,
+    _HAS_TORCH,
+    MSG_TF1_NOT_FOUND,
+    MSG_TF2_NOT_FOUND,
+    MSG_TORCH_NOT_FOUND,
+)
+
+
+###############################################################################
+# Note: all tests are also used as examples such as in readme.md as a reference
+# Whenever any of the following test fails, we should update API documentations
+# Each test case is expected to be runnable and self-complete, then sync to the
+# documentation pages as API example code snippet.
+###############################################################################
+
+
+@pytest.mark.skipif(not _HAS_TF_1, reason=MSG_TF1_NOT_FOUND)
+class TestTensorFlow1ConverterExamples:
+    @staticmethod
+    def test_convert_from_frozen_graph(tmpdir):
+        import tensorflow as tf
+
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=(1, 2, 3), name="input")
+            y = tf.nn.relu(x, name="output")
+
+        mlmodel = ct.convert(graph)
+
+        test_input = np.random.rand(1, 2, 3) - 0.5
+        with tf.compat.v1.Session(graph=graph) as sess:
+            expected_val = sess.run(y, feed_dict={x: test_input})
+        results = mlmodel.predict({"input": test_input})
+        np.testing.assert_allclose(results["output"], expected_val)
+
+    @staticmethod
+    def test_convert_from_frozen_graph_file(tmpdir):
+        # create the model to convert
+        import tensorflow as tf
+
+        # write a toy frozen graph
+        # Note that we usually needs to run freeze_graph() on tf.Graph()
+        # skipping here as this toy model does not contain any variables
+        with tf.Graph().as_default() as graph:
+            x = tf.placeholder(tf.float32, shape=(1, 2, 3), name="input")
+            y = tf.nn.relu(x, name="output")
+
+        save_path = str(tmpdir)
+        tf.io.write_graph(graph, save_path, "frozen_graph.pb", as_text=False)
+
+        # Create a test sample
+        # -0.5 to have some negative values
+        test_input = np.random.rand(1, 2, 3) - 0.5
+        with tf.compat.v1.Session(graph=graph) as sess:
+            expected_val = sess.run(y, feed_dict={x: test_input})
+
+        # The input `.pb` file is a frozen graph format that usually
+        # generated by TensorFlow's utility function `freeze_graph()`
+        pb_path = os.path.join(save_path, "frozen_graph.pb")
+
+        # 3 ways to specify inputs:
+        # (1) Fully specify inputs
+        mlmodel = ct.convert(
+            pb_path,
+            # We specify inputs with name matching the placeholder name.
+            inputs=[ct.TensorType(name="input", shape=(1, 2, 3))],
+            outputs=["output"],
+        )
+
+        # (2) Specify input TensorType without name (when there's only one
+        # input)
+        mlmodel = ct.convert(
+            pb_path,
+            # TensorType name is optional when there's only one input.
+            inputs=[ct.TensorType(shape=(1, 2, 3))],
+            outputs=["output"],
+        )
+
+        # (3) Not specify inputs at all. `inputs` is optional for TF. When
+        # inputs is not specified, convert() infers inputs from Placeholder
+        # nodes.
+        mlmodel = ct.convert(pb_path, outputs=["output"])
+
+        results = mlmodel.predict({"input": test_input})
+        np.testing.assert_allclose(results["output"], expected_val)
+        mlmodel_path = os.path.join(save_path, "model.mlmodel")
+        # Save the converted model
+        mlmodel.save(mlmodel_path)
+
+        results = mlmodel.predict({"input": test_input})
+        np.testing.assert_allclose(results["output"], expected_val)
+
+    @staticmethod
+    def test_convert_from_saved_model_dir(tmpdir):
+        # Sample input
+        test_input = np.random.rand(1, 3, 5) - 0.5
+
+        # create the model to convert
+        import tensorflow as tf
+
+        with tf.compat.v1.Session() as sess:
+            x = tf.placeholder(shape=(1, 3, 5), dtype=tf.float32)
+            y = tf.nn.relu(x)
+
+            expected_val = sess.run(y, feed_dict={x: test_input})
+
+        # Save model as SavedModel
+        inputs = {"x": x}
+        outputs = {"y": y}
+        save_path = str(tmpdir)
+        tf.compat.v1.saved_model.simple_save(sess, save_path, inputs, outputs)
+
+        # SavedModel directory generated by TensorFlow 1.x
+        # when converting from SavedModel dir, inputs / outputs are optional
+        mlmodel = ct.convert(save_path)
+
+        # Need input output names to call mlmodel
+        # x.name == 'Placeholder:0'. Strip out ':0'
+        input_name = x.name.split(":")[0]
+        results = mlmodel.predict({input_name: test_input})
+        # y.name == 'Relu:0'. output_name == 'Relu'
+        output_name = y.name.split(":")[0]
+        np.testing.assert_allclose(results[output_name], expected_val)
+
+
+@pytest.mark.skipif(not _HAS_TF_2, reason=MSG_TF2_NOT_FOUND)
+class TestTensorFlow2ConverterExamples:
+    def setup_class(self):
+        self._cwd = getcwd()
+        self._temp_dir = mkdtemp()
+        # step into temp directory as working directory
+        # to make the user-facing examples cleaner
+        chdir(self._temp_dir)
+
+        # create toy models for conversion examples
+        import tensorflow as tf
+
+        # write a toy tf.keras HDF5 model
+        tf_keras_model = tf.keras.Sequential(
+            [
+                tf.keras.layers.Flatten(input_shape=(28, 28)),
+                tf.keras.layers.Dense(128, activation=tf.nn.relu),
+                tf.keras.layers.Dense(10, activation=tf.nn.softmax),
+            ]
+        )
+        tf_keras_model.save("./tf_keras_model.h5")
+
+        # write a toy SavedModel directory
+        tf_keras_model.save("./saved_model", save_format="tf")
+
+    def teardown_class(self):
+        chdir(self._cwd)
+        if exists(self._temp_dir):
+            rmtree(self._temp_dir)
+
+    @staticmethod
+    def test_convert_tf_keras_h5_file(tmpdir):
+        import tensorflow as tf
+
+        x = tf.keras.Input(shape=(32,), name="input")
+        y = tf.keras.layers.Dense(16, activation="softmax")(x)
+        keras_model = tf.keras.Model(x, y)
+        save_dir = str(tmpdir)
+        h5_path = os.path.join(save_dir, "tf_keras_model.h5")
+        keras_model.save(h5_path)
+
+        mlmodel = ct.convert(h5_path)
+
+        test_input = np.random.rand(2, 32)
+        expected_val = keras_model(test_input)
+        results = mlmodel.predict({"input": test_input})
+        np.testing.assert_allclose(results["Identity"], expected_val, rtol=1e-4)
+
+    @staticmethod
+    def test_convert_tf_keras_model():
+        import tensorflow as tf
+
+        x = tf.keras.Input(shape=(32,), name="input")
+        y = tf.keras.layers.Dense(16, activation="softmax")(x)
+        keras_model = tf.keras.Model(x, y)
+
+        mlmodel = ct.convert(keras_model)
+
+        test_input = np.random.rand(2, 32)
+        expected_val = keras_model(test_input)
+        results = mlmodel.predict({"input": test_input})
+        np.testing.assert_allclose(results["Identity"], expected_val, rtol=1e-4)
+
+    @staticmethod
+    def test_convert_tf_keras_applications_model():
+        import tensorflow as tf
+
+        tf_keras_model = tf.keras.applications.MobileNet(
+            weights="imagenet", input_shape=(224, 224, 3)
+        )
+
+        # inputs / outputs are optional, we can get from tf.keras model
+        # this can be extremely helpful when we want to extract sub-graphs
+        input_name = tf_keras_model.inputs[0].name.split(":")[0]
+        # note that the `convert()` requires tf.Graph's outputs instead of
+        # tf.keras.Model's outputs, to access that, we can do the following
+        output_name = tf_keras_model.outputs[0].name.split(":")[0]
+        tf_graph_output_name = output_name.split("/")[-1]
+
+        mlmodel = ct.convert(
+            tf_keras_model,
+            inputs=[ct.TensorType(name=input_name, shape=(1, 224, 224, 3))],
+            outputs=[tf_graph_output_name],
+        )
+        mlmodel.save("./mobilenet.mlmodel")
+
+    @staticmethod
+    def test_convert_from_saved_model_dir():
+        # SavedModel directory generated by TensorFlow 2.x
+        mlmodel = ct.convert("./saved_model")
+        mlmodel.save("./model.mlmodel")
+
+
+@pytest.mark.skipif(not _HAS_TORCH, reason=MSG_TORCH_NOT_FOUND)
+class TestPyTorchConverterExamples:
+    @staticmethod
+    def test_convert_torch_vision_mobilenet_v2(tmpdir):
+        import torch
+        import torchvision
+
+        """
+        In this example, we'll instantiate a PyTorch classification model and convert
+        it to Core ML.
+        """
+
+        """
+        Here we instantiate our model. In a real use case this would be your trained
+        model.
+        """
+        model = torchvision.models.mobilenet_v2()
+
+        """
+        The next thing we need to do is generate TorchScript for the model. The easiest
+        way to do this is by tracing it.
+        """
+
+        """
+        It's important that a model be in evaluation mode (not training mode) when it's
+        traced. This makes sure things like dropout are disabled.
+        """
+        model.eval()
+
+        """
+        Tracing takes an example input and traces its flow through the model. Here we
+        are creating an example image input.
+
+        The rank and shape of the tensor will depend on your model use case. If your
+        model expects a fixed size input, use that size here. If it can accept a
+        variety of input sizes, it's generally best to keep the example input small to
+        shorten how long it takes to run a forward pass of your model. In all cases,
+        the rank of the tensor must be fixed.
+        """
+        example_input = torch.rand(1, 3, 256, 256)
+
+        """
+        Now we actually trace the model. This will produce the TorchScript that the
+        CoreML converter needs.
+        """
+        traced_model = torch.jit.trace(model, example_input)
+
+        """
+        Now with a TorchScript representation of the model, we can call the CoreML
+        converter. The converter also needs a description of the input to the model,
+        where we can give it a convenient name.
+        """
+        mlmodel = ct.convert(
+            traced_model,
+            inputs=[ct.TensorType(name="input", shape=example_input.shape)],
+        )
+
+        """
+        Now with a conversion complete, we can save the MLModel and run inference.
+        """
+        save_path = os.path.join(str(tmpdir), "mobilenet_v2.mlmodel")
+        mlmodel.save(save_path)
+        results = mlmodel.predict({"input": example_input.numpy()})
+        expected = model(example_input)
+        np.testing.assert_allclose(
+            results["1651"], expected.detach().numpy(), rtol=1e-2
+        )
+
+    @staticmethod
+    def test_int64_inputs():
+        import torch
+
+        num_tokens = 3
+        embedding_size = 5
+
+        class TestModule(torch.nn.Module):
+            def __init__(self):
+                super(TestModule, self).__init__()
+                self.embedding = torch.nn.Embedding(num_tokens, embedding_size)
+
+            def forward(self, x):
+                return self.embedding(x)
+
+        model = TestModule()
+        model.eval()
+
+        example_input = torch.randint(high=num_tokens, size=(2,), dtype=torch.int64)
+        traced_model = torch.jit.trace(model, example_input)
+        mlmodel = ct.convert(
+            traced_model,
+            inputs=[
+                ct.TensorType(
+                    name="input",
+                    shape=example_input.shape,
+                    dtype=example_input.numpy().dtype,
+                )
+            ],
+        )
+
+        result = mlmodel.predict(
+            {"input": example_input.detach().numpy().astype(np.float32)}
+        )
+
+        # Verify outputs
+        expected = model(example_input)
+        np.testing.assert_allclose(result["5"], expected.detach().numpy())
+
+        # Duplicated inputs are invalid
+        with pytest.raises(ValueError, match=r"Duplicated inputs"):
+            mlmodel = ct.convert(
+                traced_model,
+                inputs=[
+                    ct.TensorType(
+                        name="input",
+                        shape=example_input.shape,
+                        dtype=example_input.numpy().dtype,
+                    ),
+                    ct.TensorType(
+                        name="input",
+                        shape=example_input.shape,
+                        dtype=example_input.numpy().dtype,
+                    ),
+                ],
+            )
+
+        # Outputs must not be specified for PyTorch
+        with pytest.raises(ValueError, match=r"outputs must not be specified"):
+            mlmodel = ct.convert(
+                traced_model,
+                inputs=[
+                    ct.TensorType(
+                        name="input",
+                        shape=example_input.shape,
+                        dtype=example_input.numpy().dtype,
+                    ),
+                ],
+                outputs=["output"],
+            )
+
+
+class TestMILExamples:
+    @staticmethod
+    def test_tutorial():
+        from coremltools.converters.mil import Builder as mb
+
+        @mb.program(
+            input_specs=[mb.TensorSpec(shape=(1, 100, 100, 3)),]
+        )
+        def prog(x):
+            x = mb.relu(x=x, name="relu")
+            x = mb.transpose(x=x, perm=[0, 3, 1, 2], name="transpose")
+            x = mb.reduce_mean(x=x, axes=[2, 3], keep_dims=False, name="reduce")
+            x = mb.log(x=x, name="log")
+            y = mb.add(x=1, y=2)
+            return x
+
+        print("prog:\n", prog)
+
+        # Convert and verify
+        from coremltools.converters.mil.converter import _convert
+        from coremltools import models
+
+        proto = _convert(prog, convert_from="mil")
+
+        model = models.MLModel(proto)
+        prediction = model.predict(
+            {"x": np.random.rand(1, 100, 100, 3).astype(np.float32),}
+        )
+        assert len(prediction) == 1
diff --git a/coremltools/test/api/test_api_visibilities.py b/coremltools/test/api/test_api_visibilities.py
new file mode 100644
index 000000000..66d953c58
--- /dev/null
+++ b/coremltools/test/api/test_api_visibilities.py
@@ -0,0 +1,213 @@
+import coremltools as ct
+
+
+def _get_visible_items(d):
+    return [x for x in dir(d) if not x.startswith("_")]
+
+
+def _check_visible_modules(actual, expected):
+    assert set(actual) == set(expected), "API mis-matched. Expected %s. Got %s" % (
+        expected,
+        actual,
+    )
+
+
+class TestApiVisibilities:
+    """Test public coremltools API visibilities."""
+
+    def test_top_level(self):
+        expected = [
+            "ClassifierConfig",
+            "EnumeratedShapes",
+            "ImageType",
+            "RangeDim",
+            "SPECIFICATION_VERSION",
+            "Shape",
+            "TensorType",
+            "convert",
+            "converters",
+            "models",
+            "proto",
+            "target",
+            "utils",
+            "version",
+        ]
+        _check_visible_modules(_get_visible_items(ct), expected)
+
+    def test_utils(self):
+        expected = [
+            "convert_double_to_float_multiarray_type",
+            "convert_neural_network_spec_weights_to_fp16",
+            "convert_neural_network_weights_to_fp16",
+            "evaluate_classifier",
+            "evaluate_classifier_with_probabilities",
+            "evaluate_regressor",
+            "evaluate_transformer",
+            "load_spec",
+            "rename_feature",
+            "save_spec",
+        ]
+        _check_visible_modules(_get_visible_items(ct.utils), expected)
+
+    def test_models(self):
+        expected = [
+            "MLModel",
+            "datatypes",
+            "model",
+            "neural_network",
+            "pipeline",
+            "tree_ensemble",
+            "utils",
+        ]
+        _check_visible_modules(_get_visible_items(ct.models), expected)
+
+    def test_models_mlmodel(self):
+        expected = [
+            "author",
+            "get_spec",
+            "input_description",
+            "license",
+            "output_description",
+            "predict",
+            "save",
+            "short_description",
+            "user_defined_metadata",
+            "version",
+        ]
+        _check_visible_modules(_get_visible_items(ct.models.MLModel), expected)
+
+    def test_models_neural_network(self):
+        expected = [
+            "AdamParams",
+            "NeuralNetworkBuilder",
+            "SgdParams",
+            "builder",
+            "datatypes",
+            "flexible_shape_utils",
+            "optimization_utils",
+            "printer",
+            "quantization_utils",
+            "set_training_features",
+            "set_transform_interface_params",
+            "spec_inspection_utils",
+            "update_optimizer_utils",
+            "utils",
+        ]
+        _check_visible_modules(_get_visible_items(ct.models.neural_network), expected)
+
+    def test_models_neural_network_utils(self):
+        expected = ["NeuralNetworkBuilder", "make_image_input", "make_nn_classifier"]
+        _check_visible_modules(
+            _get_visible_items(ct.models.neural_network.utils), expected
+        )
+
+    def test_models_tree_ensemble(self):
+        expected = [
+            "TreeEnsembleBase",
+            "TreeEnsembleClassifier",
+            "TreeEnsembleRegressor",
+            "set_classifier_interface_params",
+            "set_regressor_interface_params",
+        ]
+        _check_visible_modules(_get_visible_items(ct.models.tree_ensemble), expected)
+
+    def test_models_pipeline(self):
+        expected = [
+            "Pipeline",
+            "PipelineClassifier",
+            "PipelineRegressor",
+            "set_classifier_interface_params",
+            "set_regressor_interface_params",
+            "set_training_features",
+            "set_transform_interface_params",
+        ]
+        _check_visible_modules(_get_visible_items(ct.models.pipeline), expected)
+
+    def test_converters(self):
+        expected = [
+            "ClassifierConfig",
+            "EnumeratedShapes",
+            "ImageType",
+            "RangeDim",
+            "Shape",
+            "TensorType",
+            "caffe",
+            "convert",
+            "keras",
+            "libsvm",
+            "mil",
+            "onnx",
+            "sklearn",
+            "xgboost",
+        ]
+        _check_visible_modules(_get_visible_items(ct.converters), expected)
+
+    def test_converters_caffe(self):
+        _check_visible_modules(_get_visible_items(ct.converters.caffe), ["convert"])
+
+    def test_converters_keras(self):
+        _check_visible_modules(_get_visible_items(ct.converters.keras), ["convert"])
+
+    def test_converters_libsvm(self):
+        _check_visible_modules(_get_visible_items(ct.converters.libsvm), ["convert"])
+
+    def test_converters_onnx(self):
+        _check_visible_modules(_get_visible_items(ct.converters.onnx), ["convert"])
+
+    def test_converters_sklearn(self):
+        _check_visible_modules(_get_visible_items(ct.converters.sklearn), ["convert"])
+
+    def test_converters_xgboost(self):
+        _check_visible_modules(_get_visible_items(ct.converters.xgboost), ["convert"])
+
+    def test_converters_mil(self):
+        pass  # TODO: [Create API visibility tests for MIL](rdar://64413959)
+
+    def test_models_neural_network_quantization_utils(self):
+        expected = [
+            "AdvancedQuantizedLayerSelector",
+            "MatrixMultiplyLayerSelector",
+            "ModelMetrics",
+            "NoiseMetrics",
+            "OutputMetric",
+            "QuantizedLayerSelector",
+            "TopKMetrics",
+            "activate_int8_int8_matrix_multiplications",
+            "compare_models",
+            "quantize_weights",
+        ]
+        _check_visible_modules(
+            _get_visible_items(ct.models.neural_network.quantization_utils), expected
+        )
+
+    def test_models_neural_network_flexible_shape_utils(self):
+        expected = [
+            "NeuralNetworkImageSize",
+            "NeuralNetworkImageSizeRange",
+            "NeuralNetworkMultiArrayShape",
+            "NeuralNetworkMultiArrayShapeRange",
+            "Shape",
+            "ShapeRange",
+            "Size",
+            "add_enumerated_image_sizes",
+            "add_enumerated_multiarray_shapes",
+            "add_multiarray_ndshape_enumeration",
+            "set_multiarray_ndshape_range",
+            "update_image_size_range",
+            "update_multiarray_shape_range",
+        ]
+        _check_visible_modules(
+            _get_visible_items(ct.models.neural_network.flexible_shape_utils), expected
+        )
+
+    def test_models_neural_network_update_optimizer_utils(self):
+        expected = ["AdamParams", "Batch", "RangeParam", "SgdParams"]
+        _check_visible_modules(
+            _get_visible_items(ct.models.neural_network.update_optimizer_utils),
+            expected,
+        )
+
+    def test_models_neural_network_optimization_utils(self):
+        _check_visible_modules(
+            _get_visible_items(ct.models.neural_network.optimization_utils), [],
+        )
diff --git a/coremltools/converters/nnssa/__init__.py b/coremltools/test/neural_network/__init__.py
similarity index 100%
rename from coremltools/converters/nnssa/__init__.py
rename to coremltools/test/neural_network/__init__.py
diff --git a/coremltools/test/neural_network/test_caffe2_numeric.py b/coremltools/test/neural_network/test_caffe2_numeric.py
deleted file mode 100644
index 1bcabe1cd..000000000
--- a/coremltools/test/neural_network/test_caffe2_numeric.py
+++ /dev/null
@@ -1,191 +0,0 @@
-from __future__ import division
-from __future__ import print_function
-
-import copy
-import itertools
-import unittest
-
-import numpy as np
-
-import coremltools.models.datatypes as datatypes
-from coremltools._deps import HAS_CAFFE2
-from coremltools.models import MLModel
-from coremltools.models import neural_network as neural_network
-from coremltools.models.utils import macos_version, is_macos
-
-if HAS_CAFFE2:
-    from caffe2.python import workspace, model_helper
-
-np.random.seed(10)
-np.set_printoptions(precision=3, suppress=True)
-
-
-class CorrectnessTest(unittest.TestCase):
-    def _compare_shapes(self, ref_preds, coreml_preds):
-        if np.squeeze(ref_preds).shape != np.squeeze(coreml_preds).shape:
-            return False
-        else:
-            return True
-
-    def _compare_predictions(self, ref_preds, coreml_preds, snr_thresh=35):
-        ref_preds = ref_preds.flatten()
-        coreml_preds = coreml_preds.flatten()
-        noise = coreml_preds - ref_preds
-        noise_var = np.sum(noise ** 2) / len(noise) + 1e-7
-        signal_energy = np.sum(ref_preds ** 2) / len(ref_preds)
-        max_signal_energy = np.amax(ref_preds ** 2)
-        SNR = 10 * np.log10(signal_energy / noise_var)
-        if SNR < snr_thresh:
-            return False
-        else:
-            return True
-
-    def _test_model(self, input_dict, ref_output_dict, coreml_model):
-        if is_macos() and macos_version() >= (10, 13):
-            coreml_out_dict = coreml_model.predict(input_dict, useCPUOnly=True)
-            for out_ in list(ref_output_dict.keys()):
-                ref_out = ref_output_dict[out_]
-                coreml_out = coreml_out_dict[out_]
-                if self._compare_shapes(ref_out, coreml_out):
-                    return True, self._compare_predictions(ref_out, coreml_out)
-                else:
-                    return False, False
-        return True, True
-
-
-@unittest.skipIf(not HAS_CAFFE2, 'Missing Caffe2. Skipping tests.')
-class StressTest(CorrectnessTest):
-
-    def test_roi_align(self):
-
-        def get_coreml_model_roi_align(params):
-            eval = True
-            mlmodel = None
-            batch, ch, n_roi = params["b_c_n"]
-            H = params["H"]
-            W = params["W"]
-            s_ratio = params["sampling_ratio"]
-            try:
-                input_features = [('data', datatypes.Array(ch,H,W))]
-                if batch == 1:
-                    input_features.append(('roi', datatypes.Array(4, 1, 1)))
-                else:
-                    input_features.append(('roi', datatypes.Array(5, 1, 1)))
-                output_features = [('output', None)]
-                builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
-
-                builder.add_crop_resize('resize', ['data', 'roi'], 'output_crop_resize',
-                                        target_height=params["Hnew"]*s_ratio, target_width=params["Wnew"]*s_ratio,
-                                        mode='ROI_ALIGN_MODE',
-                                        normalized_roi=False,
-                                        box_indices_mode='CORNERS_WIDTH_FIRST',
-                                        spatial_scale=params["spatial_scale"])
-                builder.add_pooling('pool', height=s_ratio, width=s_ratio,
-                                    stride_height=s_ratio, stride_width=s_ratio,
-                                    layer_type='AVERAGE',
-                                    padding_type='VALID',
-                                    input_name='output_crop_resize', output_name='output')
-                mlmodel = MLModel(builder.spec)
-            except RuntimeError as e:
-                print(e)
-                eval = False
-
-            return mlmodel, eval
-
-        def get_caffe2_predictions_roi_align(X, roi, params):
-            workspace.ResetWorkspace()
-            workspace.FeedBlob("data", X.astype(np.float32))
-            workspace.FeedBlob("roi_in", roi.astype(np.float32))
-            m = model_helper.ModelHelper(name="my net")
-            roi_align = m.net.RoIAlign(["data", "roi_in"], "out",
-                                       spatial_scale=params["spatial_scale"],
-                                       pooled_h=params["Hnew"],
-                                       pooled_w=params["Wnew"],
-                                       sampling_ratio=params["sampling_ratio"])
-            workspace.RunNetOnce(m.param_init_net)
-            workspace.CreateNet(m.net)
-            workspace.RunNetOnce(m.net)
-            out = workspace.FetchBlob("out")
-            return out
-
-
-        '''
-        Define Params
-        '''
-        params_dict = dict(H = [1,4,10,100],  #[1,4,10,100]
-                            W = [1,4,10,100],  #[1,4,10,100]
-                            Hnew = [1,2,6,7],  #[1,2,3,6,7]
-                            Wnew = [1,2,6,7],  #[1,2,3,6,7]
-                            b_c_n=[(1, 1, 1), (1, 2, 3), (3, 2, 1), (3, 4, 3)],  # [(1,1,1),(1,2,3),(3,2,1),(3,4,3)]
-                            sampling_ratio = [1,2,3],  #[1,2,3]
-                            spatial_scale = [1.0,0.5],  #[1.0, 0.5]
-                           )
-        params = [x for x in list(itertools.product(*params_dict.values()))]
-        valid_params = [dict(zip(params_dict.keys(), x)) for x in params]
-        print("Total params to be tested: {}".format(len(valid_params)))
-        '''
-        Test
-        '''
-        failed_tests_compile = []
-        failed_tests_shape = []
-        failed_tests_numerical = []
-        for i in range(len(valid_params)):
-            params = valid_params[i]
-            # print("=========: ", params)
-            # if i % 100 == 0:
-            #     print("======================= Testing {}/{}".format(str(i), str(len(valid_params))))
-            batch, ch, n_roi = params["b_c_n"]
-            X = np.round(255 * np.random.rand(batch, ch, params["H"], params["W"]))
-            H = params["H"]
-            W = params["W"]
-            spatial_scale = params["spatial_scale"]
-
-            if batch == 1:
-                roi = np.zeros((n_roi, 4), dtype=np.float32)
-            else:
-                roi = np.zeros((n_roi, 5), dtype=np.float32)
-                roi[:, 0] = np.random.randint(low=0, high=batch, size=(1, n_roi))
-
-            for ii in range(n_roi):
-                r = np.random.rand(4)
-
-                w_start = r[0] * (W - 1)
-                h_start = r[1] * (H - 1)
-                w_end = r[2] * (W - 1 - w_start) + w_start
-                h_end = r[3] * (H - 1 - h_start) + h_start
-
-                if batch == 1:
-                    roi[ii, :] = [w_start, h_start, w_end, h_end]
-                    roi[ii, :] /= spatial_scale
-                    roi[ii, :] = np.round(10 * roi[ii, :]) / 10
-                    assert roi[ii, 0] <= roi[ii, 2]
-                    assert roi[ii, 1] <= roi[ii, 3]
-                else:
-                    roi[ii, 1:] = [w_start, h_start, w_end, h_end]
-                    roi[ii, 1:] /= spatial_scale
-                    roi[ii, 1:] = np.round(10 * roi[ii, 1:]) / 10
-                    assert roi[ii, 1] <= roi[ii, 3]
-                    assert roi[ii, 2] <= roi[ii, 4]
-
-            caffe2_preds = get_caffe2_predictions_roi_align(copy.deepcopy(X), copy.deepcopy(roi), params)
-            coreml_model, eval = get_coreml_model_roi_align(params)
-            if eval is False:
-                failed_tests_compile.append(params)
-            else:
-                input_dict = {'data': np.expand_dims(X, axis=0)}
-                input_dict['roi'] = np.reshape(roi, (n_roi,1,-1,1,1))
-                ref_output_dict = {'output': np.expand_dims(caffe2_preds, axis=0)}
-                shape_match, numerical_match = self._test_model(input_dict, ref_output_dict, coreml_model)
-                if not shape_match:
-                    failed_tests_shape.append(params)
-                if not numerical_match:
-                    # print(params)
-                    # print(np.squeeze(input_dict['roi']))
-                    # print(np.squeeze(input_dict['data']))
-                    # import sys
-                    # sys.exit(1)
-                    failed_tests_numerical.append(params)
-
-        self.assertEqual(failed_tests_compile, [])
-        self.assertEqual(failed_tests_shape, [])
-        self.assertEqual(failed_tests_numerical, [])
\ No newline at end of file
diff --git a/coremltools/test/neural_network/test_caffe_stress_tests.py b/coremltools/test/neural_network/test_caffe_stress_tests.py
deleted file mode 100644
index 7d02bf600..000000000
--- a/coremltools/test/neural_network/test_caffe_stress_tests.py
+++ /dev/null
@@ -1,407 +0,0 @@
-import json
-import os
-import shutil
-import subprocess
-import tarfile
-import unittest
-from subprocess import Popen
-
-import numpy as np
-
-from coremltools.converters import caffe as caffe_converter
-from coremltools.models.utils import macos_version, is_macos
-
-nets_path = os.getenv('CAFFE_MODELS_PATH', '')
-nets_path = nets_path + '/' if nets_path else ''
-import coremltools
-import pytest
-
-
-def extract_tarfile(input_filename, dest_dir):
-    with tarfile.open(input_filename, "r:gz") as tar:
-        tar.extractall(dest_dir)
-
-
-def traverse_caffe_nets(layer_type):
-    for root, dirs, files in os.walk(
-            '{}nets/{}/caffemodels/'.format(nets_path, layer_type)):
-        return files
-
-
-def traverse_data_files(layer_type):
-    for root, dirs, files in os.walk(
-            '{}nets/{}/data/'.format(nets_path, layer_type)):
-        return set(files)
-
-"""def convert(model, image_input_names=[], is_bgr=False,
-            red_bias=0.0, blue_bias=0.0, green_bias=0.0, gray_bias=0.0,
-            image_scale=1.0, class_labels=None, predicted_feature_name=None):
-            """
-def conversion_to_mlmodel(net_name, proto_name, layer_type, input_layer):
-    filename= '{}nets/{}/mlkitmodels/{}.mlmodel'.format(
-                              nets_path, layer_type, net_name)
-    caffe_model_path = '{}nets/{}/caffemodels/{}.caffemodel'.format(
-                              nets_path, layer_type, net_name),
-    proto_path = '{}nets/{}/prototxt/{}.prototxt'.format(
-                   nets_path, layer_type, proto_name)
-    model_path = caffe_model_path[0]
-    if isinstance(input_layer, str):
-        input_layer = [input_layer]
-    try:
-        model = caffe_converter.convert(
-            (model_path, proto_path),
-        )
-        model.save(filename)
-    except RuntimeError as e:
-        print(e)
-        return False
-    return True
-
-
-def load_mlmodel(net_name, layer_type):
-    load_args = [' /usr/local/bin/coremltest',
-                 'load',
-                 '-modelPath',
-                 '{}nets/{}/mlkitmodels/{}.mlmodel'.format(
-                     nets_path, layer_type, net_name),
-                 ]
-    print('Loading {}'.format(net_name))
-    process = Popen((" ").join(load_args),
-                    stdin=subprocess.PIPE,
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    shell=True)
-    stdout, err = process.communicate()
-
-    if process.returncode == 0:
-        return True
-
-    print(" The error is {}".format(err.decode()))
-    return False
-
-
-def evaluation_data(net_name, layer_type, data_files):
-    if 'input' in net_name:
-        for file in data_files:
-            if 'output' in file:
-                output_data_file = file
-            elif 'input' in file:
-                input_data_file = file
-            else:
-                return False
-
-        with open('{}nets/{}/data/{}'.format(
-                nets_path, layer_type, input_data_file)
-        ) as data_file:
-            input_net_data = json.load(data_file)
-        with open('{}nets/{}/data/{}'.format(
-                nets_path, layer_type, output_data_file)
-        ) as data_file:
-            output_net_data = json.load(data_file)
-        if isinstance(input_net_data, list):
-            input_data = []
-            for data in input_net_data:
-                input_data.append(np.array(data["input_data"]))
-        else:
-            input_data = np.array(input_net_data["input_data"], dtype = 'f')
-        if isinstance(output_net_data, list):
-            output_data = []
-            for data in output_net_data:
-                output_data.append(np.array(data["output_data"]))
-        else:
-            output_data = np.array(output_net_data["output_data"])
-
-        return input_data, output_data
-
-
-@unittest.skipIf(not nets_path, 'Caffe nets path environment variable not '
-                                'found. Skipping all caffe nose tests')
-class CaffeLayers(unittest.TestCase):
-    """
-    Unit test case for caffe layers
-    """
-    @classmethod
-    def setUpClass(self):
-        """
-        Set up the unit test by loading common utilities.
-        """
-
-    def run_case(self, layer_type, input_layer, output_layer, delta=1e-2):
-        self.maxDiff = None
-        extract_tarfile('{}nets/{}.gz'.format(nets_path, layer_type),
-                        '{}nets/'.format(nets_path))
-        nets = traverse_caffe_nets(layer_type)
-        data_files = traverse_data_files(layer_type)
-        failed_tests_load = []
-        failed_tests_conversion = []
-        failed_tests_evaluation = []
-        counter = 0
-        for net_name_proto in nets:
-            
-            counter += 1
-            net_data_files = []
-            proto_name = \
-                net_name_proto.split("_")[0] + \
-                "_" + \
-                net_name_proto.split("_")[1]
-            for file in data_files:
-                if proto_name + '_' in file:
-                    net_data_files.append(file)
-            net_name = net_name_proto.split(".")[0]
-            conversion_result = conversion_to_mlmodel(
-                net_name,
-                proto_name,
-                layer_type,
-                input_layer
-            )
-            if is_macos() and macos_version() >= (10, 13):
-                if conversion_result is False:
-                    failed_tests_conversion.append(net_name)
-                    continue
-                load_result = load_mlmodel(net_name, layer_type)
-                if load_result is False:
-                    failed_tests_load.append(net_name)
-                if 'input' in net_name:
-                    evaluation_result, failed_tests_evaluation = \
-                        self.evaluate_model(
-                            net_name,
-                            layer_type,
-                            input_layer,
-                            output_layer,
-                            net_data_files,
-                            failed_tests_evaluation,
-                            counter,
-                            delta)
-        with open('./failed_tests_{}.json'.format(layer_type), mode='w') \
-                as file:
-            json.dump({'conversion': failed_tests_conversion,
-                       'load': failed_tests_load,
-                       'evaluation': failed_tests_evaluation},
-                      file)
-
-        self.assertEqual(failed_tests_conversion,
-                         [])
-        self.assertEqual(failed_tests_load,
-                         [])
-        self.assertEqual(failed_tests_evaluation,
-                         [])
-        shutil.rmtree('{}nets/{}'.format(nets_path, layer_type))
-
-    def evaluate_model(self,
-                       net_name,
-                       layer_type,
-                       input_layer,
-                       output_layer,
-                       data_files,
-                       failed_tests,
-                       counter,
-                       delta=1e-2):
-        input_data, output_data = evaluation_data(
-            net_name,
-            layer_type,
-            data_files)
-        model_path = '{}nets/{}/mlkitmodels/{}.mlmodel'.format(
-            nets_path,
-            layer_type,
-            net_name
-        )
-        coremlmodel = coremltools.models.MLModel(model_path)
-        mlmodel_input = {}
-        if isinstance(input_layer, list):
-            i = 0
-            for layer in input_layer:
-                if len(input_data[i].shape) == 4:
-                    if input_data[i].shape[0] > 1:
-                        mlmodel_input[layer] = np.expand_dims(input_data[i], axis=0)
-                    else:
-                        mlmodel_input[layer] = input_data[i][0,:,:,:]
-                else:
-                    mlmodel_input[layer] = input_data[i]
-                i += 1
-        else:
-            if len(input_data.shape) == 4:
-                if input_data.shape[0] > 1:
-                    if str(coremlmodel.output_description).split('(')[1][:-1] == 'LayerEmbed':
-                        mlmodel_input[input_layer] = np.expand_dims(input_data, axis=1)
-                    else:
-                        mlmodel_input[input_layer] = np.expand_dims(input_data, axis=0)
-                else :
-                    mlmodel_input[input_layer] = input_data[0,:,:,:]
-            else:
-                mlmodel_input[input_layer] = input_data[0]
-        if isinstance(output_layer, list):
-            output_preds = coremlmodel.predict(mlmodel_input)
-            coreml_preds = []
-            caffe_preds = []
-            i = 0
-            for key in sorted(output_preds):
-                coreml_preds.extend(output_preds[key].flatten().tolist())
-                caffe_preds.extend(output_data[i].flatten().tolist())
-                i += 1
-        else:
-            output_layer_name = str(coremlmodel.output_description).split('(')[1].split(')')[0]
-            coreml_preds = coremlmodel.predict(mlmodel_input)[output_layer_name].flatten()
-            caffe_preds = output_data.flatten()
-        
-        if len(coreml_preds) != len(caffe_preds):
-            failed_tests.append(net_name)
-            return relative_error, failed_tests    
-            
-        for i in range(len(caffe_preds)):
-            max_den = max(1.0, np.abs(caffe_preds[i]), np.abs(coreml_preds[i]))
-            relative_error = np.abs(caffe_preds[i]/max_den - coreml_preds[i]/max_den)
-            if relative_error > delta and np.abs(caffe_preds[i]) < 1e10:
-                failed_tests.append(net_name)
-                break
-                
-        return relative_error, failed_tests
-
-    @pytest.mark.slow
-    def test_caffe_inner_product_layer(self):
-        self.run_case(
-            layer_type='inner_product',
-            input_layer='data',
-            output_layer='LayerInnerProduct'
-        )
-
-    @pytest.mark.slow
-    def test_caffe_inner_product_activation_layer(self):
-        self.run_case(
-            layer_type='inner_product_activation',
-            input_layer='data',
-            output_layer='LayerActivation'
-        )
-
-    @pytest.mark.slow
-    def test_convolutional_layer(self):
-        self.run_case(
-            layer_type='convolutional',
-            input_layer='data',
-            output_layer='LayerConvolution'
-        )
-        
-    @pytest.mark.slow
-    def test_deconvolution_layer(self):
-        self.run_case(
-            layer_type='deconvolution',
-            input_layer='data',
-            output_layer='LayerDeconvolution'
-        )
-
-    def test_reduction_layer(self):
-        self.run_case(
-            layer_type='reduction',
-            input_layer='data',
-            output_layer='LayerReduction'
-            )
-
-    def test_scale_layer(self):
-        self.run_case(
-            layer_type='scale',
-            input_layer=['LayerInput1', 'LayerInput2'],
-            output_layer='LayerScale'
-        )
-
-    def test_slice_layer(self):
-        self.run_case(
-            layer_type='slice',
-            input_layer='data',
-            output_layer=['LayerSlice', 'LayerSlice1', 'LayerSlice2']
-        )
-
-    def test_bias_layer(self):
-        self.run_case(
-            layer_type='bias',
-            input_layer=['LayerInput1', 'LayerInput2'],
-            output_layer='LayerBias'
-        )
-
-    @pytest.mark.slow
-    def test_crop_layer(self):
-        self.run_case(
-            layer_type='crop',
-            input_layer=['LayerInput1', 'LayerInput2'],
-            output_layer='LayerCrop'
-        )
-
-    @unittest.skip("32877551")
-    def test_concat_layer(self):
-        self.run_case(
-            layer_type='concat',
-            input_layer=['LayerInput1', 'LayerInput2'],
-            output_layer='LayerConcat'
-        )
-
-    @pytest.mark.slow
-    def test_pooling_layer(self):
-        self.run_case(
-            layer_type='pooling',
-            input_layer='data',
-            output_layer='LayerPooling'
-        )
-
-    @pytest.mark.slow
-    def test_lrn(self):
-        self.run_case(
-            layer_type='lrn',
-            input_layer='data',
-            output_layer='LayerLRN',
-        )
-
-    @unittest.skip("33056676")
-    def test_mvn(self):
-        self.run_case(
-            layer_type='mvn',
-            input_layer='data',
-            output_layer='LayerMVN',
-        )
-
-    def test_reshape(self):
-        self.run_case(
-            layer_type='reshape',
-            input_layer='data',
-            output_layer='LayerReshape',
-        )
-
-    def test_embed(self):
-        self.run_case(
-            layer_type='embed',
-            input_layer='data',
-            output_layer='LayerEmbed',
-        )
-
-    def test_batchnorm(self):
-        self.run_case(
-            layer_type='batchnorm',
-            input_layer='data',
-            output_layer='LayerBatchNorm',
-        )
-
-    def test_flatten(self):
-        self.run_case(
-            layer_type='flatten',
-            input_layer='data',
-            output_layer='LayerFlatten',
-        )
-
-    def test_eltwise(self):
-        self.run_case(
-            layer_type='eltwise',
-            input_layer=['LayerInput1', 'LayerInput2'],
-            output_layer='LayerEltwise',
-        )
-
-    @unittest.skip("32739970")
-    def test_parameter(self):
-        self.run_case(
-            layer_type='parameter',
-            input_layer='data',
-            output_layer='LayerParameter',
-        )
-
-    def test_split(self):
-        self.run_case(
-            layer_type='split',
-            input_layer='data',
-            output_layer=['LayerOutput_1', 'LayerOutput_2'],
-        )
diff --git a/coremltools/test/neural_network/test_custom_neural_nets.py b/coremltools/test/neural_network/test_custom_neural_nets.py
index 7338eb7a5..16e9764c9 100644
--- a/coremltools/test/neural_network/test_custom_neural_nets.py
+++ b/coremltools/test/neural_network/test_custom_neural_nets.py
@@ -8,55 +8,80 @@
 import coremltools
 import coremltools.models.datatypes as datatypes
 from coremltools.models import neural_network as neural_network
-from coremltools.models.utils import macos_version, is_macos
+from coremltools.models.utils import _macos_version, _is_macos
 
 
 class SimpleTest(unittest.TestCase):
-    
     def test_fixed_seq_len(self):
-        '''
-        Input has a fixed sequence length. 
+        """
+        Input has a fixed sequence length.
         (this happens when model is trained using padded sequences, inspiration: https://forums.developer.apple.com/thread/80407)
-        
+
         (Seq,Batch,C,H,W)
         embedding: input shape (15,1,1,1,1) --> output shape (15,1,32,1,1)
         permute  : input shape (15,1,32,1,1) --> output shape (1,1,32,1,15)
         flatten  : input shape (1,1,32,1,15) --> output shape (1,1,32 * 15,1,1)
         dense    : input shape (1,1,480,1,1) --> output shape (1,1,2,1,1)
-        '''
-        
+        """
+
         coreml_preds = []
-        input_dim = (1,1,1)
-        output_dim = (1, 1, 1) #some random dimensions here: we are going to remove this information later
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', datatypes.Array(*output_dim))]
+        input_dim = (1, 1, 1)
+        output_dim = (
+            1,
+            1,
+            1,
+        )  # some random dimensions here: we are going to remove this information later
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", datatypes.Array(*output_dim))]
         builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
-        
-        #ADD Layers
-        builder.add_embedding('embed', W = np.random.rand(10, 32), b = None , input_dim = 10, output_channels = 32, has_bias = 0,
-                              input_name = 'data', output_name = 'embed')
-        builder.add_permute('permute', dim = [3,1,2,0], input_name  = 'embed', output_name = 'permute')
-        builder.add_flatten('flatten', mode = 0, input_name  = 'permute', output_name = 'flatten')                                     
-        builder.add_inner_product('dense', W = np.random.rand(480,2), b = None, input_channels = 480, output_channels = 2, has_bias = 0,
-                                  input_name = 'flatten', output_name = 'output')
-        
-        #Remove output shape by deleting and adding an output
-        del builder.spec.description.output[-1]                            
+
+        # ADD Layers
+        builder.add_embedding(
+            "embed",
+            W=np.random.rand(10, 32),
+            b=None,
+            input_dim=10,
+            output_channels=32,
+            has_bias=0,
+            input_name="data",
+            output_name="embed",
+        )
+        builder.add_permute(
+            "permute", dim=[3, 1, 2, 0], input_name="embed", output_name="permute"
+        )
+        builder.add_flatten(
+            "flatten", mode=0, input_name="permute", output_name="flatten"
+        )
+        builder.add_inner_product(
+            "dense",
+            W=np.random.rand(480, 2),
+            b=None,
+            input_channels=480,
+            output_channels=2,
+            has_bias=0,
+            input_name="flatten",
+            output_name="output",
+        )
+
+        # Remove output shape by deleting and adding an output
+        del builder.spec.description.output[-1]
         output = builder.spec.description.output.add()
-        output.name = 'output' 
-        output.type.multiArrayType.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value('DOUBLE')
-        
-        #save the model                        
+        output.name = "output"
+        output.type.multiArrayType.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value(
+            "DOUBLE"
+        )
+
+        # save the model
         model_dir = tempfile.mkdtemp()
-        model_path = os.path.join(model_dir, 'test_layer.mlmodel')                        
+        model_path = os.path.join(model_dir, "test_layer.mlmodel")
         coremltools.utils.save_spec(builder.spec, model_path)
-        #preprare input and get predictions
+        # preprare input and get predictions
         coreml_model = coremltools.models.MLModel(model_path)
-        X = np.random.randint(low=0,high=10,size=15)
-        X = np.reshape(X, (15,1,1,1,1)).astype(np.float32)
-        coreml_input = {'data': X}
-        if is_macos() and macos_version() >= (10, 13):
-            coreml_preds = coreml_model.predict(coreml_input)['output']
+        X = np.random.randint(low=0, high=10, size=15)
+        X = np.reshape(X, (15, 1, 1, 1, 1)).astype(np.float32)
+        coreml_input = {"data": X}
+        if _is_macos() and _macos_version() >= (10, 13):
+            coreml_preds = coreml_model.predict(coreml_input)["output"]
             self.assertEquals(len(coreml_preds.flatten()), 2)
 
         if os.path.exists(model_dir):
diff --git a/coremltools/test/neural_network/test_graph_passes.py b/coremltools/test/neural_network/test_graph_passes.py
deleted file mode 100644
index e193fd4a3..000000000
--- a/coremltools/test/neural_network/test_graph_passes.py
+++ /dev/null
@@ -1,908 +0,0 @@
-import numpy as np
-import unittest
-import coremltools.models.datatypes as datatypes
-from coremltools.models import neural_network as neural_network
-from coremltools.models import MLModel
-from coremltools.models.neural_network.printer import print_network_spec
-from coremltools.converters.nnssa.coreml.graph_pass.mlmodel_passes import \
-        remove_disconnected_layers, transform_conv_crop, remove_redundant_transposes
-import copy
-import pytest
-from sys import platform
-from coremltools.models.utils import macos_version
-
-DEBUG = False
-np.random.seed(100)
-
-class MLModelPassesTest(unittest.TestCase):
-
-    def test_load_constant_remove(self):
-        input_features = [('data', datatypes.Array(*(3, 4)))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        builder.add_activation('relu1', 'RELU', 'data', 'relu1')
-        builder.add_load_constant_nd('const1', 'c1', constant_value=np.ones((5,)), shape=(5,))
-        builder.add_activation('relu2', 'RELU', 'relu1', 'out')
-        builder.add_load_constant_nd('const2', 'c2', constant_value=np.ones((5,)), shape=(5,))
-        builder.add_load_constant_nd('const3', 'c3', constant_value=np.ones((5,)), shape=(5,))
-        spec = builder.spec
-        np.testing.assert_equal(5, len(spec.neuralNetwork.layers))
-        remove_disconnected_layers(spec)
-        np.testing.assert_equal(2, len(spec.neuralNetwork.layers))
-
-    def test_dead_layer_remove(self):
-        input_features = [('data', datatypes.Array(*(3, 4)))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        builder.add_activation('relu1', 'RELU', 'data', 'relu1')
-        builder.add_load_constant_nd('const1', 'c1', constant_value=np.ones((5,)), shape=(5,))
-        builder.add_load_constant_nd('const2', 'c2', constant_value=np.ones((5,)), shape=(5,))
-        builder.add_split_nd('splitnd1', 'const2', ['s1', 's2', 's3'], axis=0, num_splits=3)
-        builder.add_squeeze('squeeze', 's1', 'squeeze_out')
-        builder.add_activation('relu4', 'RELU', 's2', 'relu4')
-        builder.add_activation('relu5', 'RELU', 'relu4', 'relu5')
-        builder.add_load_constant_nd('const3', 'c3', constant_value=np.ones((5,)), shape=(5,))
-        builder.add_activation('relu2', 'RELU', 'relu1', 'out')
-        spec = builder.spec
-        np.testing.assert_equal(9, len(spec.neuralNetwork.layers))
-        remove_disconnected_layers(spec)
-        np.testing.assert_equal(2, len(spec.neuralNetwork.layers))
-
-    @pytest.mark.xfail
-    def test_dead_layer_remove_branch(self):
-        convergence_tolerance = 1e-8
-
-        input_features = [('input', datatypes.Array(*(2,)))]
-        output_features = [('out', None)]
-
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        # add condition to break from the loop, if convergence criterion is met
-        builder.add_less_than('cond', ['input'], 'cond', alpha=convergence_tolerance)
-        branch_layer = builder.add_branch('branch_layer', 'cond')
-        builder_ifbranch = neural_network.NeuralNetworkBuilder(nn_spec=branch_layer.branch.ifBranch)
-        builder_ifbranch.add_activation('relu1', 'RELU', 'input', 'relu1_out')
-        builder_ifbranch.add_activation('relu2_out', 'RELU', 'relu1_out', 'relu2_out')
-        builder_elsebranch = neural_network.NeuralNetworkBuilder(nn_spec=branch_layer.branch.elseBranch)
-        builder_elsebranch.add_activation('linear1', 'LINEAR', 'input', 'linear1_out')
-        builder_elsebranch.add_activation('linear2', 'LINEAR', 'linear1_out', 'relu2_out')
-        builder.add_squeeze('out', 'input', 'out', squeeze_all=True)
-
-        mlmodel = MLModel(builder.spec)
-        data = np.random.rand(2,)
-        data_dict = {'input': data}
-        before_pass_out = mlmodel.predict(data_dict)['out']
-        if DEBUG:
-            print('\n mlmodel description before remove disconnected layers pass: \n')
-            print_network_spec(builder.spec, style='coding')
-        remove_disconnected_layers(builder.spec)
-        if DEBUG:
-            print('\n mlmodel description after remove disconnected layers pass: \n')
-            print_network_spec(builder.spec, style='coding')
-        mlmodel = MLModel(builder.spec)
-        after_pass_out = mlmodel.predict(data_dict)['out']
-
-        np.testing.assert_almost_equal(before_pass_out, after_pass_out, decimal=2)
-        np.testing.assert_equal(len(builder.spec.neuralNetwork.layers), 1)
-
-    @pytest.mark.xfail
-    def test_dead_layer_partial_branch(self):
-        convergence_tolerance = 1e-8
-
-        input_features = [('input', datatypes.Array(*(2,)))]
-        output_features = [('out', None)]
-
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        # add condition to break from the loop, if convergence criterion is met
-        builder.add_less_than('cond', ['input'], 'cond', alpha=convergence_tolerance)
-        branch_layer = builder.add_branch('branch_layer', 'cond')
-        builder_ifbranch = neural_network.NeuralNetworkBuilder(nn_spec=branch_layer.branch.ifBranch)
-        builder_ifbranch.add_activation('relu1', 'RELU', 'input', 'relu1_out')
-        builder_ifbranch.add_activation('relu2_out', 'RELU', 'relu1_out', 'relu2_out')
-        builder_elsebranch = neural_network.NeuralNetworkBuilder(nn_spec=branch_layer.branch.elseBranch)
-        builder_elsebranch.add_activation('linear1', 'LINEAR', 'input', 'linear1_out')
-        builder_elsebranch.add_activation('linear_red_1', 'LINEAR', 'input', 'linear_red1_out')
-        builder_elsebranch.add_activation('linear_red_2', 'LINEAR', 'linear_red1_out', 'linear_red2_out')
-        builder_elsebranch.add_activation('linear2', 'LINEAR', 'linear1_out', 'relu2_out')
-        builder.add_squeeze('out', 'relu2_out', 'out', squeeze_all=True)
-
-        mlmodel = MLModel(builder.spec)
-        data = np.random.rand(2,)
-        data_dict = {'input': data}
-        before_pass_out = mlmodel.predict(data_dict)['out']
-        if DEBUG:
-            print('\n mlmodel description before remove disconnected layers pass: \n')
-            print_network_spec(builder.spec, style='coding')
-        old_spec = copy.copy(builder.spec)
-        remove_disconnected_layers(builder.spec)
-        if DEBUG:
-            print('\n mlmodel description after remove disconnected layers pass: \n')
-            print_network_spec(builder.spec, style='coding')
-        mlmodel = MLModel(builder.spec)
-        after_pass_out = mlmodel.predict(data_dict)['out']
-
-        np.testing.assert_almost_equal(before_pass_out, after_pass_out, decimal=2)
-        np.testing.assert_equal(len(old_spec.neuralNetwork.layers[1].branch.ifBranch.layers),
-                                len(builder.spec.neuralNetwork.layers[1].branch.ifBranch.layers))
-        np.testing.assert_equal(len(builder.spec.neuralNetwork.layers[1].branch.elseBranch.layers), 2)
-
-    def test_conv_crop_bn_to_conv_bn_crop(self):
-        input_features = [('data', datatypes.Array(1, 10, 10))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
-        W = np.ones((2,10,1,10), dtype=np.float32)
-        builder.add_convolution(name='conv',
-                                kernel_channels=1,
-                                output_channels=2,
-                                height=2, width=2,
-                                stride_height=1, stride_width=1,
-                                border_mode='valid', groups=1,
-                                W=W,
-                                b=None, has_bias=False,
-                                input_name='data', output_name='conv_out')
-        builder.add_crop(name='crop',
-                        left=1, right=1, top=1, bottom=1, offset=0,
-                        input_names=['conv_out'],
-                        output_name='crop_out')
-        builder.add_batchnorm(name='bn',
-                              channels=2,
-                              gamma=np.ones(2,).astype(np.float32),
-                              beta=np.ones(2,).astype(np.float32),
-                              mean=np.ones(2,).astype(np.float32),
-                              variance=np.ones(2,).astype(np.float32),
-                              input_name='crop_out',
-                              output_name='out')
-        # Conv -> Crop -> BN
-        spec = builder.spec.neuralNetwork
-        np.testing.assert_equal('crop', spec.layers[1].WhichOneof('layer'))
-        np.testing.assert_equal('batchnorm', spec.layers[2].WhichOneof('layer'))
-
-        # transform the pattern
-        transform_conv_crop(builder.spec)
-        # Conv -> BN -> Crop
-        np.testing.assert_equal('batchnorm', spec.layers[1].WhichOneof('layer'))
-        np.testing.assert_equal('crop', spec.layers[2].WhichOneof('layer'))
-
-    def test_conv_crop_bn_relu_to_conv_bn_relu_crop(self):
-        input_features = [('data', datatypes.Array(1, 10, 10))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
-        W = np.ones((2,10,1,10), dtype=np.float32)
-        builder.add_convolution(name='conv',
-                                kernel_channels=1,
-                                output_channels=2,
-                                height=2, width=2,
-                                stride_height=1, stride_width=1,
-                                border_mode='valid', groups=1,
-                                W=W,
-                                b=None, has_bias=False,
-                                input_name='data', output_name='conv_out')
-        builder.add_crop(name='crop',
-                        left=1, right=1, top=1, bottom=1, offset=0,
-                        input_names=['conv_out'],
-                        output_name='crop_out')
-        builder.add_batchnorm(name='bn',
-                              channels=2,
-                              gamma=np.ones(2,).astype(np.float32),
-                              beta=np.ones(2,).astype(np.float32),
-                              mean=np.ones(2,).astype(np.float32),
-                              variance=np.ones(2,).astype(np.float32),
-                              input_name='crop_out',
-                              output_name='bn_out')
-        builder.add_activation(name='relu',
-                               non_linearity='RELU',
-                               input_name='bn_out',
-                               output_name='out')
-        # Conv -> Crop -> BN -> ReLU
-        spec = builder.spec.neuralNetwork
-        np.testing.assert_equal('crop', spec.layers[1].WhichOneof('layer'))
-        np.testing.assert_equal('batchnorm', spec.layers[2].WhichOneof('layer'))
-        np.testing.assert_equal('activation', spec.layers[3].WhichOneof('layer'))
-
-        # transform the pattern
-        transform_conv_crop(builder.spec)
-        # Conv -> BN -> ReLU -> Crop
-        np.testing.assert_equal('batchnorm', spec.layers[1].WhichOneof('layer'))
-        np.testing.assert_equal('activation', spec.layers[2].WhichOneof('layer'))
-        np.testing.assert_equal('crop', spec.layers[3].WhichOneof('layer'))
-
-
-@unittest.skipIf(platform != 'darwin' or macos_version() < (10, 15), "Requires MacOS 10.15 or later")
-class Redundant_Transposees_Test(unittest.TestCase):
-
-    def _test_builder(self, builder, input_shape, expected_layer_num=None):
-
-        data = np.random.rand(*input_shape)
-
-        # Mlmodel before
-        mlmodel = MLModel(builder.spec)
-        output_before = mlmodel.predict({'data':data})['out']
-        num_layers_before = len(builder.spec.neuralNetwork.layers)
-
-        remove_redundant_transposes(builder.spec)
-
-        layers = builder.spec.neuralNetwork.layers
-        if expected_layer_num == None:
-            self.assertTrue(len(layers) < num_layers_before)
-        else:
-            self.assertEqual(len(layers), expected_layer_num)
-
-        # Mlmodel after
-        mlmodel = MLModel(builder.spec)
-        output_after = mlmodel.predict({'data':data})['out']
-
-        np.testing.assert_almost_equal(output_before, output_after, decimal=3)
-
-    def test_output_edge_case(self):
-
-        # For now for safety purpose, the node which are output should't be merged
-        input_shape = (1,10,5)
-        input_features = [('data', datatypes.Array(*input_shape))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        builder.add_transpose(name='first_transpose',
-                              axes=[2,0,1],
-                              input_name='data',
-                              output_name='first_transpose_out')
-        builder.add_transpose(name='second_transpose',
-                              axes=[1,2,0],
-                              input_name='first_transpose_out',
-                              output_name='out')
-
-        self._test_builder(builder, input_shape, 2)
-
-    def test_output_edge_case_2(self):
-
-        # For now for safety purpose, the node which are output should't be merged
-        input_shape = (1,10,5)
-        input_features = [('data', datatypes.Array(*input_shape))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        builder.add_transpose(name='ranspose',
-                              axes=[1,2,0],
-                              input_name='data',
-                              output_name='out')
-
-        self._test_builder(builder, input_shape, 1)
-
-    def test_remove_single_identity_transpose(self):
-
-        # A single identity transpose (like 0,1,2) should also be removed
-        input_shape = (1,10,5)
-        input_features = [('data', datatypes.Array(*input_shape))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        builder.add_transpose(name='uselss_transpose',
-                              axes=[0,1,2],
-                              input_name='data',
-                              output_name='useless_transpose_out')
-        builder.add_activation(name='relu',
-                               non_linearity='RELU',
-                               input_name='useless_transpose_out',
-                               output_name='out')
-
-        self._test_builder(builder, input_shape, 1)
-
-    def test_remove_three_transpose(self):
-
-        # Three transpose layer which can be removed
-        input_shape = (1,10,5)
-        input_features = [('data', datatypes.Array(*input_shape))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        transpose = [[2,1,0],[1,0,2],[2,0,1]]
-        input_name = 'data'
-        for i, axes in enumerate(transpose):
-            name = 'transpose_'+str(i)
-            output_name = name + '_out'
-            builder.add_transpose(name=name,
-                                  axes=axes,
-                                  input_name=input_name,
-                                  output_name=output_name)
-            input_name = output_name
-
-        builder.add_activation(name='relu',
-                               non_linearity='RELU',
-                               input_name=input_name,
-                               output_name='out')
-
-        self._test_builder(builder, input_shape, 1)
-
-    def test_remove_thousands_identity_transpose(self):
-
-        '''
-               INPUT
-                 |
-                 v
-                [t1]
-                 |
-                 v
-                [t2]
-                 |
-                 v
-                 .
-                 .
-                 .
-                 |
-                 v
-               [t1000]
-                 |
-                 v
-                RELU
-        tk are all identity
-        Remove a sequence of 1000 identity transpose
-        '''
-        input_shape = (1,10,5)
-        input_features = [('data', datatypes.Array(*input_shape))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-
-        num_layers = 1000
-        input_name = 'data'
-        for i in range(num_layers):
-            output_name = 'layer_'+str(i)+'_output'
-            name = 'layer_'+str(i)
-            builder.add_transpose(name=name,
-                                  axes=[0,1,2],
-                                  input_name=input_name,
-                                  output_name=output_name)
-            input_name=output_name
-
-        builder.add_activation(name='relu',
-                               non_linearity='RELU',
-                               input_name=input_name,
-                               output_name='out')
-
-        self._test_builder(builder, input_shape, 1)
-
-    def test_remove_thousands_identity_transpose_with_activation_between(self):
-        '''
-               INPUT
-                 |
-                 v
-                [t1]
-                 |
-                 v
-                 .
-                 .
-                 .
-                [t500]
-                 |
-                 v
-                RELU_1
-                 |
-                 v
-                 .
-                 .
-                 .
-                 |
-                 v
-               [t1000]
-                 |
-                 v
-                RELU_2
-        tk are all identity
-        Remove a sequence of 1000 identity transpose but with a RELU in the middle,
-        the final output should be
-               INPUT
-                 |
-                 v
-                RELU_1
-                 |
-                 v
-                RELU_2
-
-        '''
-        input_shape = (1,10,5)
-        input_features = [('data', datatypes.Array(*input_shape))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-
-        num_layers = 1000
-        input_name = 'data'
-        for i in range(num_layers):
-            output_name = 'layer_'+str(i)+'_output'
-            name = 'layer_'+str(i)
-            builder.add_transpose(name=name,
-                                  axes=[0,1,2],
-                                  input_name=input_name,
-                                  output_name=output_name)
-            input_name=output_name
-            if i == num_layers/2:
-                builder.add_activation(name='relu_inter',
-                                       non_linearity='ReLU',
-                                       input_name=input_name,
-                                       output_name='relu_out')
-                input_name = 'relu_out'
-        builder.add_activation(name='relu',
-                               non_linearity='RELU',
-                               input_name=input_name,
-                               output_name='out')
-        self._test_builder(builder, input_shape, 2)
-
-    def test_remove_thousands_random_transpose_layers(self):
-        '''
-               INPUT
-                 |
-                 v
-                [t_0]
-                 |
-                 v
-                [t_1]
-                 |
-                 v
-                 .
-                 .
-                 .
-                 |
-                 v
-               [t_999]
-                 |
-                 v
-                RELU
-        tk are randomly generated,
-        under this certain seed, the result should be
-                INPUT
-                 |
-                 v
-                [t_0]
-                 |
-                 v
-                [t_1]
-                 |
-                 v
-                RELU
-        '''
-
-        from itertools import permutations
-        import random
-        random.seed(1000)
-        input_shape = (3,10,5)
-        input_features = [('data', datatypes.Array(*input_shape))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-
-        num_layers = 1000
-        dim = 3
-        input_name = 'data'
-        debug = []
-        for i in range(num_layers):
-            axes = list(permutations(range(dim)))
-            random.shuffle(axes)
-            output_name = 'layer_'+str(i)+'_output'
-            name = 'layer_'+str(i)
-            debug.append(axes[0])
-            builder.add_transpose(name=name,
-                                  axes=axes[0],
-                                  input_name=input_name,
-                                  output_name=output_name)
-            input_name=output_name
-        builder.add_activation(name='relu',
-                               non_linearity='RELU',
-                               input_name=input_name,
-                               output_name='out')
-        self._test_builder(builder, input_shape, None)
-
-    def test_remove_thousands_random_transpose_layers_case_2(self):
-        '''
-        Same test as the previous one, but add more layers and dimension.
-        '''
-        from itertools import permutations
-        import random
-        random.seed(0)
-        input_shape = (3,10,5,2,4)
-        input_features = [('data', datatypes.Array(*input_shape))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-
-        num_layers = 5000
-        dim = 5
-        input_name = 'data'
-        for i in range(num_layers):
-            axes = list(permutations(range(dim)))
-            random.shuffle(axes)
-            output_name = 'layer_'+str(i)+'_output'
-            name = 'layer_'+str(i)
-            builder.add_transpose(name=name,
-                                  axes=axes[0],
-                                  input_name=input_name,
-                                  output_name=output_name)
-            input_name=output_name
-        builder.add_activation(name='relu',
-                               non_linearity='RELU',
-                               input_name=input_name,
-                               output_name='out')
-        self._test_builder(builder, input_shape, None)
-
-    def test_branch_structure(self):
-        '''
-                INPUT
-                 |
-                 v
-                [t_0]
-                 |
-                 v
-                [t_1]
-                 |
-                 v
-                [t_3] --.
-                 |      |
-                 v      v
-                [t_4]  RELU_1
-                 |
-                 v
-                [t_5]
-                 |
-                 v
-                RELU_2
-        t_0, t_1, t_3 can be merged.
-        t_4, t_5 can be merged.
-        The output shuld be
-                INPUT
-                 |
-                 .------.
-                 |      |
-                 v      v
-               RELU_2  RELU_1
-
-        '''
-        input_shape = (1,10,5)
-        input_features = [('data', datatypes.Array(1,10,5))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        transpose = [[2,1,0],[2,1,0],[0,1,2],[2,0,1],[1,2,0]]
-        input_name = 'data'
-        for i, axes in enumerate(transpose):
-            name = 'transpose_'+str(i)
-            output_name = name + '_out'
-            builder.add_transpose(name=name,
-                                  axes=axes,
-                                  input_name=input_name,
-                                  output_name=output_name)
-            input_name = output_name
-
-        builder.add_activation(name='relu',
-                               non_linearity='RELU',
-                               input_name=input_name,
-                               output_name='out')
-        builder.add_activation(name='dumpy',
-                               non_linearity='RELU',
-                               input_name='transpose_2_out',
-                               output_name='dumpy')
-        self._test_builder(builder, input_shape, 2)
-
-    def test_branch_case_2(self):
-        '''
-                INPUT
-                 |
-                 v
-                [t_0] --.
-                 |      |
-                 v      v
-                [t_1]  RELU_1
-                 |
-                 v
-                RELU_2
-        Even though t_0, t_1 can be merged, but there is a branch from t_0,
-        so we shouldn't remove anything here.
-
-        '''
-        input_shape = (1,10,5)
-        input_features = [('data', datatypes.Array(*input_shape))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        transpose = [[2,1,0],[2,1,0]]
-        input_name = 'data'
-        for i, axes in enumerate(transpose):
-            name = 'transpose_'+str(i)
-            output_name = name + '_out'
-            builder.add_transpose(name=name,
-                                  axes=axes,
-                                  input_name=input_name,
-                                  output_name=output_name)
-            input_name = output_name
-
-        builder.add_activation(name='relu',
-                               non_linearity='RELU',
-                               input_name=input_name,
-                               output_name='out')
-        builder.add_activation(name='dumpy',
-                               non_linearity='RELU',
-                               input_name='transpose_0_out',
-                               output_name='dumpy')
-        self._test_builder(builder, input_shape, 4)
-
-    def test_fork_structure_case_3(self):
-        '''
-                INPUT
-                 |
-                 v
-                [t_0]
-                 |
-                 v
-                [t_1]--.
-                 |     |
-                 |     v
-                 |    RELU_1
-                 |
-                 v
-                [t_2]--.
-                 |     |
-                 |     v
-                 |    RELU_2
-                [t_3]
-                 |
-                 v
-                [t_4]--.
-                 |     |
-                 |     v
-                 |    RELU_3
-                 v
-                RELU_4
-
-        Even though t_0, t_1 can be merged, t_2 is identity, t_3, t_4 can be merge,
-        The final output should be
-                   INPUT
-                     |
-        .------------.----------.
-        |        |       |      |
-        v        v       v      v
-      RELU_1   RELU_2  RELU_3  RELU_4
-
-        '''
-        input_shape = (1,10,5)
-        input_features = [('data', datatypes.Array(1,10,5))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        transpose = [[2,1,0],[2,1,0],[0,1,2],[2,1,0],[2,1,0]]
-        input_name = 'data'
-        for i, axes in enumerate(transpose):
-            name = 'transpose_'+str(i)
-            output_name = name + '_out'
-            builder.add_transpose(name=name,
-                                  axes=axes,
-                                  input_name=input_name,
-                                  output_name=output_name)
-            input_name = output_name
-
-        builder.add_activation(name='relu',
-                               non_linearity='RELU',
-                               input_name=input_name,
-                               output_name='out')
-        builder.add_activation(name='dumpy_1',
-                               non_linearity='RELU',
-                               input_name='transpose_1_out',
-                               output_name='dumpy_1')
-        builder.add_activation(name='dumpy_2',
-                               non_linearity='RELU',
-                               input_name='transpose_2_out',
-                               output_name='dumpy_2')
-        builder.add_activation(name='dumpy_4',
-                               non_linearity='RELU',
-                               input_name='transpose_4_out',
-                               output_name='dumpy_4')
-
-        self._test_builder(builder, input_shape, 4)
-
-    def test_fork(self):
-        '''
-                   INPUT
-                     |
-              .------.------.
-              |             |
-              v             v
-             [t_1]         [t_3]
-              |             |
-              v             v
-             [t_2]         [t_4]
-              |             |
-              v             v
-            RELU_1         RELU_2
-
-            t_1,t_2 can be merged and t_3,t_4 can be merged.
-            The result output would be
-
-                   INPUT
-                     |
-              .------.------.
-              |             |
-              v             v
-            RELU_1         RELU_2
-
-        '''
-        input_shape = (1,10,5)
-        input_features = [('data', datatypes.Array(*input_shape))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        transpose = [[2,1,0],[2,1,0]]
-        input_name = 'data'
-        for i, axes in enumerate(transpose):
-            name = 'transpose_'+str(i)
-            output_name = name + '_out'
-            builder.add_transpose(name=name,
-                                  axes=axes,
-                                  input_name=input_name,
-                                  output_name=output_name)
-            input_name = output_name
-
-        builder.add_activation(name='relu',
-                               non_linearity='RELU',
-                               input_name=input_name,
-                               output_name='out')
-
-        input_name = 'data'
-        for i, axes in enumerate(transpose):
-            name = 'transpose_branch_2_'+str(i)
-            output_name = name + '_out'
-            builder.add_transpose(name=name,
-                                  axes=axes,
-                                  input_name=input_name,
-                                  output_name=output_name)
-            input_name = output_name
-
-        builder.add_activation(name='relu_branch_2',
-                               non_linearity='RELU',
-                               input_name=input_name,
-                               output_name='out_branch_2')
-        self._test_builder(builder, input_shape, 2)
-
-    def test_fork_and_add(self):
-        '''
-                   INPUT
-                     |
-              .------.------.
-              |             |
-              v             v
-             [t_1]         [t_3]
-              |             |
-              v             v
-             [t_2]         [t_4]
-              |             |
-              .-----. .-----.
-                    | |
-                    v v
-                    Add
-
-            t_1,t_2 can be merged and t_3,t_4 can be merged.
-            The result output would be
-
-                   INPUT
-                     |
-              .------.------.
-              |             |
-              .-----. .-----.
-                    | |
-                    v v
-                    Add
-
-        '''
-        input_shape = (1,10,5)
-        input_features = [('data', datatypes.Array(*input_shape))]
-        output_features = [('out', None)]
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        transpose = [[2,1,0],[2,1,0]]
-        input_name = 'data'
-        for i, axes in enumerate(transpose):
-            name = 'transpose_'+str(i)
-            output_name = name + '_out'
-            builder.add_transpose(name=name,
-                                  axes=axes,
-                                  input_name=input_name,
-                                  output_name=output_name)
-            input_name = output_name
-
-        input_1 = input_name
-
-        input_name = 'data'
-        for i, axes in enumerate(transpose):
-            name = 'transpose_branch_2_'+str(i)
-            output_name = name + '_out'
-            builder.add_transpose(name=name,
-                                  axes=axes,
-                                  input_name=input_name,
-                                  output_name=output_name)
-            input_name = output_name
-
-        input_2 = input_name
-
-        builder.add_add_broadcastable(name='add',
-                                      input_names=[input_1,input_2],
-                                      output_name='out')
-        self._test_builder(builder, input_shape, 1)
-
-    def test_transpose(self):
-
-        def _build_and_test_network(input_size, transpose_layers, expected_layers):
-            """
-            Helper function for testing transpose removal.
-
-            Args:
-                input_size: Size of the input network tensor.
-                transpose_layers: Array of transpose axes definitions.
-                expected_layers: Array of indices into transpose_layers indicating
-                    which of the transpose layers should be present after the
-                    graph pass.
-            """
-            input_features = [('data', datatypes.Array(*input_size))]
-            output_features = [('out', None)]
-            builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
-            spec = builder.spec.neuralNetwork.layers
-
-
-            last_layer = 'data'
-            for idx, axes in enumerate(transpose_layers):
-                name = 't{}'.format(idx)
-                if idx == len(transpose_layers) - 1:
-                    output_name = 'out'
-                else:
-                    output_name = name + '_out'
-                builder.add_transpose(name=name,
-                                      axes=axes,
-                                      input_name=last_layer,
-                                      output_name=output_name)
-                last_layer = output_name
-
-            spec = builder.spec.neuralNetwork
-            # Check the network before the graph pass.
-            for idx in range(len(transpose_layers)):
-                np.testing.assert_equal('transpose', spec.layers[idx].WhichOneof('layer'))
-            # Run the removal pass.
-            remove_redundant_transposes(builder.spec)
-            # Verify only the expected layers remain.
-            np.testing.assert_equal(len(spec.layers), len(expected_layers))
-            for output_layer_idx, input_layer_idx in enumerate(expected_layers):
-                np.testing.assert_equal(
-                    'transpose',
-                    spec.layers[output_layer_idx].WhichOneof('layer')
-                )
-                np.testing.assert_array_equal(
-                    transpose_layers[input_layer_idx],
-                    spec.layers[output_layer_idx].transpose.axes
-                )
-
-        _build_and_test_network(
-            input_size=[1, 10, 10],
-            # These transposes are not inverses.
-            transpose_layers=[[2, 0, 1], [2, 0, 1]],
-            expected_layers=[0, 1],
-        )
-
-        _build_and_test_network(
-            input_size=[1, 1, 10, 10, 3],
-            # First two are the identity, then an extra.
-            transpose_layers=[[2, 4, 1, 0, 3], [3, 2, 0, 4, 1], [1, 0, 2, 3, 4]],
-            expected_layers=[2],
-        )
-
-        # A slightly more complicated test case where there are two transposes
-        # in topological order, but are actually in parallel in the graph.
-        builder = neural_network.NeuralNetworkBuilder(
-            [('data', datatypes.Array(2, 4, 8))],
-            [('out', None)]
-        )
-        last_layer = 'data'
-        builder.add_transpose(name='t1',
-                              axes=[0, 2, 1],
-                              input_name='data',
-                              output_name='t1')
-        builder.add_transpose(name='t2',
-                              axes=[0, 2, 1],
-                              input_name='data',
-                              output_name='t2')
-        builder.add_stack(name='stack',
-                          input_names=['t1', 't2'],
-                          output_name='out')
-        spec = builder.spec.neuralNetwork
-        # Run the removal pass.
-        remove_redundant_transposes(builder.spec)
-        # Verify nothing was removed.
-        np.testing.assert_equal(len(spec.layers), 3)
-
-
-if __name__ == '__main__':
-    RUN_ALL_TESTS = True
-    if RUN_ALL_TESTS:
-        unittest.main()
-    else:
-        suite = unittest.TestSuite()
-        suite.addTest(MLModelPassesTest('test_load_constant_remove'))
-        unittest.TextTestRunner().run(suite)
diff --git a/coremltools/test/neural_network/test_keras.py b/coremltools/test/neural_network/test_keras.py
index 2cc4a18e4..b53971e14 100644
--- a/coremltools/test/neural_network/test_keras.py
+++ b/coremltools/test/neural_network/test_keras.py
@@ -5,22 +5,24 @@
 
 import unittest
 
-from coremltools._deps import HAS_KERAS_TF
+from coremltools._deps import _HAS_KERAS_TF
 from coremltools.proto import FeatureTypes_pb2
 import pytest
 import six
 
-if HAS_KERAS_TF:
+if _HAS_KERAS_TF:
     import tensorflow as tf
     from keras.models import Sequential, Model
     from coremltools.converters import keras
 
-@unittest.skipIf(not HAS_KERAS_TF, 'Missing keras. Skipping tests.')
+
+@unittest.skipIf(not _HAS_KERAS_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras1
 class KerasSingleLayerTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
     """
+
     @classmethod
     def setUpClass(self):
         """
@@ -37,22 +39,24 @@ def test_dense(self):
         model = Sequential()
         model.add(Dense(32, input_dim=16))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        six.assertCountEqual(self, input_names,
-               [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        six.assertCountEqual(self, output_names,
-               [x.name for x in spec.description.output])
+        six.assertCountEqual(
+            self, output_names, [x.name for x in spec.description.output]
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -66,8 +70,24 @@ def test_activations(self):
         from keras.layers import Dense, Activation
 
         # Create a simple Keras model
-        keras_activation_options = ['tanh', 'softplus', 'softsign', 'relu', 'sigmoid', 'hard_sigmoid', 'linear']
-        coreml_activation_options = ['tanh', 'softplus', 'softsign', 'ReLU', 'sigmoid', 'sigmoidHard', 'linear']
+        keras_activation_options = [
+            "tanh",
+            "softplus",
+            "softsign",
+            "relu",
+            "sigmoid",
+            "hard_sigmoid",
+            "linear",
+        ]
+        coreml_activation_options = [
+            "tanh",
+            "softplus",
+            "softsign",
+            "ReLU",
+            "sigmoid",
+            "sigmoidHard",
+            "linear",
+        ]
 
         for i, k_act in enumerate(keras_activation_options):
             c_act = coreml_activation_options[i]
@@ -75,22 +95,24 @@ def test_activations(self):
             model.add(Dense(32, input_dim=16))
             model.add(Activation(k_act))
 
-            input_names = ['input']
-            output_names = ['output']
+            input_names = ["input"]
+            output_names = ["output"]
             spec = keras.convert(model, input_names, output_names).get_spec()
             self.assertIsNotNone(spec)
 
             # Test the model class
             self.assertIsNotNone(spec.description)
-            self.assertTrue(spec.HasField('neuralNetwork'))
+            self.assertTrue(spec.HasField("neuralNetwork"))
 
             # Test the inputs and outputs
             self.assertEquals(len(spec.description.input), len(input_names))
-            six.assertCountEqual(self, input_names,
-                   [x.name for x in spec.description.input])
+            six.assertCountEqual(
+                self, input_names, [x.name for x in spec.description.input]
+            )
             self.assertEquals(len(spec.description.output), len(output_names))
-            six.assertCountEqual(self, output_names,
-                   [x.name for x in spec.description.output])
+            six.assertCountEqual(
+                self, output_names, [x.name for x in spec.description.output]
+            )
 
             # Test the layer parameters.
             layers = spec.neuralNetwork.layers
@@ -107,24 +129,26 @@ def test_activation_softmax(self):
         # Create a simple Keras model
         model = Sequential()
         model.add(Dense(32, input_dim=16))
-        model.add(Activation('softmax'))
+        model.add(Activation("softmax"))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        six.assertCountEqual(self, input_names,
-               [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        six.assertCountEqual(self, output_names,
-               [x.name for x in spec.description.output])
+        six.assertCountEqual(
+            self, output_names, [x.name for x in spec.description.output]
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -145,22 +169,24 @@ def test_dropout(self):
         model.add(Dropout(0.5))
         model.add(Dense(32, input_dim=16))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        six.assertCountEqual(self, input_names,
-               [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        six.assertCountEqual(self, output_names,
-               [x.name for x in spec.description.output])
+        six.assertCountEqual(
+            self, output_names, [x.name for x in spec.description.output]
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -176,26 +202,38 @@ def test_convolution(self):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(Convolution2D(input_shape=(64, 64, 3),
-                                nb_filter=32, nb_row=5, nb_col=5,
-                                init='glorot_uniform', activation=None, weights=None,
-                                border_mode='valid', subsample=(1, 1), bias=True))
-        input_names = ['input']
-        output_names = ['output']
+        model.add(
+            Convolution2D(
+                input_shape=(64, 64, 3),
+                nb_filter=32,
+                nb_row=5,
+                nb_col=5,
+                init="glorot_uniform",
+                activation=None,
+                weights=None,
+                border_mode="valid",
+                subsample=(1, 1),
+                bias=True,
+            )
+        )
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        six.assertCountEqual(self, input_names,
-               [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        six.assertCountEqual(self, output_names,
-               [x.name for x in spec.description.output])
+        six.assertCountEqual(
+            self, output_names, [x.name for x in spec.description.output]
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -210,25 +248,28 @@ def test_upsample(self):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(Convolution2D(input_shape=(64, 64, 3), nb_filter=32,
-            nb_row=5, nb_col=5))
-        model.add(UpSampling2D(size = (2, 2)))
-        input_names = ['input']
-        output_names = ['output']
+        model.add(
+            Convolution2D(input_shape=(64, 64, 3), nb_filter=32, nb_row=5, nb_col=5)
+        )
+        model.add(UpSampling2D(size=(2, 2)))
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        six.assertCountEqual(self, input_names,
-               [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        six.assertCountEqual(self, output_names,
-               [x.name for x in spec.description.output])
+        six.assertCountEqual(
+            self, output_names, [x.name for x in spec.description.output]
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -245,28 +286,40 @@ def test_pooling(self):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(Convolution2D(input_shape=(64, 64, 3),
-                                nb_filter=32, nb_row=5, nb_col=5,
-                                init='glorot_uniform', activation=None, weights=None,
-                                border_mode='valid', subsample=(1, 1), bias=True))
-        model.add(MaxPooling2D(pool_size=(2,2)))
-
-        input_names = ['input']
-        output_names = ['output']
+        model.add(
+            Convolution2D(
+                input_shape=(64, 64, 3),
+                nb_filter=32,
+                nb_row=5,
+                nb_col=5,
+                init="glorot_uniform",
+                activation=None,
+                weights=None,
+                border_mode="valid",
+                subsample=(1, 1),
+                bias=True,
+            )
+        )
+        model.add(MaxPooling2D(pool_size=(2, 2)))
+
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        six.assertCountEqual(self, input_names,
-               [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        six.assertCountEqual(self, output_names,
-               [x.name for x in spec.description.output])
+        six.assertCountEqual(
+            self, output_names, [x.name for x in spec.description.output]
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -278,26 +331,29 @@ def test_permute(self):
         Test the conversion of pooling layer.
         """
         from keras.layers.core import Permute
+
         # Create a simple Keras model
         model = Sequential()
-        model.add(Permute((3, 2, 1), input_shape=(10, 64,3)))
+        model.add(Permute((3, 2, 1), input_shape=(10, 64, 3)))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        six.assertCountEqual(self, input_names,
-               [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        six.assertCountEqual(self, output_names,
-               [x.name for x in spec.description.output])
+        six.assertCountEqual(
+            self, output_names, [x.name for x in spec.description.output]
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -314,8 +370,8 @@ def test_lstm(self):
         model = Sequential()
         model.add(LSTM(32, input_dim=24, input_length=10))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
 
         print(spec)
@@ -324,10 +380,10 @@ def test_lstm(self):
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
-        self.assertEquals(len(spec.description.input), len(input_names)+2)
+        self.assertEquals(len(spec.description.input), len(input_names) + 2)
 
         self.assertEquals(32, spec.description.input[1].type.multiArrayType.shape[0])
         self.assertEquals(32, spec.description.input[2].type.multiArrayType.shape[0])
@@ -346,7 +402,6 @@ def test_lstm(self):
         self.assertEquals(len(layer_0.input), 3)
         self.assertEquals(len(layer_0.output), 3)
 
-
     def test_simple_rnn(self):
         """
         Test the conversion of a simple RNN layer.
@@ -357,14 +412,14 @@ def test_simple_rnn(self):
         model = Sequential()
         model.add(SimpleRNN(32, input_dim=32, input_length=10))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names) + 1)
@@ -384,7 +439,6 @@ def test_simple_rnn(self):
         self.assertEquals(len(layer_0.input), 2)
         self.assertEquals(len(layer_0.output), 2)
 
-
     def test_gru(self):
         """
         Test the conversion of a GRU layer.
@@ -395,14 +449,14 @@ def test_gru(self):
         model = Sequential()
         model.add(GRU(32, input_dim=32, input_length=10))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names) + 1)
@@ -431,17 +485,18 @@ def test_bidir(self):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(Bidirectional(LSTM(32, input_dim=32, input_length=10),
-                                input_shape=(10, 32)))
+        model.add(
+            Bidirectional(LSTM(32, input_dim=32, input_length=10), input_shape=(10, 32))
+        )
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names) + 4)
@@ -468,7 +523,6 @@ def test_bidir(self):
         self.assertEquals(len(layer_0.input), 5)
         self.assertEquals(len(layer_0.output), 5)
 
-
     def test_embedding(self):
         from keras.layers import Embedding
 
@@ -477,8 +531,8 @@ def test_embedding(self):
         num_outputs = 3
         model.add(Embedding(num_inputs, num_outputs, input_length=5))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
 
         spec = keras.convert(model, input_names, output_names).get_spec()
 
@@ -486,7 +540,7 @@ def test_embedding(self):
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
@@ -499,7 +553,9 @@ def test_embedding(self):
         self.assertEquals(layer_0.embedding.inputDim, num_inputs)
         self.assertEquals(layer_0.embedding.outputChannels, num_outputs)
 
-        self.assertEquals(len(layer_0.embedding.weights.floatValue), num_inputs*num_outputs)
+        self.assertEquals(
+            len(layer_0.embedding.weights.floatValue), num_inputs * num_outputs
+        )
 
     @unittest.skip
     def test_sentiment_analysis(self):
@@ -521,25 +577,27 @@ def test_sentiment_analysis(self):
         model.add(Embedding(max_features, embedded_dim, input_length=sequence_length))
         # output_dim = 32
         model.add(LSTM(32))
-        model.add(Dense(1, activation='sigmoid'))
+        model.add(Dense(1, activation="sigmoid"))
 
         # Input/output
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        six.assertCountEqual(self, input_names,
-              [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        six.assertCountEqual(self, output_names,
-              [x.name for x in spec.description.output])
+        six.assertCountEqual(
+            self, output_names, [x.name for x in spec.description.output]
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -550,28 +608,31 @@ def test_sentiment_analysis(self):
     @unittest.skip
     def test_conv1d_lstm(self):
         from keras.layers import Convolution1D, LSTM, Dense
+
         model = Sequential()
         # input_shape = (time_step, dimensions)
-        model.add(Convolution1D(32,3,border_mode='same',input_shape=(10,8)))
+        model.add(Convolution1D(32, 3, border_mode="same", input_shape=(10, 8)))
         # conv1d output shape = (None, 10, 32)
         model.add(LSTM(24))
-        model.add(Dense(1, activation='sigmoid'))
-        print('model.layers[1].output_shape=', model.layers[1].output_shape)
+        model.add(Dense(1, activation="sigmoid"))
+        print("model.layers[1].output_shape=", model.layers[1].output_shape)
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
 
         self.assertIsNotNone(spec)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        six.assertCountEqual(self, input_names,
-              [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        six.assertCountEqual(self, output_names,
-              [x.name for x in spec.description.output])
+        six.assertCountEqual(
+            self, output_names, [x.name for x in spec.description.output]
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -588,28 +649,40 @@ def test_batchnorm(self):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(Convolution2D(input_shape=(64, 64, 3),
-                               nb_filter=32, nb_row=5, nb_col=5,
-                               init='glorot_uniform', activation=None, weights=None,
-                               border_mode='valid', subsample=(1, 1), bias=True))
+        model.add(
+            Convolution2D(
+                input_shape=(64, 64, 3),
+                nb_filter=32,
+                nb_row=5,
+                nb_col=5,
+                init="glorot_uniform",
+                activation=None,
+                weights=None,
+                border_mode="valid",
+                subsample=(1, 1),
+                bias=True,
+            )
+        )
         # epsilon in CoreML is currently fixed at 1e-5
         model.add(BatchNormalization(epsilon=1e-5))
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        six.assertCountEqual(self, input_names,
-              [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        six.assertCountEqual(self, output_names,
-              [x.name for x in spec.description.output])
+        six.assertCountEqual(
+            self, output_names, [x.name for x in spec.description.output]
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -622,38 +695,43 @@ def test_repeat_vector(self):
         model = Sequential()
         model.add(RepeatVector(3, input_shape=(5,)))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        six.assertCountEqual(self, input_names,
-              [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        six.assertCountEqual(self, output_names,
-              [x.name for x in spec.description.output])
+        six.assertCountEqual(
+            self, output_names, [x.name for x in spec.description.output]
+        )
         layers = spec.neuralNetwork.layers
         self.assertIsNotNone(layers[0].sequenceRepeat)
 
-    @pytest.mark.xfail(raises = ValueError)
+    @pytest.mark.xfail(raises=ValueError)
     def test_unsupported_variational_deconv(self):
         from keras.layers import Input, Lambda, Convolution2D, Flatten, Dense
-        x = Input(shape=(8,8,3))
-        conv_1 = Convolution2D(4, 2, 2, border_mode='same', activation='relu')(x)
+
+        x = Input(shape=(8, 8, 3))
+        conv_1 = Convolution2D(4, 2, 2, border_mode="same", activation="relu")(x)
         flat = Flatten()(conv_1)
-        hidden = Dense(10, activation='relu')(flat)
+        hidden = Dense(10, activation="relu")(flat)
         z_mean = Dense(10)(hidden)
         z_log_var = Dense(10)(hidden)
+
         def sampling(args):
             z_mean, z_log_var = args
             return z_mean + z_log_var
+
         z = Lambda(sampling, output_shape=(10,))([z_mean, z_log_var])
         model = Model([x], [z])
-        spec = keras.convert(model, ['input'], ['output']).get_spec()
+        spec = keras.convert(model, ["input"], ["output"]).get_spec()
 
     def test_image_processing(self):
         """
@@ -663,33 +741,54 @@ def test_image_processing(self):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(Convolution2D(input_shape=(64, 64, 3),
-                                nb_filter=32, nb_row=5, nb_col=5,
-                                init='glorot_uniform', activation=None, weights=None,
-                                border_mode='valid', subsample=(1, 1), bias=True))
-        input_names = ['input']
-        output_names = ['output']
-        spec = keras.convert(model, input_names, output_names, image_input_names =
-                ['input'], red_bias = 110.0, blue_bias = 117.0, green_bias = 120.0,
-                is_bgr = True, image_scale = 1.0).get_spec()
+        model.add(
+            Convolution2D(
+                input_shape=(64, 64, 3),
+                nb_filter=32,
+                nb_row=5,
+                nb_col=5,
+                init="glorot_uniform",
+                activation=None,
+                weights=None,
+                border_mode="valid",
+                subsample=(1, 1),
+                bias=True,
+            )
+        )
+        input_names = ["input"]
+        output_names = ["output"]
+        spec = keras.convert(
+            model,
+            input_names,
+            output_names,
+            image_input_names=["input"],
+            red_bias=110.0,
+            blue_bias=117.0,
+            green_bias=120.0,
+            is_bgr=True,
+            image_scale=1.0,
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
-        self.assertEquals(spec.description.input[0].type.WhichOneof('Type'),
-                'imageType')
-        self.assertEquals(spec.description.input[0].type.imageType.colorSpace,
-                 FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value('BGR'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
+        self.assertEquals(
+            spec.description.input[0].type.WhichOneof("Type"), "imageType"
+        )
+        self.assertEquals(
+            spec.description.input[0].type.imageType.colorSpace,
+            FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value("BGR"),
+        )
 
         # Test the layer parameters.
         preprocessing = spec.neuralNetwork.preprocessing[0]
-        self.assertTrue(preprocessing.HasField('scaler'))
+        self.assertTrue(preprocessing.HasField("scaler"))
         pr_0 = preprocessing.scaler
-        print('pr_0.channelScale = ', pr_0.channelScale)
-        print('pr_0.redBias = ', pr_0.redBias)
-        print('pr_0.blueBias = ', pr_0.blueBias)
-        print('pr_0.greenBias = ', pr_0.greenBias)
+        print("pr_0.channelScale = ", pr_0.channelScale)
+        print("pr_0.redBias = ", pr_0.redBias)
+        print("pr_0.blueBias = ", pr_0.blueBias)
+        print("pr_0.greenBias = ", pr_0.greenBias)
         self.assertIsNotNone(pr_0.redBias)
         self.assertIsNotNone(pr_0.greenBias)
         self.assertIsNotNone(pr_0.blueBias)
@@ -700,22 +799,33 @@ def test_image_processing(self):
         self.assertEqual(pr_0.greenBias, 120.0)
 
         # Configuration 2: isbgr = False
-        spec = keras.convert(model, input_names, output_names, image_input_names =
-                ['input'], red_bias = 110.0, blue_bias = 117.0, green_bias = 120.0,
-                is_bgr = False, image_scale = 1.0).get_spec()
+        spec = keras.convert(
+            model,
+            input_names,
+            output_names,
+            image_input_names=["input"],
+            red_bias=110.0,
+            blue_bias=117.0,
+            green_bias=120.0,
+            is_bgr=False,
+            image_scale=1.0,
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
-        self.assertEquals(spec.description.input[0].type.WhichOneof('Type'),
-                'imageType')
-        self.assertEquals(spec.description.input[0].type.imageType.colorSpace,
-                 FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value('RGB'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
+        self.assertEquals(
+            spec.description.input[0].type.WhichOneof("Type"), "imageType"
+        )
+        self.assertEquals(
+            spec.description.input[0].type.imageType.colorSpace,
+            FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value("RGB"),
+        )
 
         # Test the layer parameters.
         preprocessing = spec.neuralNetwork.preprocessing[0]
-        self.assertTrue(preprocessing.HasField('scaler'))
+        self.assertTrue(preprocessing.HasField("scaler"))
         pr_0 = preprocessing.scaler
         self.assertIsNotNone(pr_0.redBias)
         self.assertIsNotNone(pr_0.greenBias)
@@ -727,21 +837,30 @@ def test_image_processing(self):
         self.assertEqual(pr_0.greenBias, 120.0)
 
         # Configuration 3: Defaults
-        spec = keras.convert(model, input_names, output_names, image_input_names =
-                ['input'], is_bgr = False, image_scale = 1.0).get_spec()
+        spec = keras.convert(
+            model,
+            input_names,
+            output_names,
+            image_input_names=["input"],
+            is_bgr=False,
+            image_scale=1.0,
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
-        self.assertEquals(spec.description.input[0].type.WhichOneof('Type'),
-                'imageType')
-        self.assertEquals(spec.description.input[0].type.imageType.colorSpace,
-                 FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value('RGB'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
+        self.assertEquals(
+            spec.description.input[0].type.WhichOneof("Type"), "imageType"
+        )
+        self.assertEquals(
+            spec.description.input[0].type.imageType.colorSpace,
+            FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value("RGB"),
+        )
 
         # Test the layer parameters.
         preprocessing = spec.neuralNetwork.preprocessing[0]
-        self.assertTrue(preprocessing.HasField('scaler'))
+        self.assertTrue(preprocessing.HasField("scaler"))
         pr_0 = preprocessing.scaler
         self.assertIsNotNone(pr_0.redBias)
         self.assertIsNotNone(pr_0.greenBias)
@@ -759,38 +878,55 @@ def test_classifier_string_classes(self):
         # Create a simple Keras model
         model = Sequential()
         model.add(Dense(32, input_dim=16))
-        model.add(Activation('softmax'))
-        classes = ['c%s' % i for i in range(32)]
-
-        input_names = ['input']
-        output_names = ['prob_output']
-        expected_output_names = ['prob_output', 'classLabel']
-        spec = keras.convert(model, input_names, output_names, class_labels = classes).get_spec()
+        model.add(Activation("softmax"))
+        classes = ["c%s" % i for i in range(32)]
+
+        input_names = ["input"]
+        output_names = ["prob_output"]
+        expected_output_names = ["prob_output", "classLabel"]
+        spec = keras.convert(
+            model, input_names, output_names, class_labels=classes
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetworkClassifier'))
-        self.assertFalse(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetworkClassifier"))
+        self.assertFalse(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        six.assertCountEqual(self, input_names,
-               [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(expected_output_names))
-        self.assertEquals(expected_output_names,
-               [x.name for x in spec.description.output])
+        self.assertEquals(
+            expected_output_names, [x.name for x in spec.description.output]
+        )
 
         # Check the types
-        self.assertEquals(spec.description.output[0].type.WhichOneof('Type'), 'dictionaryType')
-        self.assertEquals(spec.description.output[0].type.dictionaryType.WhichOneof('KeyType'), 'stringKeyType')
-        self.assertEquals(spec.description.output[1].type.WhichOneof('Type'), 'stringType')
-        self.assertTrue(spec.description.predictedFeatureName, 'classLabel')
-        self.assertTrue(spec.description.predictedProbabilitiesName, 'prob_output')
+        self.assertEquals(
+            spec.description.output[0].type.WhichOneof("Type"), "dictionaryType"
+        )
+        self.assertEquals(
+            spec.description.output[0].type.dictionaryType.WhichOneof("KeyType"),
+            "stringKeyType",
+        )
+        self.assertEquals(
+            spec.description.output[1].type.WhichOneof("Type"), "stringType"
+        )
+        self.assertTrue(spec.description.predictedFeatureName, "classLabel")
+        self.assertTrue(spec.description.predictedProbabilitiesName, "prob_output")
 
         # Test the class parameters
-        self.assertEqual(spec.WhichOneof('Type'), 'neuralNetworkClassifier', "Expected a NN classifier model")
-        self.assertEqual(spec.neuralNetworkClassifier.WhichOneof('ClassLabels'), 'stringClassLabels')
+        self.assertEqual(
+            spec.WhichOneof("Type"),
+            "neuralNetworkClassifier",
+            "Expected a NN classifier model",
+        )
+        self.assertEqual(
+            spec.neuralNetworkClassifier.WhichOneof("ClassLabels"), "stringClassLabels"
+        )
         class_from_proto = list(spec.neuralNetworkClassifier.stringClassLabels.vector)
         six.assertCountEqual(self, classes, class_from_proto)
 
@@ -803,37 +939,48 @@ def test_classifier_file(self):
         # Create a simple Keras model
         model = Sequential()
         model.add(Dense(32, input_dim=16))
-        model.add(Activation('softmax'))
-        classes = ['c%s' % i for i in range(32)]
+        model.add(Activation("softmax"))
+        classes = ["c%s" % i for i in range(32)]
         classes_file = tempfile.mktemp()
-        with open(classes_file, 'w') as f:
-            f.write('\n'.join(classes))
-
-        input_names = ['input']
-        output_names = ['prob_output']
-        expected_output_names = ['prob_output', 'classLabel']
-        spec = keras.convert(model, input_names, output_names, class_labels = classes).get_spec()
+        with open(classes_file, "w") as f:
+            f.write("\n".join(classes))
+
+        input_names = ["input"]
+        output_names = ["prob_output"]
+        expected_output_names = ["prob_output", "classLabel"]
+        spec = keras.convert(
+            model, input_names, output_names, class_labels=classes
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetworkClassifier'))
-        self.assertFalse(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetworkClassifier"))
+        self.assertFalse(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        six.assertCountEqual(self, input_names,
-               [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(expected_output_names))
-        self.assertEquals(expected_output_names,
-               [x.name for x in spec.description.output])
+        self.assertEquals(
+            expected_output_names, [x.name for x in spec.description.output]
+        )
 
         # Check the types
-        self.assertEquals(spec.description.output[0].type.WhichOneof('Type'), 'dictionaryType')
-        self.assertEquals(spec.description.output[0].type.dictionaryType.WhichOneof('KeyType'), 'stringKeyType')
-        self.assertEquals(spec.description.output[1].type.WhichOneof('Type'), 'stringType')
-        self.assertTrue(spec.description.predictedFeatureName, 'classLabel')
-        self.assertTrue(spec.description.predictedProbabilitiesName, 'prob_output')
+        self.assertEquals(
+            spec.description.output[0].type.WhichOneof("Type"), "dictionaryType"
+        )
+        self.assertEquals(
+            spec.description.output[0].type.dictionaryType.WhichOneof("KeyType"),
+            "stringKeyType",
+        )
+        self.assertEquals(
+            spec.description.output[1].type.WhichOneof("Type"), "stringType"
+        )
+        self.assertTrue(spec.description.predictedFeatureName, "classLabel")
+        self.assertTrue(spec.description.predictedProbabilitiesName, "prob_output")
 
         # cleanup
         os.remove(classes_file)
@@ -845,38 +992,55 @@ def test_classifier_integer_classes(self):
         # Create a simple Keras model
         model = Sequential()
         model.add(Dense(32, input_dim=16))
-        model.add(Activation('softmax'))
+        model.add(Activation("softmax"))
         classes = list(range(32))
 
-        input_names = ['input']
-        output_names = ['prob_output']
-        expected_output_names = ['prob_output', 'classLabel']
-        spec = keras.convert(model, input_names, output_names, class_labels = classes).get_spec()
+        input_names = ["input"]
+        output_names = ["prob_output"]
+        expected_output_names = ["prob_output", "classLabel"]
+        spec = keras.convert(
+            model, input_names, output_names, class_labels=classes
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetworkClassifier'))
-        self.assertFalse(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetworkClassifier"))
+        self.assertFalse(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        six.assertCountEqual(self, input_names,
-               [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(expected_output_names))
-        self.assertEquals(expected_output_names,
-               [x.name for x in spec.description.output])
+        self.assertEquals(
+            expected_output_names, [x.name for x in spec.description.output]
+        )
 
         # Check the types
-        self.assertEquals(spec.description.output[0].type.WhichOneof('Type'), 'dictionaryType')
-        self.assertEquals(spec.description.output[0].type.dictionaryType.WhichOneof('KeyType'), 'int64KeyType')
-        self.assertEquals(spec.description.output[1].type.WhichOneof('Type'), 'int64Type')
-        self.assertTrue(spec.description.predictedFeatureName, 'classLabel')
-        self.assertTrue(spec.description.predictedProbabilitiesName, 'prob_output')
+        self.assertEquals(
+            spec.description.output[0].type.WhichOneof("Type"), "dictionaryType"
+        )
+        self.assertEquals(
+            spec.description.output[0].type.dictionaryType.WhichOneof("KeyType"),
+            "int64KeyType",
+        )
+        self.assertEquals(
+            spec.description.output[1].type.WhichOneof("Type"), "int64Type"
+        )
+        self.assertTrue(spec.description.predictedFeatureName, "classLabel")
+        self.assertTrue(spec.description.predictedProbabilitiesName, "prob_output")
 
         # Test the class parameters
-        self.assertEqual(spec.WhichOneof('Type'), 'neuralNetworkClassifier', "Expected a NN classifier model")
-        self.assertEqual(spec.neuralNetworkClassifier.WhichOneof('ClassLabels'), 'int64ClassLabels')
+        self.assertEqual(
+            spec.WhichOneof("Type"),
+            "neuralNetworkClassifier",
+            "Expected a NN classifier model",
+        )
+        self.assertEqual(
+            spec.neuralNetworkClassifier.WhichOneof("ClassLabels"), "int64ClassLabels"
+        )
         class_from_proto = list(spec.neuralNetworkClassifier.int64ClassLabels.vector)
         six.assertCountEqual(self, classes, class_from_proto)
 
@@ -887,39 +1051,61 @@ def test_classifier_custom_class_name(self):
         # Create a simple Keras model
         model = Sequential()
         model.add(Dense(32, input_dim=16))
-        model.add(Activation('softmax'))
-        classes = ['c%s' % i for i in range(32)]
-
-        input_names = ['input']
-        output_names = ['prob_output']
-        expected_output_names = ['prob_output', 'my_foo_bar_class_output']
-        spec = keras.convert(model, input_names, output_names, class_labels = classes,
-                          predicted_feature_name = 'my_foo_bar_class_output').get_spec()
+        model.add(Activation("softmax"))
+        classes = ["c%s" % i for i in range(32)]
+
+        input_names = ["input"]
+        output_names = ["prob_output"]
+        expected_output_names = ["prob_output", "my_foo_bar_class_output"]
+        spec = keras.convert(
+            model,
+            input_names,
+            output_names,
+            class_labels=classes,
+            predicted_feature_name="my_foo_bar_class_output",
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetworkClassifier'))
-        self.assertFalse(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetworkClassifier"))
+        self.assertFalse(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        six.assertCountEqual(self, input_names,
-               [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(expected_output_names))
-        self.assertEquals(expected_output_names,
-               [x.name for x in spec.description.output])
+        self.assertEquals(
+            expected_output_names, [x.name for x in spec.description.output]
+        )
 
         # Check the types
-        self.assertEquals(spec.description.output[0].type.WhichOneof('Type'), 'dictionaryType')
-        self.assertEquals(spec.description.output[0].type.dictionaryType.WhichOneof('KeyType'), 'stringKeyType')
-        self.assertEquals(spec.description.output[1].type.WhichOneof('Type'), 'stringType')
-        self.assertTrue(spec.description.predictedFeatureName, 'my_foo_bar_class_output')
-        self.assertTrue(spec.description.predictedProbabilitiesName, 'prob_output')
+        self.assertEquals(
+            spec.description.output[0].type.WhichOneof("Type"), "dictionaryType"
+        )
+        self.assertEquals(
+            spec.description.output[0].type.dictionaryType.WhichOneof("KeyType"),
+            "stringKeyType",
+        )
+        self.assertEquals(
+            spec.description.output[1].type.WhichOneof("Type"), "stringType"
+        )
+        self.assertTrue(
+            spec.description.predictedFeatureName, "my_foo_bar_class_output"
+        )
+        self.assertTrue(spec.description.predictedProbabilitiesName, "prob_output")
 
         # Test the class parameters
-        self.assertEqual(spec.WhichOneof('Type'), 'neuralNetworkClassifier', "Expected a NN classifier model")
-        self.assertEqual(spec.neuralNetworkClassifier.WhichOneof('ClassLabels'), 'stringClassLabels')
+        self.assertEqual(
+            spec.WhichOneof("Type"),
+            "neuralNetworkClassifier",
+            "Expected a NN classifier model",
+        )
+        self.assertEqual(
+            spec.neuralNetworkClassifier.WhichOneof("ClassLabels"), "stringClassLabels"
+        )
         class_from_proto = list(spec.neuralNetworkClassifier.stringClassLabels.vector)
         six.assertCountEqual(self, classes, class_from_proto)
 
@@ -930,23 +1116,23 @@ def test_default_interface_names(self):
         # Create a simple Keras model
         model = Sequential()
         model.add(Dense(32, input_dim=16))
-        model.add(Activation('softmax'))
+        model.add(Activation("softmax"))
 
-        expected_input_names = ['input1']
-        expected_output_names = ['output1']
+        expected_input_names = ["input1"]
+        expected_output_names = ["output1"]
         spec = keras.convert(model).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(expected_input_names))
-        six.assertCountEqual(self, expected_input_names,
-               [x.name for x in spec.description.input])
+        six.assertCountEqual(
+            self, expected_input_names, [x.name for x in spec.description.input]
+        )
         self.assertEquals(len(spec.description.output), len(expected_output_names))
-        self.assertEquals(expected_output_names,
-               [x.name for x in spec.description.output])
-
-        
+        self.assertEquals(
+            expected_output_names, [x.name for x in spec.description.output]
+        )
diff --git a/coremltools/test/neural_network/test_keras2.py b/coremltools/test/neural_network/test_keras2.py
index 903152307..d7bfbd72c 100644
--- a/coremltools/test/neural_network/test_keras2.py
+++ b/coremltools/test/neural_network/test_keras2.py
@@ -1,22 +1,24 @@
 import unittest
 
-from coremltools._deps import HAS_KERAS2_TF
+from coremltools._deps import _HAS_KERAS2_TF
 from coremltools.proto import Model_pb2
 from coremltools.proto import FeatureTypes_pb2
 from coremltools.proto import NeuralNetwork_pb2
 import pytest
 
-if HAS_KERAS2_TF:
+if _HAS_KERAS2_TF:
     import tensorflow as tf
     from keras.models import Sequential, Model
     from coremltools.converters import keras
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 class KerasSingleLayerTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
     """
+
     @classmethod
     def setUpClass(self):
         """
@@ -33,22 +35,24 @@ def test_dense(self):
         model = Sequential()
         model.add(Dense(32, input_dim=16))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        self.assertEqual(sorted(output_names),
-               sorted(map(lambda x: x.name, spec.description.output)))
+        self.assertEqual(
+            sorted(output_names), sorted(map(lambda x: x.name, spec.description.output))
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -62,8 +66,26 @@ def test_activations(self):
         from keras.layers import Dense, Activation
 
         # Create a simple Keras model
-        keras_activation_options = ['elu', 'tanh', 'softplus', 'softsign', 'relu', 'sigmoid', 'hard_sigmoid', 'linear']
-        coreml_activation_options = ['ELU', 'tanh', 'softplus', 'softsign', 'ReLU', 'sigmoid', 'sigmoidHard', 'linear']
+        keras_activation_options = [
+            "elu",
+            "tanh",
+            "softplus",
+            "softsign",
+            "relu",
+            "sigmoid",
+            "hard_sigmoid",
+            "linear",
+        ]
+        coreml_activation_options = [
+            "ELU",
+            "tanh",
+            "softplus",
+            "softsign",
+            "ReLU",
+            "sigmoid",
+            "sigmoidHard",
+            "linear",
+        ]
 
         for i, k_act in enumerate(keras_activation_options):
             c_act = coreml_activation_options[i]
@@ -71,22 +93,26 @@ def test_activations(self):
             model.add(Dense(32, input_dim=16))
             model.add(Activation(k_act))
 
-            input_names = ['input']
-            output_names = ['output']
+            input_names = ["input"]
+            output_names = ["output"]
             spec = keras.convert(model, input_names, output_names).get_spec()
             self.assertIsNotNone(spec)
 
             # Test the model class
             self.assertIsNotNone(spec.description)
-            self.assertTrue(spec.HasField('neuralNetwork'))
+            self.assertTrue(spec.HasField("neuralNetwork"))
 
             # Test the inputs and outputs
             self.assertEquals(len(spec.description.input), len(input_names))
-            self.assertEqual(sorted(input_names),
-                   sorted(map(lambda x: x.name, spec.description.input)))
+            self.assertEqual(
+                sorted(input_names),
+                sorted(map(lambda x: x.name, spec.description.input)),
+            )
             self.assertEquals(len(spec.description.output), len(output_names))
-            self.assertEqual(sorted(output_names),
-                   sorted(map(lambda x: x.name, spec.description.output)))
+            self.assertEqual(
+                sorted(output_names),
+                sorted(map(lambda x: x.name, spec.description.output)),
+            )
 
             # Test the layer parameters.
             layers = spec.neuralNetwork.layers
@@ -103,24 +129,26 @@ def test_activation_softmax(self):
         # Create a simple Keras model
         model = Sequential()
         model.add(Dense(32, input_dim=16))
-        model.add(Activation('softmax'))
+        model.add(Activation("softmax"))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        self.assertEqual(sorted(output_names),
-               sorted(map(lambda x: x.name, spec.description.output)))
+        self.assertEqual(
+            sorted(output_names), sorted(map(lambda x: x.name, spec.description.output))
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -141,22 +169,24 @@ def test_dropout(self):
         model.add(Dropout(0.5))
         model.add(Dense(32, input_shape=(16,)))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        self.assertEqual(sorted(output_names),
-               sorted(map(lambda x: x.name, spec.description.output)))
+        self.assertEqual(
+            sorted(output_names), sorted(map(lambda x: x.name, spec.description.output))
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -176,27 +206,37 @@ def test_convolution(self, with_dilations=False):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(Conv2D(input_shape=(64, 64, 3),
-                         filters=32, kernel_size=(5,5), activation=None,
-                         padding='valid', strides=(1, 1), use_bias=True,
-                         dilation_rate=dilation_rate))
+        model.add(
+            Conv2D(
+                input_shape=(64, 64, 3),
+                filters=32,
+                kernel_size=(5, 5),
+                activation=None,
+                padding="valid",
+                strides=(1, 1),
+                use_bias=True,
+                dilation_rate=dilation_rate,
+            )
+        )
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        self.assertEqual(sorted(output_names),
-               sorted(map(lambda x: x.name, spec.description.output)))
+        self.assertEqual(
+            sorted(output_names), sorted(map(lambda x: x.name, spec.description.output))
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -222,28 +262,37 @@ def test_separable_convolution(self, with_dilations=False, activation=None):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(SeparableConv2D(input_shape=(64, 64, 3),
-                                  filters=32, kernel_size=(5,5),
-                                  activation=activation,
-                                  padding='valid', strides=(1, 1), use_bias=True,
-                                  dilation_rate=dilation_rate))
-
-        input_names = ['input']
-        output_names = ['output']
+        model.add(
+            SeparableConv2D(
+                input_shape=(64, 64, 3),
+                filters=32,
+                kernel_size=(5, 5),
+                activation=activation,
+                padding="valid",
+                strides=(1, 1),
+                use_bias=True,
+                dilation_rate=dilation_rate,
+            )
+        )
+
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-                         sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        self.assertEqual(sorted(output_names),
-                         sorted(map(lambda x: x.name, spec.description.output)))
+        self.assertEqual(
+            sorted(output_names), sorted(map(lambda x: x.name, spec.description.output))
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -253,8 +302,8 @@ def test_separable_convolution(self, with_dilations=False, activation=None):
         self.assertIsNotNone(layer_pointwise.convolution)
         self.assertEqual(layer_depthwise.convolution.dilationFactor, dilation_rate)
         if activation is not None:
-                self.assertIsNotNone(layers[2].activation)
-                self.assertTrue(layers[2].activation.HasField('ELU'))
+            self.assertIsNotNone(layers[2].activation)
+            self.assertTrue(layers[2].activation.HasField("ELU"))
 
     def test_separable_convolution_dilated(self):
         """
@@ -266,7 +315,7 @@ def test_separable_convolution_with_nonlinearity(self):
         """
         Test the conversion of 2D depthwise separable convolutional layer with nonlinearity.
         """
-        self.test_separable_convolution(activation='elu')
+        self.test_separable_convolution(activation="elu")
 
     def test_upsample(self):
         """
@@ -276,25 +325,26 @@ def test_upsample(self):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(Conv2D(input_shape=(64, 64, 3), filters=32,
-            kernel_size=(5,5)))
-        model.add(UpSampling2D(size = (2, 2)))
-        input_names = ['input']
-        output_names = ['output']
+        model.add(Conv2D(input_shape=(64, 64, 3), filters=32, kernel_size=(5, 5)))
+        model.add(UpSampling2D(size=(2, 2)))
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        self.assertEqual(sorted(output_names),
-               sorted(map(lambda x: x.name, spec.description.output)))
+        self.assertEqual(
+            sorted(output_names), sorted(map(lambda x: x.name, spec.description.output))
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -302,15 +352,17 @@ def test_upsample(self):
         self.assertIsNotNone(layer_0.convolution)
         layer_1 = layers[1]
         self.assertIsNotNone(layer_1.upsample)
-        self.assertEquals(layer_1.upsample.mode, NeuralNetwork_pb2.UpsampleLayerParams.InterpolationMode.Value('NN'))
+        self.assertEquals(
+            layer_1.upsample.mode,
+            NeuralNetwork_pb2.UpsampleLayerParams.InterpolationMode.Value("NN"),
+        )
 
         # Test if BILINEAR mode works as well
         model = Sequential()
-        model.add(Conv2D(input_shape=(64, 64, 3), filters=32,
-            kernel_size=(5,5)))
+        model.add(Conv2D(input_shape=(64, 64, 3), filters=32, kernel_size=(5, 5)))
         try:
-            model.add(UpSampling2D(size = (2, 2), interpolation = 'bilinear'))
-        except TypeError: # Early version of Keras, no support for 'interpolation'
+            model.add(UpSampling2D(size=(2, 2), interpolation="bilinear"))
+        except TypeError:  # Early version of Keras, no support for 'interpolation'
             return
 
         spec = keras.convert(model, input_names, output_names).get_spec()
@@ -318,7 +370,10 @@ def test_upsample(self):
         layers = spec.neuralNetwork.layers
         layer_1 = layers[1]
         self.assertIsNotNone(layer_1.upsample)
-        self.assertEquals(layer_1.upsample.mode, NeuralNetwork_pb2.UpsampleLayerParams.InterpolationMode.Value('BILINEAR'))
+        self.assertEquals(
+            layer_1.upsample.mode,
+            NeuralNetwork_pb2.UpsampleLayerParams.InterpolationMode.Value("BILINEAR"),
+        )
 
     def test_pooling(self):
         """
@@ -328,27 +383,37 @@ def test_pooling(self):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(Conv2D(input_shape=(64, 64, 3),
-                                filters=32, kernel_size=(5,5), strides=(1,1), activation=None,
-                                padding='valid', use_bias=True))
-        model.add(MaxPooling2D(pool_size=(2,2)))
+        model.add(
+            Conv2D(
+                input_shape=(64, 64, 3),
+                filters=32,
+                kernel_size=(5, 5),
+                strides=(1, 1),
+                activation=None,
+                padding="valid",
+                use_bias=True,
+            )
+        )
+        model.add(MaxPooling2D(pool_size=(2, 2)))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        self.assertEqual(sorted(output_names),
-               sorted(map(lambda x: x.name, spec.description.output)))
+        self.assertEqual(
+            sorted(output_names), sorted(map(lambda x: x.name, spec.description.output))
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -359,26 +424,29 @@ def test_permute(self):
         Test the conversion of pooling layer.
         """
         from keras.layers.core import Permute
+
         # Create a simple Keras model
         model = Sequential()
-        model.add(Permute((3, 2, 1), input_shape=(10, 64,3)))
+        model.add(Permute((3, 2, 1), input_shape=(10, 64, 3)))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        self.assertEqual(sorted(output_names),
-               sorted(map(lambda x: x.name, spec.description.output)))
+        self.assertEqual(
+            sorted(output_names), sorted(map(lambda x: x.name, spec.description.output))
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -393,10 +461,10 @@ def test_lstm(self):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(LSTM(32, input_shape=(10,24)))
+        model.add(LSTM(32, input_shape=(10, 24)))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
 
         print(spec)
@@ -405,10 +473,10 @@ def test_lstm(self):
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
-        self.assertEquals(len(spec.description.input), len(input_names)+2)
+        self.assertEquals(len(spec.description.input), len(input_names) + 2)
 
         self.assertEquals(32, spec.description.input[1].type.multiArrayType.shape[0])
         self.assertEquals(32, spec.description.input[2].type.multiArrayType.shape[0])
@@ -435,16 +503,16 @@ def test_simple_rnn(self):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(SimpleRNN(32, input_shape=(10,32)))
+        model.add(SimpleRNN(32, input_shape=(10, 32)))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names) + 1)
@@ -472,16 +540,16 @@ def test_gru(self):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(GRU(32, input_shape=(32,10)))
+        model.add(GRU(32, input_shape=(32, 10)))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names) + 1)
@@ -510,17 +578,16 @@ def test_bidir(self):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(Bidirectional(LSTM(32, input_shape=(10, 32)),
-                                input_shape=(10, 32)))
+        model.add(Bidirectional(LSTM(32, input_shape=(10, 32)), input_shape=(10, 32)))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names) + 4)
@@ -555,8 +622,8 @@ def test_embedding(self):
         num_outputs = 3
         model.add(Embedding(num_inputs, num_outputs, input_length=5))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
 
         spec = keras.convert(model, input_names, output_names).get_spec()
 
@@ -564,7 +631,7 @@ def test_embedding(self):
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
@@ -577,7 +644,9 @@ def test_embedding(self):
         self.assertEquals(layer_0.embedding.inputDim, num_inputs)
         self.assertEquals(layer_0.embedding.outputChannels, num_outputs)
 
-        self.assertEquals(len(layer_0.embedding.weights.floatValue), num_inputs*num_outputs)
+        self.assertEquals(
+            len(layer_0.embedding.weights.floatValue), num_inputs * num_outputs
+        )
 
     def test_sentiment_analysis(self):
         """
@@ -598,17 +667,17 @@ def test_sentiment_analysis(self):
         model.add(Embedding(max_features, embedded_dim, input_length=sequence_length))
         # output_dim = 32
         model.add(LSTM(32))
-        model.add(Dense(1, activation='sigmoid'))
+        model.add(Dense(1, activation="sigmoid"))
 
         # Input/output
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         # We're giving state input and output so expect description to differ.
@@ -623,19 +692,20 @@ def test_sentiment_analysis(self):
 
     def test_conv1d_lstm(self):
         from keras.layers import Conv1D, LSTM, Dense
+
         model = Sequential()
         # input_shape = (time_step, dimensions)
-        model.add(Conv1D(32,3,padding='same',input_shape=(10,8)))
+        model.add(Conv1D(32, 3, padding="same", input_shape=(10, 8)))
         # conv1d output shape = (None, 10, 32)
         model.add(LSTM(24))
-        model.add(Dense(1, activation='sigmoid'))
+        model.add(Dense(1, activation="sigmoid"))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
 
         self.assertIsNotNone(spec)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names) + 2)
@@ -656,27 +726,37 @@ def test_batchnorm(self):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(Conv2D(input_shape=(64, 64, 3),
-                               filters=32, kernel_size=(5,5), strides=(1,1), activation=None,
-                               padding='valid', use_bias=True))
+        model.add(
+            Conv2D(
+                input_shape=(64, 64, 3),
+                filters=32,
+                kernel_size=(5, 5),
+                strides=(1, 1),
+                activation=None,
+                padding="valid",
+                use_bias=True,
+            )
+        )
         # epsilon in CoreML is currently fixed at 1e-5
         model.add(BatchNormalization(epsilon=1e-5))
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-              sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        self.assertEqual(sorted(output_names),
-              sorted(map(lambda x: x.name, spec.description.output)))
+        self.assertEqual(
+            sorted(output_names), sorted(map(lambda x: x.name, spec.description.output))
+        )
 
         # Test the layer parameters.
         layers = spec.neuralNetwork.layers
@@ -689,38 +769,43 @@ def test_repeat_vector(self):
         model = Sequential()
         model.add(RepeatVector(3, input_shape=(5,)))
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-              sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEquals(len(spec.description.output), len(output_names))
-        self.assertEqual(sorted(output_names),
-              sorted(map(lambda x: x.name, spec.description.output)))
+        self.assertEqual(
+            sorted(output_names), sorted(map(lambda x: x.name, spec.description.output))
+        )
         layers = spec.neuralNetwork.layers
         self.assertIsNotNone(layers[0].sequenceRepeat)
 
-    @pytest.mark.xfail(raises = ValueError)
+    @pytest.mark.xfail(raises=ValueError)
     def test_unsupported_variational_deconv(self):
         from keras.layers import Input, Lambda, Conv2D, Flatten, Dense
-        x = Input(shape=(8,8,3))
-        conv_1 = Conv2D(4, (2, 2), padding='same', activation='relu')(x)
+
+        x = Input(shape=(8, 8, 3))
+        conv_1 = Conv2D(4, (2, 2), padding="same", activation="relu")(x)
         flat = Flatten()(conv_1)
-        hidden = Dense(10, activation='relu')(flat)
+        hidden = Dense(10, activation="relu")(flat)
         z_mean = Dense(10)(hidden)
         z_log_var = Dense(10)(hidden)
+
         def sampling(args):
             z_mean, z_log_var = args
             return z_mean + z_log_var
+
         z = Lambda(sampling, output_shape=(10,))([z_mean, z_log_var])
         model = Model([x], [z])
-        spec = keras.convert(model, ['input'], ['output']).get_spec()
+        spec = keras.convert(model, ["input"], ["output"]).get_spec()
 
     def test_image_processing(self):
         """
@@ -730,33 +815,51 @@ def test_image_processing(self):
 
         # Create a simple Keras model
         model = Sequential()
-        model.add(Conv2D(input_shape=(64, 64, 3),
-                         filters=32, kernel_size=(5,5),
-                         activation=None, padding='valid',
-                         strides=(1, 1), use_bias=True))
-        input_names = ['input']
-        output_names = ['output']
-        spec = keras.convert(model, input_names, output_names, image_input_names =
-                ['input'], red_bias = 110.0, blue_bias = 117.0, green_bias = 120.0,
-                is_bgr = True, image_scale = 1.0).get_spec()
+        model.add(
+            Conv2D(
+                input_shape=(64, 64, 3),
+                filters=32,
+                kernel_size=(5, 5),
+                activation=None,
+                padding="valid",
+                strides=(1, 1),
+                use_bias=True,
+            )
+        )
+        input_names = ["input"]
+        output_names = ["output"]
+        spec = keras.convert(
+            model,
+            input_names,
+            output_names,
+            image_input_names=["input"],
+            red_bias=110.0,
+            blue_bias=117.0,
+            green_bias=120.0,
+            is_bgr=True,
+            image_scale=1.0,
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
-        self.assertEquals(spec.description.input[0].type.WhichOneof('Type'),
-                'imageType')
-        self.assertEquals(spec.description.input[0].type.imageType.colorSpace,
-                 FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value('BGR'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
+        self.assertEquals(
+            spec.description.input[0].type.WhichOneof("Type"), "imageType"
+        )
+        self.assertEquals(
+            spec.description.input[0].type.imageType.colorSpace,
+            FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value("BGR"),
+        )
 
         # Test the layer parameters.
         preprocessing = spec.neuralNetwork.preprocessing[0]
-        self.assertTrue(preprocessing.HasField('scaler'))
+        self.assertTrue(preprocessing.HasField("scaler"))
         pr_0 = preprocessing.scaler
-        print('pr_0.channelScale = ', pr_0.channelScale)
-        print('pr_0.redBias = ', pr_0.redBias)
-        print('pr_0.blueBias = ', pr_0.blueBias)
-        print('pr_0.greenBias = ', pr_0.greenBias)
+        print("pr_0.channelScale = ", pr_0.channelScale)
+        print("pr_0.redBias = ", pr_0.redBias)
+        print("pr_0.blueBias = ", pr_0.blueBias)
+        print("pr_0.greenBias = ", pr_0.greenBias)
         self.assertIsNotNone(pr_0.redBias)
         self.assertIsNotNone(pr_0.greenBias)
         self.assertIsNotNone(pr_0.blueBias)
@@ -767,22 +870,33 @@ def test_image_processing(self):
         self.assertEqual(pr_0.greenBias, 120.0)
 
         # Configuration 2: isbgr = False
-        spec = keras.convert(model, input_names, output_names, image_input_names =
-                ['input'], red_bias = 110.0, blue_bias = 117.0, green_bias = 120.0,
-                is_bgr = False, image_scale = 1.0).get_spec()
+        spec = keras.convert(
+            model,
+            input_names,
+            output_names,
+            image_input_names=["input"],
+            red_bias=110.0,
+            blue_bias=117.0,
+            green_bias=120.0,
+            is_bgr=False,
+            image_scale=1.0,
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
-        self.assertEquals(spec.description.input[0].type.WhichOneof('Type'),
-                'imageType')
-        self.assertEquals(spec.description.input[0].type.imageType.colorSpace,
-                 FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value('RGB'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
+        self.assertEquals(
+            spec.description.input[0].type.WhichOneof("Type"), "imageType"
+        )
+        self.assertEquals(
+            spec.description.input[0].type.imageType.colorSpace,
+            FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value("RGB"),
+        )
 
         # Test the layer parameters.
         preprocessing = spec.neuralNetwork.preprocessing[0]
-        self.assertTrue(preprocessing.HasField('scaler'))
+        self.assertTrue(preprocessing.HasField("scaler"))
         pr_0 = preprocessing.scaler
         self.assertIsNotNone(pr_0.redBias)
         self.assertIsNotNone(pr_0.greenBias)
@@ -794,21 +908,30 @@ def test_image_processing(self):
         self.assertEqual(pr_0.greenBias, 120.0)
 
         # Configuration 3: Defaults
-        spec = keras.convert(model, input_names, output_names, image_input_names =
-                ['input'], is_bgr = False, image_scale = 1.0).get_spec()
+        spec = keras.convert(
+            model,
+            input_names,
+            output_names,
+            image_input_names=["input"],
+            is_bgr=False,
+            image_scale=1.0,
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
-        self.assertEquals(spec.description.input[0].type.WhichOneof('Type'),
-                'imageType')
-        self.assertEquals(spec.description.input[0].type.imageType.colorSpace,
-                 FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value('RGB'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
+        self.assertEquals(
+            spec.description.input[0].type.WhichOneof("Type"), "imageType"
+        )
+        self.assertEquals(
+            spec.description.input[0].type.imageType.colorSpace,
+            FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value("RGB"),
+        )
 
         # Test the layer parameters.
         preprocessing = spec.neuralNetwork.preprocessing[0]
-        self.assertTrue(preprocessing.HasField('scaler'))
+        self.assertTrue(preprocessing.HasField("scaler"))
         pr_0 = preprocessing.scaler
         self.assertIsNotNone(pr_0.redBias)
         self.assertIsNotNone(pr_0.greenBias)
@@ -826,38 +949,55 @@ def test_classifier_string_classes(self):
         # Create a simple Keras model
         model = Sequential()
         model.add(Dense(32, input_shape=(16,)))
-        model.add(Activation('softmax'))
-        classes = ['c%s' % i for i in range(32)]
-
-        input_names = ['input']
-        output_names = ['prob_output']
-        expected_output_names = ['prob_output', 'classLabel']
-        spec = keras.convert(model, input_names, output_names, class_labels = classes).get_spec()
+        model.add(Activation("softmax"))
+        classes = ["c%s" % i for i in range(32)]
+
+        input_names = ["input"]
+        output_names = ["prob_output"]
+        expected_output_names = ["prob_output", "classLabel"]
+        spec = keras.convert(
+            model, input_names, output_names, class_labels=classes
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetworkClassifier'))
-        self.assertFalse(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetworkClassifier"))
+        self.assertFalse(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEquals(len(spec.description.output), len(expected_output_names))
-        self.assertEquals(expected_output_names,
-               list(map(lambda x: x.name, spec.description.output)))
+        self.assertEquals(
+            expected_output_names, list(map(lambda x: x.name, spec.description.output))
+        )
 
         # Check the types
-        self.assertEquals(spec.description.output[0].type.WhichOneof('Type'), 'dictionaryType')
-        self.assertEquals(spec.description.output[0].type.dictionaryType.WhichOneof('KeyType'), 'stringKeyType')
-        self.assertEquals(spec.description.output[1].type.WhichOneof('Type'), 'stringType')
-        self.assertTrue(spec.description.predictedFeatureName, 'classLabel')
-        self.assertTrue(spec.description.predictedProbabilitiesName, 'prob_output')
+        self.assertEquals(
+            spec.description.output[0].type.WhichOneof("Type"), "dictionaryType"
+        )
+        self.assertEquals(
+            spec.description.output[0].type.dictionaryType.WhichOneof("KeyType"),
+            "stringKeyType",
+        )
+        self.assertEquals(
+            spec.description.output[1].type.WhichOneof("Type"), "stringType"
+        )
+        self.assertTrue(spec.description.predictedFeatureName, "classLabel")
+        self.assertTrue(spec.description.predictedProbabilitiesName, "prob_output")
 
         # Test the class parameters
-        self.assertEqual(spec.WhichOneof('Type'), 'neuralNetworkClassifier', "Expected a NN classifier model")
-        self.assertEqual(spec.neuralNetworkClassifier.WhichOneof('ClassLabels'), 'stringClassLabels')
+        self.assertEqual(
+            spec.WhichOneof("Type"),
+            "neuralNetworkClassifier",
+            "Expected a NN classifier model",
+        )
+        self.assertEqual(
+            spec.neuralNetworkClassifier.WhichOneof("ClassLabels"), "stringClassLabels"
+        )
         class_from_proto = list(spec.neuralNetworkClassifier.stringClassLabels.vector)
         self.assertEqual(sorted(classes), sorted(class_from_proto))
 
@@ -870,37 +1010,48 @@ def test_classifier_file(self):
         # Create a simple Keras model
         model = Sequential()
         model.add(Dense(32, input_shape=(16,)))
-        model.add(Activation('softmax'))
-        classes = ['c%s' % i for i in range(32)]
+        model.add(Activation("softmax"))
+        classes = ["c%s" % i for i in range(32)]
         classes_file = tempfile.mktemp()
-        with open(classes_file, 'w') as f:
-            f.write('\n'.join(classes))
-
-        input_names = ['input']
-        output_names = ['prob_output']
-        expected_output_names = ['prob_output', 'classLabel']
-        spec = keras.convert(model, input_names, output_names, class_labels = classes).get_spec()
+        with open(classes_file, "w") as f:
+            f.write("\n".join(classes))
+
+        input_names = ["input"]
+        output_names = ["prob_output"]
+        expected_output_names = ["prob_output", "classLabel"]
+        spec = keras.convert(
+            model, input_names, output_names, class_labels=classes
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetworkClassifier'))
-        self.assertFalse(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetworkClassifier"))
+        self.assertFalse(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEquals(len(spec.description.output), len(expected_output_names))
-        self.assertEquals(expected_output_names,
-               list(map(lambda x: x.name, spec.description.output)))
+        self.assertEquals(
+            expected_output_names, list(map(lambda x: x.name, spec.description.output))
+        )
 
         # Check the types
-        self.assertEquals(spec.description.output[0].type.WhichOneof('Type'), 'dictionaryType')
-        self.assertEquals(spec.description.output[0].type.dictionaryType.WhichOneof('KeyType'), 'stringKeyType')
-        self.assertEquals(spec.description.output[1].type.WhichOneof('Type'), 'stringType')
-        self.assertTrue(spec.description.predictedFeatureName, 'classLabel')
-        self.assertTrue(spec.description.predictedProbabilitiesName, 'prob_output')
+        self.assertEquals(
+            spec.description.output[0].type.WhichOneof("Type"), "dictionaryType"
+        )
+        self.assertEquals(
+            spec.description.output[0].type.dictionaryType.WhichOneof("KeyType"),
+            "stringKeyType",
+        )
+        self.assertEquals(
+            spec.description.output[1].type.WhichOneof("Type"), "stringType"
+        )
+        self.assertTrue(spec.description.predictedFeatureName, "classLabel")
+        self.assertTrue(spec.description.predictedProbabilitiesName, "prob_output")
 
         # cleanup
         os.remove(classes_file)
@@ -912,38 +1063,55 @@ def test_classifier_integer_classes(self):
         # Create a simple Keras model
         model = Sequential()
         model.add(Dense(32, input_shape=(16,)))
-        model.add(Activation('softmax'))
+        model.add(Activation("softmax"))
         classes = list(range(32))
 
-        input_names = ['input']
-        output_names = ['prob_output']
-        expected_output_names = ['prob_output', 'classLabel']
-        spec = keras.convert(model, input_names, output_names, class_labels = classes).get_spec()
+        input_names = ["input"]
+        output_names = ["prob_output"]
+        expected_output_names = ["prob_output", "classLabel"]
+        spec = keras.convert(
+            model, input_names, output_names, class_labels=classes
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetworkClassifier'))
-        self.assertFalse(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetworkClassifier"))
+        self.assertFalse(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEquals(len(spec.description.output), len(expected_output_names))
-        self.assertEquals(expected_output_names,
-               list(map(lambda x: x.name, spec.description.output)))
+        self.assertEquals(
+            expected_output_names, list(map(lambda x: x.name, spec.description.output))
+        )
 
         # Check the types
-        self.assertEquals(spec.description.output[0].type.WhichOneof('Type'), 'dictionaryType')
-        self.assertEquals(spec.description.output[0].type.dictionaryType.WhichOneof('KeyType'), 'int64KeyType')
-        self.assertEquals(spec.description.output[1].type.WhichOneof('Type'), 'int64Type')
-        self.assertTrue(spec.description.predictedFeatureName, 'classLabel')
-        self.assertTrue(spec.description.predictedProbabilitiesName, 'prob_output')
+        self.assertEquals(
+            spec.description.output[0].type.WhichOneof("Type"), "dictionaryType"
+        )
+        self.assertEquals(
+            spec.description.output[0].type.dictionaryType.WhichOneof("KeyType"),
+            "int64KeyType",
+        )
+        self.assertEquals(
+            spec.description.output[1].type.WhichOneof("Type"), "int64Type"
+        )
+        self.assertTrue(spec.description.predictedFeatureName, "classLabel")
+        self.assertTrue(spec.description.predictedProbabilitiesName, "prob_output")
 
         # Test the class parameters
-        self.assertEqual(spec.WhichOneof('Type'), 'neuralNetworkClassifier', "Expected a NN classifier model")
-        self.assertEqual(spec.neuralNetworkClassifier.WhichOneof('ClassLabels'), 'int64ClassLabels')
+        self.assertEqual(
+            spec.WhichOneof("Type"),
+            "neuralNetworkClassifier",
+            "Expected a NN classifier model",
+        )
+        self.assertEqual(
+            spec.neuralNetworkClassifier.WhichOneof("ClassLabels"), "int64ClassLabels"
+        )
         class_from_proto = list(spec.neuralNetworkClassifier.int64ClassLabels.vector)
         self.assertEqual(sorted(classes), sorted(class_from_proto))
 
@@ -954,39 +1122,61 @@ def test_classifier_custom_class_name(self):
         # Create a simple Keras model
         model = Sequential()
         model.add(Dense(32, input_shape=(16,)))
-        model.add(Activation('softmax'))
-        classes = ['c%s' % i for i in range(32)]
-
-        input_names = ['input']
-        output_names = ['prob_output']
-        expected_output_names = ['prob_output', 'my_foo_bar_class_output']
-        spec = keras.convert(model, input_names, output_names, class_labels = classes,
-                          predicted_feature_name = 'my_foo_bar_class_output').get_spec()
+        model.add(Activation("softmax"))
+        classes = ["c%s" % i for i in range(32)]
+
+        input_names = ["input"]
+        output_names = ["prob_output"]
+        expected_output_names = ["prob_output", "my_foo_bar_class_output"]
+        spec = keras.convert(
+            model,
+            input_names,
+            output_names,
+            class_labels=classes,
+            predicted_feature_name="my_foo_bar_class_output",
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetworkClassifier'))
-        self.assertFalse(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetworkClassifier"))
+        self.assertFalse(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEquals(len(spec.description.output), len(expected_output_names))
-        self.assertEquals(expected_output_names,
-               list(map(lambda x: x.name, spec.description.output)))
+        self.assertEquals(
+            expected_output_names, list(map(lambda x: x.name, spec.description.output))
+        )
 
         # Check the types
-        self.assertEquals(spec.description.output[0].type.WhichOneof('Type'), 'dictionaryType')
-        self.assertEquals(spec.description.output[0].type.dictionaryType.WhichOneof('KeyType'), 'stringKeyType')
-        self.assertEquals(spec.description.output[1].type.WhichOneof('Type'), 'stringType')
-        self.assertTrue(spec.description.predictedFeatureName, 'my_foo_bar_class_output')
-        self.assertTrue(spec.description.predictedProbabilitiesName, 'prob_output')
+        self.assertEquals(
+            spec.description.output[0].type.WhichOneof("Type"), "dictionaryType"
+        )
+        self.assertEquals(
+            spec.description.output[0].type.dictionaryType.WhichOneof("KeyType"),
+            "stringKeyType",
+        )
+        self.assertEquals(
+            spec.description.output[1].type.WhichOneof("Type"), "stringType"
+        )
+        self.assertTrue(
+            spec.description.predictedFeatureName, "my_foo_bar_class_output"
+        )
+        self.assertTrue(spec.description.predictedProbabilitiesName, "prob_output")
 
         # Test the class parameters
-        self.assertEqual(spec.WhichOneof('Type'), 'neuralNetworkClassifier', "Expected a NN classifier model")
-        self.assertEqual(spec.neuralNetworkClassifier.WhichOneof('ClassLabels'), 'stringClassLabels')
+        self.assertEqual(
+            spec.WhichOneof("Type"),
+            "neuralNetworkClassifier",
+            "Expected a NN classifier model",
+        )
+        self.assertEqual(
+            spec.neuralNetworkClassifier.WhichOneof("ClassLabels"), "stringClassLabels"
+        )
         class_from_proto = list(spec.neuralNetworkClassifier.stringClassLabels.vector)
         self.assertEqual(sorted(classes), sorted(class_from_proto))
 
@@ -997,24 +1187,28 @@ def test_default_interface_names(self):
         # Create a simple Keras model
         model = Sequential()
         model.add(Dense(32, input_shape=(16,)))
-        model.add(Activation('softmax'))
+        model.add(Activation("softmax"))
 
-        expected_input_names = ['input1']
-        expected_output_names = ['output1']
+        expected_input_names = ["input1"]
+        expected_output_names = ["output1"]
         spec = keras.convert(model).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.input), len(expected_input_names))
-        self.assertEqual(sorted(expected_input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(expected_input_names),
+            sorted(map(lambda x: x.name, spec.description.input)),
+        )
         self.assertEquals(len(spec.description.output), len(expected_output_names))
-        self.assertEquals(sorted(expected_output_names),
-               sorted(map(lambda x: x.name, spec.description.output)))
+        self.assertEquals(
+            sorted(expected_output_names),
+            sorted(map(lambda x: x.name, spec.description.output)),
+        )
 
     def test_updatable_model_flag_off(self):
         """
@@ -1025,18 +1219,19 @@ def test_updatable_model_flag_off(self):
         from keras.layers import Dense
         from keras.losses import categorical_crossentropy
         from keras.optimizers import SGD
-        input = ['data']
-        output = ['output']
+
+        input = ["data"]
+        output = ["output"]
         # First, set respect_trainable to False and then check to make sure the
         # converted model is NOT updatable.
         not_updatable = Sequential()
         not_updatable.add(Dense(128, input_shape=(16,)))
         # layer is updatable, but the flag during convert is false, so that bit
         # must get dropped on the floor.
-        not_updatable.add(Dense(10, name="foo", activation='softmax',
-                                trainable=True))
-        not_updatable.compile(loss=categorical_crossentropy,
-                              optimizer=SGD(lr=0.01), metrics=['accuracy'])
+        not_updatable.add(Dense(10, name="foo", activation="softmax", trainable=True))
+        not_updatable.compile(
+            loss=categorical_crossentropy, optimizer=SGD(lr=0.01), metrics=["accuracy"]
+        )
         cml = coremltools.converters.keras.convert(
             not_updatable, input, output, respect_trainable=False
         )
@@ -1056,16 +1251,17 @@ def test_updatable_model_flag_cce_sgd(self):
         from keras.layers import Dense
         from keras.losses import categorical_crossentropy
         from keras.optimizers import SGD
-        input = ['data']
-        output = ['output']
+
+        input = ["data"]
+        output = ["output"]
 
         # This should result in an updatable model.
         updatable = Sequential()
         updatable.add(Dense(128, input_shape=(16,)))
-        updatable.add(Dense(10, name="foo", activation='softmax',
-                            trainable=True))
-        updatable.compile(loss=categorical_crossentropy,
-                          optimizer=SGD(lr=1.0), metrics=['accuracy'])
+        updatable.add(Dense(10, name="foo", activation="softmax", trainable=True))
+        updatable.compile(
+            loss=categorical_crossentropy, optimizer=SGD(lr=1.0), metrics=["accuracy"]
+        )
         cml = coremltools.converters.keras.convert(
             updatable, input, output, respect_trainable=True
         )
@@ -1090,16 +1286,18 @@ def test_updatable_model_flag_functional(self):
         from keras.layers import Dense, Input
         from keras.losses import categorical_crossentropy
         from keras.optimizers import SGD
-        input = ['data']
-        output = ['output']
+
+        input = ["data"]
+        output = ["output"]
 
         # This should result in an updatable model.
         inputs = Input(shape=(16,))
         d1 = Dense(128)(inputs)
-        d2 = Dense(10, name="foo", activation='softmax', trainable=True)(d1)
+        d2 = Dense(10, name="foo", activation="softmax", trainable=True)(d1)
         kmodel = Model(inputs=inputs, outputs=d2)
-        kmodel.compile(loss=categorical_crossentropy,
-                       optimizer=SGD(lr=1.0), metrics=['accuracy'])
+        kmodel.compile(
+            loss=categorical_crossentropy, optimizer=SGD(lr=1.0), metrics=["accuracy"]
+        )
         cml = coremltools.converters.keras.convert(
             kmodel, input, output, respect_trainable=True
         )
@@ -1124,18 +1322,19 @@ def test_updatable_model_flag_mse_adam(self):
         from keras.layers import Dense
         from keras.losses import mean_squared_error
         from keras.optimizers import Adam
-        input = ['data']
-        output = ['output']
+
+        input = ["data"]
+        output = ["output"]
 
         # Again, this should give an updatable model.
         updatable = Sequential()
         updatable.add(Dense(128, input_shape=(16,)))
-        updatable.add(Dense(10, name="foo", activation='softmax',
-                            trainable=True))
-        updatable.compile(loss=mean_squared_error,
-                          optimizer=Adam(lr=1.0, beta_1=0.5, beta_2=0.75,
-                                         epsilon=0.25),
-                          metrics=['accuracy'])
+        updatable.add(Dense(10, name="foo", activation="softmax", trainable=True))
+        updatable.compile(
+            loss=mean_squared_error,
+            optimizer=Adam(lr=1.0, beta_1=0.5, beta_2=0.75, epsilon=0.25),
+            metrics=["accuracy"],
+        )
         cml = coremltools.converters.keras.convert(
             updatable, input, output, respect_trainable=True
         )
@@ -1159,12 +1358,12 @@ def test_updatable_model_flag_no_loss_optimizer(self):
         """
         import coremltools
         from keras.layers import Dense
+
         updatable = Sequential()
         updatable.add(Dense(128, input_shape=(16,)))
-        updatable.add(Dense(10, name="foo", activation='softmax',
-                            trainable=True))
-        input = ['data']
-        output = ['output']
+        updatable.add(Dense(10, name="foo", activation="softmax", trainable=True))
+        input = ["data"]
+        output = ["output"]
         cml = coremltools.converters.keras.convert(
             updatable, input, output, respect_trainable=True
         )
@@ -1175,6 +1374,7 @@ def test_updatable_model_flag_no_loss_optimizer(self):
         self.assertTrue(layers[1].innerProduct)
         self.assertTrue(layers[1].isUpdatable)
 
+    # <rdar://problem/53688606>
     # when loss was specified as a string the converter had failed to work.
     def test_updatable_model_flag_mse_string_adam(self):
         """
@@ -1187,14 +1387,14 @@ def test_updatable_model_flag_mse_string_adam(self):
 
         updatable = Sequential()
         updatable.add(Dense(128, input_shape=(16,)))
-        updatable.add(Dense(10, name="foo", activation='relu',
-                            trainable=True))
-        updatable.compile(loss='mean_squared_error',
-                          optimizer=Adam(lr=1.0, beta_1=0.5, beta_2=0.75,
-                                         epsilon=0.25),
-                          metrics=['accuracy'])
-        input = ['data']
-        output = ['output']
+        updatable.add(Dense(10, name="foo", activation="relu", trainable=True))
+        updatable.compile(
+            loss="mean_squared_error",
+            optimizer=Adam(lr=1.0, beta_1=0.5, beta_2=0.75, epsilon=0.25),
+            metrics=["accuracy"],
+        )
+        input = ["data"]
+        output = ["output"]
         cml = coremltools.converters.keras.convert(
             updatable, input, output, respect_trainable=True
         )
@@ -1208,15 +1408,35 @@ def test_updatable_model_flag_mse_string_adam(self):
         self.assertEqual(len(spec.neuralNetwork.updateParams.lossLayers), 1)
         # check that mean squared error input name and output name is set
         # check length is non-zero for mse
-        self.assertTrue(len(spec.neuralNetwork.updateParams.lossLayers[
-                            0].meanSquaredErrorLossLayer.input))
-        self.assertTrue(len(spec.neuralNetwork.updateParams.lossLayers[
-                                0].meanSquaredErrorLossLayer.target))
+        self.assertTrue(
+            len(
+                spec.neuralNetwork.updateParams.lossLayers[
+                    0
+                ].meanSquaredErrorLossLayer.input
+            )
+        )
+        self.assertTrue(
+            len(
+                spec.neuralNetwork.updateParams.lossLayers[
+                    0
+                ].meanSquaredErrorLossLayer.target
+            )
+        )
         # check length is 0 for cce
-        self.assertFalse(len(spec.neuralNetwork.updateParams.lossLayers[
-                                0].categoricalCrossEntropyLossLayer.input))
-        self.assertFalse(len(spec.neuralNetwork.updateParams.lossLayers[
-                                 0].categoricalCrossEntropyLossLayer.target))
+        self.assertFalse(
+            len(
+                spec.neuralNetwork.updateParams.lossLayers[
+                    0
+                ].categoricalCrossEntropyLossLayer.input
+            )
+        )
+        self.assertFalse(
+            len(
+                spec.neuralNetwork.updateParams.lossLayers[
+                    0
+                ].categoricalCrossEntropyLossLayer.target
+            )
+        )
 
         adopt = spec.neuralNetwork.updateParams.optimizer.adamOptimizer
         # verify default values
@@ -1225,6 +1445,7 @@ def test_updatable_model_flag_mse_string_adam(self):
         self.assertEqual(adopt.beta2.defaultValue, 0.75)
         self.assertEqual(adopt.eps.defaultValue, 0.25)
 
+    # <rdar://problem/53688606>
     def test_updatable_model_flag_cce_string_sgd(self):
         """
         Tests the 'respect_trainable' flag when used along with string
@@ -1236,12 +1457,12 @@ def test_updatable_model_flag_cce_string_sgd(self):
 
         updatable = Sequential()
         updatable.add(Dense(128, input_shape=(16,)))
-        updatable.add(Dense(10, name="foo", activation='softmax',
-                            trainable=True))
-        updatable.compile(loss='categorical_crossentropy',
-                          optimizer=SGD(lr=1.0), metrics=['accuracy'])
-        input = ['data']
-        output = ['output']
+        updatable.add(Dense(10, name="foo", activation="softmax", trainable=True))
+        updatable.compile(
+            loss="categorical_crossentropy", optimizer=SGD(lr=1.0), metrics=["accuracy"]
+        )
+        input = ["data"]
+        output = ["output"]
         cml = coremltools.converters.keras.convert(
             updatable, input, output, respect_trainable=True
         )
@@ -1255,22 +1476,41 @@ def test_updatable_model_flag_cce_string_sgd(self):
 
         # check that cce input name and output name is set
         # check length is non-zero for cce
-        self.assertTrue(len(spec.neuralNetwork.updateParams.lossLayers[
-                            0].categoricalCrossEntropyLossLayer.input))
-        self.assertTrue(len(spec.neuralNetwork.updateParams.lossLayers[
-                                0].categoricalCrossEntropyLossLayer.target))
+        self.assertTrue(
+            len(
+                spec.neuralNetwork.updateParams.lossLayers[
+                    0
+                ].categoricalCrossEntropyLossLayer.input
+            )
+        )
+        self.assertTrue(
+            len(
+                spec.neuralNetwork.updateParams.lossLayers[
+                    0
+                ].categoricalCrossEntropyLossLayer.target
+            )
+        )
         # check length is 0 for mse
-        self.assertFalse(len(spec.neuralNetwork.updateParams.lossLayers[
-                                0].meanSquaredErrorLossLayer.input))
-        self.assertFalse(len(spec.neuralNetwork.updateParams.lossLayers[
-                                 0].meanSquaredErrorLossLayer.target))
+        self.assertFalse(
+            len(
+                spec.neuralNetwork.updateParams.lossLayers[
+                    0
+                ].meanSquaredErrorLossLayer.input
+            )
+        )
+        self.assertFalse(
+            len(
+                spec.neuralNetwork.updateParams.lossLayers[
+                    0
+                ].meanSquaredErrorLossLayer.target
+            )
+        )
 
         sgdopt = spec.neuralNetwork.updateParams.optimizer.sgdOptimizer
         self.assertEqual(sgdopt.learningRate.defaultValue, 1.0)
         self.assertEqual(sgdopt.miniBatchSize.defaultValue, 16)
         self.assertEqual(sgdopt.momentum.defaultValue, 0.0)
 
-
     def test_updatable_model_flag_cce_sgd_string(self):
         """
         Tests the 'respect_trainable' flag when used along with string
@@ -1280,16 +1520,18 @@ def test_updatable_model_flag_cce_sgd_string(self):
         import coremltools
         from keras.layers import Dense, Input
         from keras.losses import categorical_crossentropy
-        input = ['data']
-        output = ['output']
+
+        input = ["data"]
+        output = ["output"]
 
         # This should result in an updatable model.
         inputs = Input(shape=(16,))
         d1 = Dense(128)(inputs)
-        d2 = Dense(10, name="foo", activation='softmax', trainable=True)(d1)
+        d2 = Dense(10, name="foo", activation="softmax", trainable=True)(d1)
         kmodel = Model(inputs=inputs, outputs=d2)
-        kmodel.compile(loss=categorical_crossentropy,
-                       optimizer='sgd', metrics=['accuracy'])
+        kmodel.compile(
+            loss=categorical_crossentropy, optimizer="sgd", metrics=["accuracy"]
+        )
         cml = coremltools.converters.keras.convert(
             kmodel, input, output, respect_trainable=True
         )
@@ -1303,8 +1545,7 @@ def test_updatable_model_flag_cce_sgd_string(self):
         sgdopt = spec.neuralNetwork.updateParams.optimizer.sgdOptimizer
         # use almost equal for default verification with at least 5 decimal
         # places of closeness
-        self.assertAlmostEqual(sgdopt.learningRate.defaultValue, 0.01,
-                               places=5)
+        self.assertAlmostEqual(sgdopt.learningRate.defaultValue, 0.01, places=5)
         self.assertEqual(sgdopt.miniBatchSize.defaultValue, 16)
         self.assertEqual(sgdopt.momentum.defaultValue, 0.0)
 
@@ -1317,16 +1558,18 @@ def test_updatable_model_flag_cce_adam_string(self):
         import coremltools
         from keras.layers import Dense, Input
         from keras.losses import categorical_crossentropy
-        input = ['data']
-        output = ['output']
+
+        input = ["data"]
+        output = ["output"]
 
         # This should result in an updatable model.
         inputs = Input(shape=(16,))
         d1 = Dense(128)(inputs)
-        d2 = Dense(10, name="foo", activation='softmax', trainable=True)(d1)
+        d2 = Dense(10, name="foo", activation="softmax", trainable=True)(d1)
         kmodel = Model(inputs=inputs, outputs=d2)
-        kmodel.compile(loss=categorical_crossentropy,
-                       optimizer='adam', metrics=['accuracy'])
+        kmodel.compile(
+            loss=categorical_crossentropy, optimizer="adam", metrics=["accuracy"]
+        )
         cml = coremltools.converters.keras.convert(
             kmodel, input, output, respect_trainable=True
         )
diff --git a/coremltools/test/neural_network/test_keras2_numeric.py b/coremltools/test/neural_network/test_keras2_numeric.py
index 225f1b032..33fe58765 100644
--- a/coremltools/test/neural_network/test_keras2_numeric.py
+++ b/coremltools/test/neural_network/test_keras2_numeric.py
@@ -7,16 +7,35 @@
 import numpy as np
 import pytest
 
-from coremltools._deps import HAS_KERAS2_TF
+from coremltools._deps import _HAS_KERAS2_TF
 from coremltools.models import _MLMODEL_FULL_PRECISION, _MLMODEL_HALF_PRECISION
-from coremltools.models.utils import macos_version, is_macos
+from coremltools.models.utils import _macos_version, _is_macos
 
-if HAS_KERAS2_TF:
+if _HAS_KERAS2_TF:
     import keras.backend
     from keras.models import Sequential, Model
-    from keras.layers import Dense, Activation, Conv2D, Conv1D, Flatten, BatchNormalization, Conv2DTranspose, SeparableConv2D
-    from keras.layers import MaxPooling2D, AveragePooling2D, GlobalAveragePooling2D, GlobalMaxPooling2D
-    from keras.layers import MaxPooling1D, AveragePooling1D, GlobalAveragePooling1D, GlobalMaxPooling1D
+    from keras.layers import (
+        Dense,
+        Activation,
+        Conv2D,
+        Conv1D,
+        Flatten,
+        BatchNormalization,
+        Conv2DTranspose,
+        SeparableConv2D,
+    )
+    from keras.layers import (
+        MaxPooling2D,
+        AveragePooling2D,
+        GlobalAveragePooling2D,
+        GlobalMaxPooling2D,
+    )
+    from keras.layers import (
+        MaxPooling1D,
+        AveragePooling1D,
+        GlobalAveragePooling1D,
+        GlobalMaxPooling1D,
+    )
     from keras.layers import Embedding, Input, Permute, Reshape, RepeatVector, Dropout
     from keras.layers import Add, Concatenate
     from keras.layers import add, multiply, concatenate, dot, maximum, average
@@ -26,9 +45,10 @@
     from keras.layers.core import SpatialDropout2D
     from keras.layers.wrappers import Bidirectional, TimeDistributed
     from distutils.version import StrictVersion as _StrictVersion
-    if keras.__version__ >= _StrictVersion('2.2.1'):
+
+    if keras.__version__ >= _StrictVersion("2.2.1"):
         from keras.layers import DepthwiseConv2D, ReLU
-    elif keras.__version__ >= _StrictVersion('2.2.0'):
+    elif keras.__version__ >= _StrictVersion("2.2.0"):
         from keras.layers import DepthwiseConv2D
         from keras_applications.mobilenet import relu6
     else:
@@ -38,21 +58,21 @@
 def _keras_transpose(x, is_sequence=False):
     if len(x.shape) == 5:
         # Keras input shape = [Batch, Seq, Height, Width, Channels]
-        x = np.transpose(x, [1,0,4,2,3])
+        x = np.transpose(x, [1, 0, 4, 2, 3])
     if len(x.shape) == 4:
         # Keras input shape = [Batch, Height, Width, Channels]
-        x = np.transpose(x, [0,3,1,2])
+        x = np.transpose(x, [0, 3, 1, 2])
         return np.expand_dims(x, axis=0)
     elif len(x.shape) == 3:
         # Keras input shape = [Batch, (Sequence) Length, Channels]
-        return np.transpose(x, [1,0,2])
+        return np.transpose(x, [1, 0, 2])
     elif len(x.shape) == 2:
         if is_sequence:  # (N,S) --> (S,N,1,)
             return x.reshape(x.shape[::-1] + (1,))
         else:  # (N,C) --> (N,C,1,1)
-            return x.reshape((1, ) + x.shape) # Dense
+            return x.reshape((1,) + x.shape)  # Dense
     elif len(x.shape) == 1:
-        if is_sequence: # (S) --> (S,N,1,1,1)
+        if is_sequence:  # (S) --> (S,N,1,1,1)
             return x.reshape((x.shape[0], 1, 1))
         else:
             return x
@@ -60,36 +80,49 @@ def _keras_transpose(x, is_sequence=False):
         return x
 
 
-def _get_coreml_model(model, input_names=['data'], output_names=['output'], input_name_shape_dict= {},
-                      model_precision=_MLMODEL_FULL_PRECISION, use_float_arraytype=False):
+def _get_coreml_model(
+    model,
+    input_names=["data"],
+    output_names=["output"],
+    input_name_shape_dict={},
+    model_precision=_MLMODEL_FULL_PRECISION,
+    use_float_arraytype=False,
+):
     """
     Get the coreml model from the Keras model.
     """
     # Convert the model
     from coremltools.converters import keras as keras_converter
-    model = keras_converter.convert(model, input_names, output_names, input_name_shape_dict = input_name_shape_dict,
-                                    model_precision=model_precision, use_float_arraytype=use_float_arraytype)
+
+    model = keras_converter.convert(
+        model,
+        input_names,
+        output_names,
+        input_name_shape_dict=input_name_shape_dict,
+        model_precision=model_precision,
+        use_float_arraytype=use_float_arraytype,
+    )
     return model
 
 
-def _generate_data(input_shape, mode='random'):
+def _generate_data(input_shape, mode="random"):
     """
     Generate some random data according to a shape.
     """
-    if mode == 'zeros':
+    if mode == "zeros":
         X = np.zeros(input_shape)
-    elif mode == 'ones':
+    elif mode == "ones":
         X = np.ones(input_shape)
-    elif mode == 'linear':
+    elif mode == "linear":
         X = np.array(range(np.product(input_shape))).reshape(input_shape)
-    elif mode == 'random':
+    elif mode == "random":
         X = np.random.rand(*input_shape)
-    elif mode == 'random_zero_mean':
+    elif mode == "random_zero_mean":
         X = np.random.rand(*input_shape) - 0.5
     return X
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 class KerasNumericCorrectnessTest(unittest.TestCase):
     """
@@ -99,50 +132,67 @@ class KerasNumericCorrectnessTest(unittest.TestCase):
     def runTest(self):
         pass
 
-    def _get_coreml_model_params_and_test_input(self, model, mode, one_dim_seq_flags, input_name_shape_dict={}):
+    def _get_coreml_model_params_and_test_input(
+        self, model, mode, one_dim_seq_flags, input_name_shape_dict={}
+    ):
         # Generate data
         nb_inputs = len(model.inputs)
         if nb_inputs > 1:
-            input_names = []; input_data = []; coreml_input = {}
+            input_names = []
+            input_data = []
+            coreml_input = {}
             for i in range(nb_inputs):
                 feature_name = "data_%s" % i
                 input_names.append(feature_name)
                 if feature_name in input_name_shape_dict:
-                    input_shape = [1 if a is None else a for a in \
-                                   input_name_shape_dict[feature_name]]
+                    input_shape = [
+                        1 if a is None else a
+                        for a in input_name_shape_dict[feature_name]
+                    ]
                 else:
-                    input_shape = [1 if a is None else a for a in \
-                                   model.input_shape[i]]
+                    input_shape = [1 if a is None else a for a in model.input_shape[i]]
                 X = _generate_data(input_shape, mode)
                 input_data.append(X)
                 if one_dim_seq_flags is None:
-                    coreml_input[feature_name] = _keras_transpose(
-                        X).astype('f').copy()
+                    coreml_input[feature_name] = _keras_transpose(X).astype("f").copy()
                 else:
-                    coreml_input[feature_name] = _keras_transpose(
-                        X, one_dim_seq_flags[i]).astype('f').copy()
+                    coreml_input[feature_name] = (
+                        _keras_transpose(X, one_dim_seq_flags[i]).astype("f").copy()
+                    )
         else:
-            input_names = ['data']
-            if 'data' in input_name_shape_dict:
-                input_shape = [1 if a is None else a for a in input_name_shape_dict['data']]
+            input_names = ["data"]
+            if "data" in input_name_shape_dict:
+                input_shape = [
+                    1 if a is None else a for a in input_name_shape_dict["data"]
+                ]
             else:
                 input_shape = [1 if a is None else a for a in model.input_shape]
 
             input_data = _generate_data(input_shape, mode)
             if one_dim_seq_flags is None:
-                coreml_input = {'data': _keras_transpose(input_data).astype(
-                        'f').copy()}
+                coreml_input = {"data": _keras_transpose(input_data).astype("f").copy()}
             else:
-                coreml_input = {'data': _keras_transpose(
-                    input_data, one_dim_seq_flags[0]).astype('f').copy()}
+                coreml_input = {
+                    "data": _keras_transpose(input_data, one_dim_seq_flags[0])
+                    .astype("f")
+                    .copy()
+                }
 
-        output_names = ['output'+str(i) for i in range(len(model.outputs))]
+        output_names = ["output" + str(i) for i in range(len(model.outputs))]
         return input_names, output_names, input_data, coreml_input
 
-    def _test_model(self, model, input_name_shape_dict={}, num_samples=1, mode='random', delta=1e-2,
-                    model_dir=None, transpose_keras_result=True,
-                    one_dim_seq_flags=None,
-                    model_precision=_MLMODEL_FULL_PRECISION):
+    def _test_model(
+        self,
+        model,
+        input_name_shape_dict={},
+        num_samples=1,
+        mode="random",
+        delta=1e-2,
+        model_dir=None,
+        transpose_keras_result=True,
+        one_dim_seq_flags=None,
+        model_precision=_MLMODEL_FULL_PRECISION,
+    ):
 
         # transpose_keras_result: if true, compare the transposed Keras result
         # one_dim_seq_flags: a list of same length as the number of inputs in
@@ -156,14 +206,24 @@ def _test_model(self, model, input_name_shape_dict={}, num_samples=1, mode='rand
             use_tmp_folder = True
             model_dir = tempfile.mkdtemp()
 
-        input_names, output_names, input_data, coreml_input = self._get_coreml_model_params_and_test_input(model, mode,
-                                                                                                           one_dim_seq_flags,
-                                                                                                           input_name_shape_dict)
+        (
+            input_names,
+            output_names,
+            input_data,
+            coreml_input,
+        ) = self._get_coreml_model_params_and_test_input(
+            model, mode, one_dim_seq_flags, input_name_shape_dict
+        )
 
-        coreml_model = _get_coreml_model(model, input_names, output_names, input_name_shape_dict,
-                                         model_precision=model_precision)
+        coreml_model = _get_coreml_model(
+            model,
+            input_names,
+            output_names,
+            input_name_shape_dict,
+            model_precision=model_precision,
+        )
         try:
-            if not (is_macos() and macos_version() >= (10, 13)):
+            if not (_is_macos() and _macos_version() >= (10, 13)):
                 return
 
             # Assuming coreml model output names are in the same order as
@@ -187,20 +247,19 @@ def _test_model(self, model, input_name_shape_dict={}, num_samples=1, mode='rand
                 self.assertEqual(len(kp), len(cp))
                 for i in range(len(kp)):
                     max_den = max(1.0, kp[i], cp[i])
-                    self.assertAlmostEqual(kp[i]/max_den, cp[i]/max_den,
-                        delta=delta)
+                    self.assertAlmostEqual(
+                        kp[i] / max_den, cp[i] / max_den, delta=delta
+                    )
         finally:
             # Cleanup files - models on disk no longer useful
             if use_tmp_folder and os.path.exists(model_dir):
                 shutil.rmtree(model_dir)
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 class KerasBasicNumericCorrectnessTest(KerasNumericCorrectnessTest):
-
-    def test_tiny_inner_product(self,
-                                model_precision=_MLMODEL_FULL_PRECISION):
+    def test_tiny_inner_product(self, model_precision=_MLMODEL_FULL_PRECISION):
         np.random.seed(1988)
 
         # Define a model
@@ -209,11 +268,11 @@ def test_tiny_inner_product(self,
 
         # Test all zeros
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model, mode='zeros', model_precision=model_precision)
+        self._test_model(model, mode="zeros", model_precision=model_precision)
 
         # Test all ones
         model.set_weights([np.ones(w.shape) for w in model.get_weights()])
-        self._test_model(model, mode='ones', model_precision=model_precision)
+        self._test_model(model, mode="ones", model_precision=model_precision)
 
         # Test random
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -222,8 +281,7 @@ def test_tiny_inner_product(self,
     def test_tiny_inner_product_half_precision(self):
         self.test_tiny_inner_product(model_precision=_MLMODEL_HALF_PRECISION)
 
-    def test_inner_product_random(self,
-                                  model_precision=_MLMODEL_FULL_PRECISION):
+    def test_inner_product_random(self, model_precision=_MLMODEL_FULL_PRECISION):
         np.random.seed(1988)
 
         # Define a model
@@ -244,7 +302,7 @@ def test_dense_softmax(self):
 
         # Define a model
         model = Sequential()
-        model.add(Dense(32, input_shape=(32,), activation='softmax'))
+        model.add(Dense(32, input_shape=(32,), activation="softmax"))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -257,7 +315,7 @@ def test_dense_elu(self):
 
         # Define a model
         model = Sequential()
-        model.add(Dense(32, input_shape=(32,), activation='elu'))
+        model.add(Dense(32, input_shape=(32,), activation="elu"))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -270,7 +328,7 @@ def test_dense_selu(self):
 
         # Define a model
         model = Sequential()
-        model.add(Dense(32, input_shape=(32,), activation='selu'))
+        model.add(Dense(32, input_shape=(32,), activation="selu"))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -285,9 +343,9 @@ def test_housenet_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Dense(num_hidden, input_dim = num_features))
-        model.add(Activation('relu'))
-        model.add(Dense(1, input_dim = num_features))
+        model.add(Dense(num_hidden, input_dim=num_features))
+        model.add(Activation("relu"))
+        model.add(Dense(1, input_dim=num_features))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -303,8 +361,13 @@ def test_tiny_conv_ones(self, model_precision=_MLMODEL_FULL_PRECISION):
 
         # Define a model
         model = Sequential()
-        model.add(Conv2D(input_shape = input_shape,
-            filters = num_kernels, kernel_size = (kernel_height, kernel_width)))
+        model.add(
+            Conv2D(
+                input_shape=input_shape,
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.ones(w.shape) for w in model.get_weights()])
@@ -315,7 +378,6 @@ def test_tiny_conv_ones(self, model_precision=_MLMODEL_FULL_PRECISION):
     def test_tiny_conv_ones_half_precision(self):
         self.test_tiny_conv_ones(model_precision=_MLMODEL_HALF_PRECISION)
 
-
     def test_tiny_conv_random(self, model_precision=_MLMODEL_FULL_PRECISION):
         np.random.seed(1988)
         input_dim = 10
@@ -324,8 +386,13 @@ def test_tiny_conv_random(self, model_precision=_MLMODEL_FULL_PRECISION):
 
         # Define a model
         model = Sequential()
-        model.add(Conv2D(input_shape = input_shape,
-            filters = num_kernels, kernel_size = (kernel_height, kernel_width)))
+        model.add(
+            Conv2D(
+                input_shape=input_shape,
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -333,9 +400,12 @@ def test_tiny_conv_random(self, model_precision=_MLMODEL_FULL_PRECISION):
         # Test the keras model
         self._test_model(model, model_precision=model_precision)
 
-    @unittest.skipUnless(is_macos() and macos_version() >= (10, 14),
-                         'Only supported on MacOS 10.14+')
-    def test_tiny_conv_random_input_shape_dict(self, model_precision=_MLMODEL_FULL_PRECISION):
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 14), "Only supported on MacOS 10.14+"
+    )
+    def test_tiny_conv_random_input_shape_dict(
+        self, model_precision=_MLMODEL_FULL_PRECISION
+    ):
         np.random.seed(1988)
         H, W, C = 10, 20, 5
         input_shape = (None, H, W, C)
@@ -343,14 +413,23 @@ def test_tiny_conv_random_input_shape_dict(self, model_precision=_MLMODEL_FULL_P
 
         # Define a model
         model = Sequential()
-        model.add(Conv2D(input_shape=(None,None,C),
-                         filters=num_kernels, kernel_size=(kernel_height, kernel_width)))
+        model.add(
+            Conv2D(
+                input_shape=(None, None, C),
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_model(model, input_name_shape_dict={'data':input_shape},model_precision=model_precision)
+        self._test_model(
+            model,
+            input_name_shape_dict={"data": input_shape},
+            model_precision=model_precision,
+        )
 
     def test_tiny_conv_random_half_precision(self):
         self.test_tiny_conv_random(model_precision=_MLMODEL_HALF_PRECISION)
@@ -363,8 +442,14 @@ def test_tiny_conv_dilated(self, model_precision=_MLMODEL_FULL_PRECISION):
 
         # Define a model
         model = Sequential()
-        model.add(Conv2D(input_shape = input_shape, dilation_rate=(2,2),
-            filters = num_kernels, kernel_size = (kernel_height, kernel_width)))
+        model.add(
+            Conv2D(
+                input_shape=input_shape,
+                dilation_rate=(2, 2),
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -375,7 +460,9 @@ def test_tiny_conv_dilated(self, model_precision=_MLMODEL_FULL_PRECISION):
     def test_tiny_conv_dilated_half_precision(self):
         return self.test_tiny_conv_dilated(model_precision=_MLMODEL_HALF_PRECISION)
 
-    def test_tiny_conv_dilated_rect_random(self, model_precision=_MLMODEL_FULL_PRECISION):
+    def test_tiny_conv_dilated_rect_random(
+        self, model_precision=_MLMODEL_FULL_PRECISION
+    ):
         np.random.seed(1988)
         input_shape = (32, 20, 3)
         num_kernels = 2
@@ -384,8 +471,14 @@ def test_tiny_conv_dilated_rect_random(self, model_precision=_MLMODEL_FULL_PRECI
 
         # Define a model
         model = Sequential()
-        model.add(Conv2D(input_shape = input_shape, dilation_rate=(2,2),
-            filters = num_kernels, kernel_size = (kernel_height, kernel_width)))
+        model.add(
+            Conv2D(
+                input_shape=input_shape,
+                dilation_rate=(2, 2),
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -394,21 +487,29 @@ def test_tiny_conv_dilated_rect_random(self, model_precision=_MLMODEL_FULL_PRECI
         self._test_model(model, model_precision=model_precision)
 
     def test_tiny_conv_dilated_rect_random_half_precision(self):
-        return self.test_tiny_conv_dilated_rect_random(model_precision=_MLMODEL_HALF_PRECISION)
+        return self.test_tiny_conv_dilated_rect_random(
+            model_precision=_MLMODEL_HALF_PRECISION
+        )
 
     def test_tiny_conv_pseudo_1d_x(self, model_precision=_MLMODEL_FULL_PRECISION):
         np.random.seed(1988)
         input_dim = 2
         input_length = 5
-        filter_length = 1 # 3
+        filter_length = 1  # 3
         nb_filters = 1
         # Define a model
         model = Sequential()
-        model.add(Conv2D(nb_filters, kernel_size = (1,filter_length), input_shape=(1,input_length,input_dim), padding = 'valid'))
+        model.add(
+            Conv2D(
+                nb_filters,
+                kernel_size=(1, filter_length),
+                input_shape=(1, input_length, input_dim),
+                padding="valid",
+            )
+        )
         # Set some random weights
         model.set_weights([np.ones(w.shape) for w in model.get_weights()])
-        self._test_model(model, mode='linear',
-                         model_precision=model_precision)
+        self._test_model(model, mode="linear", model_precision=model_precision)
 
     def test_tiny_conv_pseudo_1d_x_half_precision(self):
         return self.test_tiny_conv_pseudo_1d_x(model_precision=_MLMODEL_HALF_PRECISION)
@@ -420,8 +521,14 @@ def test_tiny_conv1d_same_random(self):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Conv1D(nb_filters, kernel_size = filter_length, padding = 'same',
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Conv1D(
+                nb_filters,
+                kernel_size=filter_length,
+                padding="same",
+                input_shape=(input_length, input_dim),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -436,24 +543,40 @@ def test_tiny_conv1d_same_random_input_shape_dict(self):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Conv1D(nb_filters, kernel_size = filter_length, padding = 'same',
-            input_shape=(None, input_dim)))
+        model.add(
+            Conv1D(
+                nb_filters,
+                kernel_size=filter_length,
+                padding="same",
+                input_shape=(None, input_dim),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_model(model, input_name_shape_dict={'data':(None,input_length,input_dim)})
+        self._test_model(
+            model, input_name_shape_dict={"data": (None, input_length, input_dim)}
+        )
 
-    def test_large_input_length_conv1d_same_random(self, model_precision=_MLMODEL_FULL_PRECISION):
+    def test_large_input_length_conv1d_same_random(
+        self, model_precision=_MLMODEL_FULL_PRECISION
+    ):
         np.random.seed(1988)
         input_dim = 2
         input_length = 80
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Conv1D(nb_filters, kernel_size = filter_length, padding = 'same',
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Conv1D(
+                nb_filters,
+                kernel_size=filter_length,
+                padding="same",
+                input_shape=(input_length, input_dim),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -462,7 +585,9 @@ def test_large_input_length_conv1d_same_random(self, model_precision=_MLMODEL_FU
         self._test_model(model, model_precision=model_precision)
 
     def test_large_input_length_conv1d_same_random_half_precision(self):
-        return self.test_large_input_length_conv1d_same_random(model_precision=_MLMODEL_HALF_PRECISION)
+        return self.test_large_input_length_conv1d_same_random(
+            model_precision=_MLMODEL_HALF_PRECISION
+        )
 
     def test_tiny_conv1d_valid_random(self):
         np.random.seed(1988)
@@ -471,8 +596,14 @@ def test_tiny_conv1d_valid_random(self):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Conv1D(nb_filters, kernel_size = filter_length, padding = 'valid',
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Conv1D(
+                nb_filters,
+                kernel_size=filter_length,
+                padding="valid",
+                input_shape=(input_length, input_dim),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -488,8 +619,15 @@ def test_tiny_conv1d_dilated_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Conv1D(num_kernels, kernel_size = filter_length, padding = 'valid',
-            input_shape = input_shape, dilation_rate = 3))
+        model.add(
+            Conv1D(
+                num_kernels,
+                kernel_size=filter_length,
+                padding="valid",
+                input_shape=input_shape,
+                dilation_rate=3,
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -507,9 +645,14 @@ def test_tiny_conv_rect_kernel_x(self):
 
         # Define a model
         model = Sequential()
-        model.add(Conv2D(input_shape = input_shape,
-            filters = num_kernels, kernel_size = (kernel_height, kernel_width),
-            padding = 'same'))
+        model.add(
+            Conv2D(
+                input_shape=input_shape,
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+                padding="same",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -527,9 +670,14 @@ def test_tiny_conv_rect_kernel_y(self):
 
         # Define a model
         model = Sequential()
-        model.add(Conv2D(input_shape = input_shape,
-            filters = num_kernels, kernel_size = (kernel_height, kernel_width),
-            padding = 'valid'))
+        model.add(
+            Conv2D(
+                input_shape=input_shape,
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+                padding="valid",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -537,8 +685,7 @@ def test_tiny_conv_rect_kernel_y(self):
         # Test the keras model
         self._test_model(model)
 
-    def test_tiny_conv_rect_kernel_xy(self,
-                                      model_precision=_MLMODEL_FULL_PRECISION):
+    def test_tiny_conv_rect_kernel_xy(self, model_precision=_MLMODEL_FULL_PRECISION):
         np.random.seed(1988)
         input_dim = 10
         input_shape = (input_dim, input_dim, 1)
@@ -548,9 +695,14 @@ def test_tiny_conv_rect_kernel_xy(self,
 
         # Define a model
         model = Sequential()
-        model.add(Conv2D(input_shape = input_shape,
-            filters = num_kernels, kernel_size = (kernel_height, kernel_width),
-            padding = 'valid'))
+        model.add(
+            Conv2D(
+                input_shape=input_shape,
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+                padding="valid",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -563,15 +715,15 @@ def test_tiny_conv_rect_kernel_xy_half_precision(self):
 
     def test_flatten(self):
         model = Sequential()
-        model.add(Flatten(input_shape=(2,2,2)))
-        self._test_model(model, mode='linear')
+        model.add(Flatten(input_shape=(2, 2, 2)))
+        self._test_model(model, mode="linear")
 
     def test_conv_dense(self, model_precision=_MLMODEL_FULL_PRECISION):
         input_shape = (48, 48, 3)
         model = Sequential()
-        model.add(Conv2D(32, (3, 3), activation='relu', input_shape = input_shape))
+        model.add(Conv2D(32, (3, 3), activation="relu", input_shape=input_shape))
         model.add(Flatten())
-        model.add(Dense(10, activation = 'softmax'))
+        model.add(Dense(10, activation="softmax"))
 
         # Get the coreml model
         self._test_model(model, model_precision=model_precision)
@@ -589,8 +741,13 @@ def test_conv_batchnorm_random(self, model_precision=_MLMODEL_FULL_PRECISION):
 
         # Define a model
         model = Sequential()
-        model.add(Conv2D(input_shape = input_shape,
-            filters = num_kernels, kernel_size = (kernel_height, kernel_width)))
+        model.add(
+            Conv2D(
+                input_shape=input_shape,
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+            )
+        )
         model.add(BatchNormalization(epsilon=1e-5))
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -601,7 +758,9 @@ def test_conv_batchnorm_random(self, model_precision=_MLMODEL_FULL_PRECISION):
     def test_conv_batchnorm_random_half_precision(self):
         return self.test_conv_batchnorm_random(model_precision=_MLMODEL_HALF_PRECISION)
 
-    def test_conv_batchnorm_no_gamma_no_beta(self, model_precision=_MLMODEL_FULL_PRECISION):
+    def test_conv_batchnorm_no_gamma_no_beta(
+        self, model_precision=_MLMODEL_FULL_PRECISION
+    ):
         np.random.seed(1988)
         input_dim = 10
         input_shape = (input_dim, input_dim, 3)
@@ -611,8 +770,13 @@ def test_conv_batchnorm_no_gamma_no_beta(self, model_precision=_MLMODEL_FULL_PRE
 
         # Define a model
         model = Sequential()
-        model.add(Conv2D(input_shape = input_shape,
-            filters = num_kernels, kernel_size = (kernel_height, kernel_width)))
+        model.add(
+            Conv2D(
+                input_shape=input_shape,
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+            )
+        )
         model.add(BatchNormalization(center=False, scale=False, epsilon=1e-5))
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -621,7 +785,9 @@ def test_conv_batchnorm_no_gamma_no_beta(self, model_precision=_MLMODEL_FULL_PRE
         self._test_model(model, model_precision=model_precision)
 
     def test_conv_batchnorm_no_gamma_no_beta_half_precision(self):
-        return self.test_conv_batchnorm_no_gamma_no_beta(model_precision=_MLMODEL_HALF_PRECISION)
+        return self.test_conv_batchnorm_no_gamma_no_beta(
+            model_precision=_MLMODEL_HALF_PRECISION
+        )
 
     def test_tiny_deconv_random(self):
         # In Keras 2, deconvolution auto computes the output shape.
@@ -634,8 +800,15 @@ def test_tiny_deconv_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Conv2DTranspose(filters = num_kernels, kernel_size=(kernel_height, kernel_width),
-            input_shape = input_shape, padding = 'valid', use_bias=False))
+        model.add(
+            Conv2DTranspose(
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+                input_shape=input_shape,
+                padding="valid",
+                use_bias=False,
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -653,8 +826,16 @@ def test_tiny_deconv_random_same_padding(self):
 
         # Define a model
         model = Sequential()
-        model.add(Conv2DTranspose(filters = num_kernels, kernel_size=(kernel_height, kernel_width),
-            input_shape = input_shape, padding = 'same', strides = (2,2), use_bias=True))
+        model.add(
+            Conv2DTranspose(
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+                input_shape=input_shape,
+                padding="same",
+                strides=(2, 2),
+                use_bias=True,
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -672,8 +853,15 @@ def test_tiny_depthwise_conv_same_pad(self):
 
         # Define a model
         model = Sequential()
-        model.add(DepthwiseConv2D(depth_multiplier = depth_multiplier, kernel_size=(kernel_height, kernel_width),
-            input_shape = input_shape, padding = 'same', strides = (1,1)))
+        model.add(
+            DepthwiseConv2D(
+                depth_multiplier=depth_multiplier,
+                kernel_size=(kernel_height, kernel_width),
+                input_shape=input_shape,
+                padding="same",
+                strides=(1, 1),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -691,8 +879,15 @@ def test_tiny_depthwise_conv_valid_pad(self):
 
         # Define a model
         model = Sequential()
-        model.add(DepthwiseConv2D(depth_multiplier = depth_multiplier, kernel_size=(kernel_height, kernel_width),
-            input_shape = input_shape, padding = 'valid', strides = (1,1)))
+        model.add(
+            DepthwiseConv2D(
+                depth_multiplier=depth_multiplier,
+                kernel_size=(kernel_height, kernel_width),
+                input_shape=input_shape,
+                padding="valid",
+                strides=(1, 1),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -710,8 +905,15 @@ def test_tiny_depthwise_conv_same_pad_depth_multiplier(self):
 
         # Define a model
         model = Sequential()
-        model.add(DepthwiseConv2D(depth_multiplier = depth_multiplier, kernel_size=(kernel_height, kernel_width),
-            input_shape = input_shape, padding = 'same', strides = (1,1)))
+        model.add(
+            DepthwiseConv2D(
+                depth_multiplier=depth_multiplier,
+                kernel_size=(kernel_height, kernel_width),
+                input_shape=input_shape,
+                padding="same",
+                strides=(1, 1),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -729,8 +931,15 @@ def test_tiny_depthwise_conv_valid_pad_depth_multiplier(self):
 
         # Define a model
         model = Sequential()
-        model.add(DepthwiseConv2D(depth_multiplier = depth_multiplier, kernel_size=(kernel_height, kernel_width),
-            input_shape = input_shape, padding = 'valid', strides = (1,1)))
+        model.add(
+            DepthwiseConv2D(
+                depth_multiplier=depth_multiplier,
+                kernel_size=(kernel_height, kernel_width),
+                input_shape=input_shape,
+                padding="valid",
+                strides=(1, 1),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -749,9 +958,16 @@ def test_tiny_separable_conv_valid(self):
 
         # Define a model
         model = Sequential()
-        model.add(SeparableConv2D(filters = num_kernels, kernel_size=(kernel_height, kernel_width),
-            padding = 'valid', strides = (1,1), depth_multiplier = depth_multiplier,
-            input_shape = input_shape))
+        model.add(
+            SeparableConv2D(
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+                padding="valid",
+                strides=(1, 1),
+                depth_multiplier=depth_multiplier,
+                input_shape=input_shape,
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -770,9 +986,17 @@ def test_tiny_separable_conv_same_fancy(self):
 
         # Define a model
         model = Sequential()
-        model.add(SeparableConv2D(filters = num_kernels, kernel_size=(kernel_height, kernel_width),
-            padding = 'same', strides = (2,2), activation='relu', depth_multiplier = depth_multiplier,
-            input_shape = input_shape))
+        model.add(
+            SeparableConv2D(
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+                padding="same",
+                strides=(2, 2),
+                activation="relu",
+                depth_multiplier=depth_multiplier,
+                input_shape=input_shape,
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -791,9 +1015,16 @@ def test_tiny_separable_conv_valid_depth_multiplier(self):
 
         # Define a model
         model = Sequential()
-        model.add(SeparableConv2D(filters = num_kernels, kernel_size=(kernel_height, kernel_width),
-            padding = 'valid', strides = (1,1), depth_multiplier = depth_multiplier,
-            input_shape = input_shape))
+        model.add(
+            SeparableConv2D(
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+                padding="valid",
+                strides=(1, 1),
+                depth_multiplier=depth_multiplier,
+                input_shape=input_shape,
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -802,8 +1033,8 @@ def test_tiny_separable_conv_valid_depth_multiplier(self):
         self._test_model(model)
 
     def test_tiny_separable_conv_same_fancy_depth_multiplier(
-            self,
-            model_precision=_MLMODEL_FULL_PRECISION):
+        self, model_precision=_MLMODEL_FULL_PRECISION
+    ):
 
         np.random.seed(1988)
         input_dim = 16
@@ -815,9 +1046,17 @@ def test_tiny_separable_conv_same_fancy_depth_multiplier(
 
         # Define a model
         model = Sequential()
-        model.add(SeparableConv2D(filters = num_kernels, kernel_size=(kernel_height, kernel_width),
-            padding = 'same', strides = (2,2), activation='relu', depth_multiplier = depth_multiplier,
-            input_shape = input_shape))
+        model.add(
+            SeparableConv2D(
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+                padding="same",
+                strides=(2, 2),
+                activation="relu",
+                depth_multiplier=depth_multiplier,
+                input_shape=input_shape,
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -827,7 +1066,8 @@ def test_tiny_separable_conv_same_fancy_depth_multiplier(
 
     def test_tiny_separable_conv_same_fancy_depth_multiplier_half_precision(self):
         return self.test_tiny_separable_conv_same_fancy_depth_multiplier(
-            model_precision=_MLMODEL_HALF_PRECISION)
+            model_precision=_MLMODEL_HALF_PRECISION
+        )
 
     def test_tiny_separable_conv_dilated(self, model_precision=_MLMODEL_FULL_PRECISION):
         np.random.seed(1988)
@@ -837,8 +1077,14 @@ def test_tiny_separable_conv_dilated(self, model_precision=_MLMODEL_FULL_PRECISI
 
         # Define a model
         model = Sequential()
-        model.add(SeparableConv2D(input_shape = input_shape, dilation_rate=(2, 2),
-                                  filters = num_kernels, kernel_size = (kernel_height, kernel_width)))
+        model.add(
+            SeparableConv2D(
+                input_shape=input_shape,
+                dilation_rate=(2, 2),
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -847,9 +1093,13 @@ def test_tiny_separable_conv_dilated(self, model_precision=_MLMODEL_FULL_PRECISI
         self._test_model(model, model_precision=model_precision)
 
     def test_tiny_separable_conv_dilated_half_precision(self):
-        return self.test_tiny_separable_conv_dilated(model_precision=_MLMODEL_HALF_PRECISION)
+        return self.test_tiny_separable_conv_dilated(
+            model_precision=_MLMODEL_HALF_PRECISION
+        )
 
-    def test_tiny_separable_conv_dilated_rect_random(self, model_precision=_MLMODEL_FULL_PRECISION):
+    def test_tiny_separable_conv_dilated_rect_random(
+        self, model_precision=_MLMODEL_FULL_PRECISION
+    ):
         np.random.seed(1988)
         input_shape = (32, 20, 3)
         num_kernels = 2
@@ -858,8 +1108,14 @@ def test_tiny_separable_conv_dilated_rect_random(self, model_precision=_MLMODEL_
 
         # Define a model
         model = Sequential()
-        model.add(SeparableConv2D(input_shape = input_shape, dilation_rate=(2,2),
-                                  filters = num_kernels, kernel_size = (kernel_height, kernel_width)))
+        model.add(
+            SeparableConv2D(
+                input_shape=input_shape,
+                dilation_rate=(2, 2),
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -868,61 +1124,93 @@ def test_tiny_separable_conv_dilated_rect_random(self, model_precision=_MLMODEL_
         self._test_model(model, model_precision=model_precision)
 
     def test_tiny_separable_conv_dilated_rect_random_half_precision(self):
-        return self.test_tiny_separable_conv_dilated_rect_random(model_precision=_MLMODEL_HALF_PRECISION)
+        return self.test_tiny_separable_conv_dilated_rect_random(
+            model_precision=_MLMODEL_HALF_PRECISION
+        )
 
     def test_max_pooling_no_overlap(self):
         # no_overlap: pool_size = strides
         model = Sequential()
-        model.add(MaxPooling2D(input_shape=(16,16,3), pool_size=(2, 2),
-                               strides=None, padding='valid'))
+        model.add(
+            MaxPooling2D(
+                input_shape=(16, 16, 3), pool_size=(2, 2), strides=None, padding="valid"
+            )
+        )
         self._test_model(model)
 
     def test_max_pooling_overlap_multiple(self):
         # input shape is multiple of pool_size, strides != pool_size
         model = Sequential()
-        model.add(MaxPooling2D(input_shape=(18,18,3), pool_size=(3, 3),
-                               strides=(2,2), padding='valid'))
+        model.add(
+            MaxPooling2D(
+                input_shape=(18, 18, 3),
+                pool_size=(3, 3),
+                strides=(2, 2),
+                padding="valid",
+            )
+        )
         self._test_model(model)
 
     def test_max_pooling_overlap_odd(self):
         model = Sequential()
-        model.add(MaxPooling2D(input_shape=(16,16,3), pool_size=(3, 3),
-                               strides=(2,2), padding='valid'))
+        model.add(
+            MaxPooling2D(
+                input_shape=(16, 16, 3),
+                pool_size=(3, 3),
+                strides=(2, 2),
+                padding="valid",
+            )
+        )
         self._test_model(model)
 
     def test_max_pooling_overlap_same(self):
         model = Sequential()
-        model.add(MaxPooling2D(input_shape=(16,16,3), pool_size=(3, 3),
-                               strides=(2,2), padding='same'))
+        model.add(
+            MaxPooling2D(
+                input_shape=(16, 16, 3),
+                pool_size=(3, 3),
+                strides=(2, 2),
+                padding="same",
+            )
+        )
         self._test_model(model)
 
     def test_global_max_pooling(self):
         model = Sequential()
-        model.add(GlobalMaxPooling2D(input_shape=(16,16,3)))
+        model.add(GlobalMaxPooling2D(input_shape=(16, 16, 3)))
         self._test_model(model)
 
     def test_average_pooling_no_overlap(self):
         # no_overlap: pool_size = strides
         model = Sequential()
-        model.add(AveragePooling2D(input_shape=(16,16,3), pool_size=(2, 2),
-                               strides=None, padding='valid'))
+        model.add(
+            AveragePooling2D(
+                input_shape=(16, 16, 3), pool_size=(2, 2), strides=None, padding="valid"
+            )
+        )
         self._test_model(model, delta=1e-2)
 
     def test_average_pooling_inception_config_1(self):
         # no_overlap: pool_size = strides
         model = Sequential()
-        model.add(AveragePooling2D(input_shape=(16,16,3), pool_size=(3,3),
-                               strides=(1,1), padding='same'))
+        model.add(
+            AveragePooling2D(
+                input_shape=(16, 16, 3),
+                pool_size=(3, 3),
+                strides=(1, 1),
+                padding="same",
+            )
+        )
         self._test_model(model, delta=1e-2)
 
     def test_global_average_pooling(self):
         model = Sequential()
-        model.add(GlobalAveragePooling2D(input_shape=(16,16,3)))
+        model.add(GlobalAveragePooling2D(input_shape=(16, 16, 3)))
         self._test_model(model)
 
     def test_max_pooling_1d(self):
         model = Sequential()
-        model.add(MaxPooling1D(input_shape=(16,3), pool_size=4))
+        model.add(MaxPooling1D(input_shape=(16, 3), pool_size=4))
         self._test_model(model)
 
     def test_global_max_pooling_1d(self):
@@ -932,8 +1220,14 @@ def test_global_max_pooling_1d(self):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Conv1D(nb_filters, kernel_size = filter_length, padding='same',
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Conv1D(
+                nb_filters,
+                kernel_size=filter_length,
+                padding="same",
+                input_shape=(input_length, input_dim),
+            )
+        )
         model.add(GlobalMaxPooling1D())
         self._test_model(model)
 
@@ -944,8 +1238,14 @@ def test_average_pooling_1d(self):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Conv1D(nb_filters, kernel_size = filter_length, padding='same',
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Conv1D(
+                nb_filters,
+                kernel_size=filter_length,
+                padding="same",
+                input_shape=(input_length, input_dim),
+            )
+        )
         model.add(AveragePooling1D(pool_size=2))
         self._test_model(model)
 
@@ -956,8 +1256,14 @@ def test_global_average_pooling_1d(self):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Conv1D(nb_filters, kernel_size = filter_length, padding='same',
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Conv1D(
+                nb_filters,
+                kernel_size=filter_length,
+                padding="same",
+                input_shape=(input_length, input_dim),
+            )
+        )
         model.add(GlobalAveragePooling1D())
         self._test_model(model)
 
@@ -971,9 +1277,14 @@ def test_tiny_conv_upsample_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Conv2D(input_shape = input_shape,
-            filters = num_kernels, kernel_size = (kernel_height, kernel_width)))
-        model.add(UpSampling2D(size = 2))
+        model.add(
+            Conv2D(
+                input_shape=input_shape,
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+            )
+        )
+        model.add(UpSampling2D(size=2))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -988,9 +1299,15 @@ def test_tiny_conv_upsample_1d_random(self):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Conv1D(nb_filters, kernel_size = filter_length, padding='same',
-            input_shape=(input_length, input_dim)))
-        model.add(UpSampling1D(size = 2))
+        model.add(
+            Conv1D(
+                nb_filters,
+                kernel_size=filter_length,
+                padding="same",
+                input_shape=(input_length, input_dim),
+            )
+        )
+        model.add(UpSampling1D(size=2))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1005,9 +1322,15 @@ def test_tiny_conv_crop_1d_random(self, model_precision=_MLMODEL_FULL_PRECISION)
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Conv1D(nb_filters, kernel_size = filter_length, padding='same',
-            input_shape=(input_length, input_dim)))
-        model.add(Cropping1D(cropping = 2))
+        model.add(
+            Conv1D(
+                nb_filters,
+                kernel_size=filter_length,
+                padding="same",
+                input_shape=(input_length, input_dim),
+            )
+        )
+        model.add(Cropping1D(cropping=2))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1016,7 +1339,9 @@ def test_tiny_conv_crop_1d_random(self, model_precision=_MLMODEL_FULL_PRECISION)
         self._test_model(model, model_precision=model_precision)
 
     def test_tiny_conv_crop_1d_random_half_precision(self):
-        return self.test_tiny_conv_crop_1d_random(model_precision=_MLMODEL_HALF_PRECISION)
+        return self.test_tiny_conv_crop_1d_random(
+            model_precision=_MLMODEL_HALF_PRECISION
+        )
 
     def test_tiny_conv_pad_1d_random(self, model_precision=_MLMODEL_FULL_PRECISION):
         np.random.seed(1988)
@@ -1025,9 +1350,15 @@ def test_tiny_conv_pad_1d_random(self, model_precision=_MLMODEL_FULL_PRECISION):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Conv1D(nb_filters, kernel_size = filter_length, padding='same',
-            input_shape=(input_length, input_dim)))
-        model.add(ZeroPadding1D(padding = 2))
+        model.add(
+            Conv1D(
+                nb_filters,
+                kernel_size=filter_length,
+                padding="same",
+                input_shape=(input_length, input_dim),
+            )
+        )
+        model.add(ZeroPadding1D(padding=2))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1036,15 +1367,15 @@ def test_tiny_conv_pad_1d_random(self, model_precision=_MLMODEL_FULL_PRECISION):
         self._test_model(model, model_precision=model_precision)
 
     def test_tiny_conv_pad_1d_random_half_precision(self):
-        return self.test_tiny_conv_pad_1d_random(model_precision=_MLMODEL_HALF_PRECISION)
+        return self.test_tiny_conv_pad_1d_random(
+            model_precision=_MLMODEL_HALF_PRECISION
+        )
 
     def test_tiny_conv_causal_1d(self):
         np.random.seed(1988)
         model = Sequential()
-        model.add(Conv1D(1,3,input_shape=(10,1),use_bias=False,
-                padding='causal'))
-        model.set_weights([np.random.rand(*w.shape) for w in \
-                model.get_weights()])
+        model.add(Conv1D(1, 3, input_shape=(10, 1), use_bias=False, padding="causal"))
+        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
         self._test_model(model)
 
     def test_embedding(self, model_precision=_MLMODEL_FULL_PRECISION):
@@ -1066,8 +1397,9 @@ def test_embedding_seq(self, model_precision=_MLMODEL_FULL_PRECISION):
         model.add(Embedding(num_inputs, num_outputs, input_length=7))
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model, one_dim_seq_flags=[True],
-                         model_precision=model_precision)
+        self._test_model(
+            model, one_dim_seq_flags=[True], model_precision=model_precision
+        )
 
     def test_embedding_seq_half_precision(self):
         return self.test_embedding_seq(model_precision=_MLMODEL_HALF_PRECISION)
@@ -1099,7 +1431,9 @@ def test_tiny_sequence_simple_rnn_random(self):
         model.add(SimpleRNN(num_channels, input_shape=(input_length, input_dim)))
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2 - 0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model)
@@ -1112,10 +1446,18 @@ def test_tiny_seq2seq_rnn_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(SimpleRNN(num_channels, input_shape=(input_length, input_dim), return_sequences=True))
+        model.add(
+            SimpleRNN(
+                num_channels,
+                input_shape=(input_length, input_dim),
+                return_sequences=True,
+            )
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2 - 0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model)
@@ -1127,10 +1469,14 @@ def test_rnn_seq(self):
 
         # Define a model
         model = Sequential()
-        model.add(SimpleRNN(20, input_shape=(input_length, input_dim), return_sequences=False))
+        model.add(
+            SimpleRNN(20, input_shape=(input_length, input_dim), return_sequences=False)
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2 - 0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model)
@@ -1142,10 +1488,19 @@ def test_rnn_seq_backwards(self):
 
         # Define a model
         model = Sequential()
-        model.add(SimpleRNN(20, input_shape=(input_length, input_dim), return_sequences=False, go_backwards=True))
+        model.add(
+            SimpleRNN(
+                20,
+                input_shape=(input_length, input_dim),
+                return_sequences=False,
+                go_backwards=True,
+            )
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2 - 0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model)
@@ -1161,7 +1516,9 @@ def test_medium_no_sequence_simple_rnn_random(self):
         model.add(SimpleRNN(num_channels, input_shape=(input_length, input_dim)))
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2 - 0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model)
@@ -1173,11 +1530,19 @@ def test_tiny_no_sequence_lstm_zeros(self):
         num_channels = 1
 
         model = Sequential()
-        model.add(LSTM(num_channels, input_shape = (input_length, input_dim),
-            implementation = 1, recurrent_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_shape=(input_length, input_dim),
+                implementation=1,
+                recurrent_activation="sigmoid",
+            )
+        )
 
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
-        self._test_model(model, mode='zeros')
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
+        self._test_model(model, mode="zeros")
 
     def test_tiny_no_sequence_lstm_ones(self):
         np.random.seed(1988)
@@ -1186,11 +1551,19 @@ def test_tiny_no_sequence_lstm_ones(self):
         num_channels = 1
 
         model = Sequential()
-        model.add(LSTM(num_channels, input_shape = (input_length, input_dim),
-            implementation = 1, recurrent_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_shape=(input_length, input_dim),
+                implementation=1,
+                recurrent_activation="sigmoid",
+            )
+        )
 
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
-        self._test_model(model, mode='ones')
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
+        self._test_model(model, mode="ones")
 
     def test_small_no_sequence_lstm_zeros(self):
         np.random.seed(1988)
@@ -1199,11 +1572,19 @@ def test_small_no_sequence_lstm_zeros(self):
         num_channels = 1
 
         model = Sequential()
-        model.add(LSTM(num_channels, input_shape = (input_length, input_dim),
-            implementation = 2, recurrent_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_shape=(input_length, input_dim),
+                implementation=2,
+                recurrent_activation="sigmoid",
+            )
+        )
 
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
-        self._test_model(model, mode='zeros')
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
+        self._test_model(model, mode="zeros")
 
     def test_small_no_sequence_lstm_ones(self):
         np.random.seed(1988)
@@ -1212,11 +1593,19 @@ def test_small_no_sequence_lstm_ones(self):
         num_channels = 1
 
         model = Sequential()
-        model.add(LSTM(num_channels, input_shape = (input_length, input_dim),
-            implementation = 2, recurrent_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_shape=(input_length, input_dim),
+                implementation=2,
+                recurrent_activation="sigmoid",
+            )
+        )
 
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
-        self._test_model(model, mode='ones')
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
+        self._test_model(model, mode="ones")
 
     def test_lstm_seq(self):
         np.random.seed(1988)
@@ -1224,9 +1613,13 @@ def test_lstm_seq(self):
         input_length = 5
 
         model = Sequential()
-        model.add(LSTM(20, input_shape = (input_length, input_dim), return_sequences=False))
+        model.add(
+            LSTM(20, input_shape=(input_length, input_dim), return_sequences=False)
+        )
 
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
         self._test_model(model)
 
     def test_lstm_seq_backwards(self):
@@ -1235,9 +1628,18 @@ def test_lstm_seq_backwards(self):
         input_length = 5
 
         model = Sequential()
-        model.add(LSTM(20, input_shape = (input_length, input_dim), return_sequences=False, go_backwards=True))
+        model.add(
+            LSTM(
+                20,
+                input_shape=(input_length, input_dim),
+                return_sequences=False,
+                go_backwards=True,
+            )
+        )
 
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
         self._test_model(model)
 
     def test_medium_no_sequence_lstm_random(self):
@@ -1248,11 +1650,18 @@ def test_medium_no_sequence_lstm_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(LSTM(num_channels, input_shape = (input_length, input_dim),
-                       recurrent_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_shape=(input_length, input_dim),
+                recurrent_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model)
@@ -1265,14 +1674,22 @@ def test_tiny_no_sequence_lstm_zeros_gpu(self):
 
         # Define a model
         model = Sequential()
-        model.add(LSTM(num_channels, input_shape = (input_length, input_dim),
-                       implementation = 2, recurrent_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_shape=(input_length, input_dim),
+                implementation=2,
+                recurrent_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
-        self._test_model(model, mode='zeros')
+        self._test_model(model, mode="zeros")
 
     def test_small_no_sequence_lstm_random(self):
         np.random.seed(1988)
@@ -1282,17 +1699,24 @@ def test_small_no_sequence_lstm_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(LSTM(num_channels, input_shape = (input_length, input_dim),
-                       implementation = 2, recurrent_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_shape=(input_length, input_dim),
+                implementation=2,
+                recurrent_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model)
 
-    def test_tiny_no_sequence_gru_random(self,
-                                         model_precision=_MLMODEL_FULL_PRECISION):
+    def test_tiny_no_sequence_gru_random(self, model_precision=_MLMODEL_FULL_PRECISION):
         np.random.seed(1988)
         input_dim = 1
         input_length = 1
@@ -1301,18 +1725,26 @@ def test_tiny_no_sequence_gru_random(self,
 
         # Define a model
         model = Sequential()
-        model.add(GRU(num_channels, input_shape = (input_length, input_dim),
-                      recurrent_activation = 'sigmoid'))
+        model.add(
+            GRU(
+                num_channels,
+                input_shape=(input_length, input_dim),
+                recurrent_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model, model_precision=model_precision)
 
     def test_tiny_no_sequence_gru_random_half_precision(self):
         return self.test_tiny_no_sequence_gru_random(
-            model_precision=_MLMODEL_HALF_PRECISION)
+            model_precision=_MLMODEL_HALF_PRECISION
+        )
 
     def test_small_no_sequence_gru_random(self):
         np.random.seed(1988)
@@ -1322,17 +1754,25 @@ def test_small_no_sequence_gru_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(GRU(num_channels, input_shape = (input_length, input_dim),
-               recurrent_activation = 'sigmoid'))
+        model.add(
+            GRU(
+                num_channels,
+                input_shape=(input_length, input_dim),
+                recurrent_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model)
 
-    def test_medium_no_sequence_gru_random(self,
-                                           model_precision=_MLMODEL_FULL_PRECISION):
+    def test_medium_no_sequence_gru_random(
+        self, model_precision=_MLMODEL_FULL_PRECISION
+    ):
         np.random.seed(1988)
         input_dim = 10
         input_length = 1
@@ -1340,7 +1780,13 @@ def test_medium_no_sequence_gru_random(self,
 
         # Define a model
         model = Sequential()
-        model.add(GRU(num_channels, input_shape = (input_length, input_dim), recurrent_activation = 'sigmoid'))
+        model.add(
+            GRU(
+                num_channels,
+                input_shape=(input_length, input_dim),
+                recurrent_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1350,7 +1796,8 @@ def test_medium_no_sequence_gru_random(self,
 
     def test_medium_no_sequence_gru_random_half_precision(self):
         return self.test_medium_no_sequence_gru_random(
-            model_precision=_MLMODEL_HALF_PRECISION)
+            model_precision=_MLMODEL_HALF_PRECISION
+        )
 
     def test_gru_seq(self):
         np.random.seed(1988)
@@ -1359,10 +1806,14 @@ def test_gru_seq(self):
 
         # Define a model
         model = Sequential()
-        model.add(GRU(20, input_shape = (input_length, input_dim), return_sequences=False))
+        model.add(
+            GRU(20, input_shape=(input_length, input_dim), return_sequences=False)
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model)
@@ -1374,20 +1825,29 @@ def test_gru_seq_backwards(self, model_precision=_MLMODEL_FULL_PRECISION):
 
         # Define a model
         model = Sequential()
-        model.add(GRU(20, input_shape = (input_length, input_dim), return_sequences=False, go_backwards=True))
+        model.add(
+            GRU(
+                20,
+                input_shape=(input_length, input_dim),
+                return_sequences=False,
+                go_backwards=True,
+            )
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model, model_precision=model_precision)
 
     def test_gru_seq_backwards_half_precision(self):
-        return self.test_gru_seq_backwards(
-            model_precision=_MLMODEL_HALF_PRECISION)
+        return self.test_gru_seq_backwards(model_precision=_MLMODEL_HALF_PRECISION)
 
-    def test_tiny_no_sequence_bidir_random(self,
-                                           model_precision=_MLMODEL_FULL_PRECISION):
+    def test_tiny_no_sequence_bidir_random(
+        self, model_precision=_MLMODEL_FULL_PRECISION
+    ):
         np.random.seed(1988)
         input_dim = 1
         input_length = 1
@@ -1396,22 +1856,29 @@ def test_tiny_no_sequence_bidir_random(self,
 
         # Define a model
         model = Sequential()
-        model.add(Bidirectional(LSTM(num_channels,
-            implementation = 1, recurrent_activation = 'sigmoid'),
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Bidirectional(
+                LSTM(num_channels, implementation=1, recurrent_activation="sigmoid"),
+                input_shape=(input_length, input_dim),
+            )
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model, model_precision=model_precision)
 
     def test_tiny_no_sequence_bidir_random_half_precision(self):
         return self.test_tiny_no_sequence_bidir_random(
-            model_precision=_MLMODEL_HALF_PRECISION)
+            model_precision=_MLMODEL_HALF_PRECISION
+        )
 
-    def test_tiny_no_sequence_bidir_random_gpu(self,
-                                               model_precision = _MLMODEL_FULL_PRECISION):
+    def test_tiny_no_sequence_bidir_random_gpu(
+        self, model_precision=_MLMODEL_FULL_PRECISION
+    ):
         np.random.seed(1988)
         input_dim = 1
         input_length = 1
@@ -1420,19 +1887,25 @@ def test_tiny_no_sequence_bidir_random_gpu(self,
 
         # Define a model
         model = Sequential()
-        model.add(Bidirectional(LSTM(num_channels,
-                                     implementation = 2, recurrent_activation = 'sigmoid'),
-                                input_shape=(input_length, input_dim)))
+        model.add(
+            Bidirectional(
+                LSTM(num_channels, implementation=2, recurrent_activation="sigmoid"),
+                input_shape=(input_length, input_dim),
+            )
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
-                                # Test the keras model
+        # Test the keras model
         self._test_model(model, model_precision=model_precision)
 
     def test_tiny_no_sequence_bidir_random_gpu_half_precision(self):
         return self.test_tiny_no_sequence_bidir_random_gpu(
-            model_precision=_MLMODEL_HALF_PRECISION)
+            model_precision=_MLMODEL_HALF_PRECISION
+        )
 
     def test_small_no_sequence_bidir_random(self):
         np.random.seed(1988)
@@ -1442,12 +1915,17 @@ def test_small_no_sequence_bidir_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Bidirectional(LSTM(num_channels,
-            implementation = 2, recurrent_activation = 'sigmoid'),
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Bidirectional(
+                LSTM(num_channels, implementation=2, recurrent_activation="sigmoid"),
+                input_shape=(input_length, input_dim),
+            )
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model)
@@ -1460,12 +1938,17 @@ def test_medium_no_sequence_bidir_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Bidirectional(LSTM(num_channels,
-            implementation = 2, recurrent_activation = 'sigmoid'),
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Bidirectional(
+                LSTM(num_channels, implementation=2, recurrent_activation="sigmoid"),
+                input_shape=(input_length, input_dim),
+            )
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model)
@@ -1478,12 +1961,22 @@ def test_medium_bidir_random_return_seq_false(self):
 
         # Define a model
         model = Sequential()
-        model.add(Bidirectional(LSTM(num_channels,
-                                return_sequences=False, implementation=2, recurrent_activation='sigmoid'),
-                                input_shape=(input_length, input_dim)))
+        model.add(
+            Bidirectional(
+                LSTM(
+                    num_channels,
+                    return_sequences=False,
+                    implementation=2,
+                    recurrent_activation="sigmoid",
+                ),
+                input_shape=(input_length, input_dim),
+            )
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model)
@@ -1496,52 +1989,68 @@ def test_medium_bidir_random_return_seq_true(self):
 
         # Define a model
         model = Sequential()
-        model.add(Bidirectional(LSTM(num_channels,
-                                return_sequences = True, implementation = 2, recurrent_activation = 'sigmoid'),
-                                input_shape=(input_length, input_dim)))
+        model.add(
+            Bidirectional(
+                LSTM(
+                    num_channels,
+                    return_sequences=True,
+                    implementation=2,
+                    recurrent_activation="sigmoid",
+                ),
+                input_shape=(input_length, input_dim),
+            )
+        )
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2-0.1 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model)
 
-
     def test_bilstm_merge_modes(self):
         # issue 157
 
         def get_model(input_dim, fc_size, rnn_size, output_dim, merge_mode):
-            input_data = Input(name='the_input', shape=(None, input_dim))
-            x = TimeDistributed(Dense(fc_size,
-                                      name='fc1',
-                                      activation='relu', ))(input_data)
-            x = Bidirectional(LSTM(rnn_size,
-                                   return_sequences=True,
-                                   activation='relu',
-                                   kernel_initializer='he_normal'),
-                              merge_mode=merge_mode)(x)
-            y_pred = TimeDistributed(Dense(output_dim, name="y_pred", activation="softmax"))(x)
+            input_data = Input(name="the_input", shape=(None, input_dim))
+            x = TimeDistributed(Dense(fc_size, name="fc1", activation="relu",))(
+                input_data
+            )
+            x = Bidirectional(
+                LSTM(
+                    rnn_size,
+                    return_sequences=True,
+                    activation="relu",
+                    kernel_initializer="he_normal",
+                ),
+                merge_mode=merge_mode,
+            )(x)
+            y_pred = TimeDistributed(
+                Dense(output_dim, name="y_pred", activation="softmax")
+            )(x)
             model = Model([input_data], [y_pred])
-            model.set_weights([np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()])
+            model.set_weights(
+                [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+            )
             return model
 
         input_dim = 26
         fc_size = 512
         rnn_size = 512
         output_dim = 29
-        for merge_mode in ['concat','sum','mul','ave']:
+        for merge_mode in ["concat", "sum", "mul", "ave"]:
             model = get_model(input_dim, fc_size, rnn_size, output_dim, merge_mode)
             self._test_model(model)
 
-
     def test_tiny_conv_elu_random(self):
         np.random.seed(1988)
 
         # Define a model
         from keras.layers.advanced_activations import ELU
+
         model = Sequential()
-        model.add(Conv2D(input_shape = (10, 10, 3),
-            filters = 3, kernel_size = (5,5)))
+        model.add(Conv2D(input_shape=(10, 10, 3), filters=3, kernel_size=(5, 5)))
         model.add(ELU(alpha=0.8))
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1549,15 +2058,18 @@ def test_tiny_conv_elu_random(self):
         # Get the coreml model
         self._test_model(model)
 
-    def test_tiny_conv_prelu_random(self,
-                                    model_precision=_MLMODEL_FULL_PRECISION):
+    def test_tiny_conv_prelu_random(self, model_precision=_MLMODEL_FULL_PRECISION):
         np.random.seed(1988)
 
         # Define a model
         from keras.layers.advanced_activations import PReLU
+
         model = Sequential()
-        model.add(Conv2D(input_shape = (10, 10, 3),
-            filters = 3, kernel_size = (5,5), padding = 'same'))
+        model.add(
+            Conv2D(
+                input_shape=(10, 10, 3), filters=3, kernel_size=(5, 5), padding="same"
+            )
+        )
         model.add(PReLU(shared_axes=[1, 2]))
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1566,17 +2078,20 @@ def test_tiny_conv_prelu_random(self,
         self._test_model(model, model_precision=model_precision)
 
     def test_tiny_conv_prelu_random_half_precision(self):
-        return self.test_tiny_conv_prelu_random(
-            model_precision=_MLMODEL_HALF_PRECISION)
+        return self.test_tiny_conv_prelu_random(model_precision=_MLMODEL_HALF_PRECISION)
 
     def test_tiny_conv_leaky_relu_random(self):
         np.random.seed(1988)
 
         # Define a model
         from keras.layers.advanced_activations import LeakyReLU
+
         model = Sequential()
-        model.add(Conv2D(input_shape = (10, 10, 3),
-            filters = 3, kernel_size=(5,5), padding = 'same'))
+        model.add(
+            Conv2D(
+                input_shape=(10, 10, 3), filters=3, kernel_size=(5, 5), padding="same"
+            )
+        )
         model.add(LeakyReLU(alpha=0.3))
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1589,9 +2104,13 @@ def test_tiny_conv_thresholded_relu_random(self):
 
         # Define a model
         from keras.layers.advanced_activations import ThresholdedReLU
+
         model = Sequential()
-        model.add(Conv2D(input_shape = (10, 10, 3),
-            filters = 3, kernel_size=(5,5), padding = 'same'))
+        model.add(
+            Conv2D(
+                input_shape=(10, 10, 3), filters=3, kernel_size=(5, 5), padding="same"
+            )
+        )
         model.add(ThresholdedReLU(theta=0.8))
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1605,7 +2124,7 @@ def test_tiny_concat_random(self):
         num_channels = 6
 
         # Define a model
-        input_tensor = Input(shape = (input_dim, ))
+        input_tensor = Input(shape=(input_dim,))
         x1 = Dense(num_channels)(input_tensor)
         x2 = Dense(num_channels)(x1)
         x3 = Dense(num_channels)(x1)
@@ -1620,7 +2139,6 @@ def test_tiny_concat_random(self):
         # Get the coreml model
         self._test_model(model)
 
-
     def test_tiny_concat_seq_random(self):
         np.random.seed(1988)
         max_features = 10
@@ -1629,7 +2147,7 @@ def test_tiny_concat_seq_random(self):
         num_channels = 6
 
         # Define a model
-        input_tensor = Input(shape = (seq_len, ))
+        input_tensor = Input(shape=(seq_len,))
         x1 = Embedding(max_features, embedding_dims)(input_tensor)
         x2 = Embedding(max_features, embedding_dims)(input_tensor)
         x3 = concatenate([x1, x2], axis=1)
@@ -1655,8 +2173,8 @@ def test_lstm_concat_dense_random(self):
         concat = Concatenate(axis=2)([embedding, pos])
         model = LSTM(units, return_sequences=True, stateful=False)(concat)
         model = LSTM(units, return_sequences=False)(model)
-        model = Dense(100, activation='relu')(model)
-        model = Dense(vocab_size, activation='softmax')(model)
+        model = Dense(100, activation="relu")(model)
+        model = Dense(vocab_size, activation="softmax")(model)
 
         model = Model(inputs=[input, pos], outputs=model)
 
@@ -1672,7 +2190,7 @@ def test_tiny_add_random(self):
         num_channels = 6
 
         # Define a model
-        input_tensor = Input(shape = (input_dim, ))
+        input_tensor = Input(shape=(input_dim,))
         x1 = Dense(num_channels)(input_tensor)
         x2 = Dense(num_channels)(x1)
         x3 = Dense(num_channels)(x1)
@@ -1693,7 +2211,7 @@ def test_tiny_mul_random(self):
         num_channels = 6
 
         # Define a model
-        input_tensor = Input(shape = (input_dim, ))
+        input_tensor = Input(shape=(input_dim,))
         x1 = Dense(num_channels)(input_tensor)
         x2 = Dense(num_channels)(x1)
         x3 = Dense(num_channels)(x1)
@@ -1714,7 +2232,7 @@ def test_tiny_cos_random(self):
         num_channels = 6
 
         # Define a model
-        input_tensor = Input(shape = (input_dim, ))
+        input_tensor = Input(shape=(input_dim,))
         x1 = Dense(num_channels)(input_tensor)
         x2 = Dense(num_channels)(x1)
         x3 = Dense(num_channels)(x1)
@@ -1733,7 +2251,7 @@ def test_zeropad_simple(self):
 
         input_shape = (48, 48, 3)
         model = Sequential()
-        model.add(ZeroPadding2D((1,1),input_shape=input_shape))
+        model.add(ZeroPadding2D((1, 1), input_shape=input_shape))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1745,7 +2263,7 @@ def test_zeropad_fancy(self):
 
         input_shape = (48, 48, 3)
         model = Sequential()
-        model.add(ZeroPadding2D(((2,5),(3,4)),input_shape=input_shape))
+        model.add(ZeroPadding2D(((2, 5), (3, 4)), input_shape=input_shape))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1757,7 +2275,7 @@ def test_crop_simple(self):
 
         input_shape = (48, 48, 3)
         model = Sequential()
-        model.add(Cropping2D(cropping=((2,5),(2,5)),input_shape=input_shape))
+        model.add(Cropping2D(cropping=((2, 5), (2, 5)), input_shape=input_shape))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1769,6 +2287,7 @@ def test_tiny_permute(self):
         # When input blob is 3D array (D1, D2, D3), Keras assumes the axes' meaning is
         # (D1=H,D2=W,D3=C), while CoreML assumes (D1=C,D2=H,D3=W)
         import itertools
+
         for permute_order in list(itertools.permutations([1, 2, 3])):
             model = Sequential()
             model.add(Permute(permute_order, input_shape=(4, 3, 2)))
@@ -1776,8 +2295,8 @@ def test_tiny_permute(self):
 
     def test_reshape_3d(self):
         model = Sequential()
-        model.add(Reshape((10,1,6), input_shape=(5,4,3)))
-        self._test_model(model, mode='linear')
+        model.add(Reshape((10, 1, 6), input_shape=(5, 4, 3)))
+        self._test_model(model, mode="linear")
 
     def test_tiny_conv_dense_random(self):
         np.random.seed(1988)
@@ -1791,8 +2310,13 @@ def test_tiny_conv_dense_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Conv2D(input_shape = input_shape,
-            filters = num_kernels, kernel_size=(kernel_height, kernel_width)))
+        model.add(
+            Conv2D(
+                input_shape=input_shape,
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+            )
+        )
         model.add(Dropout(0.5))
         model.add(Flatten())
         model.add(Dense(hidden_dim))
@@ -1815,8 +2339,13 @@ def test_tiny_conv_dropout_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Conv2D(input_shape = input_shape,
-            filters = num_kernels, kernel_size=(kernel_height, kernel_width)))
+        model.add(
+            Conv2D(
+                input_shape=input_shape,
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+            )
+        )
         model.add(SpatialDropout2D(0.5))
         model.add(Flatten())
         model.add(Dense(hidden_dim))
@@ -1835,7 +2364,7 @@ def test_tiny_dense_tanh_fused_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Dense(hidden_dim, input_shape=(input_dim,), activation='tanh'))
+        model.add(Dense(hidden_dim, input_shape=(input_dim,), activation="tanh"))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1855,8 +2384,14 @@ def test_tiny_conv_relu_fused_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Conv2D(input_shape = input_shape, activation='relu',
-            filters = num_kernels, kernel_size=(kernel_height, kernel_width)))
+        model.add(
+            Conv2D(
+                input_shape=input_shape,
+                activation="relu",
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1864,7 +2399,6 @@ def test_tiny_conv_relu_fused_random(self):
         # Get the coreml model
         self._test_model(model)
 
-
     def test_tiny_time_distrbuted(self):
 
         # as the first layer in a model
@@ -1884,11 +2418,19 @@ def test_tiny_sequence_lstm(self, model_precision=_MLMODEL_FULL_PRECISION):
 
         # Define a model
         model = Sequential()
-        model.add(LSTM(num_channels, input_shape = (input_length, input_dim),
-            implementation = 1, recurrent_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_shape=(input_length, input_dim),
+                implementation=1,
+                recurrent_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
-        model.set_weights([(np.random.rand(*w.shape)-0.5)*0.2 for w in model.get_weights()])
+        model.set_weights(
+            [(np.random.rand(*w.shape) - 0.5) * 0.2 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model, delta=1e-4, model_precision=model_precision)
@@ -1898,7 +2440,7 @@ def test_tiny_sequence_lstm_half_precision(self):
 
     def test_tiny_spatial_bn(self):
         np.random.seed(1988)
-        x_in = Input(shape=(7,7,2))
+        x_in = Input(shape=(7, 7, 2))
         x = ZeroPadding2D(padding=(1, 1))(x_in)
         x = BatchNormalization(axis=2)(x)
         model = Model(x_in, x)
@@ -1922,19 +2464,19 @@ def test_embedding_fixed_length(self):
 
     def test_conv1d_flatten(self, delta=1e-2):
         model = Sequential()
-        model.add(AveragePooling1D(2,input_shape=(64,9)))
-        model.add(Conv1D(16, 1, padding='same', activation='relu', use_bias=False))
+        model.add(AveragePooling1D(2, input_shape=(64, 9)))
+        model.add(Conv1D(16, 1, padding="same", activation="relu", use_bias=False))
         model.add(MaxPooling1D(2))
         model.add(Flatten())
-        model.add(Dense(units=7, activation='softmax', use_bias=False))
+        model.add(Dense(units=7, activation="softmax", use_bias=False))
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
         self._test_model(model, delta=delta)
 
     def test_dense_fused_act_in_td(self):
         np.random.seed(1988)
-        x_in = Input(shape=(10,2))
-        x = TimeDistributed(Dense(6, activation = 'softmax'))(x_in)
+        x_in = Input(shape=(10, 2))
+        x = TimeDistributed(Dense(6, activation="softmax"))(x_in)
         model = Model(inputs=[x_in], outputs=[x])
 
         self._test_model(model, delta=1e-4)
@@ -1946,7 +2488,14 @@ def test_conv_batch_1d(self):
         input_length = 10
 
         model = Sequential()
-        model.add(Embedding(vocabulary_size, embedding_dimension, input_length=input_length, trainable=True))
+        model.add(
+            Embedding(
+                vocabulary_size,
+                embedding_dimension,
+                input_length=input_length,
+                trainable=True,
+            )
+        )
 
         model.add(Conv1D(5, 2))
         model.add(BatchNormalization())
@@ -1965,18 +2514,23 @@ def test_lstm_td(self):
 
         # Define a model
         model = Sequential()
-        model.add(SimpleRNN(num_channels, return_sequences=True,
-                input_shape=(input_length, input_dim),))
+        model.add(
+            SimpleRNN(
+                num_channels,
+                return_sequences=True,
+                input_shape=(input_length, input_dim),
+            )
+        )
         model.add(TimeDistributed(Dense(5)))
 
         # Set some random weights
-        model.set_weights([np.random.rand(*w.shape)*0.2 - 0.1 for w in \
-                model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 0.2 - 0.1 for w in model.get_weights()]
+        )
 
         # Test the keras model
         self._test_model(model)
 
-
     # Making sure that giant channel sizes get handled correctly
     def test_large_channel_gpu(self):
 
@@ -1985,14 +2539,21 @@ def test_large_channel_gpu(self):
         kernel_size = 3
 
         model = Sequential()
-        model.add(Conv2D(input_shape = input_shape,
-                         filters = num_channels, kernel_size = (kernel_size, kernel_size)))
+        model.add(
+            Conv2D(
+                input_shape=input_shape,
+                filters=num_channels,
+                kernel_size=(kernel_size, kernel_size),
+            )
+        )
 
-        model.set_weights([(np.random.rand(*w.shape)-0.5)*0.2 for w in model.get_weights()])
+        model.set_weights(
+            [(np.random.rand(*w.shape) - 0.5) * 0.2 for w in model.get_weights()]
+        )
 
         self._test_model(model, delta=1e-2)
 
-    @pytest.mark.xfail(raises = Exception)
+    @pytest.mark.xfail(raises=Exception)
     def test_large_batch_gpu(self):
 
         batch_size = 2049
@@ -2000,24 +2561,27 @@ def test_large_batch_gpu(self):
         kernel_size = 3
 
         model = Sequential()
-        model.add(TimeDistributed(Dense(num_channels), input_shape=(batch_size, kernel_size)))
+        model.add(
+            TimeDistributed(Dense(num_channels), input_shape=(batch_size, kernel_size))
+        )
 
-        model.set_weights([(np.random.rand(*w.shape)-0.5)*0.2 for w in model.get_weights()])
+        model.set_weights(
+            [(np.random.rand(*w.shape) - 0.5) * 0.2 for w in model.get_weights()]
+        )
 
         self._test_model(model, delta=1e-2)
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 class KerasTopologyCorrectnessTest(KerasNumericCorrectnessTest):
-
     def test_dangling_merge_left(self):
 
-        x1 = Input(shape=(4,), name = 'input1')
-        x2 = Input(shape=(5,), name = 'input2')
-        y1 = Dense(6, name = 'dense')(x2)
+        x1 = Input(shape=(4,), name="input1")
+        x2 = Input(shape=(5,), name="input2")
+        y1 = Dense(6, name="dense")(x2)
         z = concatenate([x1, y1])
-        model = Model(inputs = [x1,x2], outputs = [z])
+        model = Model(inputs=[x1, x2], outputs=[z])
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
@@ -2025,18 +2589,18 @@ def test_dangling_merge_left(self):
 
     def test_dangling_merge_right(self):
 
-        x1 = Input(shape=(4,), name = 'input1')
-        x2 = Input(shape=(5,), name = 'input2')
-        y1 = Dense(6, name = 'dense')(x2)
+        x1 = Input(shape=(4,), name="input1")
+        x2 = Input(shape=(5,), name="input2")
+        y1 = Dense(6, name="dense")(x2)
         z = concatenate([y1, x1])
-        model = Model(inputs = [x1,x2], outputs = [z])
+        model = Model(inputs=[x1, x2], outputs=[z])
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         self._test_model(model)
 
     def test_shared_vision(self):
-        digit_input = Input(shape=(27, 27,1))
+        digit_input = Input(shape=(27, 27, 1))
         x = Conv2D(64, (3, 3))(digit_input)
         x = Conv2D(64, (3, 3))(x)
         out = Flatten()(x)
@@ -2044,15 +2608,15 @@ def test_shared_vision(self):
         vision_model = Model(inputs=[digit_input], outputs=[out])
 
         # then define the tell-digits-apart model
-        digit_a = Input(shape=(27,27,1))
-        digit_b = Input(shape=(27,27,1))
+        digit_a = Input(shape=(27, 27, 1))
+        digit_b = Input(shape=(27, 27, 1))
 
         # the vision model will be shared, weights and all
         out_a = vision_model(digit_a)
         out_b = vision_model(digit_b)
 
         concatenated = concatenate([out_a, out_b])
-        out = Dense(1, activation='sigmoid')(concatenated)
+        out = Dense(1, activation="sigmoid")(concatenated)
         model = Model(inputs=[digit_a, digit_b], outputs=out)
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
         self._test_model(model)
@@ -2071,114 +2635,123 @@ def test_tiny_weight_sharing(self):
         model = Model(inputs=[x], outputs=[z])
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model, mode='random', delta=1e-2)
+        self._test_model(model, mode="random", delta=1e-2)
 
     def test_tiny_multiple_outputs(self):
         x = Input(shape=(3,))
         y1 = Dense(4)(x)
         y2 = Dense(5)(x)
-        model = Model([x], [y1,y2])
+        model = Model([x], [y1, y2])
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model, mode='random', delta=1e-2)
+        self._test_model(model, mode="random", delta=1e-2)
 
     def test_intermediate_outputs_dense(self):
         x = Input(shape=(3,))
-        y = Dense(4, name='intermediate_dense_y')(x)
-        z = Dense(5, name='intermediate_dense_z')(y)
-        model = Model([x], [y,z])
+        y = Dense(4, name="intermediate_dense_y")(x)
+        z = Dense(5, name="intermediate_dense_z")(y)
+        model = Model([x], [y, z])
 
-        model.set_weights([np.random.rand(*w.shape) for w in \
-                model.get_weights()])
-        self._test_model(model, mode='random', delta=1e-2)
+        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
+        self._test_model(model, mode="random", delta=1e-2)
 
     def test_intermediate_outputs_conv2d(self):
-        x = Input(shape=(8,8,3))
-        y = Conv2D(4, (3,3), name='intermdiate_conv2d_1')(x)
-        z = Conv2D(5, (3,3), name='intermdiate_conv2d_2')(y)
-        model = Model([x], [y,z])
+        x = Input(shape=(8, 8, 3))
+        y = Conv2D(4, (3, 3), name="intermdiate_conv2d_1")(x)
+        z = Conv2D(5, (3, 3), name="intermdiate_conv2d_2")(y)
+        model = Model([x], [y, z])
 
-        model.set_weights([np.random.rand(*w.shape) for w in \
-                model.get_weights()])
-        self._test_model(model, mode='random', delta=1e-2)
+        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
+        self._test_model(model, mode="random", delta=1e-2)
 
     def test_intermediate_outputs_conv2d_fused_act(self):
-        x = Input(shape=(8,8,3))
-        y = Conv2D(4, (3,3), name='intermdiate_conv2d_1_fused',
-                activation='relu')(x)
-        z = Conv2D(5, (3,3), name='intermdiate_conv2d_2_fused',
-                activation='relu')(y)
-        model = Model([x], [y,z])
+        x = Input(shape=(8, 8, 3))
+        y = Conv2D(4, (3, 3), name="intermdiate_conv2d_1_fused", activation="relu")(x)
+        z = Conv2D(5, (3, 3), name="intermdiate_conv2d_2_fused", activation="relu")(y)
+        model = Model([x], [y, z])
 
-        model.set_weights([np.random.rand(*w.shape) - 0.5 for w in \
-                model.get_weights()])
-        self._test_model(model, mode='random', delta=1e-2)
+        model.set_weights([np.random.rand(*w.shape) - 0.5 for w in model.get_weights()])
+        self._test_model(model, mode="random", delta=1e-2)
 
     def test_intermediate_outputs_conv1d(self):
-        x = Input(shape=(10,3))
-        y = Conv1D(4, 3, name='intermdiate_conv1d_1')(x)
-        z = Conv1D(5, 3, name='intermdiate_conv1d_2')(y)
-        model = Model([x], [y,z])
-        model.set_weights([np.random.rand(*w.shape) for w in \
-                model.get_weights()])
-        self._test_model(model, mode='random', delta=1e-2)
+        x = Input(shape=(10, 3))
+        y = Conv1D(4, 3, name="intermdiate_conv1d_1")(x)
+        z = Conv1D(5, 3, name="intermdiate_conv1d_2")(y)
+        model = Model([x], [y, z])
+        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
+        self._test_model(model, mode="random", delta=1e-2)
 
     def test_intermediate_outputs_conv1d_fused_act(self):
-        x = Input(shape=(10,3))
-        y = Conv1D(4, 3, name='intermdiate_conv1d_1_fused',
-                activation='relu')(x)
-        z = Conv1D(5, 3, name='intermdiate_conv1d_2_fused',
-                activation='relu')(y)
-        model = Model([x], [y,z])
-        model.set_weights([np.random.rand(*w.shape) - 0.5 for w in \
-                model.get_weights()])
-        self._test_model(model, mode='random', delta=1e-2)
+        x = Input(shape=(10, 3))
+        y = Conv1D(4, 3, name="intermdiate_conv1d_1_fused", activation="relu")(x)
+        z = Conv1D(5, 3, name="intermdiate_conv1d_2_fused", activation="relu")(y)
+        model = Model([x], [y, z])
+        model.set_weights([np.random.rand(*w.shape) - 0.5 for w in model.get_weights()])
+        self._test_model(model, mode="random", delta=1e-2)
 
     def test_intermediate_rcnn_1d(self):
 
-        x_in = Input(shape=(10,2))
+        x_in = Input(shape=(10, 2))
         # Conv block 1
-        x = Conv1D(3, 3, padding='same', name='interm_rcnn_conv1')(x_in)
-        x = BatchNormalization(axis=-1, name='interm_rcnn_bn1')(x)
-        x = Activation('elu')(x)
-        x = MaxPooling1D(pool_size=2, name='interm_rcnn_pool1')(x)
+        x = Conv1D(3, 3, padding="same", name="interm_rcnn_conv1")(x_in)
+        x = BatchNormalization(axis=-1, name="interm_rcnn_bn1")(x)
+        x = Activation("elu")(x)
+        x = MaxPooling1D(pool_size=2, name="interm_rcnn_pool1")(x)
 
-        out1 = x # out1.shape = (5,3)
-        x = GRU(6, name='gru1')(x)
+        out1 = x  # out1.shape = (5,3)
+        x = GRU(6, name="gru1")(x)
         out2 = x
-        model = Model(x_in, [out1,out2])
+        model = Model(x_in, [out1, out2])
         # model = Model(x_in, [out2])
-        self._test_model(model, mode='random_zero_mean', delta=1e-2)
+        self._test_model(model, mode="random_zero_mean", delta=1e-2)
 
     def test_tiny_mobilenet_arch(self, model_precision=_MLMODEL_FULL_PRECISION):
         def ReLU6(x, name):
-            if keras.__version__ >= _StrictVersion('2.2.1'):
-                return ReLU(6., name=name)(x)
+            if keras.__version__ >= _StrictVersion("2.2.1"):
+                return ReLU(6.0, name=name)(x)
             else:
                 return Activation(relu6, name=name)(x)
 
-        img_input = Input(shape=(32,32,3))
-        x = Conv2D(4, (3,3), padding='same', use_bias=False, strides=(2,2), name='conv1')(img_input)
-        x = BatchNormalization(axis=-1, name='conv1_bn')(x)
-        x = ReLU6(x, name='conv1_relu')
-
-        x = DepthwiseConv2D((3, 3), padding='same', depth_multiplier=1, strides=(1,1),
-                use_bias=False, name='conv_dw_1')(x)
-        x = BatchNormalization(axis=-1, name='conv_dw_1_bn')(x)
-        x = ReLU6(x, name='conv_dw_1_relu')
-
-        x = Conv2D(8, (1, 1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_1')(x)
-        x = BatchNormalization(axis=-1, name='conv_pw_1_bn')(x)
-        x = ReLU6(x, name='conv_pw_1_relu')
-
-        x = DepthwiseConv2D((3, 3), padding='same', depth_multiplier=1, strides=(2,2),
-                use_bias=False, name='conv_dw_2')(x)
-        x = BatchNormalization(axis=-1, name='conv_dw_2_bn')(x)
-        x = ReLU6(x, name='conv_dw_2_relu')
-
-        x = Conv2D(8, (1, 1), padding='same', use_bias=False, strides=(2, 2), name='conv_pw_2')(x)
-        x = BatchNormalization(axis=-1, name='conv_pw_2_bn')(x)
-        x = ReLU6(x, name='conv_pw_2_relu')
+        img_input = Input(shape=(32, 32, 3))
+        x = Conv2D(
+            4, (3, 3), padding="same", use_bias=False, strides=(2, 2), name="conv1"
+        )(img_input)
+        x = BatchNormalization(axis=-1, name="conv1_bn")(x)
+        x = ReLU6(x, name="conv1_relu")
+
+        x = DepthwiseConv2D(
+            (3, 3),
+            padding="same",
+            depth_multiplier=1,
+            strides=(1, 1),
+            use_bias=False,
+            name="conv_dw_1",
+        )(x)
+        x = BatchNormalization(axis=-1, name="conv_dw_1_bn")(x)
+        x = ReLU6(x, name="conv_dw_1_relu")
+
+        x = Conv2D(
+            8, (1, 1), padding="same", use_bias=False, strides=(1, 1), name="conv_pw_1"
+        )(x)
+        x = BatchNormalization(axis=-1, name="conv_pw_1_bn")(x)
+        x = ReLU6(x, name="conv_pw_1_relu")
+
+        x = DepthwiseConv2D(
+            (3, 3),
+            padding="same",
+            depth_multiplier=1,
+            strides=(2, 2),
+            use_bias=False,
+            name="conv_dw_2",
+        )(x)
+        x = BatchNormalization(axis=-1, name="conv_dw_2_bn")(x)
+        x = ReLU6(x, name="conv_dw_2_relu")
+
+        x = Conv2D(
+            8, (1, 1), padding="same", use_bias=False, strides=(2, 2), name="conv_pw_2"
+        )(x)
+        x = BatchNormalization(axis=-1, name="conv_pw_2_bn")(x)
+        x = ReLU6(x, name="conv_pw_2_relu")
 
         model = Model(inputs=[img_input], outputs=[x])
 
@@ -2188,29 +2761,33 @@ def test_tiny_mobilenet_arch_half_precision(self):
         self.test_tiny_mobilenet_arch(model_precision=_MLMODEL_HALF_PRECISION)
 
     def test_tiny_xception(self, model_precision=_MLMODEL_FULL_PRECISION):
-        img_input = Input(shape=(32,32,3))
-        x = Conv2D(2, (3, 3), strides=(2, 2), use_bias=False, name='block1_conv1')(img_input)
-        x = BatchNormalization(name='block1_conv1_bn')(x)
-        x = Activation('relu', name='block1_conv1_act')(x)
-        x = Conv2D(4, (3, 3), use_bias=False, name='block1_conv2')(x)
-        x = BatchNormalization(name='block1_conv2_bn')(x)
-        x = Activation('relu', name='block1_conv2_act')(x)
-
-        residual = Conv2D(8, (1, 1), strides=(2, 2),
-                          padding='same', use_bias=False)(x)
-        residual = BatchNormalization()(residual)
+        img_input = Input(shape=(32, 32, 3))
+        x = Conv2D(2, (3, 3), strides=(2, 2), use_bias=False, name="block1_conv1")(
+            img_input
+        )
+        x = BatchNormalization(name="block1_conv1_bn")(x)
+        x = Activation("relu", name="block1_conv1_act")(x)
+        x = Conv2D(4, (3, 3), use_bias=False, name="block1_conv2")(x)
+        x = BatchNormalization(name="block1_conv2_bn")(x)
+        x = Activation("relu", name="block1_conv2_act")(x)
 
-        x = SeparableConv2D(8, (3, 3), padding='same', use_bias=False, name='block2_sepconv1')(x)
-        x = BatchNormalization(name='block2_sepconv1_bn')(x)
-        x = Activation('relu', name='block2_sepconv2_act')(x)
-        x = SeparableConv2D(8, (3, 3), padding='same', use_bias=False, name='block2_sepconv2')(x)
-        x = BatchNormalization(name='block2_sepconv2_bn')(x)
+        residual = Conv2D(8, (1, 1), strides=(2, 2), padding="same", use_bias=False)(x)
+        residual = BatchNormalization()(residual)
 
-        x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block2_pool')(x)
+        x = SeparableConv2D(
+            8, (3, 3), padding="same", use_bias=False, name="block2_sepconv1"
+        )(x)
+        x = BatchNormalization(name="block2_sepconv1_bn")(x)
+        x = Activation("relu", name="block2_sepconv2_act")(x)
+        x = SeparableConv2D(
+            8, (3, 3), padding="same", use_bias=False, name="block2_sepconv2"
+        )(x)
+        x = BatchNormalization(name="block2_sepconv2_bn")(x)
+
+        x = MaxPooling2D((3, 3), strides=(2, 2), padding="same", name="block2_pool")(x)
         x = add([x, residual])
 
-        residual = Conv2D(16, (1, 1), strides=(2, 2),
-                          padding='same', use_bias=False)(x)
+        residual = Conv2D(16, (1, 1), strides=(2, 2), padding="same", use_bias=False)(x)
         residual = BatchNormalization()(residual)
 
         model = Model(inputs=[img_input], outputs=[residual])
@@ -2226,8 +2803,8 @@ def test_nested_model_giving_output(self):
 
         top_model = Sequential()
         top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
-        top_model.add(Dense(16, activation='relu'))
-        top_model.add(Dense(1, activation='sigmoid'))
+        top_model.add(Dense(16, activation="relu"))
+        top_model.add(Dense(1, activation="sigmoid"))
 
         model = Model(inputs=base_model.input, outputs=top_model(base_model.output))
         self._test_model(model)
@@ -2237,42 +2814,54 @@ def test_time_distributed_conv(self):
         model = Sequential()
         model.add(
             TimeDistributed(
-                Conv2D(64, (3, 3), activation='relu'),
-                input_shape=(1, 30, 30, 3)
+                Conv2D(64, (3, 3), activation="relu"), input_shape=(1, 30, 30, 3)
             )
         )
         model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(1, 1))))
-        model.add(TimeDistributed(Conv2D(32, (4, 4), activation='relu')))
+        model.add(TimeDistributed(Conv2D(32, (4, 4), activation="relu")))
         model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))
-        model.add(TimeDistributed(Conv2D(32, (4, 4), activation='relu')))
+        model.add(TimeDistributed(Conv2D(32, (4, 4), activation="relu")))
         model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2))))
         model.add(TimeDistributed(Flatten()))
         model.add(Dropout(0.5))
         model.add(LSTM(32, return_sequences=False, dropout=0.5))
-        model.add(Dense(10, activation='sigmoid'))
+        model.add(Dense(10, activation="sigmoid"))
         self._test_model(model)
 
+
 @pytest.mark.slow
 @pytest.mark.keras2
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 class KerasNumericCorrectnessStressTest(KerasNumericCorrectnessTest):
     """
     Unit test class for testing all combinations of a particular
     layer.
     """
-    def _run_test(self, model, param, model_dir = None, delta=1e-2, transpose_keras_result=True, one_dim_seq_flags = None, model_precision=_MLMODEL_FULL_PRECISION):
+
+    def _run_test(
+        self,
+        model,
+        param,
+        model_dir=None,
+        delta=1e-2,
+        transpose_keras_result=True,
+        one_dim_seq_flags=None,
+        model_precision=_MLMODEL_FULL_PRECISION,
+    ):
         """ Run a test on a particular model
         """
         use_tmp_folder = False
         if model_dir is None:
             use_tmp_folder = True
             model_dir = tempfile.mkdtemp()
-        model_path = os.path.join(model_dir, 'keras.mlmodel')
+        model_path = os.path.join(model_dir, "keras.mlmodel")
 
         # Generate some random data
         nb_inputs = len(model.inputs)
         if nb_inputs > 1:
-            input_names = []; input_data = []; coreml_input = {}
+            input_names = []
+            input_data = []
+            coreml_input = {}
             for i in range(nb_inputs):
                 input_shape = [1 if a is None else a for a in model.input_shape[i]]
                 X = _generate_data(input_shape)
@@ -2280,17 +2869,23 @@ def _run_test(self, model, param, model_dir = None, delta=1e-2, transpose_keras_
                 input_names.append(feature_name)
                 input_data.append(X)
                 if one_dim_seq_flags is None:
-                    coreml_input[feature_name] = _keras_transpose(X).astype('f')
+                    coreml_input[feature_name] = _keras_transpose(X).astype("f")
                 else:
-                    coreml_input[feature_name] = _keras_transpose(X, one_dim_seq_flags[i]).astype('f')
+                    coreml_input[feature_name] = _keras_transpose(
+                        X, one_dim_seq_flags[i]
+                    ).astype("f")
         else:
             input_shape = [1 if a is None else a for a in model.input_shape]
-            input_names = ['data']
+            input_names = ["data"]
             input_data = _generate_data(input_shape)
             if one_dim_seq_flags is None:
-                coreml_input = {'data': _keras_transpose(input_data).astype('f')}
+                coreml_input = {"data": _keras_transpose(input_data).astype("f")}
             else:
-                coreml_input = {'data': _keras_transpose(input_data, one_dim_seq_flags[0]).astype('f')}
+                coreml_input = {
+                    "data": _keras_transpose(input_data, one_dim_seq_flags[0]).astype(
+                        "f"
+                    )
+                }
 
         # Make predictions
         if transpose_keras_result:
@@ -2299,33 +2894,53 @@ def _run_test(self, model, param, model_dir = None, delta=1e-2, transpose_keras_
             keras_preds = model.predict(input_data).flatten()
 
         # Get the model
-        coreml_model = _get_coreml_model(model, input_names, ['output'],
-                                         model_precision=model_precision)
-        if is_macos() and macos_version() >= (10, 13):
+        coreml_model = _get_coreml_model(
+            model, input_names, ["output"], model_precision=model_precision
+        )
+        if _is_macos() and _macos_version() >= (10, 13):
             # get prediction
-            coreml_preds = coreml_model.predict(coreml_input)['output'].flatten()
+            coreml_preds = coreml_model.predict(coreml_input)["output"].flatten()
 
             if use_tmp_folder:
                 shutil.rmtree(model_dir)
-            self.assertEquals(len(coreml_preds), len(keras_preds),
-                    msg = 'Failed test case %s. Lengths wrong (%s vs %s)' % (param, len(coreml_preds), len(keras_preds)))
+            self.assertEquals(
+                len(coreml_preds),
+                len(keras_preds),
+                msg="Failed test case %s. Lengths wrong (%s vs %s)"
+                % (param, len(coreml_preds), len(keras_preds)),
+            )
             for i in range(len(keras_preds)):
                 max_den = max(1.0, keras_preds[i], coreml_preds[i])
-                self.assertAlmostEquals(keras_preds[i]/max_den, coreml_preds[i]/max_den, delta = delta,
-                    msg = 'Failed test case %s. Predictions wrong (%s vs %s)' % (param, coreml_preds[i], keras_preds[i]))
+                self.assertAlmostEquals(
+                    keras_preds[i] / max_den,
+                    coreml_preds[i] / max_den,
+                    delta=delta,
+                    msg="Failed test case %s. Predictions wrong (%s vs %s)"
+                    % (param, coreml_preds[i], keras_preds[i]),
+                )
 
     @pytest.mark.slow
     def test_activation_layer_params(self):
         options = dict(
-            activation = ['tanh', 'relu', 'sigmoid', 'softmax', 'softplus', 'softsign', 'hard_sigmoid', 'elu']
+            activation=[
+                "tanh",
+                "relu",
+                "sigmoid",
+                "softmax",
+                "softplus",
+                "softsign",
+                "hard_sigmoid",
+                "elu",
+            ]
         )
 
         # Define a function that tests a model
         num_channels = 10
         input_dim = 10
+
         def build_model(x):
             model = Sequential()
-            model.add(Dense(num_channels, input_dim = input_dim))
+            model.add(Dense(num_channels, input_dim=input_dim))
             model.add(Activation(**dict(zip(options.keys(), x))))
             return x, model
 
@@ -2342,16 +2957,26 @@ def build_model(x):
     @pytest.mark.slow
     def test_dense_layer_params(self):
         options = dict(
-            activation = ['relu', 'softmax', 'tanh', 'sigmoid', 'softplus', 'softsign', 'elu','hard_sigmoid'],
-            use_bias = [True, False],
+            activation=[
+                "relu",
+                "softmax",
+                "tanh",
+                "sigmoid",
+                "softplus",
+                "softsign",
+                "elu",
+                "hard_sigmoid",
+            ],
+            use_bias=[True, False],
         )
         # Define a function that tests a model
         input_shape = (10,)
         num_channels = 10
+
         def build_model(x):
             kwargs = dict(zip(options.keys(), x))
             model = Sequential()
-            model.add(Dense(num_channels, input_shape = input_shape, **kwargs))
+            model.add(Dense(num_channels, input_shape=input_shape, **kwargs))
             return x, model
 
         # Iterate through all combinations
@@ -2365,9 +2990,7 @@ def build_model(x):
 
     @pytest.mark.slow
     def test_upsample_layer_params(self):
-        options = dict(
-            size= [(2,2), (3,3), (4,4), (5,5)]
-        )
+        options = dict(size=[(2, 2), (3, 3), (4, 4), (5, 5)])
 
         np.random.seed(1988)
         input_dim = 10
@@ -2378,8 +3001,7 @@ def test_upsample_layer_params(self):
         def build_model(x):
             kwargs = dict(zip(options.keys(), x))
             model = Sequential()
-            model.add(Conv2D(filters=5, kernel_size=(7,7),
-                      input_shape = input_shape))
+            model.add(Conv2D(filters=5, kernel_size=(7, 7), input_shape=input_shape))
             model.add(UpSampling2D(**kwargs))
             return x, model
 
@@ -2395,19 +3017,24 @@ def build_model(x):
     @pytest.mark.slow
     def test_conv_layer_params(self, model_precision=_MLMODEL_FULL_PRECISION):
         options = dict(
-            activation = ['relu', 'tanh', 'sigmoid'], # keras does not support softmax on 4-D
-            use_bias = [True, False],
-            padding = ['same', 'valid'],
-            filters = [1, 3, 5],
-            kernel_size = [[5,5]], # fails when sizes are different
+            activation=[
+                "relu",
+                "tanh",
+                "sigmoid",
+            ],  # keras does not support softmax on 4-D
+            use_bias=[True, False],
+            padding=["same", "valid"],
+            filters=[1, 3, 5],
+            kernel_size=[[5, 5]],  # fails when sizes are different
         )
 
         # Define a function that tests a model
         input_shape = (10, 10, 1)
+
         def build_model(x):
             kwargs = dict(zip(options.keys(), x))
             model = Sequential()
-            model.add(Conv2D(input_shape = input_shape, **kwargs))
+            model.add(Conv2D(input_shape=input_shape, **kwargs))
             return x, model
 
         # Iterate through all combinations
@@ -2425,16 +3052,15 @@ def test_conv_layer_params_half_precision(self):
 
     @pytest.mark.slow
     def test_dense_elementwise_params(self):
-        options = dict(
-            modes = [add, multiply, concatenate, average, maximum]
-        )
+        options = dict(modes=[add, multiply, concatenate, average, maximum])
+
         def build_model(mode):
             x1 = Input(shape=(3,))
             x2 = Input(shape=(3,))
             y1 = Dense(4)(x1)
             y2 = Dense(4)(x2)
             z = mode([y1, y2])
-            model = Model([x1,x2], z)
+            model = Model([x1, x2], z)
             return mode, model
 
         product = itertools.product(*options.values())
@@ -2447,51 +3073,53 @@ def test_vgg_16_tiny(self):
 
         input_shape = (48, 48, 3)
         model = Sequential()
-        model.add(ZeroPadding2D((1,1),input_shape=input_shape))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
+        model.add(ZeroPadding2D((1, 1), input_shape=input_shape))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
 
         model.add(Flatten())
-        model.add(Dense(32, activation='relu'))
+        model.add(Dense(32, activation="relu"))
         model.add(Dropout(0.5))
-        model.add(Dense(32, activation='relu'))
+        model.add(Dense(32, activation="relu"))
         model.add(Dropout(0.5))
-        model.add(Dense(1000)) # activation='softmax'))
+        model.add(Dense(1000))  # activation='softmax'))
 
         # Set some random weights
-        model.set_weights([(np.random.rand(*w.shape)-0.5)*0.2 for w in model.get_weights()])
+        model.set_weights(
+            [(np.random.rand(*w.shape) - 0.5) * 0.2 for w in model.get_weights()]
+        )
 
         # Get the coreml model
         self._test_model(model)
@@ -2500,89 +3128,95 @@ def test_vgg_16_tiny_no_pooling(self):
 
         input_shape = (48, 48, 3)
         model = Sequential()
-        model.add(ZeroPadding2D((1,1),input_shape=input_shape))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
+        model.add(ZeroPadding2D((1, 1), input_shape=input_shape))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
 
         model.add(Flatten())
-        model.add(Dense(32, activation='relu'))
-        #model.add(Dropout(0.5))
-        model.add(Dense(32, activation='relu'))
-        #model.add(Dropout(0.5))
-        model.add(Dense(1000)) # activation='softmax'))
+        model.add(Dense(32, activation="relu"))
+        # model.add(Dropout(0.5))
+        model.add(Dense(32, activation="relu"))
+        # model.add(Dropout(0.5))
+        model.add(Dense(1000))  # activation='softmax'))
 
         # Set some random weights
-        model.set_weights([(np.random.rand(*w.shape)-0.5)*0.2 for w in model.get_weights()])
+        model.set_weights(
+            [(np.random.rand(*w.shape) - 0.5) * 0.2 for w in model.get_weights()]
+        )
 
         # Get the coreml model
         self._test_model(model)
 
-    def test_vgg_16_tiny_no_pooling_no_padding(self, model_precision=_MLMODEL_FULL_PRECISION):
+    def test_vgg_16_tiny_no_pooling_no_padding(
+        self, model_precision=_MLMODEL_FULL_PRECISION
+    ):
 
         input_shape = (48, 48, 3)
         model = Sequential()
-        model.add(Conv2D(32, (3,3), activation='relu', input_shape = input_shape))
-        model.add(Conv2D(32, (3,3), activation='relu'))
+        model.add(Conv2D(32, (3, 3), activation="relu", input_shape=input_shape))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
 
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(Conv2D(32, (3,3), activation='relu'))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
 
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(Conv2D(32, (3,3), activation='relu'))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
 
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(Conv2D(32, (3,3), activation='relu'))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
 
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(Conv2D(32, (3,3), activation='relu'))
-        model.add(Conv2D(32, (3,3), activation='relu'))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
+        model.add(Conv2D(32, (3, 3), activation="relu"))
 
         model.add(Flatten())
-        model.add(Dense(32, activation='relu'))
+        model.add(Dense(32, activation="relu"))
         model.add(Dropout(0.5))
-        model.add(Dense(32, activation='relu'))
+        model.add(Dense(32, activation="relu"))
         model.add(Dropout(0.5))
-        model.add(Dense(1000, activation='softmax'))
+        model.add(Dense(1000, activation="softmax"))
 
         # Get the coreml model
         self._test_model(model, model_precision=model_precision)
 
     def test_vgg_16_tiny_no_pooling_no_padding_half_precision(self):
-        return self.test_vgg_16_tiny_no_pooling_no_padding(model_precision=_MLMODEL_HALF_PRECISION)
+        return self.test_vgg_16_tiny_no_pooling_no_padding(
+            model_precision=_MLMODEL_HALF_PRECISION
+        )
 
     def test_imdb_fasttext_first_2(self):
 
@@ -2602,9 +3236,9 @@ def test_imdb_fasttext_first_2(self):
     def test_tiny_mcrnn_td(self):
 
         model = Sequential()
-        model.add(Conv2D(3,(1,1), input_shape=(2,4,4), padding='same'))
-        model.add(AveragePooling2D(pool_size=(2,2)))
-        model.add(Reshape((2,3)))
+        model.add(Conv2D(3, (1, 1), input_shape=(2, 4, 4), padding="same"))
+        model.add(AveragePooling2D(pool_size=(2, 2)))
+        model.add(Reshape((2, 3)))
         model.add(TimeDistributed(Dense(5)))
 
         self._test_model(model)
@@ -2612,96 +3246,96 @@ def test_tiny_mcrnn_td(self):
     def test_tiny_mcrnn_recurrent(self):
 
         model = Sequential()
-        model.add(Conv2D(3,(1,1), input_shape=(2,4,4), padding='same'))
-        model.add(AveragePooling2D(pool_size=(2,2)))
-        model.add(Reshape((2,3)))
-        model.add(LSTM(5, recurrent_activation = 'sigmoid'))
+        model.add(Conv2D(3, (1, 1), input_shape=(2, 4, 4), padding="same"))
+        model.add(AveragePooling2D(pool_size=(2, 2)))
+        model.add(Reshape((2, 3)))
+        model.add(LSTM(5, recurrent_activation="sigmoid"))
 
         self._test_model(model)
 
     def test_tiny_mcrnn_music_tagger(self):
 
-        x_in = Input(shape=(4,6,1))
+        x_in = Input(shape=(4, 6, 1))
         x = ZeroPadding2D(padding=(0, 1))(x_in)
-        x = BatchNormalization(axis=2, name='bn_0_freq')(x)
+        x = BatchNormalization(axis=2, name="bn_0_freq")(x)
         # Conv block 1
-        x = Conv2D(2, (3, 3), padding='same', name='conv1')(x)
-        x = BatchNormalization(axis=3, name='bn1')(x)
-        x = Activation('elu')(x)
-        x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x)
+        x = Conv2D(2, (3, 3), padding="same", name="conv1")(x)
+        x = BatchNormalization(axis=3, name="bn1")(x)
+        x = Activation("elu")(x)
+        x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="pool1")(x)
         # Conv block 2
-        x = Conv2D(4, (3, 3), padding='same', name='conv2')(x)
-        x = BatchNormalization(axis=3, name='bn2')(x)
-        x = Activation('elu')(x)
-        x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool2')(x)
+        x = Conv2D(4, (3, 3), padding="same", name="conv2")(x)
+        x = BatchNormalization(axis=3, name="bn2")(x)
+        x = Activation("elu")(x)
+        x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="pool2")(x)
 
         # Should get you (1,1,2,4)
         x = Reshape((2, 4))(x)
-        x = GRU(32, return_sequences=True, name='gru1')(x)
-        x = GRU(32, return_sequences=False, name='gru2')(x)
+        x = GRU(32, return_sequences=True, name="gru1")(x)
+        x = GRU(32, return_sequences=False, name="gru2")(x)
 
         # Create model.
         model = Model(x_in, x)
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_model(model, mode='random_zero_mean', delta=1e-2)
+        self._test_model(model, mode="random_zero_mean", delta=1e-2)
 
     def test_tiny_apple_manual(self):
         model = Sequential()
-        model.add(LSTM(3, input_shape=(4, 5), recurrent_activation='sigmoid'))
+        model.add(LSTM(3, input_shape=(4, 5), recurrent_activation="sigmoid"))
         model.add(Dense(5))
-        model.add(Activation('softmax'))
+        model.add(Activation("softmax"))
 
         self._test_model(model)
 
     def test_tiny_image_captioning_image_branch(self):
-        img_input_1 = Input(shape=(16,16,3))
-        x = Conv2D(2,(3,3))(img_input_1)
+        img_input_1 = Input(shape=(16, 16, 3))
+        x = Conv2D(2, (3, 3))(img_input_1)
         x = Flatten()(x)
         img_model = Model(inputs=[img_input_1], outputs=[x])
 
-        img_input = Input(shape=(16,16,3))
+        img_input = Input(shape=(16, 16, 3))
         x = img_model(img_input)
-        x = Dense(8, name = 'cap_dense')(x)
-        x = Reshape((1,8), name = 'cap_reshape')(x)
-        image_branch = Model(inputs=[img_input],outputs=[x])
+        x = Dense(8, name="cap_dense")(x)
+        x = Reshape((1, 8), name="cap_reshape")(x)
+        image_branch = Model(inputs=[img_input], outputs=[x])
         self._test_model(image_branch)
 
     def test_tiny_image_captioning_feature_merge(self):
 
-        img_input_1 = Input(shape=(16,16,3))
-        x = Conv2D(2,(3,3))(img_input_1)
+        img_input_1 = Input(shape=(16, 16, 3))
+        x = Conv2D(2, (3, 3))(img_input_1)
         x = Flatten()(x)
         img_model = Model([img_input_1], [x])
 
-        img_input = Input(shape=(16,16,3))
+        img_input = Input(shape=(16, 16, 3))
         x = img_model(img_input)
-        x = Dense(8, name = 'cap_dense')(x)
-        x = Reshape((1,8), name = 'cap_reshape')(x)
+        x = Dense(8, name="cap_dense")(x)
+        x = Reshape((1, 8), name="cap_reshape")(x)
 
-        sentence_input = Input(shape=(5,)) # max_length = 5
-        y = Embedding(8, 8, name = 'cap_embedding')(sentence_input)
-        z = concatenate([x,y], axis = 1, name = 'cap_merge')
+        sentence_input = Input(shape=(5,))  # max_length = 5
+        y = Embedding(8, 8, name="cap_embedding")(sentence_input)
+        z = concatenate([x, y], axis=1, name="cap_merge")
 
         combined_model = Model(inputs=[img_input, sentence_input], outputs=[z])
         self._test_model(combined_model, one_dim_seq_flags=[False, True])
 
     def test_tiny_image_captioning(self):
         # use a conv layer as a image feature branch
-        img_input_1 = Input(shape=(16,16,3))
-        x = Conv2D(2,(3,3))(img_input_1)
+        img_input_1 = Input(shape=(16, 16, 3))
+        x = Conv2D(2, (3, 3))(img_input_1)
         x = Flatten()(x)
         img_model = Model(inputs=[img_input_1], outputs=[x])
 
-        img_input = Input(shape=(16,16,3))
+        img_input = Input(shape=(16, 16, 3))
         x = img_model(img_input)
-        x = Dense(8, name = 'cap_dense')(x)
-        x = Reshape((1,8), name = 'cap_reshape')(x)
+        x = Dense(8, name="cap_dense")(x)
+        x = Reshape((1, 8), name="cap_reshape")(x)
 
-        sentence_input = Input(shape=(5,)) # max_length = 5
-        y = Embedding(8, 8, name = 'cap_embedding')(sentence_input)
-        z = concatenate([x,y], axis = 1, name = 'cap_merge')
-        z = LSTM(4, return_sequences = True, name = 'cap_lstm')(z)
-        z = TimeDistributed(Dense(8), name = 'cap_timedistributed')(z)
+        sentence_input = Input(shape=(5,))  # max_length = 5
+        y = Embedding(8, 8, name="cap_embedding")(sentence_input)
+        z = concatenate([x, y], axis=1, name="cap_merge")
+        z = LSTM(4, return_sequences=True, name="cap_lstm")(z)
+        z = TimeDistributed(Dense(8), name="cap_timedistributed")(z)
 
         combined_model = Model(inputs=[img_input, sentence_input], outputs=[z])
         self._test_model(combined_model, one_dim_seq_flags=[False, True])
@@ -2725,9 +3359,9 @@ def test_tiny_babi_rnn(self):
         x3 = add([x1, x2])
         x3 = LSTM(embed_hidden_size, return_sequences=False)(x3)
         x3 = Dropout(0.3)(x3)
-        x3 = Dense(vocab_size, activation='softmax')(x3)
+        x3 = Dense(vocab_size, activation="softmax")(x3)
 
-        model = Model(inputs=[input_tensor_1,input_tensor_2], outputs=[x3])
+        model = Model(inputs=[input_tensor_1, input_tensor_2], outputs=[x3])
 
         self._test_model(model, one_dim_seq_flags=[True, True])
 
@@ -2738,7 +3372,14 @@ def test_clickbait_cnn(self, model_precision=_MLMODEL_FULL_PRECISION):
         input_length = 20
 
         model = Sequential()
-        model.add(Embedding(vocabulary_size, embedding_dimension, input_length=input_length, trainable=True))
+        model.add(
+            Embedding(
+                vocabulary_size,
+                embedding_dimension,
+                input_length=input_length,
+                trainable=True,
+            )
+        )
 
         model.add(Conv1D(32, 2))
         model.add(BatchNormalization())
@@ -2759,8 +3400,9 @@ def test_clickbait_cnn(self, model_precision=_MLMODEL_FULL_PRECISION):
         model.add(BatchNormalization())
         model.add(Activation("sigmoid"))
 
-        self._test_model(model, one_dim_seq_flags=[True],
-                         model_precision=model_precision)
+        self._test_model(
+            model, one_dim_seq_flags=[True], model_precision=model_precision
+        )
 
     def test_clickbait_cnn_half_precision(self):
         return self.test_clickbait_cnn(model_precision=_MLMODEL_HALF_PRECISION)
@@ -2768,16 +3410,16 @@ def test_clickbait_cnn_half_precision(self):
     def test_model_with_duplicated_edges(self):
         # Create a simple model
         inputs = Input(shape=(20, 20))
-        activation = Activation('relu')(inputs)
+        activation = Activation("relu")(inputs)
         cropping = Cropping1D(cropping=(1, 1))(activation)
-        conv1d = Conv1D(20, 3, padding='valid')(activation)
+        conv1d = Conv1D(20, 3, padding="valid")(activation)
         ouputs = Add()([conv1d, cropping])
 
         model = Model(inputs, ouputs)
         self._test_model(model)
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 class KerasBasicConversionTest(KerasNumericCorrectnessTest):
     def test_float_arraytype_flag(self):
@@ -2789,14 +3431,22 @@ def test_float_arraytype_flag(self):
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
         # Convert model
         from coremltools.converters import keras as keras_converter
+
         coreml_model = keras_converter.convert(model, use_float_arraytype=True)
         spec = coreml_model.get_spec()
         from coremltools.proto import Model_pb2 as _Model_pb2
-        self.assertEqual(spec.description.input[0].type.multiArrayType.dataType, _Model_pb2.ArrayFeatureType.FLOAT32)
-        self.assertEqual(spec.description.output[0].type.multiArrayType.dataType, _Model_pb2.ArrayFeatureType.FLOAT32)
+
+        self.assertEqual(
+            spec.description.input[0].type.multiArrayType.dataType,
+            _Model_pb2.ArrayFeatureType.FLOAT32,
+        )
+        self.assertEqual(
+            spec.description.output[0].type.multiArrayType.dataType,
+            _Model_pb2.ArrayFeatureType.FLOAT32,
+        )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
     # suite = unittest.TestSuite()
     # suite.addTest(KerasBasicNumericCorrectnessTest("test_lstm_concat_dense_random"))
diff --git a/coremltools/test/neural_network/test_keras_nonseq.py b/coremltools/test/neural_network/test_keras_nonseq.py
index fd669e98d..73ad048e1 100644
--- a/coremltools/test/neural_network/test_keras_nonseq.py
+++ b/coremltools/test/neural_network/test_keras_nonseq.py
@@ -5,20 +5,21 @@
 
 import unittest
 import pytest
-from coremltools._deps import HAS_KERAS_TF
+from coremltools._deps import _HAS_KERAS_TF
 
-if HAS_KERAS_TF:
+if _HAS_KERAS_TF:
     from keras.models import Model
     from keras.layers import Dense, Input, merge
     from coremltools.converters import keras
 
 
-@unittest.skipIf(not HAS_KERAS_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras1
 class KerasNonSequentialModelTest(unittest.TestCase):
     """
-    Unit test class for testing non-sequential Keras models. 
+    Unit test class for testing non-sequential Keras models.
     """
+
     @classmethod
     def setUpClass(self):
         """
@@ -36,27 +37,29 @@ def test_simple_merge(self):
         x1 = Dense(4)(input_tensor)
         x2 = Dense(5)(x1)
         x3 = Dense(6)(x1)
-        x4 = merge([x2,x3], mode='concat')
+        x4 = merge([x2, x3], mode="concat")
         x5 = Dense(7)(x4)
 
         model = Model(input=[input_tensor], output=[x5])
-        input_names = ['data']
-        output_names = ['output']
+        input_names = ["data"]
+        output_names = ["output"]
 
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEqual(len(spec.description.output), len(output_names))
-        self.assertEqual(sorted(output_names),
-               sorted(map(lambda x: x.name, spec.description.output)))
+        self.assertEqual(
+            sorted(output_names), sorted(map(lambda x: x.name, spec.description.output))
+        )
 
     def test_merge_add(self):
         """
@@ -69,27 +72,29 @@ def test_merge_add(self):
         x1 = Dense(4)(input_tensor)
         x2 = Dense(5)(x1)
         x3 = Dense(5)(x1)
-        x4 = merge([x2,x3], mode='sum')
+        x4 = merge([x2, x3], mode="sum")
         x5 = Dense(7)(x4)
 
         model = Model(input=[input_tensor], output=[x5])
-        input_names = ['data']
-        output_names = ['output']
+        input_names = ["data"]
+        output_names = ["output"]
 
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEqual(len(spec.description.output), len(output_names))
-        self.assertEqual(sorted(output_names),
-               sorted(map(lambda x: x.name, spec.description.output)))
+        self.assertEqual(
+            sorted(output_names), sorted(map(lambda x: x.name, spec.description.output))
+        )
 
     def test_merge_multiply(self):
         """
@@ -102,24 +107,26 @@ def test_merge_multiply(self):
         x1 = Dense(4)(input_tensor)
         x2 = Dense(5)(x1)
         x3 = Dense(5)(x1)
-        x4 = merge([x2,x3], mode='mul')
+        x4 = merge([x2, x3], mode="mul")
         x5 = Dense(7)(x4)
 
         model = Model(input=[input_tensor], output=[x5])
-        input_names = ['data']
-        output_names = ['output']
+        input_names = ["data"]
+        output_names = ["output"]
 
         spec = keras.convert(model, input_names, output_names).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertTrue(spec.HasField('neuralNetwork'))
+        self.assertTrue(spec.HasField("neuralNetwork"))
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.input), len(input_names))
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
         self.assertEqual(len(spec.description.output), len(output_names))
-        self.assertEqual(sorted(output_names),
-               sorted(map(lambda x: x.name, spec.description.output)))
+        self.assertEqual(
+            sorted(output_names), sorted(map(lambda x: x.name, spec.description.output))
+        )
diff --git a/coremltools/test/neural_network/test_keras_numeric.py b/coremltools/test/neural_network/test_keras_numeric.py
index cd0d918b2..9b0f174d0 100644
--- a/coremltools/test/neural_network/test_keras_numeric.py
+++ b/coremltools/test/neural_network/test_keras_numeric.py
@@ -12,145 +12,197 @@
 import numpy as np
 import pytest
 
-from coremltools._deps import HAS_KERAS_TF
-from coremltools.models.utils import macos_version, is_macos
+from coremltools._deps import _HAS_KERAS_TF
+from coremltools.models.utils import _macos_version, _is_macos
 
-if HAS_KERAS_TF:
+if _HAS_KERAS_TF:
     from keras.models import Sequential, Model
-    from keras.layers import Dense, Activation, Convolution2D, AtrousConvolution2D, LSTM, \
-        ZeroPadding2D, Deconvolution2D, Permute, Convolution1D, AtrousConvolution1D, \
-        MaxPooling2D, AveragePooling2D, Flatten, Dropout, UpSampling2D, merge, Merge, Input, GRU, \
-        GlobalMaxPooling2D, GlobalMaxPooling1D, GlobalAveragePooling2D, GlobalAveragePooling1D, \
-        Cropping1D, Cropping2D, Reshape, AveragePooling1D, MaxPooling1D, RepeatVector, ELU, \
-        SimpleRNN, BatchNormalization, Embedding, ZeroPadding1D, UpSampling1D
+    from keras.layers import (
+        Dense,
+        Activation,
+        Convolution2D,
+        AtrousConvolution2D,
+        LSTM,
+        ZeroPadding2D,
+        Deconvolution2D,
+        Permute,
+        Convolution1D,
+        AtrousConvolution1D,
+        MaxPooling2D,
+        AveragePooling2D,
+        Flatten,
+        Dropout,
+        UpSampling2D,
+        merge,
+        Merge,
+        Input,
+        GRU,
+        GlobalMaxPooling2D,
+        GlobalMaxPooling1D,
+        GlobalAveragePooling2D,
+        GlobalAveragePooling1D,
+        Cropping1D,
+        Cropping2D,
+        Reshape,
+        AveragePooling1D,
+        MaxPooling1D,
+        RepeatVector,
+        ELU,
+        SimpleRNN,
+        BatchNormalization,
+        Embedding,
+        ZeroPadding1D,
+        UpSampling1D,
+    )
     from keras.layers.wrappers import Bidirectional, TimeDistributed
 
 
 def _keras_transpose(x, is_sequence=False):
     if len(x.shape) == 4:
         # Keras input shape = [Batch, Height, Width, Channels]
-        x = np.transpose(x, [0,3,1,2])
+        x = np.transpose(x, [0, 3, 1, 2])
         return np.expand_dims(x, axis=0)
     elif len(x.shape) == 3:
         # Keras input shape = [Batch, (Sequence) Length, Channels]
-        return np.transpose(x, [1,0,2])
+        return np.transpose(x, [1, 0, 2])
     elif len(x.shape) == 2:
         if is_sequence:  # (N,S) --> (S,N,1,)
             return x.reshape(x.shape[::-1] + (1,))
         else:  # (N,C) --> (N,C,1,1)
-            return x.reshape((1, ) + x.shape) # Dense
+            return x.reshape((1,) + x.shape)  # Dense
     elif len(x.shape) == 1:
-        if is_sequence: # (S) --> (S,N,1,1,1)
+        if is_sequence:  # (S) --> (S,N,1,1,1)
             return x.reshape((x.shape[0], 1, 1))
-        else: 
+        else:
             return x
     else:
         return x
 
+
 def _get_coreml_model(model, model_path, input_names, output_names):
     """
     Get the coreml model from the Keras model.
     """
     # Convert the model
     from coremltools.converters import keras as keras_converter
+
     model = keras_converter.convert(model, input_names, output_names)
     return model
 
 
-def _generate_data(input_shape, mode = 'random'):
+def _generate_data(input_shape, mode="random"):
     """
     Generate some random data according to a shape.
     """
-    if mode == 'zeros':
+    if mode == "zeros":
         X = np.zeros(input_shape)
-    elif mode == 'ones':
+    elif mode == "ones":
         X = np.ones(input_shape)
-    elif mode == 'linear':
+    elif mode == "linear":
         X = np.array(range(np.product(input_shape))).reshape(input_shape)
-    elif mode == 'random':
+    elif mode == "random":
         X = np.random.rand(*input_shape)
-    elif mode == 'random_zero_mean':
-        X = np.random.rand(*input_shape)-0.5
+    elif mode == "random_zero_mean":
+        X = np.random.rand(*input_shape) - 0.5
     return X
 
-def conv2d_bn(x, nb_filter, nb_row, nb_col, border_mode='same', subsample=(1, 1), name=None):
+
+def conv2d_bn(
+    x, nb_filter, nb_row, nb_col, border_mode="same", subsample=(1, 1), name=None
+):
     """
     Utility function to apply conv + BN.
     """
     if name is not None:
-        bn_name = name + '_bn'
-        conv_name = name + '_conv'
+        bn_name = name + "_bn"
+        conv_name = name + "_conv"
     else:
         bn_name = None
         conv_name = None
     bn_axis = 3
-    x = Convolution2D(nb_filter, nb_row, nb_col,
-                      subsample=subsample,
-                      activation='relu',
-                      border_mode=border_mode,
-                      name=conv_name)(x)
+    x = Convolution2D(
+        nb_filter,
+        nb_row,
+        nb_col,
+        subsample=subsample,
+        activation="relu",
+        border_mode=border_mode,
+        name=conv_name,
+    )(x)
     x = BatchNormalization(axis=bn_axis, name=bn_name)(x)
     return x
 
-@unittest.skipIf(not HAS_KERAS_TF, 'Missing keras. Skipping tests.')
+
+@unittest.skipIf(not _HAS_KERAS_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras1
 class KerasNumericCorrectnessTest(unittest.TestCase):
     """
     Unit test class for testing the Keras converter.
     """
-    def _test_keras_model(self, model, num_samples = 1, mode = 'random',
-            input_blob = 'data', output_blob = 'output', delta = 1e-2,
-            model_dir = None, transpose_keras_result = True, 
-            one_dim_seq_flags = None ):
+
+    def _test_keras_model(
+        self,
+        model,
+        num_samples=1,
+        mode="random",
+        input_blob="data",
+        output_blob="output",
+        delta=1e-2,
+        model_dir=None,
+        transpose_keras_result=True,
+        one_dim_seq_flags=None,
+    ):
 
         # transpose_keras_result: if true, compare the transposed Keras result
-        # one_dim_seq_flags: a list of same length as the number of inputs in 
+        # one_dim_seq_flags: a list of same length as the number of inputs in
         # the model; if None, treat all 1D input (if any) as non-sequence
         # if one_dim_seq_flags[i] is True, it means the ith input, with shape
-        # (X,) is in fact a sequence of length X. 
-        
+        # (X,) is in fact a sequence of length X.
+
         # Get the CoreML model
         use_tmp_folder = False
         if model_dir is None:
             use_tmp_folder = True
             model_dir = tempfile.mkdtemp()
-        model_path = os.path.join(model_dir, 'keras.mlmodel')
+        model_path = os.path.join(model_dir, "keras.mlmodel")
 
         # Generate data
         nb_inputs = len(model.inputs)
         if nb_inputs > 1:
-            input_names = []; input_data = []; coreml_input = {}
+            input_names = []
+            input_data = []
+            coreml_input = {}
             for i in range(nb_inputs):
-                input_shape = [1 if a is None else a for a in \
-                        model.input_shape[i]]
+                input_shape = [1 if a is None else a for a in model.input_shape[i]]
                 X = _generate_data(input_shape, mode)
                 feature_name = "data_%s" % i
                 input_names.append(feature_name)
                 input_data.append(X)
-                if one_dim_seq_flags is None: 
-                    coreml_input[feature_name] = _keras_transpose(X
-                            ).astype('f').copy()
-                else: 
-                    coreml_input[feature_name] = _keras_transpose(X, 
-                            one_dim_seq_flags[i]).astype('f').copy()
+                if one_dim_seq_flags is None:
+                    coreml_input[feature_name] = _keras_transpose(X).astype("f").copy()
+                else:
+                    coreml_input[feature_name] = (
+                        _keras_transpose(X, one_dim_seq_flags[i]).astype("f").copy()
+                    )
         else:
             input_shape = [1 if a is None else a for a in model.input_shape]
-            input_names = ['data']
+            input_names = ["data"]
             input_data = _generate_data(input_shape, mode)
-            if one_dim_seq_flags is None: 
-                coreml_input = {'data': _keras_transpose(input_data).astype(
-                        'f').copy()}
-            else: 
-                coreml_input = {'data': _keras_transpose(input_data, 
-                        one_dim_seq_flags[0]).astype('f').copy()}
+            if one_dim_seq_flags is None:
+                coreml_input = {"data": _keras_transpose(input_data).astype("f").copy()}
+            else:
+                coreml_input = {
+                    "data": _keras_transpose(input_data, one_dim_seq_flags[0])
+                    .astype("f")
+                    .copy()
+                }
 
         # Compile the model
-        output_names = ['output'+str(i) for i in range(len(model.outputs))]
-        coreml_model = _get_coreml_model(model, model_path, input_names, 
-                output_names)
-        
-        if is_macos() and macos_version() >= (10, 13):
-            # Assuming coreml model output names are in the same order as Keras 
+        output_names = ["output" + str(i) for i in range(len(model.outputs))]
+        coreml_model = _get_coreml_model(model, model_path, input_names, output_names)
+
+        if _is_macos() and _macos_version() >= (10, 13):
+            # Assuming coreml model output names are in the same order as Keras
             # Output list, put predictions into a list, sorted by output name
             coreml_preds = coreml_model.predict(coreml_input)
             c_preds = [coreml_preds[name] for name in output_names]
@@ -158,7 +210,7 @@ def _test_keras_model(self, model, num_samples = 1, mode = 'random',
             # Run Keras predictions
             keras_preds = model.predict(input_data)
             k_preds = keras_preds if type(keras_preds) is list else [keras_preds]
-            
+
             # Compare each output blob
             for idx, k_pred in enumerate(k_preds):
                 if transpose_keras_result:
@@ -170,19 +222,18 @@ def _test_keras_model(self, model, num_samples = 1, mode = 'random',
                 self.assertEquals(len(kp), len(cp))
                 for i in range(len(kp)):
                     max_den = max(1.0, kp[i], cp[i])
-                    self.assertAlmostEquals(kp[i]/max_den, 
-                                            cp[i]/max_den, 
-                                            delta=delta)
+                    self.assertAlmostEquals(
+                        kp[i] / max_den, cp[i] / max_den, delta=delta
+                    )
 
         # Cleanup files - models on disk no longer useful
         if use_tmp_folder and os.path.exists(model_dir):
             shutil.rmtree(model_dir)
-        
 
-@unittest.skipIf(not HAS_KERAS_TF, 'Missing keras. Skipping tests.')
+
+@unittest.skipIf(not _HAS_KERAS_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras1
 class KerasBasicNumericCorrectnessTest(KerasNumericCorrectnessTest):
-
     def test_tiny_inner_product_zero_input(self):
         np.random.seed(1988)
         input_dim = 2
@@ -190,13 +241,13 @@ def test_tiny_inner_product_zero_input(self):
 
         # Define a model
         model = Sequential()
-        model.add(Dense(num_channels, input_dim = input_dim))
+        model.add(Dense(num_channels, input_dim=input_dim))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, mode = 'zeros')
+        self._test_keras_model(model, mode="zeros")
 
     def test_tiny_inner_product_ones(self):
         np.random.seed(1988)
@@ -205,13 +256,13 @@ def test_tiny_inner_product_ones(self):
 
         # Define a model
         model = Sequential()
-        model.add(Dense(num_channels, input_dim = input_dim))
+        model.add(Dense(num_channels, input_dim=input_dim))
 
         # Set some random weights
         model.set_weights([np.ones(w.shape) for w in model.get_weights()])
 
         # test the keras model
-        self._test_keras_model(model, mode = 'ones')
+        self._test_keras_model(model, mode="ones")
 
     def test_tiny_inner_product_random(self):
         np.random.seed(1988)
@@ -220,7 +271,7 @@ def test_tiny_inner_product_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Dense(num_channels, input_dim = input_dim))
+        model.add(Dense(num_channels, input_dim=input_dim))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -231,12 +282,12 @@ def test_tiny_inner_product_random(self):
     def test_inner_product_random(self):
         np.random.seed(1988)
         input_dim = 100
-        input_shape = (input_dim, )
+        input_shape = (input_dim,)
         num_channels = 100
 
         # Define a model
         model = Sequential()
-        model.add(Dense(num_channels, input_dim = input_dim))
+        model.add(Dense(num_channels, input_dim=input_dim))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -254,9 +305,14 @@ def test_tiny_conv_ones(self):
 
         # Define a model
         model = Sequential()
-        model.add(Convolution2D(input_shape = input_shape,
-            nb_filter = num_kernels, nb_row = kernel_height,
-            nb_col = kernel_width))
+        model.add(
+            Convolution2D(
+                input_shape=input_shape,
+                nb_filter=num_kernels,
+                nb_row=kernel_height,
+                nb_col=kernel_width,
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.ones(w.shape) for w in model.get_weights()])
@@ -274,15 +330,18 @@ def test_tiny_conv_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(AtrousConvolution2D(num_kernels, kernel_height, kernel_width, 
-            input_shape = input_shape))
+        model.add(
+            AtrousConvolution2D(
+                num_kernels, kernel_height, kernel_width, input_shape=input_shape
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
         self._test_keras_model(model)
-        
+
     def test_tiny_atrous_conv_random(self):
         np.random.seed(1988)
         input_dim = 8
@@ -293,14 +352,21 @@ def test_tiny_atrous_conv_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(AtrousConvolution2D(nb_filter=num_kernels, nb_row=kernel_height, nb_col=kernel_width, 
-            input_shape = input_shape, atrous_rate = (2,2)))
+        model.add(
+            AtrousConvolution2D(
+                nb_filter=num_kernels,
+                nb_row=kernel_height,
+                nb_col=kernel_width,
+                input_shape=input_shape,
+                atrous_rate=(2, 2),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model)        
+        self._test_keras_model(model)
 
     def test_tiny_atrous_conv_rect_random(self):
         np.random.seed(1988)
@@ -311,8 +377,15 @@ def test_tiny_atrous_conv_rect_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(AtrousConvolution2D(nb_filter=num_kernels, nb_row=kernel_height, nb_col=kernel_width, 
-            input_shape = input_shape, atrous_rate = (3,3)))
+        model.add(
+            AtrousConvolution2D(
+                nb_filter=num_kernels,
+                nb_row=kernel_height,
+                nb_col=kernel_width,
+                input_shape=input_shape,
+                atrous_rate=(3, 3),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -330,9 +403,15 @@ def test_tiny_conv_rect_kernel_x(self):
 
         # Define a model
         model = Sequential()
-        model.add(Convolution2D(input_shape = input_shape,
-            nb_filter = num_kernels, nb_row = kernel_height,
-            nb_col = kernel_width, border_mode = 'same'))
+        model.add(
+            Convolution2D(
+                input_shape=input_shape,
+                nb_filter=num_kernels,
+                nb_row=kernel_height,
+                nb_col=kernel_width,
+                border_mode="same",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -350,9 +429,15 @@ def test_tiny_conv_rect_kernel_y(self):
 
         # Define a model
         model = Sequential()
-        model.add(Convolution2D(input_shape = input_shape,
-            nb_filter = num_kernels, nb_row = kernel_height,
-            nb_col = kernel_width, border_mode = 'valid'))
+        model.add(
+            Convolution2D(
+                input_shape=input_shape,
+                nb_filter=num_kernels,
+                nb_row=kernel_height,
+                nb_col=kernel_width,
+                border_mode="valid",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -370,28 +455,42 @@ def test_tiny_conv_rect_kernel_xy(self):
 
         # Define a model
         model = Sequential()
-        model.add(Convolution2D(input_shape = input_shape,
-            nb_filter = num_kernels, nb_row = kernel_height,
-            nb_col = kernel_width, border_mode = 'valid'))
+        model.add(
+            Convolution2D(
+                input_shape=input_shape,
+                nb_filter=num_kernels,
+                nb_row=kernel_height,
+                nb_col=kernel_width,
+                border_mode="valid",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
         self._test_keras_model(model)
-    
+
     def test_tiny_conv_pseudo_1d_x(self):
         np.random.seed(1988)
         input_dim = 2
         input_length = 5
-        filter_length = 1 # 3
+        filter_length = 1  # 3
         nb_filters = 1
         # Define a model
         model = Sequential()
-        model.add(Convolution2D(nb_filters, 1, filter_length, input_shape=(1,input_length,input_dim), border_mode='valid'))
+        model.add(
+            Convolution2D(
+                nb_filters,
+                1,
+                filter_length,
+                input_shape=(1, input_length, input_dim),
+                border_mode="valid",
+            )
+        )
         # Set some random weights
         model.set_weights([np.ones(w.shape) for w in model.get_weights()])
-        self._test_keras_model(model, mode='linear')
+        self._test_keras_model(model, mode="linear")
 
     def test_tiny_conv1d_same_random(self):
         np.random.seed(1988)
@@ -400,8 +499,14 @@ def test_tiny_conv1d_same_random(self):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Convolution1D(nb_filters, filter_length, border_mode='same',
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Convolution1D(
+                nb_filters,
+                filter_length,
+                border_mode="same",
+                input_shape=(input_length, input_dim),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -416,8 +521,14 @@ def test_tiny_conv1d_valid_random(self):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Convolution1D(nb_filters, filter_length, border_mode='valid',
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Convolution1D(
+                nb_filters,
+                filter_length,
+                border_mode="valid",
+                input_shape=(input_length, input_dim),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -434,14 +545,20 @@ def test_tiny_atrous_conv1d_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(AtrousConvolution1D(nb_filter=num_kernels, filter_length=kernel_length, 
-            input_shape = input_shape, atrous_rate = 2))
+        model.add(
+            AtrousConvolution1D(
+                nb_filter=num_kernels,
+                filter_length=kernel_length,
+                input_shape=input_shape,
+                atrous_rate=2,
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model)     
+        self._test_keras_model(model)
 
     def test_tiny_deconv_random(self):
         np.random.seed(1988)
@@ -455,9 +572,17 @@ def test_tiny_deconv_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Deconvolution2D(num_kernels, kernel_width, kernel_height,
-            input_shape = input_shape, output_shape = output_shape,
-            border_mode = 'valid', subsample = (2,2)))
+        model.add(
+            Deconvolution2D(
+                num_kernels,
+                kernel_width,
+                kernel_height,
+                input_shape=input_shape,
+                output_shape=output_shape,
+                border_mode="valid",
+                subsample=(2, 2),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -477,9 +602,17 @@ def test_tiny_deconv_random_same_padding(self):
 
         # Define a model
         model = Sequential()
-        model.add(Deconvolution2D(num_kernels, kernel_width, kernel_height,
-            input_shape = input_shape, output_shape = output_shape,
-            border_mode = 'same', subsample = (2,2)))
+        model.add(
+            Deconvolution2D(
+                num_kernels,
+                kernel_width,
+                kernel_height,
+                input_shape=input_shape,
+                output_shape=output_shape,
+                border_mode="same",
+                subsample=(2, 2),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -497,10 +630,15 @@ def test_tiny_conv_upsample_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Convolution2D(input_shape = input_shape,
-            nb_filter = num_kernels, nb_row = kernel_height,
-            nb_col = kernel_width))
-        model.add(UpSampling2D(size = (2, 2)))
+        model.add(
+            Convolution2D(
+                input_shape=input_shape,
+                nb_filter=num_kernels,
+                nb_row=kernel_height,
+                nb_col=kernel_width,
+            )
+        )
+        model.add(UpSampling2D(size=(2, 2)))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -515,9 +653,9 @@ def test_housenet_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Dense(num_hidden, input_dim = num_features))
-        model.add(Activation('relu'))
-        model.add(Dense(1, input_dim = num_features))
+        model.add(Dense(num_hidden, input_dim=num_features))
+        model.add(Activation("relu"))
+        model.add(Dense(1, input_dim=num_features))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -533,14 +671,23 @@ def test_tiny_no_sequence_lstm_zeros(self):
 
         # Define a model
         model = Sequential()
-        model.add(LSTM(num_channels, input_dim = input_dim,
-            input_length = input_length, consume_less = 'cpu', inner_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_dim=input_dim,
+                input_length=input_length,
+                consume_less="cpu",
+                inner_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, mode = 'zeros', input_blob = 'data', output_blob = 'output')
+        self._test_keras_model(
+            model, mode="zeros", input_blob="data", output_blob="output"
+        )
 
     def test_tiny_no_sequence_lstm_zeros_gpu(self):
         np.random.seed(1988)
@@ -550,14 +697,23 @@ def test_tiny_no_sequence_lstm_zeros_gpu(self):
 
         # Define a model
         model = Sequential()
-        model.add(LSTM(num_channels, input_dim = input_dim,
-                       input_length = input_length, consume_less = 'gpu', inner_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_dim=input_dim,
+                input_length=input_length,
+                consume_less="gpu",
+                inner_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, mode = 'zeros', input_blob = 'data', output_blob = 'output')
+        self._test_keras_model(
+            model, mode="zeros", input_blob="data", output_blob="output"
+        )
 
     def test_tiny_no_sequence_lstm_ones(self):
         np.random.seed(1988)
@@ -567,14 +723,23 @@ def test_tiny_no_sequence_lstm_ones(self):
 
         # Define a model
         model = Sequential()
-        model.add(LSTM(num_channels, input_dim = input_dim,
-            input_length = input_length, consume_less = 'cpu', inner_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_dim=input_dim,
+                input_length=input_length,
+                consume_less="cpu",
+                inner_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, mode = 'ones', input_blob = 'data', output_blob = 'output')
+        self._test_keras_model(
+            model, mode="ones", input_blob="data", output_blob="output"
+        )
 
     def test_small_no_sequence_lstm_zeros(self):
         np.random.seed(1988)
@@ -584,14 +749,23 @@ def test_small_no_sequence_lstm_zeros(self):
 
         # Define a model
         model = Sequential()
-        model.add(LSTM(num_channels, input_dim = input_dim,
-            input_length = input_length, consume_less = 'gpu', inner_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_dim=input_dim,
+                input_length=input_length,
+                consume_less="gpu",
+                inner_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, mode = 'zeros', input_blob = 'data', output_blob = 'output')
+        self._test_keras_model(
+            model, mode="zeros", input_blob="data", output_blob="output"
+        )
 
     def test_small_no_sequence_lstm_ones(self):
         np.random.seed(1988)
@@ -601,14 +775,23 @@ def test_small_no_sequence_lstm_ones(self):
 
         # Define a model
         model = Sequential()
-        model.add(LSTM(num_channels, input_dim = input_dim,
-            input_length = input_length, consume_less = 'gpu', inner_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_dim=input_dim,
+                input_length=input_length,
+                consume_less="gpu",
+                inner_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, mode = 'ones', input_blob = 'data', output_blob = 'output')
+        self._test_keras_model(
+            model, mode="ones", input_blob="data", output_blob="output"
+        )
 
     def test_tiny_no_sequence_simple_rnn_random(self):
         np.random.seed(1988)
@@ -619,14 +802,15 @@ def test_tiny_no_sequence_simple_rnn_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(SimpleRNN(num_channels, input_dim = input_dim,
-            input_length = input_length))
+        model.add(
+            SimpleRNN(num_channels, input_dim=input_dim, input_length=input_length)
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
+        self._test_keras_model(model, input_blob="data", output_blob="output")
 
     def test_tiny_no_sequence_gru_random(self):
         np.random.seed(1988)
@@ -637,14 +821,20 @@ def test_tiny_no_sequence_gru_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(GRU(num_channels, input_dim = input_dim,
-            input_length = input_length, inner_activation = 'sigmoid'))
+        model.add(
+            GRU(
+                num_channels,
+                input_dim=input_dim,
+                input_length=input_length,
+                inner_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
+        self._test_keras_model(model, input_blob="data", output_blob="output")
 
     def test_tiny_no_sequence_bidir_random(self):
         np.random.seed(1988)
@@ -655,35 +845,52 @@ def test_tiny_no_sequence_bidir_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Bidirectional(LSTM(num_channels, input_dim = input_dim,
-            input_length = input_length, consume_less = 'cpu', inner_activation = 'sigmoid'),
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Bidirectional(
+                LSTM(
+                    num_channels,
+                    input_dim=input_dim,
+                    input_length=input_length,
+                    consume_less="cpu",
+                    inner_activation="sigmoid",
+                ),
+                input_shape=(input_length, input_dim),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
-    
+        self._test_keras_model(model, input_blob="data", output_blob="output")
+
     def test_tiny_no_sequence_bidir_random_gpu(self):
         np.random.seed(1988)
         input_dim = 1
         input_length = 1
         num_channels = 1
         num_samples = 1
-        
+
         # Define a model
         model = Sequential()
-        model.add(Bidirectional(LSTM(num_channels, input_dim = input_dim,
-                                     input_length = input_length, consume_less = 'gpu', inner_activation = 'sigmoid'),
-                                input_shape=(input_length, input_dim)))
-            
+        model.add(
+            Bidirectional(
+                LSTM(
+                    num_channels,
+                    input_dim=input_dim,
+                    input_length=input_length,
+                    consume_less="gpu",
+                    inner_activation="sigmoid",
+                ),
+                input_shape=(input_length, input_dim),
+            )
+        )
+
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        
-                                # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
 
+        # Test the keras model
+        self._test_keras_model(model, input_blob="data", output_blob="output")
 
     def test_small_no_sequence_lstm_random(self):
         np.random.seed(1988)
@@ -693,14 +900,21 @@ def test_small_no_sequence_lstm_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(LSTM(num_channels, input_dim = input_dim,
-            input_length = input_length, consume_less = 'gpu', inner_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_dim=input_dim,
+                input_length=input_length,
+                consume_less="gpu",
+                inner_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
+        self._test_keras_model(model, input_blob="data", output_blob="output")
 
     def test_small_no_sequence_gru_random(self):
         np.random.seed(1988)
@@ -710,14 +924,20 @@ def test_small_no_sequence_gru_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(GRU(num_channels, input_dim = input_dim,
-               input_length = input_length, inner_activation = 'sigmoid'))
+        model.add(
+            GRU(
+                num_channels,
+                input_dim=input_dim,
+                input_length=input_length,
+                inner_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
+        self._test_keras_model(model, input_blob="data", output_blob="output")
 
     def test_small_no_sequence_bidir_random(self):
         np.random.seed(1988)
@@ -727,15 +947,24 @@ def test_small_no_sequence_bidir_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Bidirectional(LSTM(num_channels, input_dim = input_dim,
-            input_length = input_length, consume_less = 'gpu', inner_activation = 'sigmoid'),
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Bidirectional(
+                LSTM(
+                    num_channels,
+                    input_dim=input_dim,
+                    input_length=input_length,
+                    consume_less="gpu",
+                    inner_activation="sigmoid",
+                ),
+                input_shape=(input_length, input_dim),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
+        self._test_keras_model(model, input_blob="data", output_blob="output")
 
     def test_small_no_sequence_simple_rnn_random(self):
         np.random.seed(1988)
@@ -745,14 +974,20 @@ def test_small_no_sequence_simple_rnn_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(SimpleRNN(num_channels, input_dim = input_dim,
-                 input_length = input_length, consume_less = 'gpu'))
+        model.add(
+            SimpleRNN(
+                num_channels,
+                input_dim=input_dim,
+                input_length=input_length,
+                consume_less="gpu",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
+        self._test_keras_model(model, input_blob="data", output_blob="output")
 
     def test_medium_no_sequence_lstm_random(self):
         np.random.seed(1988)
@@ -762,14 +997,20 @@ def test_medium_no_sequence_lstm_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(LSTM(num_channels, input_dim = input_dim,
-            input_length = input_length, inner_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_dim=input_dim,
+                input_length=input_length,
+                inner_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
+        self._test_keras_model(model, input_blob="data", output_blob="output")
 
     def test_medium_no_sequence_bidir_random(self):
         np.random.seed(1988)
@@ -779,15 +1020,24 @@ def test_medium_no_sequence_bidir_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Bidirectional(LSTM(num_channels, input_dim = input_dim,
-            input_length = input_length, consume_less = 'gpu', inner_activation = 'sigmoid'),
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Bidirectional(
+                LSTM(
+                    num_channels,
+                    input_dim=input_dim,
+                    input_length=input_length,
+                    consume_less="gpu",
+                    inner_activation="sigmoid",
+                ),
+                input_shape=(input_length, input_dim),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
+        self._test_keras_model(model, input_blob="data", output_blob="output")
 
     def test_medium_bidir_random_return_seq_false(self):
         np.random.seed(1988)
@@ -797,155 +1047,221 @@ def test_medium_bidir_random_return_seq_false(self):
 
         # Define a model
         model = Sequential()
-        model.add(Bidirectional(LSTM(num_channels, input_dim=input_dim,
-                                     input_length=input_length, return_sequences=False,
-                                     consume_less='gpu', inner_activation='sigmoid'),
-                                input_shape=(input_length, input_dim)))
+        model.add(
+            Bidirectional(
+                LSTM(
+                    num_channels,
+                    input_dim=input_dim,
+                    input_length=input_length,
+                    return_sequences=False,
+                    consume_less="gpu",
+                    inner_activation="sigmoid",
+                ),
+                input_shape=(input_length, input_dim),
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, input_blob='data', output_blob='output')
-    
+        self._test_keras_model(model, input_blob="data", output_blob="output")
+
     def test_medium_bidir_random_return_seq_true(self):
         np.random.seed(1988)
         input_dim = 7
         input_length = 5
         num_channels = 10
-        
+
         # Define a model
         model = Sequential()
-        model.add(Bidirectional(LSTM(num_channels, input_dim = input_dim,
-                                     input_length = input_length, return_sequences = True,
-                                     consume_less = 'gpu', inner_activation = 'sigmoid'),
-                                input_shape=(input_length, input_dim)))
-            
+        model.add(
+            Bidirectional(
+                LSTM(
+                    num_channels,
+                    input_dim=input_dim,
+                    input_length=input_length,
+                    return_sequences=True,
+                    consume_less="gpu",
+                    inner_activation="sigmoid",
+                ),
+                input_shape=(input_length, input_dim),
+            )
+        )
+
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-                                
-                                # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
-    
-    
+
+        # Test the keras model
+        self._test_keras_model(model, input_blob="data", output_blob="output")
+
     def test_lstm_seq(self):
         np.random.seed(1988)
 
         input_dim = 11
         input_length = 5
-        
+
         # Define a model
         model = Sequential()
-        model.add(LSTM(20, input_dim=input_dim, input_length=input_length, return_sequences=False))
-        
+        model.add(
+            LSTM(
+                20,
+                input_dim=input_dim,
+                input_length=input_length,
+                return_sequences=False,
+            )
+        )
+
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        
+
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
-    
+        self._test_keras_model(model, input_blob="data", output_blob="output")
+
     def test_lstm_seq_dense(self):
         np.random.seed(1988)
-        
+
         input_dim = 5
         num_hidden = 12
         num_classes = 6
         input_length = 3
-        
+
         # Define a model
         model = Sequential()
-        model.add(LSTM(num_hidden, input_dim=input_dim, input_length=input_length, return_sequences=False))
-        model.add(Dense(num_classes, activation='softmax'))
-        
+        model.add(
+            LSTM(
+                num_hidden,
+                input_dim=input_dim,
+                input_length=input_length,
+                return_sequences=False,
+            )
+        )
+        model.add(Dense(num_classes, activation="softmax"))
+
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        
+
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
-    
-    
-        
+        self._test_keras_model(model, input_blob="data", output_blob="output")
+
     def test_lstm_seq_backwards(self):
         np.random.seed(1988)
-        
+
         input_dim = 11
         input_length = 5
-        
+
         # Define a model
         model = Sequential()
-        model.add(LSTM(20, input_dim=input_dim, input_length=input_length, return_sequences=False, go_backwards=True))
-        
+        model.add(
+            LSTM(
+                20,
+                input_dim=input_dim,
+                input_length=input_length,
+                return_sequences=False,
+                go_backwards=True,
+            )
+        )
+
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        
+
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
-    
+        self._test_keras_model(model, input_blob="data", output_blob="output")
+
     def test_rnn_seq(self):
         np.random.seed(1988)
-        
+
         input_dim = 11
         input_length = 5
-        
+
         # Define a model
         model = Sequential()
-        model.add(SimpleRNN(20, input_dim=input_dim, input_length=input_length, return_sequences=False))
-        
+        model.add(
+            SimpleRNN(
+                20,
+                input_dim=input_dim,
+                input_length=input_length,
+                return_sequences=False,
+            )
+        )
+
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        
+
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
-    
+        self._test_keras_model(model, input_blob="data", output_blob="output")
+
     def test_rnn_seq_backwards(self):
         np.random.seed(1988)
-        
+
         input_dim = 11
         input_length = 5
-        
+
         # Define a model
         model = Sequential()
-        model.add(SimpleRNN(20, input_dim=input_dim, input_length=input_length, return_sequences=False, go_backwards=True))
-        
+        model.add(
+            SimpleRNN(
+                20,
+                input_dim=input_dim,
+                input_length=input_length,
+                return_sequences=False,
+                go_backwards=True,
+            )
+        )
+
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        
+
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
-    
+        self._test_keras_model(model, input_blob="data", output_blob="output")
+
     def test_gru_seq(self):
         np.random.seed(1988)
-        
+
         input_dim = 11
         input_length = 5
-        
+
         # Define a model
         model = Sequential()
-        model.add(GRU(20, input_dim=input_dim, input_length=input_length, return_sequences=False))
-        
+        model.add(
+            GRU(
+                20,
+                input_dim=input_dim,
+                input_length=input_length,
+                return_sequences=False,
+            )
+        )
+
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        
+
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
-    
+        self._test_keras_model(model, input_blob="data", output_blob="output")
+
     def test_gru_seq_backwards(self):
         np.random.seed(1988)
-        
+
         input_dim = 11
         input_length = 5
-        
+
         # Define a model
         model = Sequential()
-        model.add(GRU(20, input_dim=input_dim, input_length=input_length, return_sequences=False, go_backwards=True))
-        
+        model.add(
+            GRU(
+                20,
+                input_dim=input_dim,
+                input_length=input_length,
+                return_sequences=False,
+                go_backwards=True,
+            )
+        )
+
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        
+
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
-    
-    
+        self._test_keras_model(model, input_blob="data", output_blob="output")
+
     def test_medium_no_sequence_simple_rnn_random(self):
         np.random.seed(1988)
         input_dim = 10
@@ -954,14 +1270,15 @@ def test_medium_no_sequence_simple_rnn_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(SimpleRNN(num_channels, input_dim = input_dim,
-                       input_length = input_length))
+        model.add(
+            SimpleRNN(num_channels, input_dim=input_dim, input_length=input_length)
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output')
+        self._test_keras_model(model, input_blob="data", output_blob="output")
 
     def test_medium_no_sequence_gru_random(self):
         np.random.seed(1988)
@@ -971,8 +1288,14 @@ def test_medium_no_sequence_gru_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(GRU(num_channels, input_dim = input_dim,
-                       input_length = input_length, inner_activation = 'sigmoid'))
+        model.add(
+            GRU(
+                num_channels,
+                input_dim=input_dim,
+                input_length=input_length,
+                inner_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -991,10 +1314,16 @@ def test_medium_conv_batchnorm_random(self):
 
         # Define a model
         from keras.layers.normalization import BatchNormalization
+
         model = Sequential()
-        model.add(Convolution2D(input_shape = input_shape,
-            nb_filter = num_kernels, nb_row = kernel_height,
-            nb_col = kernel_width))
+        model.add(
+            Convolution2D(
+                input_shape=input_shape,
+                nb_filter=num_kernels,
+                nb_row=kernel_height,
+                nb_col=kernel_width,
+            )
+        )
         model.add(BatchNormalization(epsilon=1e-5))
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1007,9 +1336,11 @@ def test_tiny_conv_elu_random(self):
 
         # Define a model
         from keras.layers.advanced_activations import ELU
+
         model = Sequential()
-        model.add(Convolution2D(input_shape = (10, 10, 3),
-            nb_filter = 3, nb_row = 5, nb_col = 5))
+        model.add(
+            Convolution2D(input_shape=(10, 10, 3), nb_filter=3, nb_row=5, nb_col=5)
+        )
         model.add(ELU(alpha=0.8))
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1022,9 +1353,17 @@ def test_tiny_conv_prelu_random(self):
 
         # Define a model
         from keras.layers.advanced_activations import PReLU
+
         model = Sequential()
-        model.add(Convolution2D(input_shape = (10, 10, 3),
-            nb_filter = 3, nb_row = 5, nb_col = 5, border_mode = 'same'))
+        model.add(
+            Convolution2D(
+                input_shape=(10, 10, 3),
+                nb_filter=3,
+                nb_row=5,
+                nb_col=5,
+                border_mode="same",
+            )
+        )
         model.add(PReLU(shared_axes=[1, 2]))
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1037,9 +1376,17 @@ def test_tiny_conv_leaky_relu_random(self):
 
         # Define a model
         from keras.layers.advanced_activations import LeakyReLU
+
         model = Sequential()
-        model.add(Convolution2D(input_shape = (10, 10, 3),
-            nb_filter = 3, nb_row = 5, nb_col = 5, border_mode = 'same'))
+        model.add(
+            Convolution2D(
+                input_shape=(10, 10, 3),
+                nb_filter=3,
+                nb_row=5,
+                nb_col=5,
+                border_mode="same",
+            )
+        )
         model.add(LeakyReLU(alpha=0.3))
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1048,16 +1395,25 @@ def test_tiny_conv_leaky_relu_random(self):
         self._test_keras_model(model)
 
     def test_tiny_parametric_softplus_random(self):
-        input_shape = (8,8,3) # (10,10,3)
+        input_shape = (8, 8, 3)  # (10,10,3)
         # Define a model
         from keras.layers.advanced_activations import ParametricSoftplus
+
         model = Sequential()
-        model.add(ParametricSoftplus(input_shape = input_shape))
+        model.add(ParametricSoftplus(input_shape=input_shape))
 
         alpha_per_channel = np.random.rand(3)
         beta_per_channel = np.random.rand(3)
-        alphas = np.repeat(alpha_per_channel, input_shape[0] * input_shape[1]).reshape(input_shape[::-1]).transpose((2,1,0))
-        betas = np.repeat(beta_per_channel, input_shape[0] * input_shape[1]).reshape(input_shape[::-1]).transpose((2,1,0))
+        alphas = (
+            np.repeat(alpha_per_channel, input_shape[0] * input_shape[1])
+            .reshape(input_shape[::-1])
+            .transpose((2, 1, 0))
+        )
+        betas = (
+            np.repeat(beta_per_channel, input_shape[0] * input_shape[1])
+            .reshape(input_shape[::-1])
+            .transpose((2, 1, 0))
+        )
 
         model.layers[0].set_weights([alphas, betas])
 
@@ -1066,22 +1422,38 @@ def test_tiny_parametric_softplus_random(self):
 
     def test_tiny_conv_parametric_softplus_random(self):
         np.random.seed(1988)
-        input_shape = (8,8,3) # (10,10,3)
+        input_shape = (8, 8, 3)  # (10,10,3)
         nb_filters = 2
-        output_shape = (8,8,2)
+        output_shape = (8, 8, 2)
         # Define a model
         from keras.layers.advanced_activations import ParametricSoftplus
+
         model = Sequential()
-        model.add(Convolution2D(input_shape = (8, 8, 3),
-            nb_filter = nb_filters, nb_row = 3, nb_col = 3, border_mode = 'same'))
+        model.add(
+            Convolution2D(
+                input_shape=(8, 8, 3),
+                nb_filter=nb_filters,
+                nb_row=3,
+                nb_col=3,
+                border_mode="same",
+            )
+        )
         model.add(ParametricSoftplus())
 
         # CoreML only takes 1-param per channel, so weights are set differently
         # model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
         alpha_per_channel = np.random.rand(nb_filters)
         beta_per_channel = np.random.rand(nb_filters)
-        alphas = np.repeat(alpha_per_channel, output_shape[0] * output_shape[1]).reshape(output_shape[::-1]).transpose((2,1,0))
-        betas = np.repeat(beta_per_channel, output_shape[0] * output_shape[1]).reshape(output_shape[::-1]).transpose((2,1,0))
+        alphas = (
+            np.repeat(alpha_per_channel, output_shape[0] * output_shape[1])
+            .reshape(output_shape[::-1])
+            .transpose((2, 1, 0))
+        )
+        betas = (
+            np.repeat(beta_per_channel, output_shape[0] * output_shape[1])
+            .reshape(output_shape[::-1])
+            .transpose((2, 1, 0))
+        )
         model.layers[1].set_weights([alphas, betas])
 
         # Get the coreml model
@@ -1092,6 +1464,7 @@ def test_tiny_dense_parametric_softplus_random(self):
 
         # Define a model
         from keras.layers.advanced_activations import ParametricSoftplus
+
         model = Sequential()
         model.add(Dense(10, input_shape=(4,)))
         model.add(ParametricSoftplus())
@@ -1106,9 +1479,17 @@ def test_tiny_conv_thresholded_relu_random(self):
 
         # Define a model
         from keras.layers.advanced_activations import ThresholdedReLU
+
         model = Sequential()
-        model.add(Convolution2D(input_shape = (10, 10, 3),
-            nb_filter = 3, nb_row = 5, nb_col = 5, border_mode = 'same'))
+        model.add(
+            Convolution2D(
+                input_shape=(10, 10, 3),
+                nb_filter=3,
+                nb_row=5,
+                nb_col=5,
+                border_mode="same",
+            )
+        )
         model.add(ThresholdedReLU(theta=0.8))
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1122,11 +1503,11 @@ def test_tiny_concat_random(self):
         num_channels = 6
 
         # Define a model
-        input_tensor = Input(shape = (input_dim, ))
+        input_tensor = Input(shape=(input_dim,))
         x1 = Dense(num_channels)(input_tensor)
         x2 = Dense(num_channels)(x1)
         x3 = Dense(num_channels)(x1)
-        x4 = merge([x2, x3], mode='concat')
+        x4 = merge([x2, x3], mode="concat")
         x5 = Dense(num_channels)(x4)
 
         model = Model(input=[input_tensor], output=[x5])
@@ -1145,10 +1526,10 @@ def test_tiny_concat_seq_random(self):
         num_channels = 6
 
         # Define a model
-        input_tensor = Input(shape = (seq_len, ))
+        input_tensor = Input(shape=(seq_len,))
         x1 = Embedding(max_features, embedding_dims)(input_tensor)
         x2 = Embedding(max_features, embedding_dims)(input_tensor)
-        x3 = merge([x1, x2], mode='concat', concat_axis=1)
+        x3 = merge([x1, x2], mode="concat", concat_axis=1)
 
         model = Model(input=[input_tensor], output=[x3])
 
@@ -1164,11 +1545,11 @@ def test_tiny_add_random(self):
         num_channels = 6
 
         # Define a model
-        input_tensor = Input(shape = (input_dim, ))
+        input_tensor = Input(shape=(input_dim,))
         x1 = Dense(num_channels)(input_tensor)
         x2 = Dense(num_channels)(x1)
         x3 = Dense(num_channels)(x1)
-        x4 = merge([x2, x3], mode='sum')
+        x4 = merge([x2, x3], mode="sum")
         x5 = Dense(num_channels)(x4)
 
         model = Model(input=[input_tensor], output=[x5])
@@ -1185,11 +1566,11 @@ def test_tiny_mul_random(self):
         num_channels = 6
 
         # Define a model
-        input_tensor = Input(shape = (input_dim, ))
+        input_tensor = Input(shape=(input_dim,))
         x1 = Dense(num_channels)(input_tensor)
         x2 = Dense(num_channels)(x1)
         x3 = Dense(num_channels)(x1)
-        x4 = merge([x2, x3], mode='mul')
+        x4 = merge([x2, x3], mode="mul")
         x5 = Dense(num_channels)(x4)
 
         model = Model(input=[input_tensor], output=[x5])
@@ -1206,11 +1587,11 @@ def test_tiny_cos_random(self):
         num_channels = 6
 
         # Define a model
-        input_tensor = Input(shape = (input_dim, ))
+        input_tensor = Input(shape=(input_dim,))
         x1 = Dense(num_channels)(input_tensor)
         x2 = Dense(num_channels)(x1)
         x3 = Dense(num_channels)(x1)
-        x4 = merge([x2, x3], mode='cos')
+        x4 = merge([x2, x3], mode="cos")
         x5 = Dense(num_channels)(x4)
 
         model = Model(input=[input_tensor], output=[x5])
@@ -1222,10 +1603,9 @@ def test_tiny_cos_random(self):
         self._test_keras_model(model)
 
     def test_zeropad_simple(self):
-
         input_shape = (48, 48, 3)
         model = Sequential()
-        model.add(ZeroPadding2D((1,1),input_shape=input_shape))
+        model.add(ZeroPadding2D((1, 1), input_shape=input_shape))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1234,22 +1614,20 @@ def test_zeropad_simple(self):
         self._test_keras_model(model)
 
     def test_zeropad_fancy(self):
-
         input_shape = (48, 48, 3)
         model = Sequential()
-        model.add(ZeroPadding2D((2,5),input_shape=input_shape))
+        model.add(ZeroPadding2D((2, 5), input_shape=input_shape))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
         # Get the coreml model
         self._test_keras_model(model)
-        
+
     def test_crop_simple(self):
-        
         input_shape = (48, 48, 3)
         model = Sequential()
-        model.add(Cropping2D(cropping=((2,5),(2,5)),input_shape=input_shape))
+        model.add(Cropping2D(cropping=((2, 5), (2, 5)), input_shape=input_shape))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1272,37 +1650,61 @@ def test_tiny_permute(self):
     def test_max_pooling_no_overlap(self):
         # no_overlap: pool_size = strides
         model = Sequential()
-        model.add(MaxPooling2D(input_shape=(16,16,3), pool_size=(2, 2),
-                               strides=None, border_mode='valid'))
+        model.add(
+            MaxPooling2D(
+                input_shape=(16, 16, 3),
+                pool_size=(2, 2),
+                strides=None,
+                border_mode="valid",
+            )
+        )
         self._test_keras_model(model)
 
     def test_max_pooling_overlap_multiple(self):
         # input shape is multiple of pool_size, strides != pool_size
         model = Sequential()
-        model.add(MaxPooling2D(input_shape=(18,18,3), pool_size=(3, 3),
-                               strides=(2,2), border_mode='valid'))
+        model.add(
+            MaxPooling2D(
+                input_shape=(18, 18, 3),
+                pool_size=(3, 3),
+                strides=(2, 2),
+                border_mode="valid",
+            )
+        )
         self._test_keras_model(model)
 
     def test_max_pooling_overlap_odd(self):
         model = Sequential()
-        model.add(MaxPooling2D(input_shape=(16,16,3), pool_size=(3, 3),
-                               strides=(2,2), border_mode='valid'))
+        model.add(
+            MaxPooling2D(
+                input_shape=(16, 16, 3),
+                pool_size=(3, 3),
+                strides=(2, 2),
+                border_mode="valid",
+            )
+        )
         self._test_keras_model(model)
 
     def test_max_pooling_overlap_same(self):
         model = Sequential()
-        model.add(MaxPooling2D(input_shape=(16,16,3), pool_size=(3, 3),
-                               strides=(2,2), border_mode='same'))
+        model.add(
+            MaxPooling2D(
+                input_shape=(16, 16, 3),
+                pool_size=(3, 3),
+                strides=(2, 2),
+                border_mode="same",
+            )
+        )
         self._test_keras_model(model)
 
     def test_global_max_pooling(self):
         model = Sequential()
-        model.add(GlobalMaxPooling2D(input_shape=(16,16,3)))
+        model.add(GlobalMaxPooling2D(input_shape=(16, 16, 3)))
         self._test_keras_model(model)
 
     def test_max_pooling_1d(self):
         model = Sequential()
-        model.add(MaxPooling1D(input_shape=(16,3), pool_length=4))
+        model.add(MaxPooling1D(input_shape=(16, 3), pool_length=4))
         self._test_keras_model(model)
 
     def test_global_max_pooling_1d(self):
@@ -1312,28 +1714,46 @@ def test_global_max_pooling_1d(self):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Convolution1D(nb_filters, filter_length, border_mode='same',
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Convolution1D(
+                nb_filters,
+                filter_length,
+                border_mode="same",
+                input_shape=(input_length, input_dim),
+            )
+        )
         model.add(GlobalMaxPooling1D())
         self._test_keras_model(model)
 
     def test_average_pooling_no_overlap(self):
         # no_overlap: pool_size = strides
         model = Sequential()
-        model.add(AveragePooling2D(input_shape=(16,16,3), pool_size=(2, 2),
-                               strides=None, border_mode='valid'))
+        model.add(
+            AveragePooling2D(
+                input_shape=(16, 16, 3),
+                pool_size=(2, 2),
+                strides=None,
+                border_mode="valid",
+            )
+        )
         self._test_keras_model(model, delta=1e-2)
 
     def test_average_pooling_inception_config_1(self):
         # no_overlap: pool_size = strides
         model = Sequential()
-        model.add(AveragePooling2D(input_shape=(16,16,3), pool_size=(3,3),
-                               strides=(1,1), border_mode='same'))
+        model.add(
+            AveragePooling2D(
+                input_shape=(16, 16, 3),
+                pool_size=(3, 3),
+                strides=(1, 1),
+                border_mode="same",
+            )
+        )
         self._test_keras_model(model, delta=1e-2)
 
     def test_global_average_pooling(self):
         model = Sequential()
-        model.add(GlobalAveragePooling2D(input_shape=(16,16,3)))
+        model.add(GlobalAveragePooling2D(input_shape=(16, 16, 3)))
         self._test_keras_model(model)
 
     def test_average_pooling_1d(self):
@@ -1343,8 +1763,14 @@ def test_average_pooling_1d(self):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Convolution1D(nb_filters, filter_length, border_mode='same',
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Convolution1D(
+                nb_filters,
+                filter_length,
+                border_mode="same",
+                input_shape=(input_length, input_dim),
+            )
+        )
         model.add(AveragePooling1D(pool_length=2))
         self._test_keras_model(model)
 
@@ -1355,8 +1781,14 @@ def test_global_average_pooling_1d(self):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Convolution1D(nb_filters, filter_length, border_mode='same',
-            input_shape=(input_length, input_dim)))
+        model.add(
+            Convolution1D(
+                nb_filters,
+                filter_length,
+                border_mode="same",
+                input_shape=(input_length, input_dim),
+            )
+        )
         model.add(GlobalAveragePooling1D())
         self._test_keras_model(model)
 
@@ -1372,10 +1804,16 @@ def test_tiny_conv_dense_random(self):
 
         # Define a model
         from keras.layers import Flatten
+
         model = Sequential()
-        model.add(Convolution2D(input_shape = input_shape,
-            nb_filter = num_kernels, nb_row = kernel_height,
-            nb_col = kernel_width))
+        model.add(
+            Convolution2D(
+                input_shape=input_shape,
+                nb_filter=num_kernels,
+                nb_row=kernel_height,
+                nb_col=kernel_width,
+            )
+        )
         model.add(Flatten())
         model.add(Dense(hidden_dim))
 
@@ -1393,7 +1831,7 @@ def test_tiny_dense_tanh_fused_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Dense(hidden_dim, input_shape=(input_dim,), activation='tanh'))
+        model.add(Dense(hidden_dim, input_shape=(input_dim,), activation="tanh"))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1413,9 +1851,15 @@ def test_tiny_conv_relu_fused_random(self):
 
         # Define a model
         model = Sequential()
-        model.add(Convolution2D(input_shape = input_shape,
-            nb_filter = num_kernels, nb_row = kernel_height,
-            nb_col = kernel_width, activation='relu'))
+        model.add(
+            Convolution2D(
+                input_shape=input_shape,
+                nb_filter=num_kernels,
+                nb_row=kernel_height,
+                nb_col=kernel_width,
+                activation="relu",
+            )
+        )
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1425,48 +1869,44 @@ def test_tiny_conv_relu_fused_random(self):
 
     def test_flatten(self):
         model = Sequential()
-        model.add(Flatten(input_shape=(2,2,2)))
-        self._test_keras_model(model, mode='linear')
-    
+        model.add(Flatten(input_shape=(2, 2, 2)))
+        self._test_keras_model(model, mode="linear")
+
     def test_reshape_3d(self):
         model = Sequential()
-        model.add(Reshape((10,1,6), input_shape=(5,4,3)))
-        self._test_keras_model(model,  mode = 'linear')
-    
+        model.add(Reshape((10, 1, 6), input_shape=(5, 4, 3)))
+        self._test_keras_model(model, mode="linear")
+
     def test_embedding(self):
-        
         model = Sequential()
         num_inputs = 10
         num_outputs = 3
         model.add(Embedding(num_inputs, num_outputs))
-        
+
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        
+
         self._test_keras_model(model)
-            
+
     def test_embedding_seq(self):
-    
         model = Sequential()
         num_inputs = 10
         num_outputs = 3
         model.add(Embedding(num_inputs, num_outputs, input_length=7))
-        
+
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        
+
         self._test_keras_model(model, one_dim_seq_flags=[True])
-    
+
     def test_tiny_time_distrbuted(self):
-        
         # as the first layer in a model
         model = Sequential()
         model.add(TimeDistributed(Dense(8), input_shape=(10, 16)))
-        
+
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        
+
         self._test_keras_model(model)
-    
+
     def test_tiny_sequence_lstm(self):
-        
         np.random.seed(1988)
         input_dim = 1
         input_length = 2
@@ -1474,31 +1914,46 @@ def test_tiny_sequence_lstm(self):
 
         # Define a model
         model = Sequential()
-        model.add(LSTM(num_channels, input_dim = input_dim,
-            input_length = input_length, consume_less = 'cpu', inner_activation = 'sigmoid'))
+        model.add(
+            LSTM(
+                num_channels,
+                input_dim=input_dim,
+                input_length=input_length,
+                consume_less="cpu",
+                inner_activation="sigmoid",
+            )
+        )
 
         # Set some random weights
-        model.set_weights([(np.random.rand(*w.shape)-0.5)/5.0 for w in model.get_weights()])
+        model.set_weights(
+            [(np.random.rand(*w.shape) - 0.5) / 5.0 for w in model.get_weights()]
+        )
 
         # Test the keras model
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output', delta=1e-4)
+        self._test_keras_model(
+            model, input_blob="data", output_blob="output", delta=1e-4
+        )
 
     def test_tiny_spatial_bn(self):
         np.random.seed(1988)
-        x_in = Input(shape=(7,7,2))
+        x_in = Input(shape=(7, 7, 2))
         x = ZeroPadding2D(padding=(1, 1))(x_in)
         x = BatchNormalization(axis=2)(x)
         model = Model(x_in, x)
-        
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output', delta=1e-2)
-        
+
+        self._test_keras_model(
+            model, input_blob="data", output_blob="output", delta=1e-2
+        )
+
     def test_dense_fused_act_in_td(self):
         np.random.seed(1988)
-        x_in = Input(shape=(10,2))
-        x = TimeDistributed(Dense(6, activation = 'softmax'))(x_in)
+        x_in = Input(shape=(10, 2))
+        x = TimeDistributed(Dense(6, activation="softmax"))(x_in)
         model = Model(x_in, x)
-        
-        self._test_keras_model(model, input_blob = 'data', output_blob = 'output', delta=1e-2)
+
+        self._test_keras_model(
+            model, input_blob="data", output_blob="output", delta=1e-2
+        )
 
     def test_tiny_conv_upsample_1d_random(self):
         np.random.seed(1988)
@@ -1507,9 +1962,15 @@ def test_tiny_conv_upsample_1d_random(self):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Convolution1D(nb_filters, filter_length = filter_length, border_mode ='same',
-            input_shape=(input_length, input_dim)))
-        model.add(UpSampling1D(length = 2))
+        model.add(
+            Convolution1D(
+                nb_filters,
+                filter_length=filter_length,
+                border_mode="same",
+                input_shape=(input_length, input_dim),
+            )
+        )
+        model.add(UpSampling1D(length=2))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1524,9 +1985,15 @@ def test_tiny_conv_crop_1d_random(self):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Convolution1D(nb_filters, filter_length = filter_length, border_mode='same',
-            input_shape=(input_length, input_dim)))
-        model.add(Cropping1D(cropping = (2,2)))
+        model.add(
+            Convolution1D(
+                nb_filters,
+                filter_length=filter_length,
+                border_mode="same",
+                input_shape=(input_length, input_dim),
+            )
+        )
+        model.add(Cropping1D(cropping=(2, 2)))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1541,9 +2008,15 @@ def test_tiny_conv_pad_1d_random(self):
         filter_length = 3
         nb_filters = 4
         model = Sequential()
-        model.add(Convolution1D(nb_filters, filter_length = filter_length, border_mode='same',
-            input_shape=(input_length, input_dim)))
-        model.add(ZeroPadding1D(padding = (2,2)))
+        model.add(
+            Convolution1D(
+                nb_filters,
+                filter_length=filter_length,
+                border_mode="same",
+                input_shape=(input_length, input_dim),
+            )
+        )
+        model.add(ZeroPadding1D(padding=(2, 2)))
 
         # Set some random weights
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
@@ -1557,7 +2030,14 @@ def test_conv_batch_1d(self):
         input_length = 10
 
         model = Sequential()
-        model.add(Embedding(vocabulary_size, embedding_dimension, input_length=input_length, trainable=True))
+        model.add(
+            Embedding(
+                vocabulary_size,
+                embedding_dimension,
+                input_length=input_length,
+                trainable=True,
+            )
+        )
 
         model.add(Convolution1D(5, 2))
         model.add(BatchNormalization())
@@ -1569,85 +2049,94 @@ def test_conv_batch_1d(self):
 
     # Making sure that giant channel sizes get handled correctly
     def test_large_channel_gpu(self):
-
         input_shape = (20, 20, 3)
         num_channels = 2049
         kernel_size = 3
-        
+
         model = Sequential()
-        model.add(Convolution2D(input_shape = input_shape,
-                                nb_filter = num_channels, nb_row = kernel_size,
-                                nb_col = kernel_size))
+        model.add(
+            Convolution2D(
+                input_shape=input_shape,
+                nb_filter=num_channels,
+                nb_row=kernel_size,
+                nb_col=kernel_size,
+            )
+        )
 
-        model.set_weights([(np.random.rand(*w.shape)-0.5)/5.0 for w in model.get_weights()])
+        model.set_weights(
+            [(np.random.rand(*w.shape) - 0.5) / 5.0 for w in model.get_weights()]
+        )
 
-        self._test_keras_model(model, input_blob='data', output_blob='output', delta=1e-2)
+        self._test_keras_model(
+            model, input_blob="data", output_blob="output", delta=1e-2
+        )
 
-    @pytest.mark.xfail(raises = Exception)
+    @pytest.mark.xfail(raises=Exception)
     def test_large_batch_gpu(self):
-    
         batch_size = 2049
         num_channels = 4
         kernel_size = 3
-        
+
         model = Sequential()
-        model.add(TimeDistributed(Dense(num_channels), input_shape=(batch_size, kernel_size)))
-        
-        model.set_weights([(np.random.rand(*w.shape)-0.5)/5.0 for w in model.get_weights()])
-        
-        self._test_keras_model(model, input_blob='data', output_blob='output', delta=1e-2)
+        model.add(
+            TimeDistributed(Dense(num_channels), input_shape=(batch_size, kernel_size))
+        )
+
+        model.set_weights(
+            [(np.random.rand(*w.shape) - 0.5) / 5.0 for w in model.get_weights()]
+        )
+
+        self._test_keras_model(
+            model, input_blob="data", output_blob="output", delta=1e-2
+        )
 
 
-@unittest.skipIf(not HAS_KERAS_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras1
 class KerasTopologyCorrectnessTest(KerasNumericCorrectnessTest):
-
     def test_tiny_sequential_merge(self):
-
         np.random.seed(1988)
 
         # Define a model
         model1 = Sequential()
-        model1.add(Dense(4, input_dim = 3))
+        model1.add(Dense(4, input_dim=3))
         model1.add(Dense(4))
         model2 = Sequential()
-        model2.add(Dense(4, input_dim = 3))
+        model2.add(Dense(4, input_dim=3))
         model2.add(Dense(4))
         model3 = Sequential()
-        model3.add(Merge([model1, model2], mode='concat'))
+        model3.add(Merge([model1, model2], mode="concat"))
 
         # Set some random weights
         model3.set_weights([np.random.rand(*w.shape) for w in model3.get_weights()])
 
         # Test the keras model
         self._test_keras_model(model3)
-    
+
     def test_dangling_merge_left(self):
-        
-        x1 = Input(shape=(4,), name = 'input1')
-        x2 = Input(shape=(5,), name = 'input2')
-        y1 = Dense(6, name = 'dense')(x2)
-        z = merge([x1, y1], mode='concat')
-        model = Model([x1,x2], [z])
-        
-        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        
+        x1 = Input(shape=(4,), name="input1")
+        x2 = Input(shape=(5,), name="input2")
+        y1 = Dense(6, name="dense")(x2)
+        z = merge([x1, y1], mode="concat")
+        model = Model([x1, x2], [z])
+
+        model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
+
         self._test_keras_model(model)
-    
+
     def test_dangling_merge_right(self):
-        
-        x1 = Input(shape=(4,), name = 'input1')
-        x2 = Input(shape=(5,), name = 'input2')
-        y1 = Dense(6, name = 'dense')(x2)
-        z = merge([y1, x1], mode='concat')
-        model = Model([x1,x2], [z])
+        x1 = Input(shape=(4,), name="input1")
+        x2 = Input(shape=(5,), name="input2")
+        y1 = Dense(6, name="dense")(x2)
+        z = merge([y1, x1], mode="concat")
+        model = Model([x1, x2], [z])
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        
+
         self._test_keras_model(model)
-    
+
     def test_shared_vision(self):
-        digit_input = Input(shape=(27, 27,1))
+        digit_input = Input(shape=(27, 27, 1))
         x = Convolution2D(64, 3, 3)(digit_input)
         x = Convolution2D(64, 3, 3)(x)
         out = Flatten()(x)
@@ -1655,15 +2144,15 @@ def test_shared_vision(self):
         vision_model = Model(digit_input, out)
 
         # then define the tell-digits-apart model
-        digit_a = Input(shape=(27,27,1))
-        digit_b = Input(shape=(27,27,1))
+        digit_a = Input(shape=(27, 27, 1))
+        digit_b = Input(shape=(27, 27, 1))
 
         # the vision model will be shared, weights and all
         out_a = vision_model(digit_a)
         out_b = vision_model(digit_b)
 
-        concatenated = merge([out_a, out_b], mode='concat')
-        out = Dense(1, activation='sigmoid')(concatenated)
+        concatenated = merge([out_a, out_b], mode="concat")
+        out = Dense(1, activation="sigmoid")(concatenated)
         model = Model([digit_a, digit_b], out)
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
         self._test_keras_model(model)
@@ -1678,56 +2167,58 @@ def test_tiny_weight_sharing(self):
         y1 = dense(x)
         y2 = dense(x)
         y3 = Dense(4)(y2)
-        z = merge([y1, y3], mode='concat')
+        z = merge([y1, y3], mode="concat")
         model = Model(x, z)
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_keras_model(model, mode = 'random', delta=1e-2)
+        self._test_keras_model(model, mode="random", delta=1e-2)
+
 
-@unittest.skipIf(not HAS_KERAS_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras1
 class KerasInceptionCorrectnessTest(KerasNumericCorrectnessTest):
-
     def test_inception_conv_stage(self):
 
-        input_shape = (299,299,3)
+        input_shape = (299, 299, 3)
         img_input = Input(shape=input_shape)
         channel_axis = 3
         inputs = img_input
 
-        x = conv2d_bn(img_input, 32, 3, 3, subsample=(2, 2), border_mode='valid')
-        x = conv2d_bn(x, 32, 3, 3, border_mode='valid')
+        x = conv2d_bn(img_input, 32, 3, 3, subsample=(2, 2), border_mode="valid")
+        x = conv2d_bn(x, 32, 3, 3, border_mode="valid")
         x = conv2d_bn(x, 64, 3, 3)
         x = MaxPooling2D((3, 3), strides=(2, 2))(x)
 
-        x = conv2d_bn(x, 80, 1, 1, border_mode='valid')
-        x = conv2d_bn(x, 192, 3, 3, border_mode='valid')
+        x = conv2d_bn(x, 80, 1, 1, border_mode="valid")
+        x = conv2d_bn(x, 192, 3, 3, border_mode="valid")
         x = MaxPooling2D((3, 3), strides=(2, 2))(x)
 
-        model = Model(inputs, x, name='inception_v3')
+        model = Model(inputs, x, name="inception_v3")
 
         # Set some random weights
         # use small weights for numerical correctness
-        model.set_weights([np.random.rand(*w.shape)*1e-3 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 1e-3 for w in model.get_weights()]
+        )
 
         # Get the coreml model
         self._test_keras_model(model)
 
     def test_inception_first_branch(self):
 
-        input_shape = (299,299,3)
+        input_shape = (299, 299, 3)
         img_input = Input(shape=input_shape)
 
         channel_axis = 3
         inputs = img_input
 
-        x = conv2d_bn(img_input, 32, 3, 3, subsample=(2, 2), border_mode='valid')
-        x = conv2d_bn(x, 32, 3, 3, border_mode='valid')
+        x = conv2d_bn(img_input, 32, 3, 3, subsample=(2, 2), border_mode="valid")
+        x = conv2d_bn(x, 32, 3, 3, border_mode="valid")
         x = conv2d_bn(x, 64, 3, 3)
         x = MaxPooling2D((3, 3), strides=(2, 2))(x)
 
-        x = conv2d_bn(x, 80, 1, 1, border_mode='valid')
-        x = conv2d_bn(x, 192, 3, 3, border_mode='valid')
+        x = conv2d_bn(x, 80, 1, 1, border_mode="valid")
+        x = conv2d_bn(x, 192, 3, 3, border_mode="valid")
         x = MaxPooling2D((3, 3), strides=(2, 2))(x)
 
         # mixed 0, 1, 2: 35 x 35 x 256
@@ -1741,37 +2232,43 @@ def test_inception_first_branch(self):
             branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
             branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
 
-            branch_pool = AveragePooling2D(
-                (3, 3), strides=(1, 1), border_mode='same')(x)
+            branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode="same")(
+                x
+            )
             branch_pool = conv2d_bn(branch_pool, 32, 1, 1)
-            x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool],
-                      mode='concat', concat_axis=channel_axis,
-                      name='mixed' + str(i))
+            x = merge(
+                [branch1x1, branch5x5, branch3x3dbl, branch_pool],
+                mode="concat",
+                concat_axis=channel_axis,
+                name="mixed" + str(i),
+            )
 
-        model = Model(inputs, x, name='inception_v3')
+        model = Model(inputs, x, name="inception_v3")
 
         # Set some random weights
         # use small weights for numerical correctness
-        model.set_weights([np.random.rand(*w.shape)*1e-3 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 1e-3 for w in model.get_weights()]
+        )
 
         # Get the coreml model
         self._test_keras_model(model)
 
     def test_inception_second_branch(self):
 
-        input_shape = (299,299,3)
+        input_shape = (299, 299, 3)
         img_input = Input(shape=input_shape)
 
         channel_axis = 3
         inputs = img_input
 
-        x = conv2d_bn(img_input, 32, 3, 3, subsample=(2, 2), border_mode='valid')
-        x = conv2d_bn(x, 32, 3, 3, border_mode='valid')
+        x = conv2d_bn(img_input, 32, 3, 3, subsample=(2, 2), border_mode="valid")
+        x = conv2d_bn(x, 32, 3, 3, border_mode="valid")
         x = conv2d_bn(x, 64, 3, 3)
         x = MaxPooling2D((3, 3), strides=(2, 2))(x)
 
-        x = conv2d_bn(x, 80, 1, 1, border_mode='valid')
-        x = conv2d_bn(x, 192, 3, 3, border_mode='valid')
+        x = conv2d_bn(x, 80, 1, 1, border_mode="valid")
+        x = conv2d_bn(x, 192, 3, 3, border_mode="valid")
         x = MaxPooling2D((3, 3), strides=(2, 2))(x)
 
         # mixed 0, 1, 2: 35 x 35 x 256
@@ -1785,25 +2282,33 @@ def test_inception_second_branch(self):
             branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
             branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
 
-            branch_pool = AveragePooling2D(
-                (3, 3), strides=(1, 1), border_mode='same')(x)
+            branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode="same")(
+                x
+            )
             branch_pool = conv2d_bn(branch_pool, 32, 1, 1)
-            x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool],
-                      mode='concat', concat_axis=channel_axis,
-                      name='mixed' + str(i))
+            x = merge(
+                [branch1x1, branch5x5, branch3x3dbl, branch_pool],
+                mode="concat",
+                concat_axis=channel_axis,
+                name="mixed" + str(i),
+            )
 
         # mixed 3: 17 x 17 x 768
-        branch3x3 = conv2d_bn(x, 384, 3, 3, subsample=(2, 2), border_mode='valid')
+        branch3x3 = conv2d_bn(x, 384, 3, 3, subsample=(2, 2), border_mode="valid")
 
         branch3x3dbl = conv2d_bn(x, 64, 1, 1)
         branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
-        branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3,
-                                 subsample=(2, 2), border_mode='valid')
+        branch3x3dbl = conv2d_bn(
+            branch3x3dbl, 96, 3, 3, subsample=(2, 2), border_mode="valid"
+        )
 
         branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x)
-        x = merge([branch3x3, branch3x3dbl, branch_pool],
-                  mode='concat', concat_axis=channel_axis,
-                  name='mixed3')
+        x = merge(
+            [branch3x3, branch3x3dbl, branch_pool],
+            mode="concat",
+            concat_axis=channel_axis,
+            name="mixed3",
+        )
 
         # mixed 4: 17 x 17 x 768
         branch1x1 = conv2d_bn(x, 192, 1, 1)
@@ -1818,11 +2323,14 @@ def test_inception_second_branch(self):
         branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
         branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
 
-        branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same')(x)
+        branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode="same")(x)
         branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
-        x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool],
-                  mode='concat', concat_axis=channel_axis,
-                  name='mixed4')
+        x = merge(
+            [branch1x1, branch7x7, branch7x7dbl, branch_pool],
+            mode="concat",
+            concat_axis=channel_axis,
+            name="mixed4",
+        )
 
         # mixed 5, 6: 17 x 17 x 768
         for i in range(2):
@@ -1838,12 +2346,16 @@ def test_inception_second_branch(self):
             branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1)
             branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
 
-            branch_pool = AveragePooling2D(
-                (3, 3), strides=(1, 1), border_mode='same')(x)
+            branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode="same")(
+                x
+            )
             branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
-            x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool],
-                      mode='concat', concat_axis=channel_axis,
-                      name='mixed' + str(5 + i))
+            x = merge(
+                [branch1x1, branch7x7, branch7x7dbl, branch_pool],
+                mode="concat",
+                concat_axis=channel_axis,
+                name="mixed" + str(5 + i),
+            )
 
         # mixed 7: 17 x 17 x 768
         branch1x1 = conv2d_bn(x, 192, 1, 1)
@@ -1858,35 +2370,40 @@ def test_inception_second_branch(self):
         branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
         branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
 
-        branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same')(x)
+        branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode="same")(x)
         branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
-        x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool],
-                  mode='concat', concat_axis=channel_axis,
-                  name='mixed7')
+        x = merge(
+            [branch1x1, branch7x7, branch7x7dbl, branch_pool],
+            mode="concat",
+            concat_axis=channel_axis,
+            name="mixed7",
+        )
 
-        model = Model(inputs, x, name='inception_v3')
+        model = Model(inputs, x, name="inception_v3")
 
         # Set some random weights
         # use small weights for numerical correctness
-        model.set_weights([np.random.rand(*w.shape)*1e-3 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 1e-3 for w in model.get_weights()]
+        )
 
         # Get the coreml model
         self._test_keras_model(model)
 
     def test_inception_no_top(self):
 
-        input_shape = (299,299,3)
+        input_shape = (299, 299, 3)
         img_input = Input(shape=input_shape)
         channel_axis = 3
         inputs = img_input
 
-        x = conv2d_bn(img_input, 32, 3, 3, subsample=(2, 2), border_mode='valid')
-        x = conv2d_bn(x, 32, 3, 3, border_mode='valid')
+        x = conv2d_bn(img_input, 32, 3, 3, subsample=(2, 2), border_mode="valid")
+        x = conv2d_bn(x, 32, 3, 3, border_mode="valid")
         x = conv2d_bn(x, 64, 3, 3)
         x = MaxPooling2D((3, 3), strides=(2, 2))(x)
 
-        x = conv2d_bn(x, 80, 1, 1, border_mode='valid')
-        x = conv2d_bn(x, 192, 3, 3, border_mode='valid')
+        x = conv2d_bn(x, 80, 1, 1, border_mode="valid")
+        x = conv2d_bn(x, 192, 3, 3, border_mode="valid")
         x = MaxPooling2D((3, 3), strides=(2, 2))(x)
 
         # mixed 0, 1, 2: 35 x 35 x 256
@@ -1900,25 +2417,33 @@ def test_inception_no_top(self):
             branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
             branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
 
-            branch_pool = AveragePooling2D(
-                (3, 3), strides=(1, 1), border_mode='same')(x)
+            branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode="same")(
+                x
+            )
             branch_pool = conv2d_bn(branch_pool, 32, 1, 1)
-            x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool],
-                      mode='concat', concat_axis=channel_axis,
-                      name='mixed' + str(i))
+            x = merge(
+                [branch1x1, branch5x5, branch3x3dbl, branch_pool],
+                mode="concat",
+                concat_axis=channel_axis,
+                name="mixed" + str(i),
+            )
 
         # mixed 3: 17 x 17 x 768
-        branch3x3 = conv2d_bn(x, 384, 3, 3, subsample=(2, 2), border_mode='valid')
+        branch3x3 = conv2d_bn(x, 384, 3, 3, subsample=(2, 2), border_mode="valid")
 
         branch3x3dbl = conv2d_bn(x, 64, 1, 1)
         branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
-        branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3,
-                                 subsample=(2, 2), border_mode='valid')
+        branch3x3dbl = conv2d_bn(
+            branch3x3dbl, 96, 3, 3, subsample=(2, 2), border_mode="valid"
+        )
 
         branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x)
-        x = merge([branch3x3, branch3x3dbl, branch_pool],
-                  mode='concat', concat_axis=channel_axis,
-                  name='mixed3')
+        x = merge(
+            [branch3x3, branch3x3dbl, branch_pool],
+            mode="concat",
+            concat_axis=channel_axis,
+            name="mixed3",
+        )
 
         # mixed 4: 17 x 17 x 768
         branch1x1 = conv2d_bn(x, 192, 1, 1)
@@ -1933,11 +2458,14 @@ def test_inception_no_top(self):
         branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
         branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
 
-        branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same')(x)
+        branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode="same")(x)
         branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
-        x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool],
-                  mode='concat', concat_axis=channel_axis,
-                  name='mixed4')
+        x = merge(
+            [branch1x1, branch7x7, branch7x7dbl, branch_pool],
+            mode="concat",
+            concat_axis=channel_axis,
+            name="mixed4",
+        )
 
         # mixed 5, 6: 17 x 17 x 768
         for i in range(2):
@@ -1953,12 +2481,16 @@ def test_inception_no_top(self):
             branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1)
             branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
 
-            branch_pool = AveragePooling2D(
-                (3, 3), strides=(1, 1), border_mode='same')(x)
+            branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode="same")(
+                x
+            )
             branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
-            x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool],
-                      mode='concat', concat_axis=channel_axis,
-                      name='mixed' + str(5 + i))
+            x = merge(
+                [branch1x1, branch7x7, branch7x7dbl, branch_pool],
+                mode="concat",
+                concat_axis=channel_axis,
+                name="mixed" + str(5 + i),
+            )
 
         # mixed 7: 17 x 17 x 768
         branch1x1 = conv2d_bn(x, 192, 1, 1)
@@ -1973,27 +2505,35 @@ def test_inception_no_top(self):
         branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
         branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
 
-        branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same')(x)
+        branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode="same")(x)
         branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
-        x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool],
-                  mode='concat', concat_axis=channel_axis,
-                  name='mixed7')
+        x = merge(
+            [branch1x1, branch7x7, branch7x7dbl, branch_pool],
+            mode="concat",
+            concat_axis=channel_axis,
+            name="mixed7",
+        )
 
         # mixed 8: 8 x 8 x 1280
         branch3x3 = conv2d_bn(x, 192, 1, 1)
-        branch3x3 = conv2d_bn(branch3x3, 320, 3, 3,
-                              subsample=(2, 2), border_mode='valid')
+        branch3x3 = conv2d_bn(
+            branch3x3, 320, 3, 3, subsample=(2, 2), border_mode="valid"
+        )
 
         branch7x7x3 = conv2d_bn(x, 192, 1, 1)
         branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7)
         branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1)
-        branch7x7x3 = conv2d_bn(branch7x7x3, 192, 3, 3,
-                                subsample=(2, 2), border_mode='valid')
+        branch7x7x3 = conv2d_bn(
+            branch7x7x3, 192, 3, 3, subsample=(2, 2), border_mode="valid"
+        )
 
         branch_pool = AveragePooling2D((3, 3), strides=(2, 2))(x)
-        x = merge([branch3x3, branch7x7x3, branch_pool],
-                  mode='concat', concat_axis=channel_axis,
-                  name='mixed8')
+        x = merge(
+            [branch3x3, branch7x7x3, branch_pool],
+            mode="concat",
+            concat_axis=channel_axis,
+            name="mixed8",
+        )
 
         # mixed 9: 8 x 8 x 2048
         for i in range(2):
@@ -2002,34 +2542,47 @@ def test_inception_no_top(self):
             branch3x3 = conv2d_bn(x, 384, 1, 1)
             branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3)
             branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1)
-            branch3x3 = merge([branch3x3_1, branch3x3_2],
-                              mode='concat', concat_axis=channel_axis,
-                              name='mixed9_' + str(i))
+            branch3x3 = merge(
+                [branch3x3_1, branch3x3_2],
+                mode="concat",
+                concat_axis=channel_axis,
+                name="mixed9_" + str(i),
+            )
 
             branch3x3dbl = conv2d_bn(x, 448, 1, 1)
             branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3)
             branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3)
             branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1)
-            branch3x3dbl = merge([branch3x3dbl_1, branch3x3dbl_2],
-                                 mode='concat', concat_axis=channel_axis)
-
-            branch_pool = AveragePooling2D(
-                (3, 3), strides=(1, 1), border_mode='same')(x)
+            branch3x3dbl = merge(
+                [branch3x3dbl_1, branch3x3dbl_2],
+                mode="concat",
+                concat_axis=channel_axis,
+            )
+
+            branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode="same")(
+                x
+            )
             branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
-            x = merge([branch1x1, branch3x3, branch3x3dbl, branch_pool],
-                      mode='concat', concat_axis=channel_axis,
-                      name='mixed' + str(9 + i))
+            x = merge(
+                [branch1x1, branch3x3, branch3x3dbl, branch_pool],
+                mode="concat",
+                concat_axis=channel_axis,
+                name="mixed" + str(9 + i),
+            )
 
-        model = Model(inputs, x, name='inception_v3')
+        model = Model(inputs, x, name="inception_v3")
 
         # Set some random weights
         # use small weights for numerical correctness
-        model.set_weights([np.random.rand(*w.shape)*1e-3 for w in model.get_weights()])
+        model.set_weights(
+            [np.random.rand(*w.shape) * 1e-3 for w in model.get_weights()]
+        )
 
         # Get the coreml model
         self._test_keras_model(model)
 
-@unittest.skipIf(not HAS_KERAS_TF, 'Missing keras. Skipping tests.')
+
+@unittest.skipIf(not _HAS_KERAS_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras1
 @pytest.mark.slow
 class KerasNumericCorrectnessStressTest(KerasNumericCorrectnessTest):
@@ -2037,19 +2590,30 @@ class KerasNumericCorrectnessStressTest(KerasNumericCorrectnessTest):
     Unit test class for testing all combinations of a particular
     layer.
     """
-    def _run_test(self, model, param, model_dir = None, delta=1e-2, transpose_keras_result=True, one_dim_seq_flags = None):
+
+    def _run_test(
+        self,
+        model,
+        param,
+        model_dir=None,
+        delta=1e-2,
+        transpose_keras_result=True,
+        one_dim_seq_flags=None,
+    ):
         """ Run a test on a particular model
         """
         use_tmp_folder = False
         if model_dir is None:
             use_tmp_folder = True
             model_dir = tempfile.mkdtemp()
-        model_path = os.path.join(model_dir, 'keras.mlmodel')
+        model_path = os.path.join(model_dir, "keras.mlmodel")
 
         # Generate some random data
         nb_inputs = len(model.inputs)
         if nb_inputs > 1:
-            input_names = []; input_data = []; coreml_input = {}
+            input_names = []
+            input_data = []
+            coreml_input = {}
             for i in range(nb_inputs):
                 input_shape = [1 if a is None else a for a in model.input_shape[i]]
                 X = _generate_data(input_shape)
@@ -2057,17 +2621,23 @@ def _run_test(self, model, param, model_dir = None, delta=1e-2, transpose_keras_
                 input_names.append(feature_name)
                 input_data.append(X)
                 if one_dim_seq_flags is None:
-                    coreml_input[feature_name] = _keras_transpose(X).astype('f')
+                    coreml_input[feature_name] = _keras_transpose(X).astype("f")
                 else:
-                    coreml_input[feature_name] = _keras_transpose(X, one_dim_seq_flags[i]).astype('f')
+                    coreml_input[feature_name] = _keras_transpose(
+                        X, one_dim_seq_flags[i]
+                    ).astype("f")
         else:
             input_shape = [1 if a is None else a for a in model.input_shape]
-            input_names = ['data']
+            input_names = ["data"]
             input_data = _generate_data(input_shape)
             if one_dim_seq_flags is None:
-                coreml_input = {'data': _keras_transpose(input_data).astype('f')}
+                coreml_input = {"data": _keras_transpose(input_data).astype("f")}
             else:
-                coreml_input = {'data': _keras_transpose(input_data, one_dim_seq_flags[0]).astype('f')}
+                coreml_input = {
+                    "data": _keras_transpose(input_data, one_dim_seq_flags[0]).astype(
+                        "f"
+                    )
+                }
 
         # Make predictions
         if transpose_keras_result:
@@ -2076,32 +2646,42 @@ def _run_test(self, model, param, model_dir = None, delta=1e-2, transpose_keras_
             keras_preds = model.predict(input_data).flatten()
 
         # Get the model
-        coreml_model = _get_coreml_model(model, model_path, input_names, ['output'])
-        if is_macos() and macos_version() >= (10, 13):
+        coreml_model = _get_coreml_model(model, model_path, input_names, ["output"])
+        if _is_macos() and _macos_version() >= (10, 13):
             # get prediction
-            coreml_preds = coreml_model.predict(coreml_input)['output'].flatten()
+            coreml_preds = coreml_model.predict(coreml_input)["output"].flatten()
 
             if use_tmp_folder:
                 shutil.rmtree(model_dir)
-            self.assertEquals(len(coreml_preds), len(keras_preds),
-                    msg = 'Failed test case %s. Lengths wrong (%s vs %s)' % (param, len(coreml_preds), len(keras_preds)))
+            self.assertEquals(
+                len(coreml_preds),
+                len(keras_preds),
+                msg="Failed test case %s. Lengths wrong (%s vs %s)"
+                % (param, len(coreml_preds), len(keras_preds)),
+            )
             for i in range(len(keras_preds)):
                 max_den = max(1.0, keras_preds[i], coreml_preds[i])
-                self.assertAlmostEquals(keras_preds[i]/max_den, coreml_preds[i]/max_den, delta = delta,
-                    msg = 'Failed test case %s. Predictions wrong (%s vs %s)' % (param, coreml_preds[i], keras_preds[i]))
+                self.assertAlmostEquals(
+                    keras_preds[i] / max_den,
+                    coreml_preds[i] / max_den,
+                    delta=delta,
+                    msg="Failed test case %s. Predictions wrong (%s vs %s)"
+                    % (param, coreml_preds[i], keras_preds[i]),
+                )
 
     @pytest.mark.slow
     def test_activation_layer_params(self):
         options = dict(
-            activation = ['tanh', 'relu', 'sigmoid', 'softmax', 'softplus', 'softsign']
+            activation=["tanh", "relu", "sigmoid", "softmax", "softplus", "softsign"]
         )
 
         # Define a function that tests a model
         num_channels = 10
         input_dim = 10
+
         def build_model(x):
             model = Sequential()
-            model.add(Dense(num_channels, input_dim = input_dim))
+            model.add(Dense(num_channels, input_dim=input_dim))
             model.add(Activation(**dict(zip(options.keys(), x))))
             return x, model
 
@@ -2118,17 +2698,17 @@ def build_model(x):
     @pytest.mark.slow
     def test_dense_layer_params(self):
         options = dict(
-            activation = ['relu', 'softmax', 'tanh', 'sigmoid'],
-            bias = [True, False],
+            activation=["relu", "softmax", "tanh", "sigmoid"], bias=[True, False],
         )
 
         # Define a function that tests a model
         input_dim = 10
         num_channels = 10
+
         def build_model(x):
             kwargs = dict(zip(options.keys(), x))
             model = Sequential()
-            model.add(Dense(num_channels, input_dim = input_dim, **kwargs))
+            model.add(Dense(num_channels, input_dim=input_dim, **kwargs))
             return x, model
 
         # Iterate through all combinations
@@ -2142,9 +2722,7 @@ def build_model(x):
 
     @pytest.mark.slow
     def test_upsample_layer_params(self):
-        options = dict(
-            size= [(2,2), (3,3), (4,4), (5,5)]
-        )
+        options = dict(size=[(2, 2), (3, 3), (4, 4), (5, 5)])
 
         np.random.seed(1988)
         input_dim = 10
@@ -2155,8 +2733,9 @@ def test_upsample_layer_params(self):
         def build_model(x):
             kwargs = dict(zip(options.keys(), x))
             model = Sequential()
-            model.add(Convolution2D(input_shape = input_shape, nb_row = 7,
-                nb_col = 7, nb_filter = 5))
+            model.add(
+                Convolution2D(input_shape=input_shape, nb_row=7, nb_col=7, nb_filter=5)
+            )
             model.add(UpSampling2D(**kwargs))
             return x, model
 
@@ -2172,20 +2751,25 @@ def build_model(x):
     @pytest.mark.slow
     def test_conv_layer_params(self):
         options = dict(
-            activation = ['relu', 'tanh', 'sigmoid'], # keas does not support softmax on 4-D
-            bias = [True, False],
-            border_mode = ['same', 'valid'],
-            nb_filter = [1, 3, 5],
-            nb_row = [5], # fails when sizes are different
-            nb_col = [5],
+            activation=[
+                "relu",
+                "tanh",
+                "sigmoid",
+            ],  # keas does not support softmax on 4-D
+            bias=[True, False],
+            border_mode=["same", "valid"],
+            nb_filter=[1, 3, 5],
+            nb_row=[5],  # fails when sizes are different
+            nb_col=[5],
         )
 
         # Define a function that tests a model
         input_shape = (10, 10, 1)
+
         def build_model(x):
             kwargs = dict(zip(options.keys(), x))
             model = Sequential()
-            model.add(Convolution2D(input_shape = input_shape, **kwargs))
+            model.add(Convolution2D(input_shape=input_shape, **kwargs))
             return x, model
 
         # Iterate through all combinations
@@ -2199,16 +2783,15 @@ def build_model(x):
 
     @pytest.mark.slow
     def test_dense_elementwise_params(self):
-        options = dict(
-            modes = ['sum', 'mul', 'concat', 'ave', 'cos', 'dot', 'max']
-        )
+        options = dict(modes=["sum", "mul", "concat", "ave", "cos", "dot", "max"])
+
         def build_model(mode):
             x1 = Input(shape=(3,))
             x2 = Input(shape=(3,))
             y1 = Dense(4)(x1)
             y2 = Dense(4)(x2)
             z = merge([y1, y2], mode=mode)
-            model = Model([x1,x2], z)
+            model = Model([x1, x2], z)
             return mode, model
 
         product = itertools.product(*options.values())
@@ -2221,51 +2804,53 @@ def test_vgg_16_tiny(self):
 
         input_shape = (48, 48, 3)
         model = Sequential()
-        model.add(ZeroPadding2D((1,1),input_shape=input_shape))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
+        model.add(ZeroPadding2D((1, 1), input_shape=input_shape))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
 
         model.add(Flatten())
-        model.add(Dense(32, activation='relu'))
-        #model.add(Dropout(0.5))
-        model.add(Dense(32, activation='relu'))
-        #model.add(Dropout(0.5))
-        model.add(Dense(1000)) # activation='softmax'))
+        model.add(Dense(32, activation="relu"))
+        # model.add(Dropout(0.5))
+        model.add(Dense(32, activation="relu"))
+        # model.add(Dropout(0.5))
+        model.add(Dense(1000))  # activation='softmax'))
 
         # Set some random weights
-        model.set_weights([(np.random.rand(*w.shape)-0.5)/5.0 for w in model.get_weights()])
+        model.set_weights(
+            [(np.random.rand(*w.shape) - 0.5) / 5.0 for w in model.get_weights()]
+        )
 
         # Get the coreml model
         self._test_keras_model(model)
@@ -2274,51 +2859,53 @@ def test_vgg_16_tiny_no_pooling(self):
 
         input_shape = (48, 48, 3)
         model = Sequential()
-        model.add(ZeroPadding2D((1,1),input_shape=input_shape))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
-
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(ZeroPadding2D((1,1)))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(MaxPooling2D((2,2), strides=(2,2)))
+        model.add(ZeroPadding2D((1, 1), input_shape=input_shape))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(ZeroPadding2D((1, 1)))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
 
         model.add(Flatten())
-        model.add(Dense(32, activation='relu'))
-        #model.add(Dropout(0.5))
-        model.add(Dense(32, activation='relu'))
-        #model.add(Dropout(0.5))
-        model.add(Dense(1000)) # activation='softmax'))
+        model.add(Dense(32, activation="relu"))
+        # model.add(Dropout(0.5))
+        model.add(Dense(32, activation="relu"))
+        # model.add(Dropout(0.5))
+        model.add(Dense(1000))  # activation='softmax'))
 
         # Set some random weights
-        model.set_weights([(np.random.rand(*w.shape)-0.5)/5.0 for w in model.get_weights()])
+        model.set_weights(
+            [(np.random.rand(*w.shape) - 0.5) / 5.0 for w in model.get_weights()]
+        )
 
         # Get the coreml model
         self._test_keras_model(model)
@@ -2327,30 +2914,30 @@ def test_vgg_16_tiny_no_pooling_no_padding(self):
 
         input_shape = (48, 48, 3)
         model = Sequential()
-        model.add(Convolution2D(32, 3, 3, activation='relu', input_shape = input_shape))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
+        model.add(Convolution2D(32, 3, 3, activation="relu", input_shape=input_shape))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
 
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
 
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
 
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
 
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
-        model.add(Convolution2D(32, 3, 3, activation='relu'))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
+        model.add(Convolution2D(32, 3, 3, activation="relu"))
 
         model.add(Flatten())
-        model.add(Dense(32, activation='relu'))
+        model.add(Dense(32, activation="relu"))
         model.add(Dropout(0.5))
-        model.add(Dense(32, activation='relu'))
+        model.add(Dense(32, activation="relu"))
         model.add(Dropout(0.5))
-        model.add(Dense(1000, activation='softmax'))
+        model.add(Dense(1000, activation="softmax"))
 
         # Get the coreml model
         self._test_keras_model(model)
@@ -2359,9 +2946,9 @@ def test_vgg_16_tiny_only_conv_dense(self):
 
         input_shape = (48, 48, 3)
         model = Sequential()
-        model.add(Convolution2D(32, 3, 3, activation='relu', input_shape = input_shape))
+        model.add(Convolution2D(32, 3, 3, activation="relu", input_shape=input_shape))
         model.add(Flatten())
-        model.add(Dense(10, activation = 'softmax'))
+        model.add(Dense(10, activation="softmax"))
 
         # Get the coreml model
         self._test_keras_model(model)
@@ -2384,9 +2971,9 @@ def test_imdb_fasttext_first_2(self):
     def test_tiny_mcrnn_td(self):
 
         model = Sequential()
-        model.add(Convolution2D(3,1,1, input_shape=(2,4,4), border_mode='same'))
-        model.add(AveragePooling2D(pool_size=(2,2)))
-        model.add(Reshape((2,3)))
+        model.add(Convolution2D(3, 1, 1, input_shape=(2, 4, 4), border_mode="same"))
+        model.add(AveragePooling2D(pool_size=(2, 2)))
+        model.add(Reshape((2, 3)))
         model.add(TimeDistributed(Dense(5)))
 
         self._test_keras_model(model)
@@ -2394,96 +2981,96 @@ def test_tiny_mcrnn_td(self):
     def test_tiny_mcrnn_recurrent(self):
 
         model = Sequential()
-        model.add(Convolution2D(3,1,1, input_shape=(2,4,4), border_mode='same'))
-        model.add(AveragePooling2D(pool_size=(2,2)))
-        model.add(Reshape((2,3)))
-        model.add(LSTM(5, inner_activation = 'sigmoid'))
+        model.add(Convolution2D(3, 1, 1, input_shape=(2, 4, 4), border_mode="same"))
+        model.add(AveragePooling2D(pool_size=(2, 2)))
+        model.add(Reshape((2, 3)))
+        model.add(LSTM(5, inner_activation="sigmoid"))
 
         self._test_keras_model(model)
 
     def test_tiny_mcrnn_music_tagger(self):
 
-        x_in = Input(shape=(4,6,1))
+        x_in = Input(shape=(4, 6, 1))
         x = ZeroPadding2D(padding=(0, 1))(x_in)
-        x = BatchNormalization(axis=2, name='bn_0_freq')(x)
+        x = BatchNormalization(axis=2, name="bn_0_freq")(x)
         # Conv block 1
-        x = Convolution2D(2, 3, 3, border_mode='same', name='conv1')(x)
-        x = BatchNormalization(axis=3, mode=0, name='bn1')(x)
+        x = Convolution2D(2, 3, 3, border_mode="same", name="conv1")(x)
+        x = BatchNormalization(axis=3, mode=0, name="bn1")(x)
         x = ELU()(x)
-        x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x)
+        x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="pool1")(x)
         # Conv block 2
-        x = Convolution2D(4, 3, 3, border_mode='same', name='conv2')(x)
-        x = BatchNormalization(axis=3, mode=0, name='bn2')(x)
+        x = Convolution2D(4, 3, 3, border_mode="same", name="conv2")(x)
+        x = BatchNormalization(axis=3, mode=0, name="bn2")(x)
         x = ELU()(x)
-        x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool2')(x)
+        x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="pool2")(x)
 
         # Should get you (1,1,2,4)
         x = Reshape((2, 4))(x)
-        x = GRU(32, return_sequences=True, name='gru1')(x)
-        x = GRU(32, return_sequences=False, name='gru2')(x)
+        x = GRU(32, return_sequences=True, name="gru1")(x)
+        x = GRU(32, return_sequences=False, name="gru2")(x)
 
         # Create model.
         model = Model(x_in, x)
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
-        self._test_keras_model(model, mode='random_zero_mean', delta=1e-2)
+        self._test_keras_model(model, mode="random_zero_mean", delta=1e-2)
 
     def test_tiny_apple_manual(self):
         model = Sequential()
-        model.add(LSTM(3, input_shape=(4, 5), inner_activation='sigmoid'))
+        model.add(LSTM(3, input_shape=(4, 5), inner_activation="sigmoid"))
         model.add(Dense(5))
-        model.add(Activation('softmax'))
+        model.add(Activation("softmax"))
 
         self._test_keras_model(model)
 
     def test_tiny_image_captioning_image_branch(self):
-        img_input_1 = Input(shape=(16,16,3))
-        x = Convolution2D(2,3,3)(img_input_1)
+        img_input_1 = Input(shape=(16, 16, 3))
+        x = Convolution2D(2, 3, 3)(img_input_1)
         x = Flatten()(x)
         img_model = Model([img_input_1], [x])
 
-        img_input = Input(shape=(16,16,3))
+        img_input = Input(shape=(16, 16, 3))
         x = img_model(img_input)
-        x = Dense(8, name = 'cap_dense')(x)
-        x = Reshape((1,8), name = 'cap_reshape')(x)
-        image_branch = Model([img_input],[x])
+        x = Dense(8, name="cap_dense")(x)
+        x = Reshape((1, 8), name="cap_reshape")(x)
+        image_branch = Model([img_input], [x])
         self._test_keras_model(image_branch)
 
     def test_tiny_image_captioning_feature_merge(self):
 
-        img_input_1 = Input(shape=(16,16,3))
-        x = Convolution2D(2,3,3)(img_input_1)
+        img_input_1 = Input(shape=(16, 16, 3))
+        x = Convolution2D(2, 3, 3)(img_input_1)
         x = Flatten()(x)
         img_model = Model([img_input_1], [x])
 
-        img_input = Input(shape=(16,16,3))
+        img_input = Input(shape=(16, 16, 3))
         x = img_model(img_input)
-        x = Dense(8, name = 'cap_dense')(x)
-        x = Reshape((1,8), name = 'cap_reshape')(x)
+        x = Dense(8, name="cap_dense")(x)
+        x = Reshape((1, 8), name="cap_reshape")(x)
 
-        sentence_input = Input(shape=(5,)) # max_length = 5
-        y = Embedding(8, 8, name = 'cap_embedding')(sentence_input)
-        z = merge([x,y], mode = 'concat', concat_axis = 1, name = 'cap_merge')
+        sentence_input = Input(shape=(5,))  # max_length = 5
+        y = Embedding(8, 8, name="cap_embedding")(sentence_input)
+        z = merge([x, y], mode="concat", concat_axis=1, name="cap_merge")
 
         combined_model = Model([img_input, sentence_input], [z])
         self._test_keras_model(combined_model, one_dim_seq_flags=[False, True])
 
     def test_tiny_image_captioning(self):
         # use a conv layer as a image feature branch
-        img_input_1 = Input(shape=(16,16,3))
-        x = Convolution2D(2,3,3)(img_input_1)
+        img_input_1 = Input(shape=(16, 16, 3))
+        x = Convolution2D(2, 3, 3)(img_input_1)
         x = Flatten()(x)
         img_model = Model([img_input_1], [x])
 
-        img_input = Input(shape=(16,16,3))
+        img_input = Input(shape=(16, 16, 3))
         x = img_model(img_input)
-        x = Dense(8, name = 'cap_dense')(x)
-        x = Reshape((1,8), name = 'cap_reshape')(x)
+        x = Dense(8, name="cap_dense")(x)
+        x = Reshape((1, 8), name="cap_reshape")(x)
 
-        sentence_input = Input(shape=(5,)) # max_length = 5
-        y = Embedding(8, 8, name = 'cap_embedding')(sentence_input)
-        z = merge([x,y], mode = 'concat', concat_axis = 1, name = 'cap_merge')
-        z = LSTM(4, return_sequences = True, name = 'cap_lstm')(z)
-        z = TimeDistributed(Dense(8), name = 'cap_timedistributed')(z)
+        sentence_input = Input(shape=(5,))  # max_length = 5
+        y = Embedding(8, 8, name="cap_embedding")(sentence_input)
+        z = merge([x, y], mode="concat", concat_axis=1, name="cap_merge")
+        z = LSTM(4, return_sequences=True, name="cap_lstm")(z)
+        z = TimeDistributed(Dense(8), name="cap_timedistributed")(z)
 
         combined_model = Model([img_input, sentence_input], [z])
         self._test_keras_model(combined_model, one_dim_seq_flags=[False, True])
@@ -2495,22 +3082,20 @@ def test_tiny_babi_rnn(self):
         query_maxlen = 5
 
         sentrnn = Sequential()
-        sentrnn.add(Embedding(vocab_size, embed_hidden_size,
-                              input_length=story_maxlen))
+        sentrnn.add(Embedding(vocab_size, embed_hidden_size, input_length=story_maxlen))
         sentrnn.add(Dropout(0.3))
 
         qrnn = Sequential()
-        qrnn.add(Embedding(vocab_size, embed_hidden_size,
-                           input_length=query_maxlen))
+        qrnn.add(Embedding(vocab_size, embed_hidden_size, input_length=query_maxlen))
         qrnn.add(Dropout(0.3))
         qrnn.add(LSTM(embed_hidden_size, return_sequences=False))
         qrnn.add(RepeatVector(story_maxlen))
 
         model = Sequential()
-        model.add(Merge([sentrnn, qrnn], mode='sum'))
+        model.add(Merge([sentrnn, qrnn], mode="sum"))
         model.add(LSTM(embed_hidden_size, return_sequences=False))
         model.add(Dropout(0.3))
-        model.add(Dense(vocab_size, activation='softmax'))
+        model.add(Dense(vocab_size, activation="softmax"))
 
         self._test_keras_model(model, one_dim_seq_flags=[True, True])
 
@@ -2521,7 +3106,14 @@ def test_clickbait_cnn(self):
         input_length = 20
 
         model = Sequential()
-        model.add(Embedding(vocabulary_size, embedding_dimension, input_length=input_length, trainable=True))
+        model.add(
+            Embedding(
+                vocabulary_size,
+                embedding_dimension,
+                input_length=input_length,
+                trainable=True,
+            )
+        )
 
         model.add(Convolution1D(32, 2))
         model.add(BatchNormalization())
diff --git a/coremltools/test/neural_network/test_model.py b/coremltools/test/neural_network/test_model.py
index 86ae00214..7b163415f 100644
--- a/coremltools/test/neural_network/test_model.py
+++ b/coremltools/test/neural_network/test_model.py
@@ -4,36 +4,43 @@
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 import coremltools
+import pytest
 import unittest
 import tempfile
 import numpy as np
 from coremltools.proto import Model_pb2
-
-from coremltools.models.utils import rename_feature, save_spec, macos_version,\
-                _convert_neural_network_spec_weights_to_fp16, is_macos, \
-                convert_double_to_float_multiarray_type
+import PIL.Image
+
+from coremltools.models.utils import (
+    rename_feature,
+    save_spec,
+    _macos_version,
+    _convert_neural_network_spec_weights_to_fp16,
+    _is_macos,
+    convert_double_to_float_multiarray_type,
+)
 from coremltools.models import MLModel, datatypes
 from coremltools.models.neural_network import NeuralNetworkBuilder
+from coremltools.models.neural_network.utils import make_image_input, make_nn_classifier
 
 
 class MLModelTest(unittest.TestCase):
-
     @classmethod
     def setUpClass(self):
 
         spec = Model_pb2.Model()
         spec.specificationVersion = coremltools.SPECIFICATION_VERSION
 
-        features = ['feature_1', 'feature_2']
-        output = 'output'
+        features = ["feature_1", "feature_2"]
+        output = "output"
         for f in features:
             input_ = spec.description.input.add()
             input_.name = f
-            input_.type.doubleType.MergeFromString(b'')
+            input_.type.doubleType.MergeFromString(b"")
 
         output_ = spec.description.output.add()
         output_.name = output
-        output_.type.doubleType.MergeFromString(b'')
+        output_.type.doubleType.MergeFromString(b"")
 
         lr = spec.glmRegressor
         lr.offset.append(0.1)
@@ -42,14 +49,14 @@ def setUpClass(self):
         for i in coefs:
             weights.value.append(i)
 
-        spec.description.predictedFeatureName = 'output'
+        spec.description.predictedFeatureName = "output"
         self.spec = spec
 
     def test_model_creation(self):
         model = MLModel(self.spec)
         self.assertIsNotNone(model)
 
-        filename = tempfile.mktemp(suffix='.mlmodel')
+        filename = tempfile.mktemp(suffix=".mlmodel")
         save_spec(self.spec, filename)
         model = MLModel(filename)
         self.assertIsNotNone(model)
@@ -58,93 +65,112 @@ def test_model_api(self):
         model = MLModel(self.spec)
         self.assertIsNotNone(model)
 
-        model.author = 'Test author'
-        self.assertEqual(model.author, 'Test author')
-        self.assertEqual(model.get_spec().description.metadata.author, 'Test author')
+        model.author = "Test author"
+        self.assertEqual(model.author, "Test author")
+        self.assertEqual(model.get_spec().description.metadata.author, "Test author")
 
-        model.license = 'Test license'
-        self.assertEqual(model.license, 'Test license')
-        self.assertEqual(model.get_spec().description.metadata.license, 'Test license')
+        model.license = "Test license"
+        self.assertEqual(model.license, "Test license")
+        self.assertEqual(model.get_spec().description.metadata.license, "Test license")
+
+        model.short_description = "Test model"
+        self.assertEqual(model.short_description, "Test model")
+        self.assertEqual(
+            model.get_spec().description.metadata.shortDescription, "Test model"
+        )
 
-        model.short_description = 'Test model'
-        self.assertEqual(model.short_description, 'Test model')
-        self.assertEqual(model.get_spec().description.metadata.shortDescription, 'Test model')
+        model.version = "1.3"
+        self.assertEqual(model.version, "1.3")
+        self.assertEqual(model.get_spec().description.metadata.versionString, "1.3")
 
-        model.input_description['feature_1'] = 'This is feature 1'
-        self.assertEqual(model.input_description['feature_1'], 'This is feature 1')
+        model.input_description["feature_1"] = "This is feature 1"
+        self.assertEqual(model.input_description["feature_1"], "This is feature 1")
 
-        model.output_description['output'] = 'This is output'
-        self.assertEqual(model.output_description['output'], 'This is output')
+        model.output_description["output"] = "This is output"
+        self.assertEqual(model.output_description["output"], "This is output")
 
-        filename = tempfile.mktemp(suffix='.mlmodel')
+        filename = tempfile.mktemp(suffix=".mlmodel")
         model.save(filename)
         loaded_model = MLModel(filename)
 
-        self.assertEqual(model.author, 'Test author')
-        self.assertEqual(model.license, 'Test license')
+        self.assertEqual(model.author, "Test author")
+        self.assertEqual(model.license, "Test license")
         # self.assertEqual(model.short_description, 'Test model')
-        self.assertEqual(model.input_description['feature_1'], 'This is feature 1')
-        self.assertEqual(model.output_description['output'], 'This is output')
+        self.assertEqual(model.input_description["feature_1"], "This is feature 1")
+        self.assertEqual(model.output_description["output"], "This is output")
 
-    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                         'Only supported on macOS 10.13+')
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
     def test_predict_api(self):
         model = MLModel(self.spec)
-        preds = model.predict({'feature_1': 1.0, 'feature_2': 1.0})
+        preds = model.predict({"feature_1": 1.0, "feature_2": 1.0})
         self.assertIsNotNone(preds)
-        self.assertEqual(preds['output'], 3.1)
+        self.assertEqual(preds["output"], 3.1)
 
-    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                         'Only supported on macOS 10.13+')
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
     def test_rename_input(self):
-        rename_feature(
-            self.spec, 'feature_1', 'renamed_feature', rename_inputs=True)
+        rename_feature(self.spec, "feature_1", "renamed_feature", rename_inputs=True)
         model = MLModel(self.spec)
-        preds = model.predict({'renamed_feature': 1.0, 'feature_2': 1.0})
+        preds = model.predict({"renamed_feature": 1.0, "feature_2": 1.0})
         self.assertIsNotNone(preds)
-        self.assertEqual(preds['output'], 3.1)
+        self.assertEqual(preds["output"], 3.1)
         # reset the spec for next run
-        rename_feature(
-            self.spec, 'renamed_feature', 'feature_1', rename_inputs=True)
+        rename_feature(self.spec, "renamed_feature", "feature_1", rename_inputs=True)
 
-    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                         'Only supported on macOS 10.13+')
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
     def test_rename_input_bad(self):
-        rename_feature(self.spec, 'blah', 'bad_name', rename_inputs=True)
+        rename_feature(self.spec, "blah", "bad_name", rename_inputs=True)
         model = MLModel(self.spec)
-        preds = model.predict({'feature_1': 1.0, 'feature_2': 1.0})
+        preds = model.predict({"feature_1": 1.0, "feature_2": 1.0})
         self.assertIsNotNone(preds)
-        self.assertEqual(preds['output'], 3.1)
+        self.assertEqual(preds["output"], 3.1)
 
-    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                         'Only supported on macOS 10.13+')
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
     def test_rename_output(self):
         rename_feature(
-            self.spec, 'output', 'renamed_output',
-            rename_inputs=False, rename_outputs=True)
+            self.spec,
+            "output",
+            "renamed_output",
+            rename_inputs=False,
+            rename_outputs=True,
+        )
         model = MLModel(self.spec)
-        preds = model.predict({'feature_1': 1.0, 'feature_2': 1.0})
+        preds = model.predict({"feature_1": 1.0, "feature_2": 1.0})
         self.assertIsNotNone(preds)
-        self.assertEqual(preds['renamed_output'], 3.1)
-        rename_feature(self.spec, 'renamed_output', 'output',
-                       rename_inputs=False, rename_outputs=True)
+        self.assertEqual(preds["renamed_output"], 3.1)
+        rename_feature(
+            self.spec,
+            "renamed_output",
+            "output",
+            rename_inputs=False,
+            rename_outputs=True,
+        )
 
-    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                         'Only supported on macOS 10.13+')
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
     def test_rename_output_bad(self):
         rename_feature(
-            self.spec, 'blah', 'bad_name',
-            rename_inputs=False, rename_outputs=True)
+            self.spec, "blah", "bad_name", rename_inputs=False, rename_outputs=True
+        )
         model = MLModel(self.spec)
-        preds = model.predict({'feature_1': 1.0, 'feature_2': 1.0})
+        preds = model.predict({"feature_1": 1.0, "feature_2": 1.0})
         self.assertIsNotNone(preds)
-        self.assertEqual(preds['output'], 3.1)
+        self.assertEqual(preds["output"], 3.1)
 
-    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                         'Only supported on macOS 10.13+')
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
     def test_future_version(self):
         self.spec.specificationVersion = 10000
-        filename = tempfile.mktemp(suffix='.mlmodel')
+        filename = tempfile.mktemp(suffix=".mlmodel")
         save_spec(self.spec, filename, auto_set_specification_version=False)
         model = MLModel(filename)
         # this model should exist, but throw an exception when we try to use
@@ -154,15 +180,17 @@ def test_future_version(self):
             try:
                 model.predict({})
             except Exception as e:
-                assert 'Core ML model specification version' in str(e)
+                assert "Core ML model specification version" in str(e)
                 raise
         self.spec.specificationVersion = 1
 
-    @unittest.skipUnless(is_macos() and macos_version() < (10, 13),
-                         'Only supported on macOS 10.13-')
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() < (10, 13), "Only supported on macOS 10.13-"
+    )
     def test_MLModel_warning(self):
         self.spec.specificationVersion = 3
         import warnings
+
         with warnings.catch_warnings(record=True) as w:
             # Cause all warnings to always be triggered.
             warnings.simplefilter("always")
@@ -175,36 +203,36 @@ def test_MLModel_warning(self):
 
     def test_convert_nn_spec_to_half_precision(self):
         # simple network with quantization layer
-        input_features = [('data', datatypes.Array(3))]
-        output_features = [('out', datatypes.Array(3))]
+        input_features = [("data", datatypes.Array(3))]
+        output_features = [("out", datatypes.Array(3))]
         builder = NeuralNetworkBuilder(input_features, output_features)
         weights = np.random.uniform(-0.5, 0.5, (3, 3))
         builder.add_inner_product(
-            name='inner_product',
+            name="inner_product",
             W=weights,
             b=None,
             input_channels=3,
             output_channels=3,
             has_bias=False,
-            input_name='data',
-            output_name='out'
+            input_name="data",
+            output_name="out",
         )
         model = MLModel(builder.spec)
         spec = _convert_neural_network_spec_weights_to_fp16(model.get_spec())
         self.assertIsNotNone(spec)
 
         # simple network without quantization layer
-        input_features = [('data', datatypes.Array(3))]
-        output_features = [('out', datatypes.Array(3))]
+        input_features = [("data", datatypes.Array(3))]
+        output_features = [("out", datatypes.Array(3))]
         builder = NeuralNetworkBuilder(input_features, output_features)
         builder.add_lrn(
-            name='lrn',
-            input_name='data',
-            output_name='out',
+            name="lrn",
+            input_name="data",
+            output_name="out",
             alpha=2,
             beta=3,
             local_size=1,
-            k=8
+            k=8,
         )
         model = MLModel(builder.spec)
         spec = _convert_neural_network_spec_weights_to_fp16(model.get_spec())
@@ -219,20 +247,20 @@ def test_downgrade_specification_version(self):
 
         # manually set a high specification version
         self.spec.specificationVersion = 4
-        filename = tempfile.mktemp(suffix='.mlmodel')
+        filename = tempfile.mktemp(suffix=".mlmodel")
         save_spec(self.spec, filename, auto_set_specification_version=True)
         model = MLModel(filename)
         assert model.get_spec().specificationVersion == 1
 
         # simple neural network with only spec 1 layer
-        input_features = [('data', datatypes.Array(3))]
-        output_features = [('out', datatypes.Array(3))]
+        input_features = [("data", datatypes.Array(3))]
+        output_features = [("out", datatypes.Array(3))]
         builder = NeuralNetworkBuilder(input_features, output_features)
-        builder.add_activation('relu', 'RELU', 'data', 'out')
+        builder.add_activation("relu", "RELU", "data", "out")
         # set a high specification version
         builder.spec.specificationVersion = 3
         model = MLModel(builder.spec)
-        filename = tempfile.mktemp(suffix='.mlmodel')
+        filename = tempfile.mktemp(suffix=".mlmodel")
         model.save(filename)
         # load the model back
         model = MLModel(filename)
@@ -240,27 +268,179 @@ def test_downgrade_specification_version(self):
 
         # test save without automatic set specification version
         self.spec.specificationVersion = 3
-        filename = tempfile.mktemp(suffix='.mlmodel')
+        filename = tempfile.mktemp(suffix=".mlmodel")
         save_spec(self.spec, filename, auto_set_specification_version=False)
         model = MLModel(filename)
         # the specification version should be original
         assert model.get_spec().specificationVersion == 3
 
     def test_multiarray_type_convert_to_float(self):
-        input_features = [('data', datatypes.Array(2))]
-        output_features = [('out', datatypes.Array(2))]
+        input_features = [("data", datatypes.Array(2))]
+        output_features = [("out", datatypes.Array(2))]
         builder = NeuralNetworkBuilder(input_features, output_features)
-        builder.add_ceil('ceil', 'data', 'out')
+        builder.add_ceil("ceil", "data", "out")
         spec = builder.spec
-        self.assertEqual(spec.description.input[0].type.multiArrayType.dataType, Model_pb2.ArrayFeatureType.DOUBLE)
-        self.assertEqual(spec.description.output[0].type.multiArrayType.dataType, Model_pb2.ArrayFeatureType.DOUBLE)
+        self.assertEqual(
+            spec.description.input[0].type.multiArrayType.dataType,
+            Model_pb2.ArrayFeatureType.DOUBLE,
+        )
+        self.assertEqual(
+            spec.description.output[0].type.multiArrayType.dataType,
+            Model_pb2.ArrayFeatureType.DOUBLE,
+        )
         convert_double_to_float_multiarray_type(spec)
-        self.assertEqual(spec.description.input[0].type.multiArrayType.dataType, Model_pb2.ArrayFeatureType.FLOAT32)
-        self.assertEqual(spec.description.output[0].type.multiArrayType.dataType, Model_pb2.ArrayFeatureType.FLOAT32)
+        self.assertEqual(
+            spec.description.input[0].type.multiArrayType.dataType,
+            Model_pb2.ArrayFeatureType.FLOAT32,
+        )
+        self.assertEqual(
+            spec.description.output[0].type.multiArrayType.dataType,
+            Model_pb2.ArrayFeatureType.FLOAT32,
+        )
+
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
+    def test_multiarray_to_image_input_util(self):
+        H, W, C = 1, 1, 3
+        input_features = [("data", datatypes.Array(C, H, W))]
+        output_features = [("out", datatypes.Array(C, H, W))]
+        builder = NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_activation("linear", "LINEAR", "data", "out")
+        spec = builder.spec
+        mlmodel = MLModel(spec)
+        mlmodel = make_image_input(
+            mlmodel,
+            "data",
+            red_bias=-5,
+            green_bias=-6,
+            blue_bias=-2.5,
+            scale=10.0,
+            image_format="NCHW",
+        )
+        x = np.array([4, 2, 5], dtype=np.uint8)
+        x = np.reshape(x, (H, W, C))
+        pil_img = PIL.Image.fromarray(x)
+        y = mlmodel.predict({"data": pil_img}, useCPUOnly=True)["out"]
+        self.assertEqual(y.shape, (C, H, W))
+        np.testing.assert_almost_equal(y.flatten(), [35.0, 14.0, 47.5])
+
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
+    def test_multiarray_to_image_input_util_transpose_elimination(self):
+        H, W, C = 1, 1, 3
+        input_features = [("data", datatypes.Array(H, W, C))]
+        output_features = [("out", datatypes.Array(H, W, C))]
+        builder = NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_transpose("transpose", [2, 0, 1], "data", "transpose")
+        builder.add_activation("linear", "LINEAR", "transpose", "out")
+        spec = builder.spec
+        mlmodel = MLModel(spec)
+        mlmodel = make_image_input(
+            mlmodel,
+            "data",
+            red_bias=-5,
+            green_bias=-6,
+            blue_bias=-2.5,
+            scale=10.0,
+            image_format="NHWC",
+        )
+        x = np.array([4, 2, 5], dtype=np.uint8)
+        x = np.reshape(x, (H, W, C))
+        pil_img = PIL.Image.fromarray(x)
+        y = mlmodel.predict({"data": pil_img}, useCPUOnly=True)["out"]
+        self.assertEqual(y.shape, (H, W, C))
+        np.testing.assert_almost_equal(y.flatten(), [35.0, 14.0, 47.5])
+
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
+    def test_multiarray_to_image_input_util_HWC_format(self):
+        H, W, C = 1, 1, 3
+        input_features = [("data", datatypes.Array(H, W, C))]
+        output_features = [("out", datatypes.Array(H, W, C))]
+        builder = NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_activation("linear", "LINEAR", "data", "out")
+        spec = builder.spec
+        mlmodel = MLModel(spec)
+        mlmodel = make_image_input(
+            mlmodel,
+            "data",
+            red_bias=-5,
+            green_bias=-6,
+            blue_bias=-2.5,
+            scale=10.0,
+            image_format="NHWC",
+        )
+        x = np.array([4, 2, 5], dtype=np.uint8)
+        x = np.reshape(x, (H, W, C))
+        pil_img = PIL.Image.fromarray(x)
+        y = mlmodel.predict({"data": pil_img}, useCPUOnly=True)["out"]
+        self.assertEqual(y.shape, (H, W, C))
+        np.testing.assert_almost_equal(y.flatten(), [35.0, 14.0, 47.5])
+
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
+    def test_nn_classifier_util(self):
+        input_features = [("data", datatypes.Array(3,))]
+        output_features = [("out", datatypes.Array(3,))]
+        builder = NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_activation("linear", "LINEAR", "data", "out")
+        spec = builder.spec
+        mlmodel = MLModel(spec)
+        mlmodel = make_nn_classifier(
+            mlmodel,
+            class_labels=["a", "b", "c"],
+            predicted_feature_name="out_confidence",
+            predicted_probabilities_output="out",
+        )
+        out_dict = mlmodel.predict({"data": np.array([4.0, 5.5, 6.0])}, useCPUOnly=True)
+        self.assertEqual(out_dict["out_confidence"], "c")
+        self.assertEqual(
+            mlmodel.get_spec().WhichOneof("Type"), "neuralNetworkClassifier"
+        )
 
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
+    def test_nn_classifier_util_file(self):
+        input_features = [("data", datatypes.Array(3,))]
+        output_features = [("out", datatypes.Array(3,))]
+        builder = NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_activation("linear", "LINEAR", "data", "out")
+        spec = builder.spec
+        mlmodel = MLModel(spec)
+
+        class_labels = ["a", "b", "c"]
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".txt") as f:
+            f.write("\n".join(class_labels))
+            f.flush()
+            mlmodel = make_nn_classifier(
+                mlmodel,
+                class_labels=f.name,
+                predicted_feature_name="out_confidence",
+                predicted_probabilities_output="out",
+            )
+        out_dict = mlmodel.predict({"data": np.array([4.0, 5.5, 6.0])}, useCPUOnly=True)
+        self.assertEqual(out_dict["out_confidence"], "c")
+        self.assertEqual(
+            mlmodel.get_spec().WhichOneof("Type"), "neuralNetworkClassifier"
+        )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
     # suite = unittest.TestSuite()
     # suite.addTest(MLModelTest('test_multiarray_type_convert_to_float'))
diff --git a/coremltools/test/neural_network/test_multiple_images_preprocessing.py b/coremltools/test/neural_network/test_multiple_images_preprocessing.py
index b5f4468ae..c7ec533b0 100644
--- a/coremltools/test/neural_network/test_multiple_images_preprocessing.py
+++ b/coremltools/test/neural_network/test_multiple_images_preprocessing.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+from __future__ import print_function as _
 
 import json
 import os
@@ -14,351 +14,176 @@
 import pytest
 
 import coremltools
-from coremltools._deps import HAS_KERAS2_TF
-from coremltools.converters import caffe as caffe_converter
-from coremltools.models.utils import macos_version, is_macos
+from coremltools._deps import _HAS_KERAS2_TF
+from coremltools.models.utils import _macos_version, _is_macos
 
-if HAS_KERAS2_TF:
+if _HAS_KERAS2_TF:
     import keras
     from keras.models import Sequential, Model
     from keras.layers import Activation, GlobalMaxPooling2D, Input
 
-try:
-    nets_path = os.environ["CAFFE_MODELS_PATH"]
-    nets_path = nets_path + '/'
-except:
-    nets_path = None
-
-FOLDER_NAME = 'multiple_images_preprocessing'
+FOLDER_NAME = "multiple_images_preprocessing"
 
 
 def extract_tarfile(input_filename, dest_dir):
     with tarfile.open(input_filename, "r:gz") as tar:
         tar.extractall(dest_dir)
-        
-        
+
+
 def load_mlmodel(model_path):
-    load_args = [' /usr/local/bin/coremltest', 'load', '-modelPath', model_path]
-    print('Loading {}'.format(model_path))
-    process = Popen((" ").join(load_args),
-                    stdin=subprocess.PIPE,
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    shell=True)
+    load_args = [" /usr/local/bin/coremltest", "load", "-modelPath", model_path]
+    print("Loading {}".format(model_path))
+    process = Popen(
+        (" ").join(load_args),
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        shell=True,
+    )
     stdout, err = process.communicate()
 
     if not err:
         return True
     else:
         print(" The error is {}".format(err.decode()))
-        return False   
-        
+        return False
+
+
 def compare_models(caffe_preds, coreml_preds):
     max_relative_error = 0
     for i in range(len(coreml_preds)):
         max_den = max(1.0, np.abs(caffe_preds[i]), np.abs(coreml_preds[i]))
-        relative_error = np.abs(caffe_preds[i]/max_den - coreml_preds[i]/max_den)
+        relative_error = np.abs(caffe_preds[i] / max_den - coreml_preds[i] / max_den)
         if relative_error > max_relative_error:
             max_relative_error = relative_error
 
-    print('maximum relative error: ', max_relative_error)
-    #print('caffe preds : ', caffe_preds)
-    #print('coreml preds: ', coreml_preds)
-    return max_relative_error 
-
-@unittest.skipIf(nets_path is None, "Unable to find CAFFE_MODELS_PATH")
-class ManyImages(unittest.TestCase):
-    """
-    Unit test case for caffe layers
-    """
-    @classmethod
-    def setUpClass(self):
-        """
-        Set up the unit test by loading common utilities.
-        """      
-        
-    def _evaluate_and_test_model_meanpreprocessing(self, n):
-    
-        failed_tests_load = []
-        failed_tests_conversion = []
-        failed_tests_evaluation = []
-    
-        extract_tarfile('{}nets/{}.gz'.format(nets_path, FOLDER_NAME), '{}nets/'.format(nets_path))
-
-        path_prototxt = '{}nets/{}/{}/image{}.prototxt'.format(nets_path, FOLDER_NAME, str(n), str(n))
-        path_caffemodel = '{}nets/{}/{}/image{}.caffemodel'.format(nets_path, FOLDER_NAME, str(n), str(n))
-        path_mlmodel = '{}nets/{}/{}/image{}.mlmodel'.format(nets_path, FOLDER_NAME, str(n), str(n))
-        if n == 1:
-            path_binaryproto = '{}nets/{}/1/mean_binary_proto1.binaryproto'.format(nets_path, FOLDER_NAME)
-        else:
-            path_binaryproto = dict()
-            for i in range(n):
-                path_binaryproto["data{}".format(str(i+1))] = '{}nets/{}/{}/mean_binary_proto{}.binaryproto'.format(nets_path, FOLDER_NAME, str(n), str(i+1))
-            
-        image_input_names = []
-        for i in range(n):
-            image_input_names.append("data{}".format(str(i+1)))       
-
-        #convert it
-        try:
-            model = caffe_converter.convert((path_caffemodel, path_prototxt, path_binaryproto), image_input_names = image_input_names)
-            model.save(path_mlmodel)
-        except RuntimeError as e:
-            print(e)
-            failed_tests_conversion.append('image mean preprocessing: conversion failure')
-
-        #load it (compile it)
-        load_result = load_mlmodel(path_mlmodel)
-        if load_result is False:
-            failed_tests_load.append('image mean preprocessing: load failure')
-        
-
-        #load Caffe's input and output
-        with open('{}nets/{}/{}/input.json'.format(nets_path, FOLDER_NAME, str(n))) as data_file:
-            input_data_dict = json.load(data_file)
-        with open('{}nets/{}/{}/output.json'.format(nets_path, FOLDER_NAME, str(n))) as data_file:
-            output_data_dict = json.load(data_file)
-        
-        output_data = np.array(output_data_dict["output_data"])    
-    
-        coreml_input_dict = dict()
-    
-        for i in range(n):
-            input_data = np.array(input_data_dict["input_data{}".format(str(i+1))]).astype(np.uint8)
-            img = PIL.Image.fromarray(np.transpose(input_data[0,:,:,:],[1,2,0]))
-            coreml_input_dict["data{}".format(str(i+1))] = img
-
-        #load and evaluate mlmodel
-        mlmodel = coremltools.models.MLModel(path_mlmodel)
-        if is_macos() and macos_version() >= (10, 13):
-            coreml_out = mlmodel.predict(coreml_input_dict)['output']
-
-            caffe_preds = output_data.flatten()
-            coreml_preds = coreml_out.flatten()
-            if len(caffe_preds) != len(coreml_preds):
-                failed_tests_evaluation.append('single image mean preprocessing: evaluation failure')
-            
-            max_relative_error = compare_models(output_data.flatten(), coreml_out.flatten())
-            if max_relative_error > 0.001:
-                failed_tests_evaluation.append('single image mean preprocessing: evaluation failure')
-           
-            self.assertEqual(failed_tests_conversion,[])
-            self.assertEqual(failed_tests_load,[])
-            self.assertEqual(failed_tests_evaluation,[])
-        shutil.rmtree('{}nets/{}'.format(nets_path, FOLDER_NAME))    
-        
-    
-    def _evaluate_and_test_model_biasprocessing(self, n, red_bias, green_bias, blue_bias, image_scale, is_bgr):
-    
-        failed_tests_load = []
-        failed_tests_conversion = []
-        failed_tests_evaluation = []
-    
-        extract_tarfile('{}nets/{}.gz'.format(nets_path, FOLDER_NAME), '{}nets/'.format(nets_path))
-
-        path_prototxt = '{}nets/{}/{}_bias/image{}.prototxt'.format(nets_path, FOLDER_NAME, str(n), str(n))
-        path_caffemodel = '{}nets/{}/{}_bias/image{}.caffemodel'.format(nets_path, FOLDER_NAME, str(n), str(n))
-        path_mlmodel = '{}nets/{}/{}_bias/image{}.mlmodel'.format(nets_path, FOLDER_NAME, str(n), str(n))
-            
-        image_input_names = []
-        for i in range(n):
-            image_input_names.append("data{}".format(str(i+1)))       
-
-        #convert it
-        try:
-            model = caffe_converter.convert(model = (path_caffemodel, path_prototxt), image_input_names = image_input_names,
-                                            red_bias = red_bias, green_bias = green_bias, blue_bias = blue_bias,
-                                            image_scale = image_scale, is_bgr = is_bgr)
-            model.save(path_mlmodel)
-        except RuntimeError as e:
-            print(e)
-            failed_tests_conversion.append('image bias preprocessing: conversion failure')
-
-        #load it (compile it)
-        load_result = load_mlmodel(path_mlmodel)
-        if load_result is False:
-            failed_tests_load.append('image bias preprocessing: load failure')
-        
-        if is_macos() and macos_version() >= (10, 13):
-            #load Caffe's input and output
-            with open('{}nets/{}/{}_bias/input.json'.format(nets_path, FOLDER_NAME, str(n))) as data_file:
-                input_data_dict = json.load(data_file)
-            with open('{}nets/{}/{}_bias/output.json'.format(nets_path, FOLDER_NAME, str(n))) as data_file:
-                output_data_dict = json.load(data_file)
-            
-            output_data = np.array(output_data_dict["output_data"])    
-        
-            coreml_input_dict = dict()
-        
-            for i in range(n):
-                input_data = np.array(input_data_dict["input_data{}".format(str(i+1))]).astype(np.uint8)
-                img = PIL.Image.fromarray(np.transpose(input_data[0,:,:,:],[1,2,0]))
-                coreml_input_dict["data{}".format(str(i+1))] = img
-
-            #load and evaluate mlmodel
-            mlmodel = coremltools.models.MLModel(path_mlmodel)
-            coreml_out = mlmodel.predict(coreml_input_dict)['output']
-
-            caffe_preds = output_data.flatten()
-            coreml_preds = coreml_out.flatten()
-            if len(caffe_preds) != len(coreml_preds):
-                failed_tests_evaluation.append('single image bias preprocessing: evaluation failure')
-            
-            max_relative_error = compare_models(output_data.flatten(), coreml_out.flatten())
-            if max_relative_error > 0.001:
-                failed_tests_evaluation.append('single image bias preprocessing: evaluation failure')
-           
-            self.assertEqual(failed_tests_conversion,[])
-            self.assertEqual(failed_tests_load,[])
-            self.assertEqual(failed_tests_evaluation,[])
-
-        shutil.rmtree('{}nets/{}'.format(nets_path, FOLDER_NAME))    
-
-    def test_1_mean_image(self):
-        self._evaluate_and_test_model_meanpreprocessing(1)
-
-    def test_2_mean_images(self):
-        self._evaluate_and_test_model_meanpreprocessing(2)
-
-    def test_3_mean_images(self):
-        self._evaluate_and_test_model_meanpreprocessing(3)
-        
-    def test_1_image_bias(self):
-        self._evaluate_and_test_model_biasprocessing(n=1,
-                                                    red_bias = -34, green_bias = -123, 
-                                                    blue_bias = -22, image_scale = 0.75, is_bgr = False)
-        
-    def test_2_image_bias(self):        
-        self._evaluate_and_test_model_biasprocessing(n=2,
-                                                    red_bias = {"data1":-11, "data2":-87}, 
-                                                    green_bias = {"data1":-56, "data2":-78}, 
-                                                    blue_bias = {"data1":-122, "data2":-76}, 
-                                                    image_scale = {"data1":0.5, "data2":0.4}, 
-                                                    is_bgr = {"data1":False, "data2":True})
-                                                    
-
-
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+    print("maximum relative error: ", max_relative_error)
+    return max_relative_error
+
+
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
-class ManyImagesKeras(unittest.TestCase):                                                    
-                                                                                                 
+class ManyImagesKeras(unittest.TestCase):
     def test_keras_1_image_bias(self):
-        
-        #define Keras model and get prediction
-        input_shape=(100,50,3)
+        # define Keras model and get prediction
+        input_shape = (100, 50, 3)
         model = Sequential()
-        model.add(Activation('linear', input_shape=input_shape))
-        
+        model.add(Activation("linear", input_shape=input_shape))
+
         data = np.ones(input_shape)
         keras_input = np.ones(input_shape)
-        data[:,:,0] = 128.0; 
-        data[:,:,1] = 27.0;
-        data[:,:,2] = 200.0;
-        red_bias = -12.0;
-        green_bias = -20;
-        blue_bias = -4;
-        keras_input[:,:,0] = data[:,:,0] + red_bias;
-        keras_input[:,:,1] = data[:,:,1] + green_bias;
-        keras_input[:,:,2] = data[:,:,2] + blue_bias;
-        
-        keras_preds = model.predict(np.expand_dims(keras_input, axis = 0))
-        keras_preds = np.transpose(keras_preds, [0,3,1,2]).flatten()
-        
-        #convert to coreml and get predictions
+        data[:, :, 0] = 128.0
+        data[:, :, 1] = 27.0
+        data[:, :, 2] = 200.0
+        red_bias = -12.0
+        green_bias = -20
+        blue_bias = -4
+        keras_input[:, :, 0] = data[:, :, 0] + red_bias
+        keras_input[:, :, 1] = data[:, :, 1] + green_bias
+        keras_input[:, :, 2] = data[:, :, 2] + blue_bias
+
+        keras_preds = model.predict(np.expand_dims(keras_input, axis=0))
+        keras_preds = np.transpose(keras_preds, [0, 3, 1, 2]).flatten()
+
+        # convert to coreml and get predictions
         model_dir = tempfile.mkdtemp()
-        model_path = os.path.join(model_dir, 'keras.mlmodel')
+        model_path = os.path.join(model_dir, "keras.mlmodel")
         from coremltools.converters import keras as keras_converter
-        coreml_model = keras_converter.convert(model, input_names = ['data'], output_names = ['output'], 
-                                                image_input_names = ['data'], 
-                                                red_bias = red_bias, 
-                                                green_bias = green_bias, 
-                                                blue_bias = blue_bias)
-        #coreml_model.save(model_path)    
-        #coreml_model = coremltools.models.MLModel(model_path)
-        
-        if is_macos() and macos_version() >= (10, 13):
+
+        coreml_model = keras_converter.convert(
+            model,
+            input_names=["data"],
+            output_names=["output"],
+            image_input_names=["data"],
+            red_bias=red_bias,
+            green_bias=green_bias,
+            blue_bias=blue_bias,
+        )
+
+        if _is_macos() and _macos_version() >= (10, 13):
             coreml_input_dict = dict()
             coreml_input_dict["data"] = PIL.Image.fromarray(data.astype(np.uint8))
-            coreml_preds = coreml_model.predict(coreml_input_dict)['output'].flatten()
+            coreml_preds = coreml_model.predict(coreml_input_dict)["output"].flatten()
 
-            self.assertEquals(len(keras_preds), len(coreml_preds))    
+            self.assertEquals(len(keras_preds), len(coreml_preds))
             max_relative_error = compare_models(keras_preds, coreml_preds)
-            self.assertAlmostEquals(max(max_relative_error, .001), .001, delta = 1e-6)
-        
-                                                            
+            self.assertAlmostEquals(max(max_relative_error, 0.001), 0.001, delta=1e-6)
+
         if os.path.exists(model_dir):
-            shutil.rmtree(model_dir)    
-            
-            
+            shutil.rmtree(model_dir)
+
     def test_keras_2_image_bias(self):
-        
-        #define Keras model and get prediction
-        input_shape1 = (100,60,3)
-        input_shape2 = (23,45,3)
-        
+        # define Keras model and get prediction
+        input_shape1 = (100, 60, 3)
+        input_shape2 = (23, 45, 3)
+
         data1 = Input(shape=input_shape1)
         data2 = Input(shape=input_shape2)
         a_pool = GlobalMaxPooling2D()(data1)
         b_pool = GlobalMaxPooling2D()(data2)
         output = keras.layers.add([a_pool, b_pool])
         model = Model(inputs=[data1, data2], outputs=output)
-        
+
         data1 = np.ones(input_shape1)
         data2 = np.ones(input_shape2)
         keras_input1 = np.ones(input_shape1)
         keras_input2 = np.ones(input_shape2)
-        
-        data1[:,:,0] = 100.0
-        data1[:,:,1] = 79.0
-        data1[:,:,2] = 194.0
-        
-        data2[:,:,0] = 130.0
-        data2[:,:,1] = 91.0
-        data2[:,:,2] = 11.0
-        
-        
-        red_bias1 = -88.0;
-        green_bias1 = -2;
-        blue_bias1 = -40;
-        
-        red_bias2 = -100.0;
-        green_bias2 = -29;
-        blue_bias2 = -15;
-        
-        keras_input1[:,:,0] = data1[:,:,2] + blue_bias1;
-        keras_input1[:,:,1] = data1[:,:,1] + green_bias1;
-        keras_input1[:,:,2] = data1[:,:,0] + red_bias1;
-        
-        keras_input2[:,:,0] = data2[:,:,0] + red_bias2;
-        keras_input2[:,:,1] = data2[:,:,1] + green_bias2;
-        keras_input2[:,:,2] = data2[:,:,2] + blue_bias2;
-        
-        keras_preds = model.predict([np.expand_dims(keras_input1, axis = 0), np.expand_dims(keras_input2, axis = 0)])
+
+        data1[:, :, 0] = 100.0
+        data1[:, :, 1] = 79.0
+        data1[:, :, 2] = 194.0
+
+        data2[:, :, 0] = 130.0
+        data2[:, :, 1] = 91.0
+        data2[:, :, 2] = 11.0
+
+        red_bias1 = -88.0
+        green_bias1 = -2
+        blue_bias1 = -40
+
+        red_bias2 = -100.0
+        green_bias2 = -29
+        blue_bias2 = -15
+
+        keras_input1[:, :, 0] = data1[:, :, 2] + blue_bias1
+        keras_input1[:, :, 1] = data1[:, :, 1] + green_bias1
+        keras_input1[:, :, 2] = data1[:, :, 0] + red_bias1
+
+        keras_input2[:, :, 0] = data2[:, :, 0] + red_bias2
+        keras_input2[:, :, 1] = data2[:, :, 1] + green_bias2
+        keras_input2[:, :, 2] = data2[:, :, 2] + blue_bias2
+
+        keras_preds = model.predict(
+            [np.expand_dims(keras_input1, axis=0), np.expand_dims(keras_input2, axis=0)]
+        )
         keras_preds = keras_preds.flatten()
-        
-        #convert to coreml and get predictions
+
+        # convert to coreml and get predictions
         model_dir = tempfile.mkdtemp()
-        model_path = os.path.join(model_dir, 'keras.mlmodel')
+        model_path = os.path.join(model_dir, "keras.mlmodel")
         from coremltools.converters import keras as keras_converter
-        coreml_model = keras_converter.convert(model, input_names = ['data1', 'data2'], output_names = ['output'],
-                                                image_input_names = ['data1', 'data2'],
-                                                red_bias = {'data1': red_bias1, 'data2': red_bias2},
-                                                green_bias = {'data1': green_bias1, 'data2': green_bias2},
-                                                blue_bias = {'data1': blue_bias1, 'data2': blue_bias2},
-                                                is_bgr = {'data1': True, 'data2': False})
-        #coreml_model.save(model_path)
-        #coreml_model = coremltools.models.MLModel(model_path)
-
-        if is_macos()and macos_version() >= (10, 13):
+
+        coreml_model = keras_converter.convert(
+            model,
+            input_names=["data1", "data2"],
+            output_names=["output"],
+            image_input_names=["data1", "data2"],
+            red_bias={"data1": red_bias1, "data2": red_bias2},
+            green_bias={"data1": green_bias1, "data2": green_bias2},
+            blue_bias={"data1": blue_bias1, "data2": blue_bias2},
+            is_bgr={"data1": True, "data2": False},
+        )
+
+        if _is_macos() and _macos_version() >= (10, 13):
             coreml_input_dict = dict()
             coreml_input_dict["data1"] = PIL.Image.fromarray(data1.astype(np.uint8))
             coreml_input_dict["data2"] = PIL.Image.fromarray(data2.astype(np.uint8))
-            coreml_preds = coreml_model.predict(coreml_input_dict)['output'].flatten()
+            coreml_preds = coreml_model.predict(coreml_input_dict)["output"].flatten()
 
-            #compare
+            # compare
             self.assertEquals(len(keras_preds), len(coreml_preds))
             max_relative_error = compare_models(keras_preds, coreml_preds)
-            self.assertAlmostEquals(max(max_relative_error, .001), .001, delta = 1e-6)
+            self.assertAlmostEquals(max(max_relative_error, 0.001), 0.001, delta=1e-6)
 
         if os.path.exists(model_dir):
             shutil.rmtree(model_dir)
diff --git a/coremltools/test/neural_network/test_neural_networks.py b/coremltools/test/neural_network/test_neural_networks.py
index b75a64642..00ec182bc 100644
--- a/coremltools/test/neural_network/test_neural_networks.py
+++ b/coremltools/test/neural_network/test_neural_networks.py
@@ -7,18 +7,22 @@
 import os
 
 import coremltools
-from coremltools._deps import HAS_KERAS_TF
-from coremltools._deps import HAS_TF
-from coremltools.models.utils import _get_custom_layer_names, \
-    _replace_custom_layer_name, macos_version, is_macos
+from coremltools._deps import _HAS_KERAS_TF, MSG_KERAS1_NOT_FOUND
+from coremltools._deps import _HAS_TF, MSG_TF1_NOT_FOUND
+from coremltools.models.utils import (
+    _get_custom_layer_names,
+    _replace_custom_layer_name,
+    _macos_version,
+    _is_macos,
+)
 from coremltools.proto import Model_pb2
 
-if HAS_KERAS_TF:
+if _HAS_KERAS_TF:
     from keras.models import Sequential
     from keras.layers import Dense, LSTM
     from coremltools.converters import keras as keras_converter
 
-if HAS_TF:
+if _HAS_TF:
     import tensorflow as tf
     from tensorflow.python.platform import gfile
     from tensorflow.python.tools import freeze_graph
@@ -26,14 +30,13 @@
     tf.compat.v1.disable_eager_execution()
 
 
-@unittest.skipIf(not HAS_KERAS_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS_TF, MSG_KERAS1_NOT_FOUND)
 @pytest.mark.keras1
 class KerasBasicNumericCorrectnessTest(unittest.TestCase):
-
     def test_classifier(self):
         np.random.seed(1988)
 
-        print('running test classifier')
+        print("running test classifier")
 
         input_dim = 5
         num_hidden = 12
@@ -41,22 +44,36 @@ def test_classifier(self):
         input_length = 3
 
         model = Sequential()
-        model.add(LSTM(num_hidden, input_dim=input_dim, input_length=input_length, return_sequences=False))
-        model.add(Dense(num_classes, activation='softmax'))
+        model.add(
+            LSTM(
+                num_hidden,
+                input_dim=input_dim,
+                input_length=input_length,
+                return_sequences=False,
+            )
+        )
+        model.add(Dense(num_classes, activation="softmax"))
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
-        input_names = ['input']
-        output_names = ['zzzz']
-        class_labels = ['a', 'b', 'c', 'd', 'e', 'f']
-        predicted_feature_name = 'pf'
-        coremlmodel = keras_converter.convert(model, input_names, output_names, class_labels=class_labels, predicted_feature_name=predicted_feature_name, predicted_probabilities_output=output_names[0])
-
-        if is_macos() and macos_version() >= (10, 13):
+        input_names = ["input"]
+        output_names = ["zzzz"]
+        class_labels = ["a", "b", "c", "d", "e", "f"]
+        predicted_feature_name = "pf"
+        coremlmodel = keras_converter.convert(
+            model,
+            input_names,
+            output_names,
+            class_labels=class_labels,
+            predicted_feature_name=predicted_feature_name,
+            predicted_probabilities_output=output_names[0],
+        )
+
+        if _is_macos() and _macos_version() >= (10, 13):
             inputs = np.random.rand(input_dim)
-            outputs = coremlmodel.predict({'input': inputs})
+            outputs = coremlmodel.predict({"input": inputs})
             # this checks that the dictionary got the right name and type
-            self.assertEquals(type(outputs[output_names[0]]), type({'a': 0.5}))
+            self.assertEquals(type(outputs[output_names[0]]), type({"a": 0.5}))
 
     def test_classifier_no_name(self):
         np.random.seed(1988)
@@ -67,22 +84,35 @@ def test_classifier_no_name(self):
         input_length = 3
 
         model = Sequential()
-        model.add(LSTM(num_hidden, input_dim=input_dim, input_length=input_length, return_sequences=False))
-        model.add(Dense(num_classes, activation='softmax'))
+        model.add(
+            LSTM(
+                num_hidden,
+                input_dim=input_dim,
+                input_length=input_length,
+                return_sequences=False,
+            )
+        )
+        model.add(Dense(num_classes, activation="softmax"))
 
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
 
-        input_names = ['input']
-        output_names = ['zzzz']
-        class_labels = ['a', 'b', 'c', 'd', 'e', 'f']
-        predicted_feature_name = 'pf'
-        coremlmodel = keras_converter.convert(model, input_names, output_names, class_labels=class_labels, predicted_feature_name=predicted_feature_name)
-
-        if is_macos() and macos_version() >= (10, 13):
+        input_names = ["input"]
+        output_names = ["zzzz"]
+        class_labels = ["a", "b", "c", "d", "e", "f"]
+        predicted_feature_name = "pf"
+        coremlmodel = keras_converter.convert(
+            model,
+            input_names,
+            output_names,
+            class_labels=class_labels,
+            predicted_feature_name=predicted_feature_name,
+        )
+
+        if _is_macos() and _macos_version() >= (10, 13):
             inputs = np.random.rand(input_dim)
-            outputs = coremlmodel.predict({'input': inputs})
+            outputs = coremlmodel.predict({"input": inputs})
             # this checks that the dictionary got the right name and type
-            self.assertEquals(type(outputs[output_names[0]]), type({'a': 0.5}))
+            self.assertEquals(type(outputs[output_names[0]]), type({"a": 0.5}))
 
     def test_internal_layer(self):
 
@@ -93,31 +123,33 @@ def test_internal_layer(self):
         num_channels2 = 7
         num_channels3 = 5
 
-        w1 = (np.random.rand(input_dim, num_channels1) - 0.5) / 5.0;
-        w2 = (np.random.rand(num_channels1, num_channels2) - 0.5) / 5.0;
-        w3 = (np.random.rand(num_channels2, num_channels3) - 0.5) / 5.0;
+        w1 = (np.random.rand(input_dim, num_channels1) - 0.5) / 5.0
+        w2 = (np.random.rand(num_channels1, num_channels2) - 0.5) / 5.0
+        w3 = (np.random.rand(num_channels2, num_channels3) - 0.5) / 5.0
 
-        b1 = (np.random.rand(num_channels1, ) - 0.5) / 5.0;
-        b2 = (np.random.rand(num_channels2, ) - 0.5) / 5.0;
-        b3 = (np.random.rand(num_channels3, ) - 0.5) / 5.0;
+        b1 = (np.random.rand(num_channels1,) - 0.5) / 5.0
+        b2 = (np.random.rand(num_channels2,) - 0.5) / 5.0
+        b3 = (np.random.rand(num_channels3,) - 0.5) / 5.0
 
         model = Sequential()
         model.add(Dense(num_channels1, input_dim=input_dim))
-        model.add(Dense(num_channels2, name='middle_layer'))
+        model.add(Dense(num_channels2, name="middle_layer"))
         model.add(Dense(num_channels3))
 
         model.set_weights([w1, b1, w2, b2, w3, b3])
 
-        input_names = ['input']
-        output_names = ['output']
+        input_names = ["input"]
+        output_names = ["output"]
         coreml1 = keras_converter.convert(model, input_names, output_names)
 
         # adjust the output parameters of coreml1 to include the intermediate layer
         spec = coreml1.get_spec()
         coremlNewOutputs = spec.description.output.add()
-        coremlNewOutputs.name = 'middle_layer_output'
+        coremlNewOutputs.name = "middle_layer_output"
         coremlNewParams = coremlNewOutputs.type.multiArrayType
-        coremlNewParams.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value('DOUBLE')
+        coremlNewParams.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value(
+            "DOUBLE"
+        )
         coremlNewParams.shape.extend([num_channels2])
 
         coremlfinal = coremltools.models.MLModel(spec)
@@ -128,284 +160,68 @@ def test_internal_layer(self):
         model2.add(Dense(num_channels2))
         model2.set_weights([w1, b1, w2, b2])
 
-        coreml2 = keras_converter.convert(model2, input_names, ['output2'])
+        coreml2 = keras_converter.convert(model2, input_names, ["output2"])
 
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             # generate input data
             inputs = np.random.rand(input_dim)
 
-            fullOutputs = coremlfinal.predict({'input': inputs})
+            fullOutputs = coremlfinal.predict({"input": inputs})
 
-            partialOutput = coreml2.predict({'input': inputs})
+            partialOutput = coreml2.predict({"input": inputs})
 
             for i in range(0, num_channels2):
-                self.assertAlmostEquals(fullOutputs['middle_layer_output'][i], partialOutput['output2'][i], 2)
-
-# Base class for basic TF conversions
-@unittest.skipIf(not HAS_TF, 'Missing TF. Skipping tests.')
-class TfConversionTestBase(unittest.TestCase):
-    def setUp(self):
-        self.tmp_dir = tempfile.mkdtemp()
-        _, self.graph_file = tempfile.mkstemp(suffix='.pb', prefix=self.tmp_dir)
-        _, self.checkpoint_file = tempfile.mkstemp(suffix='.ckpt', prefix=self.tmp_dir)
-        _, self.class_label_file = tempfile.mkstemp(suffix='.txt', prefix=self.tmp_dir)
-        _, self.frozen_graph_file = tempfile.mkstemp(suffix='.pb', prefix=self.tmp_dir)
-        self.image_size = 224
-        self._setup_tf_model()
-
-    def tearDown(self):
-        if os.path.exists(self.tmp_dir):
-            shutil.rmtree(self.tmp_dir)
-
-    def _setup_tf_model(self):
-        with open(self.class_label_file, 'w+') as labels_file:
-            for a in range(10):
-                labels_file.write(str(a + 1) + "\n")
-
-        with tf.Graph().as_default():
-            images = tf.random.uniform(self._get_input_shape(), maxval=1)
-
-            # Create the model.
-            (i_placeholder, probabilities) = self._get_network()
-
-            saver = tf.train.Saver()
-            init_op = tf.global_variables_initializer()
-
-            with tf.Session() as sess:
-                sess.run(init_op)
-                probabilities = sess.run(probabilities, {i_placeholder: images.eval()})
-                saver.save(sess, self.checkpoint_file)
-
-                with gfile.GFile(self.graph_file, 'wb') as f:
-                    f.write(sess.graph_def.SerializeToString())
-                freeze_graph.freeze_graph(self.graph_file,
-                                          '',
-                                          True,
-                                          self.checkpoint_file,
-                                          'Softmax',
-                                          '',
-                                          '',
-                                          self.frozen_graph_file,
-                                          False,
-                                          '')
-
-    # Returns (input_layer, output_layer)
-    def _get_network(self):
-        raise NotImplementedError
-
-    # Returns something like [batch_size, height, width, channels] or
-    # [batch_size, channels, height, width]
-    def _get_input_shape(self):
-        raise NotImplementedError
-
-# Converting TF models with convolution layers
-@unittest.skipIf(not HAS_TF, 'Missing TF. Skipping tests.')
-class TFBasicConversionTest(TfConversionTestBase):
-    # Basic NN using convolutions
-    def _get_network(self):
-        i_placeholder = tf.placeholder(name='input', dtype=tf.float32, shape=self._get_input_shape())
-        net = self.my_conv_2d(i_placeholder, [1, 3, 3, 1], 1, 1, 'first')
-        net = tf.nn.avg_pool2d(net, 224, strides=1, padding='VALID', name='AvgPool_1a')
-        net = self.my_conv_2d(net, [1, 1, 1, 10], 10, 1, 'fc', activation_fn=None, with_bias_add=False)
-        net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
-        probabilities = tf.nn.softmax(net, name='Softmax')
-        return (i_placeholder, probabilities)
-
-    def _get_input_shape(self):
-        return [1, self.image_size, self.image_size, 3]
-
-    def my_conv_2d(self, input, weight_shape, num_filters, strides, name, activation_fn=tf.nn.relu, with_bias_add=True):
-        my_weights = tf.get_variable(name=name + 'weights', shape=weight_shape)
-        if with_bias_add:
-            my_bias = tf.get_variable(name=name + 'bias', shape=num_filters)
-        my_conv = tf.nn.conv2d(input, my_weights, strides=strides, padding='SAME', name=name)
-        if with_bias_add:
-            my_conv = tf.nn.bias_add(my_conv, my_bias)
-        if (activation_fn != None):
-            conv_layer_out = activation_fn(my_conv)
-        else:
-            conv_layer_out = my_conv
-        return conv_layer_out
-
-    def test_classifier_with_label_file(self):
-        coremltools.converters.tensorflow.convert(
-            self.frozen_graph_file,
-            inputs={'input': [1, 224, 224, 3]},
-            image_input_names=['input'],
-            outputs=['Softmax'],
-            predicted_feature_name='classLabel',
-            class_labels=self.class_label_file)
-
-    def test_classifier_with_int_label_list(self):
-        coremltools.converters.tensorflow.convert(
-            self.frozen_graph_file,
-            inputs={'input': [1, 224, 224, 3]},
-            image_input_names=['input'],
-            outputs=['Softmax'],
-            predicted_feature_name='classLabel',
-            class_labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
-
-    def test_classifier_with_string_label_list(self):
-        coremltools.converters.tensorflow.convert(
-            self.frozen_graph_file,
-            inputs={'input': [1, 224, 224, 3]},
-            image_input_names=['input'],
-            outputs=['Softmax'],
-            predicted_feature_name='classLabel',
-            class_labels=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'])
-
-    def test_classifier_without_class_labels(self):
-        coremltools.converters.tensorflow.convert(
-            self.frozen_graph_file,
-            inputs={'input': [1, 224, 224, 3]},
-            image_input_names=['input'],
-            outputs=['Softmax'])
-
-    def test_classifier_nhwc(self):
-        # Test manually specifying the image format. The converter would have
-        # detected NHWC.
-        coremltools.converters.tensorflow.convert(
-            self.frozen_graph_file,
-            inputs={'input': [1, 224, 224, 3]},
-            image_input_names=['input'],
-            outputs=['Softmax'],
-            tf_image_format='NHWC')
-
-    def test_classifier_nchw(self):
-        # Expect failure - input dimensions are incompatible with NCHW
-        with self.assertRaises(ValueError) as e:
-            coremltools.converters.tensorflow.convert(
-                self.frozen_graph_file,
-                inputs={'input': [1, 224, 224, 3]},
-                image_input_names=['input'],
-                outputs=['Softmax'],
-                tf_image_format='NCHW')
-
-class TFConversionTestWithSimpleModelBase(TfConversionTestBase):
-    # Create a basic network with no convolution layers; converter is not given hints about the image format
-    def _get_network(self):
-        i_placeholder = tf.placeholder(name='input', dtype=tf.float32, shape=self._get_input_shape())
-        net = tf.layers.Flatten(name='flatten')(i_placeholder)
-        net = tf.contrib.slim.fully_connected(net, 256)
-        net = tf.contrib.slim.dropout(net)
-        net = tf.contrib.slim.fully_connected(net, 10, activation_fn=None)
-        probabilities = tf.nn.softmax(net, name='Softmax')
-        return (i_placeholder, probabilities)
-
-@unittest.skipIf(not HAS_TF, 'Missing TF. Skipping tests.')
-class TFConversionTestWithSimpleNHWCModel(TFConversionTestWithSimpleModelBase):
-    # Use NHWC format
-    def _get_input_shape(self):
-        return [1, self.image_size, self.image_size, 3]
-
-    def test_classifier_no_tf_image_format_selected(self):
-        # Expect to succeed; model has no convolutions but NHWC should have been
-        # default
-        coremltools.converters.tensorflow.convert(
-            self.frozen_graph_file,
-            inputs={'input': [1, 224, 224, 3]},
-            image_input_names=['input'],
-            outputs=['Softmax'])
-
-    def test_classifier_nhwc(self):
-        # Manually using the correct format should succeed
-        coremltools.converters.tensorflow.convert(
-            self.frozen_graph_file,
-            inputs={'input': [1, 224, 224, 3]},
-            image_input_names=['input'],
-            outputs=['Softmax'],
-            tf_image_format='NHWC')
-
-    def test_classifier_nchw(self):
-        # Expect failure - input dimensions are incompatible with NCHW
-        with self.assertRaises(ValueError) as e:
-            coremltools.converters.tensorflow.convert(
-                self.frozen_graph_file,
-                inputs={'input': [1, 224, 224, 3]},
-                image_input_names=['input'],
-                outputs=['Softmax'],
-                tf_image_format='NCHW')
-
-@unittest.skipIf(not HAS_TF, 'Missing TF. Skipping tests.')
-class TFConversionTestWithSimpleNCHWModel(TFConversionTestWithSimpleModelBase):
-    # Use NHWC format
-    def _get_input_shape(self):
-        return [1, 3, self.image_size, self.image_size]
-
-    def test_classifier_no_tf_image_format_selected(self):
-        # Expect to fail. Could not find image format in convolution layers and no parameter was given,
-        # so fall back to NHWC which is incompatible
-        with self.assertRaises(ValueError) as e:
-            coremltools.converters.tensorflow.convert(
-                self.frozen_graph_file,
-                inputs={'input': [1, 3, 224, 224]},
-                image_input_names=['input'],
-                outputs=['Softmax'])
-
-    def test_classifier_nhwc(self):
-        # Expect to fail, NHWC is incorrect format
-        with self.assertRaises(ValueError) as e:
-            coremltools.converters.tensorflow.convert(
-                self.frozen_graph_file,
-                inputs={'input': [1, 3, 224, 224]},
-                image_input_names=['input'],
-                outputs=['Softmax'],
-                tf_image_format='NHWC')
-
-    def test_classifier_nchw(self):
-        # Expect success - user selected the correct format
-        coremltools.converters.tensorflow.convert(
-            self.frozen_graph_file,
-            inputs={'input': [1, 3, 224, 224]},
-            image_input_names=['input'],
-            outputs=['Softmax'],
-            tf_image_format='NCHW')
+                self.assertAlmostEquals(
+                    fullOutputs["middle_layer_output"][i],
+                    partialOutput["output2"][i],
+                    2,
+                )
 
-class CustomLayerUtilsTest(unittest.TestCase):
 
+class CustomLayerUtilsTest(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         spec = Model_pb2.Model()
         spec.specificationVersion = coremltools.SPECIFICATION_VERSION
 
-        features = ['feature_1', 'feature_2']
-        output = 'output'
+        features = ["feature_1", "feature_2"]
+        output = "output"
         for f in features:
             input_ = spec.description.input.add()
             input_.name = f
-            input_.type.doubleType.MergeFromString(b'')
+            input_.type.doubleType.MergeFromString(b"")
 
         output_ = spec.description.output.add()
         output_.name = output
-        output_.type.doubleType.MergeFromString(b'')
+        output_.type.doubleType.MergeFromString(b"")
 
         layer = spec.neuralNetwork.layers.add()
-        layer.name = 'custom1'
-        layer.input.append('input')
-        layer.output.append('temp1')
-        layer.custom.className = 'name1'
+        layer.name = "custom1"
+        layer.input.append("input")
+        layer.output.append("temp1")
+        layer.custom.className = "name1"
 
         layer2 = spec.neuralNetwork.layers.add()
-        layer2.name = 'custom2'
-        layer2.input.append('temp1')
-        layer2.output.append('temp2')
-        layer2.custom.className = 'name2'
+        layer2.name = "custom2"
+        layer2.input.append("temp1")
+        layer2.output.append("temp2")
+        layer2.custom.className = "name2"
 
         layer3 = spec.neuralNetwork.layers.add()
-        layer3.name = 'custom3'
-        layer3.input.append('temp2')
-        layer3.output.append('output')
-        layer3.custom.className = 'name1'
+        layer3.name = "custom3"
+        layer3.input.append("temp2")
+        layer3.output.append("output")
+        layer3.custom.className = "name1"
 
         self.spec = spec
 
     def test_get_custom_names(self):
         names = _get_custom_layer_names(self.spec)
-        self.assertEqual(names, {'name1', 'name2'})
+        self.assertEqual(names, {"name1", "name2"})
 
     def test_change_custom_name(self):
-        _replace_custom_layer_name(self.spec, 'name1', 'notname1')
+        _replace_custom_layer_name(self.spec, "name1", "notname1")
         names = _get_custom_layer_names(self.spec)
-        self.assertEqual(names, {'notname1', 'name2'})
+        self.assertEqual(names, {"notname1", "name2"})
         # set it back for future tests
-        _replace_custom_layer_name(self.spec, 'notname1', 'name1')
+        _replace_custom_layer_name(self.spec, "notname1", "name1")
diff --git a/coremltools/test/neural_network/test_nn_builder.py b/coremltools/test/neural_network/test_nn_builder.py
index cbde545bf..d7d4b3268 100644
--- a/coremltools/test/neural_network/test_nn_builder.py
+++ b/coremltools/test/neural_network/test_nn_builder.py
@@ -5,20 +5,22 @@
 import coremltools
 from coremltools.models import datatypes, MLModel
 from coremltools.models.neural_network import NeuralNetworkBuilder
-from coremltools.models.neural_network.quantization_utils import \
-    _convert_array_to_nbit_quantized_bytes, quantize_weights
-from coremltools.models.utils import macos_version, is_macos
-
+from coremltools.models.neural_network.quantization_utils import (
+    _convert_array_to_nbit_quantized_bytes,
+    quantize_weights,
+)
+from coremltools.models.utils import _macos_version, _is_macos
 
 MIN_MACOS_VERSION_REQUIRED = (10, 13)
 LAYERS_10_14_MACOS_VERSION = (10, 14)
 LAYERS_10_15_MACOS_VERSION = (10, 15)
 
 
-@unittest.skipIf(not is_macos() or macos_version() < LAYERS_10_15_MACOS_VERSION,
-                 'Only supported on macOS 10.15+')
+@unittest.skipIf(
+    not _is_macos() or _macos_version() < LAYERS_10_15_MACOS_VERSION,
+    "Only supported on macOS 10.15+",
+)
 class ControlFlowCorrectnessTest(unittest.TestCase):
-
     @classmethod
     def setup_class(cls):
         pass
@@ -36,208 +38,272 @@ def _test_model(self, model, input_dict, output_ref, delta=1e-2):
     def test_simple_branch(self):
         """ Test a simple if-else branch network
         """
-        input_features = [('data', datatypes.Array(3)), ('cond', datatypes.Array(1))]
-        output_features = [('output', None)]
-
-        builder_top = NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        layer = builder_top.add_branch('branch_layer', 'cond')
-
-        builder_ifbranch = NeuralNetworkBuilder(input_features=None, output_features=None, spec=None, nn_spec=layer.branch.ifBranch)
-        builder_ifbranch.add_elementwise('mult_layer', input_names=['data'], output_name='output', mode='MULTIPLY', alpha=10)
-        builder_elsebranch = NeuralNetworkBuilder(input_features=None, output_features=None, spec=None, nn_spec=layer.branch.elseBranch)
-        builder_elsebranch.add_elementwise('add_layer', input_names=['data'], output_name='output', mode='ADD', alpha=10)
-        coremltools.models.utils.save_spec(builder_top.spec, '/tmp/simple_branch.mlmodel')
+        input_features = [("data", datatypes.Array(3)), ("cond", datatypes.Array(1))]
+        output_features = [("output", None)]
+
+        builder_top = NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        layer = builder_top.add_branch("branch_layer", "cond")
+
+        builder_ifbranch = NeuralNetworkBuilder(
+            input_features=None,
+            output_features=None,
+            spec=None,
+            nn_spec=layer.branch.ifBranch,
+        )
+        builder_ifbranch.add_elementwise(
+            "mult_layer",
+            input_names=["data"],
+            output_name="output",
+            mode="MULTIPLY",
+            alpha=10,
+        )
+        builder_elsebranch = NeuralNetworkBuilder(
+            input_features=None,
+            output_features=None,
+            spec=None,
+            nn_spec=layer.branch.elseBranch,
+        )
+        builder_elsebranch.add_elementwise(
+            "add_layer",
+            input_names=["data"],
+            output_name="output",
+            mode="ADD",
+            alpha=10,
+        )
+        coremltools.models.utils.save_spec(
+            builder_top.spec, "/tmp/simple_branch.mlmodel"
+        )
         mlmodel = MLModel(builder_top.spec)
 
         # True branch case
-        input_dict = {'data': np.array(range(1,4), dtype='float'), 'cond': np.array([1], dtype='float')}
-        output_ref = {'output': input_dict['data'] * 10}
+        input_dict = {
+            "data": np.array(range(1, 4), dtype="float"),
+            "cond": np.array([1], dtype="float"),
+        }
+        output_ref = {"output": input_dict["data"] * 10}
         self._test_model(mlmodel, input_dict, output_ref)
 
         # False branch case
-        input_dict['cond'] = np.array([0], dtype='float')
-        output_ref['output'] = input_dict['data'] + 10
+        input_dict["cond"] = np.array([0], dtype="float")
+        output_ref["output"] = input_dict["data"] + 10
         self._test_model(mlmodel, input_dict, output_ref)
 
     def test_simple_loop_fixed_iterations(self):
-        input_features = [('data', datatypes.Array(1))]
-        output_features = [('output', None)]
+        input_features = [("data", datatypes.Array(1))]
+        output_features = [("output", None)]
 
-        builder_top = NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        builder_top.add_copy('copy_1', input_name='data', output_name='output')
+        builder_top = NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder_top.add_copy("copy_1", input_name="data", output_name="output")
 
-        loop_layer = builder_top.add_loop('loop_layer')
+        loop_layer = builder_top.add_loop("loop_layer")
         loop_layer.loop.maxLoopIterations = 5
-        builder_body = NeuralNetworkBuilder(input_features=None, output_features=None, spec=None, nn_spec=loop_layer.loop.bodyNetwork)
-        builder_body.add_elementwise('add', input_names=['output'], output_name='x', mode='ADD', alpha=2)
-
-        builder_body.add_copy('copy_2', input_name='x', output_name='output')
-        coremltools.models.utils.save_spec(builder_top.spec, '/tmp/simple_loop_fixed_iterations.mlmodel')
+        builder_body = NeuralNetworkBuilder(
+            input_features=None,
+            output_features=None,
+            spec=None,
+            nn_spec=loop_layer.loop.bodyNetwork,
+        )
+        builder_body.add_elementwise(
+            "add", input_names=["output"], output_name="x", mode="ADD", alpha=2
+        )
+
+        builder_body.add_copy("copy_2", input_name="x", output_name="output")
+        coremltools.models.utils.save_spec(
+            builder_top.spec, "/tmp/simple_loop_fixed_iterations.mlmodel"
+        )
         mlmodel = MLModel(builder_top.spec)
 
         # True branch case
-        input_dict = {'data': np.array([0], dtype='float')}
-        output_ref = {'output': np.array([10], dtype='float')}
+        input_dict = {"data": np.array([0], dtype="float")}
+        output_ref = {"output": np.array([10], dtype="float")}
         self._test_model(mlmodel, input_dict, output_ref)
 
 
 @unittest.skipUnless(
-    is_macos() and macos_version() >= LAYERS_10_14_MACOS_VERSION,
-    'Only supported on macOS 10.14+')
+    _is_macos() and _macos_version() >= LAYERS_10_14_MACOS_VERSION,
+    "Only supported on macOS 10.14+",
+)
 class BasicNumericCorrectnessTest_1014NewLayers(unittest.TestCase):
-
-    def build_quant_conv_layer(self, W = None,
-                          quantization_type = 'linear',
-                          nbits = 8,
-                          quant_scale = None,
-                          quant_bias = None,
-                          quant_lut = None):
-
-        input_features = [('data', datatypes.Array(1, 2, 2))]
-        output_features = [('out', datatypes.Array(2, 1, 1))]
+    def build_quant_conv_layer(
+        self,
+        W=None,
+        quantization_type="linear",
+        nbits=8,
+        quant_scale=None,
+        quant_bias=None,
+        quant_lut=None,
+    ):
+        input_features = [("data", datatypes.Array(1, 2, 2))]
+        output_features = [("out", datatypes.Array(2, 1, 1))]
         builder = NeuralNetworkBuilder(input_features, output_features)
-        builder.add_convolution(name='conv',
-                                kernel_channels=1,
-                                output_channels=2,
-                                height=2, width=2,
-                                stride_height=1, stride_width=1,
-                                border_mode='valid', groups=1,
-                                W=W,
-                                b=None, has_bias=False,
-                                input_name='data', output_name='out',
-                                quantization_type=quantization_type,
-                                nbits=nbits,
-                                quant_scale=quant_scale,
-                                quant_bias=quant_bias,
-                                quant_lut=quant_lut)
+        builder.add_convolution(
+            name="conv",
+            kernel_channels=1,
+            output_channels=2,
+            height=2,
+            width=2,
+            stride_height=1,
+            stride_width=1,
+            border_mode="valid",
+            groups=1,
+            W=W,
+            b=None,
+            has_bias=False,
+            input_name="data",
+            output_name="out",
+            quantization_type=quantization_type,
+            nbits=nbits,
+            quant_scale=quant_scale,
+            quant_bias=quant_bias,
+            quant_lut=quant_lut,
+        )
         return MLModel(builder.spec)
 
     def test_linear_quant_convolution_8bit(self):
-        W = np.ones((2,2,1,2), dtype=np.uint8)
-        W[:,:,:,1] = 2
-        mlmodel = self.build_quant_conv_layer(W = W.flatten().tobytes(),
-                          quantization_type = 'linear',
-                          nbits = 8,
-                          quant_scale = [4.0],
-                          quant_bias = [-2.0])
-        data = np.ones((1,2,2))
-        data_dict = {'data': data}
-        out = mlmodel.predict(data_dict, useCPUOnly=True)['out']
-        expected_out = np.reshape(np.array([8, 24]), (2,1,1))
+        W = np.ones((2, 2, 1, 2), dtype=np.uint8)
+        W[:, :, :, 1] = 2
+        mlmodel = self.build_quant_conv_layer(
+            W=W.flatten().tobytes(),
+            quantization_type="linear",
+            nbits=8,
+            quant_scale=[4.0],
+            quant_bias=[-2.0],
+        )
+        data = np.ones((1, 2, 2))
+        data_dict = {"data": data}
+        out = mlmodel.predict(data_dict, useCPUOnly=True)["out"]
+        expected_out = np.reshape(np.array([8, 24]), (2, 1, 1))
         self.assertTrue(np.allclose(out, expected_out))
 
     def test_linear_quant_convolution_8bit_vector_scalebias(self):
-        W = np.ones((2,2,1,2), dtype=np.uint8)
-        W[:,:,:,1] = 2
-        mlmodel = self.build_quant_conv_layer(W = W.flatten().tobytes(),
-                          quantization_type = 'linear',
-                          nbits = 8,
-                          quant_scale = [4.0, 5.0],
-                          quant_bias = [-2.0, 1.0])
-        data = np.ones((1,2,2))
-        data_dict = {'data': data}
-        out = mlmodel.predict(data_dict, useCPUOnly=True)['out']
-        expected_out = np.reshape(np.array([8, 44]), (2,1,1))
+        W = np.ones((2, 2, 1, 2), dtype=np.uint8)
+        W[:, :, :, 1] = 2
+        mlmodel = self.build_quant_conv_layer(
+            W=W.flatten().tobytes(),
+            quantization_type="linear",
+            nbits=8,
+            quant_scale=[4.0, 5.0],
+            quant_bias=[-2.0, 1.0],
+        )
+        data = np.ones((1, 2, 2))
+        data_dict = {"data": data}
+        out = mlmodel.predict(data_dict, useCPUOnly=True)["out"]
+        expected_out = np.reshape(np.array([8, 44]), (2, 1, 1))
         self.assertTrue(np.allclose(out, expected_out))
 
     def test_lut_quant_convolution_2bit(self):
-        W = np.zeros((2,2,1,2), dtype=np.uint8)
-        W[:,:,:,0] = 0
-        W[:,:,:,1] = 2
+        W = np.zeros((2, 2, 1, 2), dtype=np.uint8)
+        W[:, :, :, 0] = 0
+        W[:, :, :, 1] = 2
         W = _convert_array_to_nbit_quantized_bytes(W.flatten(), 2).tobytes()
-        mlmodel = self.build_quant_conv_layer(W = W,
-                          quantization_type = 'lut',
-                          nbits = 2,
-                          quant_lut = [10.0, 11.0, -3.0, -1.0])
-        data = np.ones((1,2,2))
-        data_dict = {'data': data}
-        out = mlmodel.predict(data_dict, useCPUOnly=True)['out']
-        expected_out = np.reshape(np.array([40, -12]), (2,1,1))
+        mlmodel = self.build_quant_conv_layer(
+            W=W, quantization_type="lut", nbits=2, quant_lut=[10.0, 11.0, -3.0, -1.0]
+        )
+        data = np.ones((1, 2, 2))
+        data_dict = {"data": data}
+        out = mlmodel.predict(data_dict, useCPUOnly=True)["out"]
+        expected_out = np.reshape(np.array([40, -12]), (2, 1, 1))
         self.assertTrue(np.allclose(out, expected_out))
 
     def test_linear_quant_inner_product_3bit(self):
-        W = np.reshape(np.arange(6), (2,3)).astype(np.uint8)
-        input_features = [('data', datatypes.Array(3))]
-        output_features = [('probs', None)]
+        W = np.reshape(np.arange(6), (2, 3)).astype(np.uint8)
+        input_features = [("data", datatypes.Array(3))]
+        output_features = [("probs", None)]
         builder = NeuralNetworkBuilder(input_features, output_features)
-        builder.add_inner_product(name = 'ip1',
-                                  W = _convert_array_to_nbit_quantized_bytes(W.flatten(), 3).tobytes(),
-                                  b = None,
-                                  input_channels = 3,
-                                  output_channels = 2,
-                                  has_bias = False,
-                                  input_name = 'data',
-                                  output_name = 'probs',
-                                  quantization_type = 'linear',
-                                  nbits = 3,
-                                  quant_scale = [11.0, 2.0],
-                                  quant_bias = [-2.0, 10.0])
+        builder.add_inner_product(
+            name="ip1",
+            W=_convert_array_to_nbit_quantized_bytes(W.flatten(), 3).tobytes(),
+            b=None,
+            input_channels=3,
+            output_channels=2,
+            has_bias=False,
+            input_name="data",
+            output_name="probs",
+            quantization_type="linear",
+            nbits=3,
+            quant_scale=[11.0, 2.0],
+            quant_bias=[-2.0, 10.0],
+        )
         mlmodel = MLModel(builder.spec)
         data = np.array([1.0, 3.0, 5.0])
-        data_dict = {'data': data}
-        probs = mlmodel.predict(data_dict)['probs']
+        data_dict = {"data": data}
+        probs = mlmodel.predict(data_dict)["probs"]
         expected_out = np.array([125, 170])
         self.assertTrue(np.allclose(probs.flatten(), expected_out.flatten()))
 
     def test_lut_quant_inner_product_1bit(self):
-        W = np.zeros((2,3), dtype=np.uint8)
-        W[0,:] = [0,1,1]
-        W[1,:] = [1,0,0]
-        input_features = [('data', datatypes.Array(3))]
-        output_features = [('probs', None)]
+        W = np.zeros((2, 3), dtype=np.uint8)
+        W[0, :] = [0, 1, 1]
+        W[1, :] = [1, 0, 0]
+        input_features = [("data", datatypes.Array(3))]
+        output_features = [("probs", None)]
         builder = NeuralNetworkBuilder(input_features, output_features)
-        builder.add_inner_product(name = 'ip1',
-                                  W = _convert_array_to_nbit_quantized_bytes(W.flatten(), 1).tobytes(),
-                                  b = None,
-                                  input_channels = 3,
-                                  output_channels = 2,
-                                  has_bias = False,
-                                  input_name = 'data',
-                                  output_name = 'probs',
-                                  quantization_type = 'lut',
-                                  nbits = 1,
-                                  quant_lut = [5.0, -3.0])
+        builder.add_inner_product(
+            name="ip1",
+            W=_convert_array_to_nbit_quantized_bytes(W.flatten(), 1).tobytes(),
+            b=None,
+            input_channels=3,
+            output_channels=2,
+            has_bias=False,
+            input_name="data",
+            output_name="probs",
+            quantization_type="lut",
+            nbits=1,
+            quant_lut=[5.0, -3.0],
+        )
         mlmodel = MLModel(builder.spec)
         data = np.array([1.0, 3.0, 5.0])
-        data_dict = {'data': data}
-        probs = mlmodel.predict(data_dict)['probs']
+        data_dict = {"data": data}
+        probs = mlmodel.predict(data_dict)["probs"]
         expected_out = np.array([-19, 37])
         self.assertTrue(np.allclose(probs.flatten(), expected_out.flatten()))
 
 
 @unittest.skipUnless(
-    is_macos() and macos_version() >= LAYERS_10_15_MACOS_VERSION,
-    'Only supported on macOS 10.15+')
+    _is_macos() and _macos_version() >= LAYERS_10_15_MACOS_VERSION,
+    "Only supported on macOS 10.15+",
+)
 class BasicNumericCorrectnessTest_1015NewLayers(unittest.TestCase):
-
     def test_linear_quant_batchedmatmul_5bit(self):
         W = np.zeros((2, 3), dtype=np.uint8)
         W[0, :] = [31, 20, 11]
         W[1, :] = [1, 0, 8]
         quant_scale = np.reshape(np.array([10.0, 2.0, 3.0]), (1, 3))
         quant_bias = np.reshape(np.array([-2.0, -10.0, 6.0]), (1, 3))
-        W_unquantized = np.broadcast_to(quant_scale, (2, 3)) * W + np.broadcast_to(quant_bias, (2, 3))
+        W_unquantized = np.broadcast_to(quant_scale, (2, 3)) * W + np.broadcast_to(
+            quant_bias, (2, 3)
+        )
         bias = np.array([1.0, 2.0, 3.0])
 
-        input_features = [('data', datatypes.Array(2, 2))]
-        output_features = [('out', None)]
-        builder = NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        builder.add_batched_mat_mul(name='batched_matmul',
-                                    input_names=['data'], output_name='out',
-                                    weight_matrix_rows=2, weight_matrix_columns=3,
-                                    W=_convert_array_to_nbit_quantized_bytes(W.flatten(), 5).tobytes(),
-                                    bias=bias,
-                                    is_quantized_weight=True,
-                                    quantization_type='linear',
-                                    nbits=5,
-                                    quant_scale=quant_scale.flatten(),
-                                    quant_bias=quant_bias.flatten())
+        input_features = [("data", datatypes.Array(2, 2))]
+        output_features = [("out", None)]
+        builder = NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="out",
+            weight_matrix_rows=2,
+            weight_matrix_columns=3,
+            W=_convert_array_to_nbit_quantized_bytes(W.flatten(), 5).tobytes(),
+            bias=bias,
+            is_quantized_weight=True,
+            quantization_type="linear",
+            nbits=5,
+            quant_scale=quant_scale.flatten(),
+            quant_bias=quant_bias.flatten(),
+        )
         mlmodel = MLModel(builder.spec)
         data = np.zeros((2, 2), dtype=np.float32)
         data[0, :] = [5, 6]
         data[1, :] = [10, 12]
-        data_dict = {'data': data}
-        out = mlmodel.predict(data_dict, useCPUOnly=True)['out']
+        data_dict = {"data": data}
+        out = mlmodel.predict(data_dict, useCPUOnly=True)["out"]
         expected_out = np.matmul(data, W_unquantized) + bias
         self.assertTrue(out.shape == expected_out.shape)
         self.assertTrue(np.allclose(out.flatten(), expected_out.flatten()))
@@ -247,14 +313,20 @@ def test_linear_quant_batchedmatmul_8bit(self):
         W = np.random.rand(32, 32) * 2.0 - 1
         bias = np.random.rand(32)
 
-        input_features = [('data', datatypes.Array(2, 32))]
-        output_features = [('out', None)]
-        builder = NeuralNetworkBuilder(input_features, output_features,
-                disable_rank5_shape_mapping=True)
+        input_features = [("data", datatypes.Array(2, 32))]
+        output_features = [("out", None)]
+        builder = NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
         builder.add_batched_mat_mul(
-                name='batched_matmul', input_names=['data'],
-                output_name='out', weight_matrix_rows=32,
-                weight_matrix_columns=32, W=W, bias=bias)
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="out",
+            weight_matrix_rows=32,
+            weight_matrix_columns=32,
+            W=W,
+            bias=bias,
+        )
         mlmodel = MLModel(builder.spec)
         q_mlmodel = quantize_weights(mlmodel, 8)
         q_spec = q_mlmodel.get_spec()
@@ -264,12 +336,11 @@ def test_linear_quant_batchedmatmul_8bit(self):
         self.assertTrue(len(q_layer.weights.rawValue) > 0)
 
         data = np.random.rand(2, 32)
-        data_dict = {'data': data}
-        out = q_mlmodel.predict(data_dict, useCPUOnly=True)['out']
+        data_dict = {"data": data}
+        out = q_mlmodel.predict(data_dict, useCPUOnly=True)["out"]
         expected_out = np.matmul(data, W) + bias
         self.assertTrue(out.shape == expected_out.shape)
-        self.assertTrue(np.allclose(out.flatten(), expected_out.flatten(),
-                atol=0.1))
+        self.assertTrue(np.allclose(out.flatten(), expected_out.flatten(), atol=0.1))
 
     def test_lut_quant_embedding_nd_2bit(self):
         embed_size = 2
@@ -281,24 +352,29 @@ def test_lut_quant_embedding_nd_2bit(self):
         bias = np.array([1.0, 2.0])
         quant_lut = np.array([34.0, 12.0, -6.0, 6.0])
 
-        input_features = [('data', datatypes.Array(4, 1))]
-        output_features = [('out', None)]
-        builder = NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        builder.add_embedding_nd(name='embedding_nd',
-                                 input_name='data',
-                                 output_name='out',
-                                 vocab_size=vocab_size, embedding_size=embed_size,
-                                 W=_convert_array_to_nbit_quantized_bytes(W.flatten(), 2).tobytes(),
-                                 b=bias,
-                                 is_quantized_weight=True,
-                                 quantization_type='lut',
-                                 nbits=2,
-                                 quant_lut=quant_lut)
+        input_features = [("data", datatypes.Array(4, 1))]
+        output_features = [("out", None)]
+        builder = NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_embedding_nd(
+            name="embedding_nd",
+            input_name="data",
+            output_name="out",
+            vocab_size=vocab_size,
+            embedding_size=embed_size,
+            W=_convert_array_to_nbit_quantized_bytes(W.flatten(), 2).tobytes(),
+            b=bias,
+            is_quantized_weight=True,
+            quantization_type="lut",
+            nbits=2,
+            quant_lut=quant_lut,
+        )
 
         mlmodel = MLModel(builder.spec)
         data = np.reshape(np.array([2.0, 2.0, 1.0, 0.0]), (4, 1))
-        data_dict = {'data': data}
-        out = mlmodel.predict(data_dict, useCPUOnly=True)['out']
+        data_dict = {"data": data}
+        out = mlmodel.predict(data_dict, useCPUOnly=True)["out"]
         expected_out = np.zeros((4, embed_size), dtype=np.float32)
         expected_out[0, :] = [quant_lut[W[0, 2]], quant_lut[W[1, 2]]] + bias
         expected_out[1, :] = [quant_lut[W[0, 2]], quant_lut[W[1, 2]]] + bias
@@ -316,130 +392,210 @@ def test_linear_quant_embedding_7bit(self):
         W[:, 2] = [90, 1]
         quant_scale = np.reshape(np.array([10.0, 2.0]), (2, 1))
         quant_bias = np.reshape(np.array([-2.0, -10.0]), (2, 1))
-        W_unquantized = np.broadcast_to(quant_scale, (2, 3)) * W + np.broadcast_to(quant_bias, (2, 3))
+        W_unquantized = np.broadcast_to(quant_scale, (2, 3)) * W + np.broadcast_to(
+            quant_bias, (2, 3)
+        )
         bias = np.reshape(np.array([1.0, 2.0]), (2, 1))
         W_unquantized = W_unquantized + np.broadcast_to(bias, (2, 3))
 
-        input_features = [('data', datatypes.Array(4,1,1,1))]
-        output_features = [('out', None)]
-        builder = NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        builder.add_embedding(name='embed',
-                              W = _convert_array_to_nbit_quantized_bytes(W.flatten(), 7).tobytes(),
-                              b = bias,
-                              input_dim = vocab_size,
-                              output_channels = embed_size,
-                              has_bias = True,
-                              input_name = 'data', output_name = 'out',
-                              is_quantized_weight= True,
-                              quantization_type='linear',
-                              nbits = 7,
-                              quant_scale = quant_scale,
-                              quant_bias = quant_bias)
+        input_features = [("data", datatypes.Array(4, 1, 1, 1))]
+        output_features = [("out", None)]
+        builder = NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_embedding(
+            name="embed",
+            W=_convert_array_to_nbit_quantized_bytes(W.flatten(), 7).tobytes(),
+            b=bias,
+            input_dim=vocab_size,
+            output_channels=embed_size,
+            has_bias=True,
+            input_name="data",
+            output_name="out",
+            is_quantized_weight=True,
+            quantization_type="linear",
+            nbits=7,
+            quant_scale=quant_scale,
+            quant_bias=quant_bias,
+        )
 
         mlmodel = MLModel(builder.spec)
         data = np.reshape(np.array([2.0, 2.0, 1.0, 0.0]), (4, 1, 1, 1))
-        data_dict = {'data': data}
-        out = mlmodel.predict(data_dict, useCPUOnly=True)['out']
+        data_dict = {"data": data}
+        out = mlmodel.predict(data_dict, useCPUOnly=True)["out"]
         self.assertTrue(out.shape == (4, embed_size, 1, 1))
         expected_out = np.zeros((4, embed_size), dtype=np.float32)
-        expected_out[0, :] =  W_unquantized[:, 2].flatten()
+        expected_out[0, :] = W_unquantized[:, 2].flatten()
         expected_out[1, :] = W_unquantized[:, 2].flatten()
         expected_out[2, :] = W_unquantized[:, 1].flatten()
         expected_out[3, :] = W_unquantized[:, 0].flatten()
         self.assertTrue(np.allclose(out.flatten(), expected_out.flatten()))
 
 
-@unittest.skipIf(not is_macos() or macos_version() < (10, 13),
-                 'Only supported on macOS 10.13+')
+@unittest.skipIf(
+    not _is_macos() or _macos_version() < (10, 13), "Only supported on macOS 10.13+"
+)
 class BasicNumericCorrectnessTest(unittest.TestCase):
-
     def _build_nn_with_one_ip_layer(self):
-        input_features = [('data', datatypes.Array(3))]
-        output_features = [('out', None)]
+        input_features = [("data", datatypes.Array(3))]
+        output_features = [("out", None)]
         builder = NeuralNetworkBuilder(
-            input_features, output_features, disable_rank5_shape_mapping=True)
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
         w = np.random.uniform(-0.5, 0.5, (3, 3))
-        builder.add_inner_product(name='ip1',
-                                  W=w,
-                                  b=None,
-                                  input_channels=3,
-                                  output_channels=3,
-                                  has_bias=False,
-                                  input_name='input',
-                                  output_name='hidden')
+        builder.add_inner_product(
+            name="ip1",
+            W=w,
+            b=None,
+            input_channels=3,
+            output_channels=3,
+            has_bias=False,
+            input_name="input",
+            output_name="hidden",
+        )
         return builder
 
     def test_undefined_shape_single_output(self):
-        W = np.ones((3,3))
-        input_features = [('data', datatypes.Array(3))]
-        output_features = [('probs', None)]
+        W = np.ones((3, 3))
+        input_features = [("data", datatypes.Array(3))]
+        output_features = [("probs", None)]
         builder = NeuralNetworkBuilder(input_features, output_features)
-        builder.add_inner_product(name = 'ip1',
-                                  W = W,
-                                  b = None,
-                                  input_channels = 3,
-                                  output_channels = 3,
-                                  has_bias = False,
-                                  input_name = 'data',
-                                  output_name = 'probs')
+        builder.add_inner_product(
+            name="ip1",
+            W=W,
+            b=None,
+            input_channels=3,
+            output_channels=3,
+            has_bias=False,
+            input_name="data",
+            output_name="probs",
+        )
         mlmodel = MLModel(builder.spec)
         data = np.ones((3,))
-        data_dict = {'data': data}
-        probs = mlmodel.predict(data_dict)['probs']
+        data_dict = {"data": data}
+        probs = mlmodel.predict(data_dict)["probs"]
         self.assertTrue(np.allclose(probs, np.ones(3) * 3))
 
     def test_set_input(self):
         builder = self._build_nn_with_one_ip_layer()
-        builder.set_input(input_names=['data_renamed'], input_dims=[(2,)])
+        builder.set_input(input_names=["data_renamed"], input_dims=[(2,)])
 
-        self.assertEquals(builder.spec.description.input[0].type.multiArrayType.shape[0], 2)
-        self.assertEquals(builder.spec.description.input[0].name, 'data_renamed')
+        self.assertEquals(
+            builder.spec.description.input[0].type.multiArrayType.shape[0], 2
+        )
+        self.assertEquals(builder.spec.description.input[0].name, "data_renamed")
 
     def test_set_input_fail(self):
         builder = self._build_nn_with_one_ip_layer()
 
         # fails since input_names and input_dims do not have same size
         with self.assertRaises(ValueError):
-            builder.set_input(input_names=['data_1', 'data_2'], input_dims=[(3,)])
+            builder.set_input(input_names=["data_1", "data_2"], input_dims=[(3,)])
 
     def test_set_output(self):
         builder = self._build_nn_with_one_ip_layer()
-        builder.set_output(output_names=['out_renamed'], output_dims=[(2,)])
+        builder.set_output(output_names=["out_renamed"], output_dims=[(2,)])
 
-        self.assertEquals(builder.spec.description.output[0].type.multiArrayType.shape[0], 2)
-        self.assertEquals(builder.spec.description.output[0].name, 'out_renamed')
+        self.assertEquals(
+            builder.spec.description.output[0].type.multiArrayType.shape[0], 2
+        )
+        self.assertEquals(builder.spec.description.output[0].name, "out_renamed")
 
     def test_set_output_fail(self):
         builder = self._build_nn_with_one_ip_layer()
 
         # fails since output_names and output_dims do not have same size
         with self.assertRaises(ValueError):
-            builder.set_output(output_names=['out_1', 'out_2'], output_dims=[(3,)])
+            builder.set_output(output_names=["out_1", "out_2"], output_dims=[(3,)])
 
     def test_invalid_image_preprocessing_params(self):
         builder = self._build_nn_with_one_ip_layer()
-        image_input_names = ['input1','input2']
+        image_input_names = ["input1", "input2"]
         with self.assertRaises(ValueError):
-          image_scale = {'invalid':1./255.}
-          builder.set_pre_processing_parameters(image_input_names=image_input_names,
-                                                image_scale=image_scale)
+            image_scale = {"invalid": 1.0 / 255.0}
+            builder.set_pre_processing_parameters(
+                image_input_names=image_input_names, image_scale=image_scale
+            )
         with self.assertRaises(ValueError):
-          red_bias = {'invalid':-1}
-          builder.set_pre_processing_parameters(image_input_names=image_input_names,
-                                                red_bias=red_bias)
+            red_bias = {"invalid": -1}
+            builder.set_pre_processing_parameters(
+                image_input_names=image_input_names, red_bias=red_bias
+            )
         with self.assertRaises(ValueError):
-          blue_bias = {'invalid':-1}
-          builder.set_pre_processing_parameters(image_input_names=image_input_names,
-                                                blue_bias=blue_bias)
+            blue_bias = {"invalid": -1}
+            builder.set_pre_processing_parameters(
+                image_input_names=image_input_names, blue_bias=blue_bias
+            )
         with self.assertRaises(ValueError):
-          green_bias = {'invalid':-1}
-          builder.set_pre_processing_parameters(image_input_names=image_input_names,
-                                                green_bias=green_bias)
+            green_bias = {"invalid": -1}
+            builder.set_pre_processing_parameters(
+                image_input_names=image_input_names, green_bias=green_bias
+            )
         with self.assertRaises(ValueError):
-          gray_bias = {'invalid':-1}
-          builder.set_pre_processing_parameters(image_input_names=image_input_names,
-                                                gray_bias=gray_bias)
+            gray_bias = {"invalid": -1}
+            builder.set_pre_processing_parameters(
+                image_input_names=image_input_names, gray_bias=gray_bias
+            )
         with self.assertRaises(ValueError):
-          is_bgr = {'invalid':False}
-          builder.set_pre_processing_parameters(image_input_names=image_input_names,
-                                                is_bgr=is_bgr)
+            is_bgr = {"invalid": False}
+            builder.set_pre_processing_parameters(
+                image_input_names=image_input_names, is_bgr=is_bgr
+            )
+
+
+@unittest.skipUnless(
+    _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+)
+class UseFloatArraytypeTest(unittest.TestCase):
+    """Test that the boolean flag `use_float_arraytype` correctly changes the datatype of the
+    network's inputs and outputs and produces a spec that the `MLModel` class can call `predict`
+    with.
+    """
+
+    def _test_use_float_array_helper(self, use_float_arraytype):
+        input_features = [("data", datatypes.Array(3))]
+        output_features = [("probs", None)]
+        builder = NeuralNetworkBuilder(
+            input_features=input_features,
+            output_features=output_features,
+            use_float_arraytype=use_float_arraytype,
+        )
+        weights = np.ones((3, 3))
+        builder.add_inner_product(
+            name="ip1",
+            W=weights,
+            b=None,
+            input_channels=3,
+            output_channels=3,
+            has_bias=False,
+            input_name="data",
+            output_name="probs",
+        )
+        spec = builder.spec
+        array_feature_type = (
+            coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.FLOAT32
+            if use_float_arraytype
+            else coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.DOUBLE
+        )
+        for input in spec.description.input:
+            self.assertEquals(input.type.multiArrayType.dataType, array_feature_type)
+        for output in spec.description.input:
+            self.assertEquals(output.type.multiArrayType.dataType, array_feature_type)
+
+        # Assert that the generated spec is functional
+        mlmodel = MLModel(spec)
+        data = np.ones((3,))
+        data_dict = {"data": data}
+        try:
+            predictions = mlmodel.predict(data_dict)
+        except Exception as e:
+            self.fail(e)
+        self.assertTrue(np.allclose(predictions["probs"], np.ones(3) * 3))
+
+    def test_true_use_float_array(self):
+        # Instruct the builder to use the Float32 datatype for inputs and outputs
+        self._test_use_float_array_helper(True)
+
+    def test_false_use_float_array(self):
+        # Instruct the builder to use its default Double datatype for inputs and outputs
+        self._test_use_float_array_helper(False)
diff --git a/coremltools/test/neural_network/test_numpy_nn_layers.py b/coremltools/test/neural_network/test_numpy_nn_layers.py
index fa93a08cd..22f758677 100644
--- a/coremltools/test/neural_network/test_numpy_nn_layers.py
+++ b/coremltools/test/neural_network/test_numpy_nn_layers.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+from __future__ import print_function as _
 
 import itertools
 import math
@@ -10,34 +10,40 @@
 import uuid
 import pytest
 from packaging import version
+from six import string_types as _string_types
 
 import numpy as np
-import pytest
-import tensorflow as tf
+from coremltools._deps import _HAS_TF, MSG_TF1_NOT_FOUND
+
+if _HAS_TF:
+    import tensorflow as tf
+import torch
 
 import coremltools
 import coremltools.models.datatypes as datatypes
+from coremltools.converters.mil.mil.ops.defs._utils import aggregated_pad
 from coremltools.models import _MLMODEL_FULL_PRECISION, _MLMODEL_HALF_PRECISION
 from coremltools.models import neural_network as neural_network
 from coremltools.models.neural_network import flexible_shape_utils
-from coremltools.models.utils import macos_version, is_macos
+from coremltools.models.utils import _macos_version, _is_macos
 
 np.random.seed(10)
 
 MIN_MACOS_VERSION_REQUIRED = (10, 13)
 LAYERS_10_15_MACOS_VERSION = (10, 15)
+LAYERS_10_16_MACOS_VERSION = (10, 16)
 
 
 def _get_unary_model_spec(x, mode, alpha=1.0):
     input_dim = x.shape
-    input_features = [('data', datatypes.Array(*input_dim))]
-    output_features = [('output', datatypes.Array(*input_dim))]
+    input_features = [("data", datatypes.Array(*input_dim))]
+    output_features = [("output", datatypes.Array(*input_dim))]
 
-    builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                  output_features)
+    builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
 
-    builder.add_unary(name='unary', input_name='data',
-                      output_name='output', mode=mode, alpha=alpha)
+    builder.add_unary(
+        name="unary", input_name="data", output_name="output", mode=mode, alpha=alpha
+    )
     return builder.spec
 
 
@@ -48,25 +54,62 @@ def runTest(self):
     def _compare_shapes(self, np_preds, coreml_preds):
         return np.squeeze(np_preds).shape == np.squeeze(coreml_preds).shape
 
-    def _compare_nd_shapes(self, np_preds, coreml_preds, shape=()):
+    def _test_shape_equality(self, np_preds, coreml_preds):
+        np.testing.assert_array_equal(
+            np.squeeze(coreml_preds).shape, np.squeeze(np_preds).shape
+        )
+
+    def _test_nd_shape_equality(self, np_preds, coreml_preds, shape=()):
         if shape:
-            return coreml_preds.shape == shape
+            np.testing.assert_array_equal(coreml_preds.shape, shape)
         else:
             # check if shape has 0 valued dimension
             if np.prod(np_preds.shape) == 0 and np.prod(coreml_preds.shape) == 0:
-                return True
-            return coreml_preds.shape == np_preds.shape
+                return
+            np.testing.assert_array_equal(coreml_preds.shape, np_preds.shape)
 
-    def _compare_predictions(self, np_preds, coreml_preds, delta=.01):
+    def _compare_predictions(self, np_preds, coreml_preds, delta=0.01):
         np_preds = np_preds.flatten()
         coreml_preds = coreml_preds.flatten()
-        for i in range(len(np_preds)):
-            max_den = max(1.0, np_preds[i], coreml_preds[i])
-            if np.abs(
-                    np_preds[i] / max_den - coreml_preds[i] / max_den) > delta:
-                return False
+        max_arr = np.maximum(np.maximum(np_preds, coreml_preds), 1.0)
+        all_deltas = np.abs(np_preds / max_arr - coreml_preds / max_arr)
+        max_delta = np.amax(all_deltas)
+        if max_delta > delta:
+            return False
         return True
 
+    def _test_predictions(
+        self,
+        np_preds,
+        coreml_preds,
+        delta=0.01,
+        test_metric="rel_error",
+        SNR=30,
+        PSNR=40,
+    ):
+        np_preds = np_preds.flatten()
+        coreml_preds = coreml_preds.flatten()
+        if test_metric == "rel_error":
+            max_arr = np.maximum(np.abs(np_preds), 1.0)
+            all_deltas = np.abs(np_preds / max_arr - coreml_preds / max_arr)
+            max_delta = np.amax(all_deltas, initial=0)
+            self.assertLessEqual(
+                max_delta,
+                delta,
+                "Expected %s to be within %s of %s" % (coreml_preds, delta, np_preds),
+            )
+        elif test_metric == "SNR":
+            noise = np_preds - coreml_preds
+            noise_var = np.sum(noise ** 2) / len(noise) + 1e-7
+            signal_energy = np.sum(np_preds ** 2) / len(np_preds)
+            max_signal_energy = np.amax(np_preds ** 2)
+            snr = 10 * np.log10(signal_energy / noise_var)
+            psnr = 10 * np.log10(max_signal_energy / noise_var)
+            self.assertGreaterEqual(snr, SNR)
+            self.assertGreaterEqual(psnr, PSNR)
+        else:
+            raise ValueError("Test metric not supported")
+
     @staticmethod
     def _compare_moments(model, inputs, expected, use_cpu_only=True, num_moments=10):
         """
@@ -77,7 +120,7 @@ def _compare_moments(model, inputs, expected, use_cpu_only=True, num_moments=10)
         def get_moment(data, k):
             return np.mean(np.power(data - np.mean(data), k))
 
-        if isinstance(model, str):
+        if isinstance(model, _string_types):
             model = coremltools.models.MLModel(model)
 
         model = coremltools.models.MLModel(model, useCPUOnly=use_cpu_only)
@@ -88,7 +131,9 @@ def get_moment(data, k):
             coreml_preds = prediction[output_name]
 
             np_moments = [get_moment(np_preds.flatten(), k) for k in range(num_moments)]
-            coreml_moments = [get_moment(coreml_preds.flatten(), k) for k in range(num_moments)]
+            coreml_moments = [
+                get_moment(coreml_preds.flatten(), k) for k in range(num_moments)
+            ]
 
             np.testing.assert_almost_equal(np_moments, coreml_moments, decimal=2)
 
@@ -96,40 +141,45 @@ def get_moment(data, k):
         for output_name in expected:
             expected[output_name] = prediction[output_name]
 
-    def _test_model(self,
-                    model,
-                    input,
-                    expected,
-                    model_precision=_MLMODEL_FULL_PRECISION,
-                    useCPUOnly=False,
-                    output_name_shape_dict={},
-                    validate_shapes_only=False):
+    def _test_model(
+        self,
+        model,
+        input,
+        expected,
+        model_precision=_MLMODEL_FULL_PRECISION,
+        useCPUOnly=False,
+        output_name_shape_dict={},
+        validate_shapes_only=False,
+        test_metric="rel_error",
+        delta=0.01,
+        SNR=30,
+    ):
 
         model_dir = None
         # if we're given a path to a model
-        if isinstance(model, str):
+        if isinstance(model, _string_types):
             model = coremltools.models.MLModel(model)
 
         # If we're passed in a specification, save out the model
         # and then load it back up
         elif isinstance(model, coremltools.proto.Model_pb2.Model):
             model_dir = tempfile.mkdtemp()
-            model_name = str(uuid.uuid4()) + '.mlmodel'
+            model_name = str(uuid.uuid4()) + ".mlmodel"
             model_path = os.path.join(model_dir, model_name)
             coremltools.utils.save_spec(model, model_path)
             model = coremltools.models.MLModel(model, useCPUOnly=useCPUOnly)
 
         # If we want to test the half precision case
         if model_precision == _MLMODEL_HALF_PRECISION:
-            model = coremltools.utils._convert_neural_network_weights_to_fp16(
-                model)
+            model = coremltools.utils._convert_neural_network_weights_to_fp16(model)
 
         try:
             prediction = model.predict(input, useCPUOnly=useCPUOnly)
             for output_name in expected:
                 if self.__class__.__name__ == "SimpleTest":
-                    assert (self._compare_shapes(expected[output_name],
-                                                 prediction[output_name]))
+                    self._test_shape_equality(
+                        expected[output_name], prediction[output_name]
+                    )
                 else:
                     if output_name in output_name_shape_dict:
                         output_shape = output_name_shape_dict[output_name]
@@ -138,187 +188,193 @@ def _test_model(self,
 
                     if len(output_shape) == 0 and len(expected[output_name].shape) == 0:
                         output_shape = (1,)
-                    assert (self._compare_nd_shapes(expected[output_name],
-                                                    prediction[output_name],
-                                                    output_shape))
+
+                    self._test_nd_shape_equality(
+                        expected[output_name], prediction[output_name], output_shape
+                    )
 
                 if not validate_shapes_only:
-                    assert (self._compare_predictions(expected[output_name],
-                                                      prediction[output_name]))
+                    self._test_predictions(
+                        expected[output_name],
+                        prediction[output_name],
+                        delta=delta,
+                        test_metric=test_metric,
+                        SNR=SNR,
+                    )
         finally:
             # Remove the temporary directory if we created one
             if model_dir and os.path.exists(model_dir):
                 shutil.rmtree(model_dir)
 
 
-@unittest.skipIf(not is_macos() or macos_version() < MIN_MACOS_VERSION_REQUIRED,
-                 'macOS 10.13+ is required. Skipping tests.')
+@unittest.skipIf(
+    not _is_macos() or _macos_version() < MIN_MACOS_VERSION_REQUIRED,
+    "macOS 10.13+ is required. Skipping tests.",
+)
 class SimpleTest(CorrectnessTest):
-
     def test_tiny_upsample_linear_mode(self):
         input_dim = (1, 1, 3)  # (C,H,W)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
-
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
-        builder.add_upsample(name='upsample',
-                             scaling_factor_h=2, scaling_factor_w=3,
-                             input_name='data', output_name='output',
-                             mode='BILINEAR')
-
-        input = {
-            'data': np.reshape(np.array([1.0, 2.0, 3.0]), (1, 1, 3))
-        }
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
+
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        builder.add_upsample(
+            name="upsample",
+            scaling_factor_h=2,
+            scaling_factor_w=3,
+            input_name="data",
+            output_name="output",
+            mode="BILINEAR",
+        )
+
+        input = {"data": np.reshape(np.array([1.0, 2.0, 3.0]), (1, 1, 3))}
         expected = {
-            'output': np.array(
-                [[1, 1.333, 1.666, 2, 2.333, 2.666, 3, 3, 3],
-                 [1, 1.333, 1.6666, 2, 2.33333, 2.6666, 3, 3, 3]
-                 ])
+            "output": np.array(
+                [
+                    [1, 1.333, 1.666, 2, 2.333, 2.666, 3, 3, 3],
+                    [1, 1.333, 1.6666, 2, 2.33333, 2.6666, 3, 3, 3],
+                ]
+            )
         }
 
         self._test_model(builder.spec, input, expected)
-        self.assertEquals(len(input_dim), builder._get_rank('output'))
+        self.assertEquals(len(input_dim), builder._get_rank("output"))
 
     def test_LRN(self):
         input_dim = (1, 3, 3)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', datatypes.Array(*input_dim))]
-
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
-        builder.add_lrn(name='lrn', input_name='data', output_name='output',
-                        alpha=2, beta=3, local_size=1, k=8)
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", datatypes.Array(*input_dim))]
+
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        builder.add_lrn(
+            name="lrn",
+            input_name="data",
+            output_name="output",
+            alpha=2,
+            beta=3,
+            local_size=1,
+            k=8,
+        )
 
-        input = {
-            'data': np.ones((1, 3, 3))
-        }
-        expected = {
-            'output': 1e-3 * np.ones((1, 3, 3))
-        }
+        input = {"data": np.ones((1, 3, 3))}
+        expected = {"output": 1e-3 * np.ones((1, 3, 3))}
 
         self._test_model(builder.spec, input, expected)
-        self.assertEqual(len(input_dim), builder._get_rank('output'))
+        self.assertEqual(len(input_dim), builder._get_rank("output"))
 
     def test_MVN(self):
         input_dim = (2, 2, 2)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', datatypes.Array(*input_dim))]
-
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
-        builder.add_mvn(name='mvn', input_name='data', output_name='output',
-                        across_channels=False, normalize_variance=False)
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", datatypes.Array(*input_dim))]
+
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        builder.add_mvn(
+            name="mvn",
+            input_name="data",
+            output_name="output",
+            across_channels=False,
+            normalize_variance=False,
+        )
 
-        input = {
-            'data': np.reshape(np.arange(8, dtype=np.float32), (2, 2, 2))
-        }
+        input = {"data": np.reshape(np.arange(8, dtype=np.float32), (2, 2, 2))}
         expected = {
-            'output': np.reshape(np.arange(8) - np.array(
-                [1.5, 1.5, 1.5, 1.5, 5.5, 5.5, 5.5, 5.5]), (2, 2, 2))
+            "output": np.reshape(
+                np.arange(8) - np.array([1.5, 1.5, 1.5, 1.5, 5.5, 5.5, 5.5, 5.5]),
+                (2, 2, 2),
+            )
         }
 
         self._test_model(builder.spec, input, expected)
 
     def test_L2_normalize(self):
         input_dim = (1, 2, 2)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', datatypes.Array(*input_dim))]
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", datatypes.Array(*input_dim))]
 
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
-        builder.add_l2_normalize(name='mvn', input_name='data',
-                                 output_name='output')
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        builder.add_l2_normalize(name="mvn", input_name="data", output_name="output")
 
-        input = {
-            'data': np.reshape(np.arange(4, dtype=np.float32), (1, 2, 2))
-        }
+        input = {"data": np.reshape(np.arange(4, dtype=np.float32), (1, 2, 2))}
         expected = {
-            'output': np.reshape(np.arange(4, dtype=np.float32),
-                                 (1, 2, 2)) / np.sqrt(14)
+            "output": np.reshape(np.arange(4, dtype=np.float32), (1, 2, 2))
+            / np.sqrt(14)
         }
 
         self._test_model(builder.spec, input, expected)
 
     def test_unary_sqrt(self):
         x = np.reshape(np.arange(1, 5, dtype=np.float32), (1, 2, 2))
-        input = {'data': x}
-        expected = {'output': np.sqrt(x)}
-        spec = _get_unary_model_spec(x, 'sqrt')
+        input = {"data": x}
+        expected = {"output": np.sqrt(x)}
+        spec = _get_unary_model_spec(x, "sqrt")
         self._test_model(spec, input, expected)
 
     def test_unary_rsqrt(self):
         x = np.reshape(np.arange(1, 5, dtype=np.float32), (1, 2, 2))
-        input = {'data': x}
-        expected = {'output': 1 / np.sqrt(x)}
-        spec = _get_unary_model_spec(x, 'rsqrt')
+        input = {"data": x}
+        expected = {"output": 1 / np.sqrt(x)}
+        spec = _get_unary_model_spec(x, "rsqrt")
         self._test_model(spec, input, expected)
 
     def test_unary_inverse(self):
         x = np.reshape(np.arange(1, 5, dtype=np.float32), (1, 2, 2))
-        input = {'data': x}
-        expected = {'output': 1 / x}
-        spec = _get_unary_model_spec(x, 'inverse')
+        input = {"data": x}
+        expected = {"output": 1 / x}
+        spec = _get_unary_model_spec(x, "inverse")
         self._test_model(spec, input, expected)
 
     def test_unary_power(self):
         x = np.reshape(np.arange(1, 5, dtype=np.float32), (1, 2, 2))
-        input = {'data': x}
-        expected = {'output': x ** 3}
-        spec = _get_unary_model_spec(x, 'power', 3)
+        input = {"data": x}
+        expected = {"output": x ** 3}
+        spec = _get_unary_model_spec(x, "power", 3)
         self._test_model(spec, input, expected)
 
     def test_unary_exp(self):
         x = np.reshape(np.arange(1, 5, dtype=np.float32), (1, 2, 2))
-        input = {'data': x}
-        expected = {'output': np.exp(x)}
-        spec = _get_unary_model_spec(x, 'exp')
+        input = {"data": x}
+        expected = {"output": np.exp(x)}
+        spec = _get_unary_model_spec(x, "exp")
         self._test_model(spec, input, expected)
 
     def test_unary_log(self):
         x = np.reshape(np.arange(1, 5, dtype=np.float32), (1, 2, 2))
-        input = {'data': x}
-        expected = {'output': np.log(x)}
-        spec = _get_unary_model_spec(x, 'log')
+        input = {"data": x}
+        expected = {"output": np.log(x)}
+        spec = _get_unary_model_spec(x, "log")
         self._test_model(spec, input, expected)
 
     def test_unary_abs(self):
         x = np.reshape(np.arange(1, 5, dtype=np.float32), (1, 2, 2))
-        input = {'data': x}
-        expected = {'output': np.abs(x)}
-        spec = _get_unary_model_spec(x, 'abs')
+        input = {"data": x}
+        expected = {"output": np.abs(x)}
+        spec = _get_unary_model_spec(x, "abs")
         self._test_model(spec, input, expected)
 
     def test_unary_threshold(self):
         x = np.reshape(np.arange(1, 5, dtype=np.float32), (1, 2, 2))
-        input = {'data': x}
-        expected = {'output': np.maximum(x, 2)}
-        spec = _get_unary_model_spec(x, 'threshold', 2)
+        input = {"data": x}
+        expected = {"output": np.maximum(x, 2)}
+        spec = _get_unary_model_spec(x, "threshold", 2)
         self._test_model(spec, input, expected)
 
     def test_split(self):
         input_dim = (9, 2, 2)
         x = np.random.rand(*input_dim)
 
-        input_features = [('data', datatypes.Array(*input_dim))]
+        input_features = [("data", datatypes.Array(*input_dim))]
         output_names = []
         output_features = []
         for i in range(3):
-            out = 'out_' + str(i)
+            out = "out_" + str(i)
             output_names.append(out)
             output_features.append((out, None))
 
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
-        builder.add_split(name='split', input_name='data',
-                          output_names=output_names)
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        builder.add_split(name="split", input_name="data", output_names=output_names)
 
-        input = {'data': x}
-        expected = {
-            'out_0': x[0: 3, :, :],
-            'out_1': x[3: 6, :, :],
-            'out_2': x[6: 9, :, :]
-        }
+        input = {"data": x}
+        expected = {"out_0": x[0:3, :, :], "out_1": x[3:6, :, :], "out_2": x[6:9, :, :]}
 
         self._test_model(builder.spec, input, expected)
         for output_ in output_names:
@@ -326,377 +382,449 @@ def test_split(self):
 
     def test_scale_constant(self):
         input_dim = (1, 2, 2)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
-
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
-        builder.add_scale(name='scale', W=5, b=45, has_bias=True,
-                          input_name='data', output_name='output')
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
+
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        builder.add_scale(
+            name="scale",
+            W=5,
+            b=45,
+            has_bias=True,
+            input_name="data",
+            output_name="output",
+        )
 
         x = np.reshape(np.arange(4, dtype=np.float32), (1, 2, 2))
-        input = {'data': x}
-        expected = {'output': 5 * x + 45}
+        input = {"data": x}
+        expected = {"output": 5 * x + 45}
 
         self._test_model(builder.spec, input, expected)
 
     def test_scale_matrix(self):
         input_dim = (1, 2, 2)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
 
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
         W = np.reshape(np.arange(5, 9), (1, 2, 2))
 
-        builder.add_scale(name='scale', W=W, b=None, has_bias=False,
-                          input_name='data', output_name='output',
-                          shape_scale=[1, 2, 2])
+        builder.add_scale(
+            name="scale",
+            W=W,
+            b=None,
+            has_bias=False,
+            input_name="data",
+            output_name="output",
+            shape_scale=[1, 2, 2],
+        )
 
         x = np.reshape(np.arange(4, dtype=np.float32), (1, 2, 2))
-        input = {'data': x}
-        expected = {'output': W * x}
+        input = {"data": x}
+        expected = {"output": W * x}
 
         self._test_model(builder.spec, input, expected)
 
     def test_bias_constant(self):
         input_dim = (1, 2, 2)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
 
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
-        builder.add_bias(name='bias', b=45, input_name='data',
-                         output_name='output')
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        builder.add_bias(name="bias", b=45, input_name="data", output_name="output")
 
         x = np.reshape(np.arange(4, dtype=np.float32), (1, 2, 2))
-        input = {'data': x}
-        expected = {'output': x + 45}
+        input = {"data": x}
+        expected = {"output": x + 45}
 
         self._test_model(builder.spec, input, expected)
 
     def test_bias_matrix(self):
         input_dim = (1, 2, 2)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
 
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
         b = np.reshape(np.arange(5, 9), (1, 2, 2))
 
-        builder.add_bias(name='bias', b=b, input_name='data',
-                         output_name='output',
-                         shape_bias=[1, 2, 2])
+        builder.add_bias(
+            name="bias",
+            b=b,
+            input_name="data",
+            output_name="output",
+            shape_bias=[1, 2, 2],
+        )
 
         x = np.reshape(np.arange(4, dtype=np.float32), (1, 2, 2))
-        input = {'data': x}
-        expected = {'output': x + b}
+        input = {"data": x}
+        expected = {"output": x + b}
 
         self._test_model(builder.spec, input, expected)
 
     def test_load_constant(self, model_precision=_MLMODEL_FULL_PRECISION):
         input_dim = (1, 2, 2)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
 
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
         b = np.reshape(np.arange(5, 9), (1, 2, 2))
 
-        builder.add_load_constant(name='load_constant', output_name='bias',
-                                  constant_value=b, shape=[1, 2, 2])
-        builder.add_elementwise(name='add', input_names=['data', 'bias'],
-                                output_name='output', mode='ADD')
+        builder.add_load_constant(
+            name="load_constant", output_name="bias", constant_value=b, shape=[1, 2, 2]
+        )
+        builder.add_elementwise(
+            name="add", input_names=["data", "bias"], output_name="output", mode="ADD"
+        )
 
         x = np.reshape(np.arange(4, dtype=np.float32), (1, 2, 2))
-        input = {'data': x}
-        expected = {'output': x + b}
+        input = {"data": x}
+        expected = {"output": x + b}
 
         self._test_model(builder.spec, input, expected, model_precision)
-        self.assertEqual(len(input_dim), builder._get_rank('output'))
+        self.assertEqual(len(input_dim), builder._get_rank("output"))
 
     def test_load_constant_half_precision(self):
         self.test_load_constant(model_precision=_MLMODEL_HALF_PRECISION)
 
     def test_min(self):
         input_dim = (1, 2, 2)
-        input_features = [('data_0', datatypes.Array(*input_dim)),
-                          ('data_1', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
+        input_features = [
+            ("data_0", datatypes.Array(*input_dim)),
+            ("data_1", datatypes.Array(*input_dim)),
+        ]
+        output_features = [("output", None)]
 
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
 
-        builder.add_elementwise(name='min', input_names=['data_0', 'data_1'],
-                                output_name='output', mode='MIN')
+        builder.add_elementwise(
+            name="min",
+            input_names=["data_0", "data_1"],
+            output_name="output",
+            mode="MIN",
+        )
         x1 = np.reshape(np.arange(4, dtype=np.float32), (1, 2, 2))
         x2 = np.reshape(np.arange(2, 6, dtype=np.float32), (1, 2, 2))
 
-        input = {'data_0': x1, 'data_1': x2}
-        expected = {'output': np.minimum(x1, x2)}
+        input = {"data_0": x1, "data_1": x2}
+        expected = {"output": np.minimum(x1, x2)}
 
         self._test_model(builder.spec, input, expected)
-        self.assertEqual(len(input_dim), builder._get_rank('output'))
+        self.assertEqual(len(input_dim), builder._get_rank("output"))
 
     def test_conv_same_padding(self):
         input_dim = (10, 15, 15)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
 
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
         W = np.random.rand(3, 3, 10, 20)
 
-        builder.add_convolution(name='conv', kernel_channels=10,
-                                output_channels=20,
-                                height=3, width=3, stride_height=2,
-                                stride_width=2,
-                                border_mode='same', groups=1,
-                                W=W, b=None, has_bias=False,
-                                input_name='data', output_name='output',
-                                same_padding_asymmetry_mode='TOP_LEFT_HEAVY')
+        builder.add_convolution(
+            name="conv",
+            kernel_channels=10,
+            output_channels=20,
+            height=3,
+            width=3,
+            stride_height=2,
+            stride_width=2,
+            border_mode="same",
+            groups=1,
+            W=W,
+            b=None,
+            has_bias=False,
+            input_name="data",
+            output_name="output",
+            same_padding_asymmetry_mode="TOP_LEFT_HEAVY",
+        )
 
         x = np.random.rand(*input_dim)
-        input = {'data': x}
-        expected = {'output': np.random.rand(20, 8, 8)}
+        input = {"data": x}
+        expected = {"output": np.random.rand(20, 8, 8)}
 
-        self._test_model(
-            builder.spec, input, expected, validate_shapes_only=True)
-        self.assertEqual(len(input_dim), builder._get_rank('output'))
+        self._test_model(builder.spec, input, expected, validate_shapes_only=True)
+        self.assertEqual(len(input_dim), builder._get_rank("output"))
 
     def test_deconv_valid_padding(self):
         input_dim = (10, 15, 15)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
 
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
         W = np.random.rand(3, 3, 10, 20)
 
-        builder.add_convolution(name='deconv', kernel_channels=10,
-                                output_channels=20,
-                                height=3, width=3, stride_height=2,
-                                stride_width=2,
-                                border_mode='valid', groups=1,
-                                W=W, b=None, has_bias=False,
-                                is_deconv=True,
-                                input_name='data', output_name='output',
-                                padding_top=2, padding_bottom=3,
-                                padding_left=2, padding_right=3)
+        builder.add_convolution(
+            name="deconv",
+            kernel_channels=10,
+            output_channels=20,
+            height=3,
+            width=3,
+            stride_height=2,
+            stride_width=2,
+            border_mode="valid",
+            groups=1,
+            W=W,
+            b=None,
+            has_bias=False,
+            is_deconv=True,
+            input_name="data",
+            output_name="output",
+            padding_top=2,
+            padding_bottom=3,
+            padding_left=2,
+            padding_right=3,
+        )
 
         x = np.random.rand(*input_dim)
-        input = {'data': x}
-        expected = {'output': np.random.rand(20, 26, 26)}
+        input = {"data": x}
+        expected = {"output": np.random.rand(20, 26, 26)}
 
-        self._test_model(
-            builder.spec, input, expected, validate_shapes_only=True)
+        self._test_model(builder.spec, input, expected, validate_shapes_only=True)
 
     def test_deconv_non_unit_groups(self):
         input_dim = (16, 15, 15)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
 
-        builder = neural_network.NeuralNetworkBuilder(
-            input_features, output_features)
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
 
         W = np.random.rand(3, 3, 16, 5)
-        builder.add_convolution(name='deconv', kernel_channels=16,
-                                output_channels=20,
-                                height=3, width=3, stride_height=2,
-                                stride_width=2,
-                                border_mode='valid', groups=4,
-                                W=W, b=None, has_bias=False,
-                                is_deconv=True,
-                                input_name='data', output_name='output',
-                                padding_top=2, padding_bottom=3,
-                                padding_left=2, padding_right=3)
+        builder.add_convolution(
+            name="deconv",
+            kernel_channels=16,
+            output_channels=20,
+            height=3,
+            width=3,
+            stride_height=2,
+            stride_width=2,
+            border_mode="valid",
+            groups=4,
+            W=W,
+            b=None,
+            has_bias=False,
+            is_deconv=True,
+            input_name="data",
+            output_name="output",
+            padding_top=2,
+            padding_bottom=3,
+            padding_left=2,
+            padding_right=3,
+        )
 
         x = np.random.rand(*input_dim)
-        input = {'data': x}
-        expected = {'output': np.random.rand(20, 26, 26)}
+        input = {"data": x}
+        expected = {"output": np.random.rand(20, 26, 26)}
 
-        self._test_model(
-            builder.spec, input, expected, validate_shapes_only=True)
+        self._test_model(builder.spec, input, expected, validate_shapes_only=True)
 
     def test_linear_activation(self):
         input_dim = (10, 15, 15)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
-
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
-        builder.add_activation(name='activation',
-                               non_linearity='LINEAR',
-                               input_name='data',
-                               output_name='output', params=[34.0, 67.0])
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
+
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        builder.add_activation(
+            name="activation",
+            non_linearity="LINEAR",
+            input_name="data",
+            output_name="output",
+            params=[34.0, 67.0],
+        )
 
         x = np.random.rand(*input_dim)
-        input = {'data': x}
-        expected = {'output': 34.0 * x + 67.0}
+        input = {"data": x}
+        expected = {"output": 34.0 * x + 67.0}
 
         self._test_model(builder.spec, input, expected)
 
     def test_padding_constant(self):
         input_dim = (1, 2, 3)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
+
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        builder.add_padding(
+            name="pad",
+            left=1,
+            right=0,
+            top=2,
+            bottom=0,
+            value=-1,
+            input_name="data",
+            output_name="output",
+        )
 
-        builder = neural_network.NeuralNetworkBuilder(
-            input_features, output_features)
-        builder.add_padding(name='pad',
-                            left=1, right=0, top=2, bottom=0,
-                            value=-1,
-                            input_name='data',
-                            output_name='output')
-
-        x = np.reshape(np.array([[1, 2, 3], [4, 5, 6]]), (1, 2, 3)).astype(
-            np.float32)
-        input = {'data': x}
+        x = np.reshape(np.array([[1, 2, 3], [4, 5, 6]]), (1, 2, 3)).astype(np.float32)
+        input = {"data": x}
         y = np.reshape(
-            np.array([[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, 1, 2, 3],
-                      [-1, 4, 5, 6]]), (1, 4, 4)).astype(np.float32)
-        expected = {'output': y}
+            np.array(
+                [[-1, -1, -1, -1], [-1, -1, -1, -1], [-1, 1, 2, 3], [-1, 4, 5, 6]]
+            ),
+            (1, 4, 4),
+        ).astype(np.float32)
+        expected = {"output": y}
 
         self._test_model(builder.spec, input, expected)
 
     def test_padding_replication(self):
         input_dim = (1, 2, 3)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
-
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
-        builder.add_padding(name='pad',
-                            left=1, top=2,
-                            input_name='data',
-                            output_name='output', padding_type='replication')
-
-        x = np.reshape(np.array([[1, 2, 3], [4, 5, 6]]), (1, 2, 3)).astype(
-            np.float32)
-        input = {'data': x}
-        y = np.reshape(np.array([[1, 1, 2, 3], [1, 1, 2, 3], [1, 1, 2, 3],
-                                 [4, 4, 5, 6]]), (1, 4, 4)).astype(np.float32)
-        expected = {'output': y}
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
+
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        builder.add_padding(
+            name="pad",
+            left=1,
+            top=2,
+            input_name="data",
+            output_name="output",
+            padding_type="replication",
+        )
+
+        x = np.reshape(np.array([[1, 2, 3], [4, 5, 6]]), (1, 2, 3)).astype(np.float32)
+        input = {"data": x}
+        y = np.reshape(
+            np.array([[1, 1, 2, 3], [1, 1, 2, 3], [1, 1, 2, 3], [4, 4, 5, 6]]),
+            (1, 4, 4),
+        ).astype(np.float32)
+        expected = {"output": y}
 
         self._test_model(builder.spec, input, expected)
 
     def test_reshape_target_shape_3(self):
         input_dim = (1, 2, 5)  # (C,H,W)
         target_dim = (10, 1, 1)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
-
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
-        builder.add_reshape(name='reshape', input_name='data',
-                            output_name='output', target_shape=target_dim,
-                            mode=0)
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
+
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        builder.add_reshape(
+            name="reshape",
+            input_name="data",
+            output_name="output",
+            target_shape=target_dim,
+            mode=0,
+        )
 
         x = np.random.rand(*input_dim)
-        input = {'data': x}
-        expected = {'output': np.reshape(x, (10, 1, 1))}
+        input = {"data": x}
+        expected = {"output": np.reshape(x, (10, 1, 1))}
 
         self._test_model(builder.spec, input, expected)
-        self.assertEqual(len(target_dim), builder._get_rank('output'))
+        self.assertEqual(len(target_dim), builder._get_rank("output"))
 
     def test_reshape_target_shape_4(self):
         input_dim = (1, 2, 5)  # (C,H,W)
         target_dim = (1, 10, 1, 1)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
-
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
-        builder.add_reshape(name='reshape', input_name='data',
-                            output_name='output', target_shape=target_dim,
-                            mode=0)
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
+
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        builder.add_reshape(
+            name="reshape",
+            input_name="data",
+            output_name="output",
+            target_shape=target_dim,
+            mode=0,
+        )
 
         x = np.random.rand(*input_dim)
-        input = {'data': x}
-        expected = {'output': np.reshape(x, (1, 10, 1, 1))}
+        input = {"data": x}
+        expected = {"output": np.reshape(x, (1, 10, 1, 1))}
 
         self._test_model(builder.spec, input, expected)
-        self.assertEqual(len(target_dim), builder._get_rank('output'))
+        self.assertEqual(len(target_dim), builder._get_rank("output"))
 
     def test_bias_matrix_cpu(self):
         input_dim = (1, 2, 2)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
 
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
         b = np.reshape(np.arange(5, 9), (1, 2, 2))
 
-        builder.add_bias(name='bias', b=b, input_name='data',
-                         output_name='output',
-                         shape_bias=[1, 2, 2])
+        builder.add_bias(
+            name="bias",
+            b=b,
+            input_name="data",
+            output_name="output",
+            shape_bias=[1, 2, 2],
+        )
 
         x = np.reshape(np.arange(4, dtype=np.float32), (1, 2, 2))
-        input = {'data': x}
-        expected = {'output': x + b}
+        input = {"data": x}
+        expected = {"output": x + b}
 
         self._test_model(builder.spec, input, expected, useCPUOnly=True)
 
     def test_linear_activation_cpu(self):
         input_dim = (10, 15, 15)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', None)]
-
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
-        builder.add_activation(name='activation',
-                               non_linearity='LINEAR',
-                               input_name='data',
-                               output_name='output', params=[34.0, 67.0])
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
+
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        builder.add_activation(
+            name="activation",
+            non_linearity="LINEAR",
+            input_name="data",
+            output_name="output",
+            params=[34.0, 67.0],
+        )
 
         x = np.random.rand(*input_dim)
-        input = {'data': x}
-        expected = {'output': 34.0 * x + 67.0}
+        input = {"data": x}
+        expected = {"output": 34.0 * x + 67.0}
 
         self._test_model(builder.spec, input, expected, useCPUOnly=True)
 
 
-@unittest.skipIf(not is_macos() or macos_version() < LAYERS_10_15_MACOS_VERSION,
-                 'macOS 10.15+ required. Skipping tests.')
+@unittest.skipIf(
+    not _is_macos() or _macos_version() < LAYERS_10_15_MACOS_VERSION,
+    "macOS 10.15+ required. Skipping tests.",
+)
 class NewLayersSimpleTest(CorrectnessTest):
-
     def test_shape_flexibility_range(self):
 
-        input_features = [('data', datatypes.Array(*(3,4)))]
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      [('output', None)], disable_rank5_shape_mapping=True)
-        builder.add_sin(name='sin', input_name='data', output_name='output')
+        input_features = [("data", datatypes.Array(*(3, 4)))]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, [("output", None)], disable_rank5_shape_mapping=True
+        )
+        builder.add_sin(name="sin", input_name="data", output_name="output")
         spec = builder.spec
 
-        flexible_shape_utils.set_multiarray_ndshape_range(spec, feature_name='data',
-                                                           lower_bounds=[1,1], upper_bounds=[-1,5])
+        flexible_shape_utils.set_multiarray_ndshape_range(
+            spec, feature_name="data", lower_bounds=[1, 1], upper_bounds=[-1, 5]
+        )
 
-        shapes = [(3,4), (1,5), (60,5), (22,4), (5,3)]
+        shapes = [(3, 4), (1, 5), (60, 5), (22, 4), (5, 3)]
         for s in shapes:
             x = np.random.rand(*s)
-            expected = {'output': np.sin(x)}
-            self._test_model(spec, {'data': x}, expected, useCPUOnly=True)
+            expected = {"output": np.sin(x)}
+            self._test_model(spec, {"data": x}, expected, useCPUOnly=True)
 
     def test_shape_flexibility_enumeration(self, rank=4):
         default_shape = tuple(np.random.randint(1, 15, size=rank))
-        input_features = [('data', datatypes.Array(*default_shape))]
+        input_features = [("data", datatypes.Array(*default_shape))]
         builder = neural_network.NeuralNetworkBuilder(
             input_features=input_features,
-            output_features=[('output', None)],
-            disable_rank5_shape_mapping=True)
-        builder.add_sin(name='sin', input_name='data', output_name='output')
+            output_features=[("output", None)],
+            disable_rank5_shape_mapping=True,
+        )
+        builder.add_sin(name="sin", input_name="data", output_name="output")
         spec = builder.spec
 
-        shapes = [tuple(np.random.randint(1, 15, size=rank)),
-                  tuple(np.random.randint(1, 15, size=rank))]
+        shapes = [
+            tuple(np.random.randint(1, 15, size=rank)),
+            tuple(np.random.randint(1, 15, size=rank)),
+        ]
         flexible_shape_utils.add_multiarray_ndshape_enumeration(
-            spec, feature_name='data', enumerated_shapes=shapes)
+            spec, feature_name="data", enumerated_shapes=shapes
+        )
 
         shapes.append(default_shape)
         for s in shapes:
             x = np.random.rand(*s)
-            expected = {'output': np.sin(x)}
-            self._test_model(spec, {'data': x}, expected, useCPUOnly=True)
+            expected = {"output": np.sin(x)}
+            self._test_model(spec, {"data": x}, expected, useCPUOnly=True)
 
     def test_shape_flexibility_enumeration_rank3(self):
         self.test_shape_flexibility_enumeration(rank=3)
@@ -707,27 +835,28 @@ def test_shape_flexibility_enumeration_rank2(self):
     def test_transpose_cpu(self):
         for rank in range(1, 6):
             axes = np.random.permutation(rank)
-            axes = [axis - rank if np.random.choice([True, False]) else axis for axis in axes]
+            axes = [
+                axis - rank if np.random.choice([True, False]) else axis
+                for axis in axes
+            ]
             input_shape = np.random.randint(low=2, high=6, size=rank)
-            input_features = [('data', datatypes.Array(*input_shape))]
-            output_features = [('output', None)]
+            input_features = [("data", datatypes.Array(*input_shape))]
+            output_features = [("output", None)]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True)
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
 
-            builder.add_transpose(name='TransposeND',
-                                  axes=axes,
-                                  input_name='data',
-                                  output_name='output')
+            builder.add_transpose(
+                name="TransposeND", axes=axes, input_name="data", output_name="output"
+            )
 
             x = np.random.rand(*input_shape)
-            input = {'data': x}
-            expected = {'output': np.transpose(x, axes)}
+            input = {"data": x}
+            expected = {"output": np.transpose(x, axes)}
 
             self._test_model(builder.spec, input, expected, useCPUOnly=True)
 
-
     def test_dynamic_weight_conv(self):
 
         input_dim = (1, 3, 16, 16)
@@ -739,30 +868,31 @@ def test_dynamic_weight_conv(self):
         output_channels, kernel_channels, height, width = weight_dim
 
         input_features = [
-            ('input', datatypes.Array(*input_dim)),
-            ('weight', datatypes.Array(*weight_dim))]
-        output_features = [('output', None)]
+            ("input", datatypes.Array(*input_dim)),
+            ("weight", datatypes.Array(*weight_dim)),
+        ]
+        output_features = [("output", None)]
 
         builder = neural_network.NeuralNetworkBuilder(
-            input_features,
-            output_features,
-            disable_rank5_shape_mapping=True)
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
 
         builder.add_convolution(
-            name='two_input_conv_layer',
+            name="two_input_conv_layer",
             kernel_channels=kernel_channels,
             output_channels=output_channels,
             height=height,
             width=width,
             stride_height=1,
             stride_width=1,
-            border_mode='valid',
+            border_mode="valid",
             groups=1,
             W=None,
             b=None,
             has_bias=False,
-            input_name=['input', 'weight'],
-            output_name='output')
+            input_name=["input", "weight"],
+            output_name="output",
+        )
 
         # Assigning everything to ones should cover the execution path
         # and engine failures, but is not a complete check on numerics.
@@ -770,8 +900,8 @@ def test_dynamic_weight_conv(self):
         weight_val = np.ones(weight_dim)
         expected = np.ones(output_dim) * 27
 
-        feed_dict = {'input': input_val, 'weight': weight_val}
-        expected = {'output': expected}
+        feed_dict = {"input": input_val, "weight": weight_val}
+        expected = {"output": expected}
 
         self._test_model(builder.spec, feed_dict, expected, useCPUOnly=True)
         self._test_model(builder.spec, feed_dict, expected, useCPUOnly=False)
@@ -786,216 +916,273 @@ def test_dynamic_weight_deconv(self):
         output_channels, kernel_channels, height, width = weight_dim
 
         input_features = [
-            ('data', datatypes.Array(*input_dim)),
-            ('weight', datatypes.Array(*weight_dim))]
-        output_features = [('output', None)]
+            ("data", datatypes.Array(*input_dim)),
+            ("weight", datatypes.Array(*weight_dim)),
+        ]
+        output_features = [("output", None)]
 
         builder = neural_network.NeuralNetworkBuilder(
-            input_features,
-            output_features,
-            disable_rank5_shape_mapping=True)
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
 
         builder.add_convolution(
-            name='deconv',
+            name="deconv",
             kernel_channels=kernel_channels,
             output_channels=output_channels,
             height=height,
             width=width,
             stride_height=1,
             stride_width=1,
-            border_mode='valid',
+            border_mode="valid",
             groups=1,
             W=None,
             b=None,
             has_bias=False,
             is_deconv=True,
-            input_name=['data', 'weight'],
-            output_name='output')
+            input_name=["data", "weight"],
+            output_name="output",
+        )
 
         input_val = np.ones(input_dim)
         weight_val = np.ones(weight_dim)
         expected = np.ones(output_dim) * 27
 
-        feed_dict = {'data': input_val, 'weight': weight_val}
-        expected = {'output': expected}
+        feed_dict = {"data": input_val, "weight": weight_val}
+        expected = {"output": expected}
 
         self._test_model(builder.spec, feed_dict, expected)
 
     def test_batched_mat_mul_cpu(self, cpu_only=True):
-        a_shapes = [(10,), (4, 10), (10,), (10,), (2, 3), (1, 3, 4),
-                    (1, 3, 1, 2, 3), (2, 3, 1, 3, 4)]
-        b_shapes = [(10,), (10,), (10, 3), (2, 10, 3), (3, 4), (3, 2, 4, 5),
-                    (1, 4, 3, 2), (2, 1, 2, 4, 5)]
-        out_shapes = [(1, 1), (4, 1), (1, 3), (2, 1, 3), (2, 4), (3, 2, 3, 5),
-                      (1, 3, 4, 2, 2), (2, 3, 2, 3, 5)]
+        a_shapes = [
+            (10,),
+            (4, 10),
+            (10,),
+            (10,),
+            (2, 3),
+            (1, 3, 4),
+            (1, 3, 1, 2, 3),
+            (2, 3, 1, 3, 4),
+        ]
+        b_shapes = [
+            (10,),
+            (10,),
+            (10, 3),
+            (2, 10, 3),
+            (3, 4),
+            (3, 2, 4, 5),
+            (1, 4, 3, 2),
+            (2, 1, 2, 4, 5),
+        ]
+        out_shapes = [
+            (1, 1),
+            (4, 1),
+            (1, 3),
+            (2, 1, 3),
+            (2, 4),
+            (3, 2, 3, 5),
+            (1, 3, 4, 2, 2),
+            (2, 3, 2, 3, 5),
+        ]
 
         for a_shape, b_shape, outShape in zip(a_shapes, b_shapes, out_shapes):
             input_shapes = [a_shape, b_shape]
             input_features = [
-                ('A', datatypes.Array(*input_shapes[0])),
-                ('B', datatypes.Array(*input_shapes[1]))
+                ("A", datatypes.Array(*input_shapes[0])),
+                ("B", datatypes.Array(*input_shapes[1])),
             ]
-            output_features = [('output', None)]
+            output_features = [("output", None)]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True)
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
 
-            builder.add_batched_mat_mul(name='batched_mat_mul',
-                                        input_names=['A', 'B'],
-                                        output_name='output',
-                                        transpose_a=False,
-                                        transpose_b=False)
+            builder.add_batched_mat_mul(
+                name="batched_mat_mul",
+                input_names=["A", "B"],
+                output_name="output",
+                transpose_a=False,
+                transpose_b=False,
+            )
 
             a = np.random.rand(*input_shapes[0])
             b = np.random.rand(*input_shapes[1])
-            input_ = {'A': a, 'B': b}
-            expected = {'output': np.array(np.matmul(a, b))}
-            shape_dict = {'output': outShape}
-            self._test_model(builder.spec, input_, expected, useCPUOnly=cpu_only,
-                             output_name_shape_dict=shape_dict)
-            self.assertEqual(len(outShape), builder._get_rank('output'))
+            input_ = {"A": a, "B": b}
+            expected = {"output": np.array(np.matmul(a, b))}
+            shape_dict = {"output": outShape}
+            self._test_model(
+                builder.spec,
+                input_,
+                expected,
+                useCPUOnly=cpu_only,
+                output_name_shape_dict=shape_dict,
+            )
+            self.assertEqual(len(outShape), builder._get_rank("output"))
 
     def test_batched_mat_mul_gpu(self):
         self.test_batched_mat_mul_cpu(cpu_only=False)
 
     def test_batched_mat_mul_with_transposes_cpu(self, cpu_only=True):
-        for transpose_a, transpose_b in itertools.product([True, False],
-                                                          [True, False]):
+        for transpose_a, transpose_b in itertools.product([True, False], [True, False]):
             a_shape = (3, 4)
             b_shape = (4, 5)
             a_shape = a_shape[::-1] if transpose_a else a_shape
             b_shape = b_shape[::-1] if transpose_b else b_shape
             input_shapes = [a_shape, b_shape]
             input_features = [
-                ('A', datatypes.Array(*input_shapes[0])),
-                ('B', datatypes.Array(*input_shapes[1]))
+                ("A", datatypes.Array(*input_shapes[0])),
+                ("B", datatypes.Array(*input_shapes[1])),
             ]
 
-            output_features = [('output', None)]
+            output_features = [("output", None)]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True
+                input_features, output_features, disable_rank5_shape_mapping=True
             )
             builder.add_batched_mat_mul(
-                name='BatchedMatMul', input_names=['A', 'B'],
-                output_name='output', transpose_a=transpose_a,
-                transpose_b=transpose_b
+                name="BatchedMatMul",
+                input_names=["A", "B"],
+                output_name="output",
+                transpose_a=transpose_a,
+                transpose_b=transpose_b,
             )
             a = np.random.rand(*input_shapes[0])
             b = np.random.rand(*input_shapes[1])
-            inputs = {'A': a, 'B': b}
+            inputs = {"A": a, "B": b}
             a = a.T if transpose_a else a
             b = b.T if transpose_b else b
-            expected = {'output': np.matmul(a, b)}
+            expected = {"output": np.matmul(a, b)}
 
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
 
     def test_batched_mat_mul_with_transposes_gpu(self):
         self.test_batched_mat_mul_with_transposes_cpu(cpu_only=False)
 
-    def test_batched_mat_mul_single_input_cpu(self,
-                                              model_precision=_MLMODEL_FULL_PRECISION,
-                                              cpu_only=True):
+    def test_batched_mat_mul_single_input_cpu(
+        self, model_precision=_MLMODEL_FULL_PRECISION, cpu_only=True
+    ):
         X1 = 11
         X2 = 23
         W = np.random.rand(X1, X2)
         bias = np.random.rand(X2)
-        input_shapes = [(X1,), (5, X1), (2, 3, X1), (4, 1, X1), (12, 5, 8, X1),
-                        (2, 3, 1, 5, X1)]
+        input_shapes = [
+            (X1,),
+            (5, X1),
+            (2, 3, X1),
+            (4, 1, X1),
+            (12, 5, 8, X1),
+            (2, 3, 1, 5, X1),
+        ]
         for input_shape in input_shapes:
             x = np.random.rand(*input_shape)
             np_out = np.matmul(x, W) + bias
-            expected = {'output': np_out}
+            expected = {"output": np_out}
 
-            input_features = [('data', datatypes.Array(*input_shape))]
-            output_features = [('output', None)]
+            input_features = [("data", datatypes.Array(*input_shape))]
+            output_features = [("output", None)]
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True)
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
 
-            builder.add_batched_mat_mul(name='batched_mat_mul',
-                                        input_names=['data'],
-                                        output_name='output',
-                                        weight_matrix_rows=X1,
-                                        weight_matrix_columns=X2,
-                                        W=W, bias=bias)
-            inputs = {'data': x}
+            builder.add_batched_mat_mul(
+                name="batched_mat_mul",
+                input_names=["data"],
+                output_name="output",
+                weight_matrix_rows=X1,
+                weight_matrix_columns=X2,
+                W=W,
+                bias=bias,
+            )
+            inputs = {"data": x}
 
             self._test_model(
-                builder.spec, inputs, expected,
-                model_precision=model_precision, useCPUOnly=cpu_only)
+                builder.spec,
+                inputs,
+                expected,
+                model_precision=model_precision,
+                useCPUOnly=cpu_only,
+            )
 
     def test_batched_mat_mul_single_input_half_precision_cpu(self):
         self.test_batched_mat_mul_single_input_cpu(
-            model_precision=_MLMODEL_HALF_PRECISION,
-            cpu_only=True)
+            model_precision=_MLMODEL_HALF_PRECISION, cpu_only=True
+        )
 
     def test_batched_mat_mul_single_input_gpu(self):
-        self.test_batched_mat_mul_single_input_cpu(model_precision=_MLMODEL_FULL_PRECISION, cpu_only=False)
+        self.test_batched_mat_mul_single_input_cpu(
+            model_precision=_MLMODEL_FULL_PRECISION, cpu_only=False
+        )
 
     def test_embedding_nd_cpu(
-            self, model_precision=_MLMODEL_FULL_PRECISION, use_cpu_only=True):
+        self, model_precision=_MLMODEL_FULL_PRECISION, use_cpu_only=True
+    ):
         vocab_size = 10
         embedding_size = 19
         W = np.random.rand(embedding_size, vocab_size)
-        input_shapes = [(5, 1), (2, 3, 1), (4, 1, 1), (12, 5, 8, 1),
-                        (2, 3, 1, 5, 1)]
+        input_shapes = [(5, 1), (2, 3, 1), (4, 1, 1), (12, 5, 8, 1), (2, 3, 1, 5, 1)]
         for input_shape in input_shapes:
             x = np.random.randint(vocab_size, size=input_shape)
 
             np_out = np.take(np.transpose(W), np.squeeze(x, axis=-1), axis=0)
-            expected = {'output': np_out}
+            expected = {"output": np_out}
 
-            input_features = [('data', datatypes.Array(*input_shape))]
-            output_features = [('output', None)]
+            input_features = [("data", datatypes.Array(*input_shape))]
+            output_features = [("output", None)]
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True)
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
 
-            builder.add_embedding_nd(name='embedding_nd',
-                                     input_name='data',
-                                     output_name='output',
-                                     vocab_size=vocab_size,
-                                     embedding_size=embedding_size,
-                                     W=W)
+            builder.add_embedding_nd(
+                name="embedding_nd",
+                input_name="data",
+                output_name="output",
+                vocab_size=vocab_size,
+                embedding_size=embedding_size,
+                W=W,
+            )
 
-            input = {'data': x.astype(np.float32)}
+            input = {"data": x.astype(np.float32)}
 
             self._test_model(
-                builder.spec, input, expected,
-                model_precision=model_precision, useCPUOnly=use_cpu_only)
+                builder.spec,
+                input,
+                expected,
+                model_precision=model_precision,
+                useCPUOnly=use_cpu_only,
+            )
 
     def test_embedding_nd_half_precision_cpu(self):
         self.test_embedding_nd_cpu(
-            model_precision=_MLMODEL_HALF_PRECISION, use_cpu_only=True)
+            model_precision=_MLMODEL_HALF_PRECISION, use_cpu_only=True
+        )
 
     def test_embedding_nd_GPU(self):
         self.test_embedding_nd_cpu(
-            model_precision=_MLMODEL_FULL_PRECISION, use_cpu_only=False)
+            model_precision=_MLMODEL_FULL_PRECISION, use_cpu_only=False
+        )
 
     def test_embedding_nd_half_precision_GPU(self):
         self.test_embedding_nd_cpu(
-            model_precision=_MLMODEL_HALF_PRECISION, use_cpu_only=False)
+            model_precision=_MLMODEL_HALF_PRECISION, use_cpu_only=False
+        )
 
     def test_softmax_nan_bug_cpu(self):
-        input_shape = [2,2]
-        input_features = [('data', datatypes.Array(*input_shape))]
-        output_features = [('output', None)]
-        for axis in [0,1]:
+        input_shape = [2, 2]
+        input_features = [("data", datatypes.Array(*input_shape))]
+        output_features = [("output", None)]
+        for axis in [0, 1]:
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True)
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
 
-            builder.add_softmax_nd(name='softmax_nd', input_name='data',
-                                   output_name='output', axis=axis)
+            builder.add_softmax_nd(
+                name="softmax_nd", input_name="data", output_name="output", axis=axis
+            )
 
-            x = np.array([[0.5, 0.5],[1e8, 1e8]])
-            input = {'data': x}
+            x = np.array([[0.5, 0.5], [1e8, 1e8]])
+            input = {"data": x}
             y = np.exp(x - np.max(x, axis=axis, keepdims=True))
             y = y / np.sum(y, axis=axis, keepdims=True)
-            expected = {'output': y}
+            expected = {"output": y}
 
             self._test_model(builder.spec, input, expected, useCPUOnly=True)
 
@@ -1003,21 +1190,25 @@ def test_softmax_nd_cpu(self, cpu_only=True):
         for rank in range(1, 6):
             for axis in range(-rank, rank):
                 input_shape = np.random.randint(low=2, high=5, size=rank)
-                input_features = [('data', datatypes.Array(*input_shape))]
-                output_features = [('output', None)]
+                input_features = [("data", datatypes.Array(*input_shape))]
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features,
-                    disable_rank5_shape_mapping=True)
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
 
-                builder.add_softmax_nd(name='softmax_nd', input_name='data',
-                                       output_name='output', axis=axis)
+                builder.add_softmax_nd(
+                    name="softmax_nd",
+                    input_name="data",
+                    output_name="output",
+                    axis=axis,
+                )
 
                 x = np.random.rand(*input_shape)
-                input = {'data': x}
+                input = {"data": x}
                 y = np.exp(x - np.max(x, axis=axis, keepdims=True))
                 y = y / np.sum(y, axis=axis, keepdims=True)
-                expected = {'output': y}
+                expected = {"output": y}
 
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -1038,23 +1229,28 @@ def test_concat_nd_cpu(self, cpu_only=True):
                     input_shapes[-1][axis] = np.random.choice(range(2, 8))
                     output_shape[axis] += input_shapes[-1][axis]
                 for i, input_dim in enumerate(input_shapes):
-                    input_name = 'input_%s' % str(i)
+                    input_name = "input_%s" % str(i)
                     input_names.append(input_name)
                     input_features.append((input_name, datatypes.Array(*input_dim)))
 
-                output_features = [('output', None)]
+                output_features = [("output", None)]
 
-                builder = neural_network.NeuralNetworkBuilder(input_features, output_features,
-                                                              disable_rank5_shape_mapping=True)
+                builder = neural_network.NeuralNetworkBuilder(
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
 
-                builder.add_concat_nd(name='concat_nd', input_names=input_names,
-                                      output_name='output', axis=axis)
+                builder.add_concat_nd(
+                    name="concat_nd",
+                    input_names=input_names,
+                    output_name="output",
+                    axis=axis,
+                )
 
                 input_tensors = []
                 for input_dim in input_shapes:
                     input_tensors.append(np.random.rand(*input_dim))
                 input = dict(zip(input_names, input_tensors))
-                expected = {'output': np.concatenate(input_tensors, axis)}
+                expected = {"output": np.concatenate(input_tensors, axis)}
 
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -1067,18 +1263,19 @@ def test_fill_like_cpu(self, cpu_only=True):
             target_shape = np.random.randint(low=2, high=6, size=rank)
             value = float(np.random.rand())
 
-            input_features = [('tensor', datatypes.Array(*target_shape))]
+            input_features = [("tensor", datatypes.Array(*target_shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)],
-                disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_fill_like(name='fill_like', input_name='tensor',
-                                  output_name='output', value=value)
+            builder.add_fill_like(
+                name="fill_like", input_name="tensor", output_name="output", value=value
+            )
 
             tensor = np.random.rand(*target_shape)
-            input = {'tensor': tensor}
-            expected = {'output': np.zeros(target_shape) + value}
+            input = {"tensor": tensor}
+            expected = {"output": np.zeros(target_shape) + value}
 
             self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -1090,23 +1287,27 @@ def test_fill_static_cpu(self, cpu_only=True):
         for rank in range(1, 6):
             shape = np.random.randint(low=2, high=8, size=rank)
 
-            input_features = [('data', datatypes.Array(*shape))]
+            input_features = [("data", datatypes.Array(*shape))]
             value = float(np.random.rand())
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)],
-                disable_rank5_shape_mapping=True)
-            builder.add_fill_static(name='fill_static', output_name='tmp',
-                                    output_shape=list(shape), value=value)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
+            builder.add_fill_static(
+                name="fill_static",
+                output_name="tmp",
+                output_shape=list(shape),
+                value=value,
+            )
 
-            builder.add_elementwise('add_layer', ['data', 'tmp'], 'output', mode='ADD')
+            builder.add_elementwise("add_layer", ["data", "tmp"], "output", mode="ADD")
 
             data = np.random.rand(*shape)
-            input = {'data': data}
-            expected = {'output': data + value}
+            input = {"data": data}
+            expected = {"output": data + value}
 
             self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
-            self.assertEqual(len(shape), builder._get_rank('output'))
+            self.assertEqual(len(shape), builder._get_rank("output"))
 
     def test_fill_static_gpu(self):
         self.test_fill_static_cpu(cpu_only=False)
@@ -1117,20 +1318,24 @@ def test_fill_dynamic_cpu(self, cpu_only=True):
             input_shape = np.random.randint(low=2, high=8, size=rank)
             value = float(np.random.rand())
 
-            input_features = [('shape', datatypes.Array(len(input_shape)))]
+            input_features = [("shape", datatypes.Array(len(input_shape)))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)],
-                disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_fill_dynamic(name='fill_dynamic', input_name='shape',
-                                     output_name='output', value=value)
+            builder.add_fill_dynamic(
+                name="fill_dynamic",
+                input_name="shape",
+                output_name="output",
+                value=value,
+            )
 
-            input = {'shape': np.array(input_shape, dtype='float')}
-            expected = {'output': np.zeros(input_shape) + value}
+            input = {"shape": np.array(input_shape, dtype="float")}
+            expected = {"output": np.zeros(input_shape) + value}
 
             self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
-            self.assertEqual(builder._get_rank('output'), -1)
+            self.assertEqual(builder._get_rank("output"), -1)
 
     def test_fill_dynamic_gpu(self):
         self.test_fill_dynamic_cpu(cpu_only=False)
@@ -1143,24 +1348,32 @@ def test_broadcast_to_like_cpu(self, cpu_only=True):
             input_shape = np.where(mask, 1, input_shape)
 
             target_rank = np.random.randint(low=rank, high=6)
-            target_shape = [np.random.randint(low=2, high=8) if (-i > rank or input_shape[i] == 1)
-                            else input_shape[i] for i in range(-1, -target_rank - 1, -1)][::-1]
+            target_shape = [
+                np.random.randint(low=2, high=8)
+                if (-i > rank or input_shape[i] == 1)
+                else input_shape[i]
+                for i in range(-1, -target_rank - 1, -1)
+            ][::-1]
 
-            input_features = [('data', datatypes.Array(*input_shape)),
-                              ('tensor', datatypes.Array(*target_shape))]
+            input_features = [
+                ("data", datatypes.Array(*input_shape)),
+                ("tensor", datatypes.Array(*target_shape)),
+            ]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)],
-                disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_broadcast_to_like(name='broadcast_to_like',
-                                          input_names=['data', 'tensor'],
-                                          output_name='output')
+            builder.add_broadcast_to_like(
+                name="broadcast_to_like",
+                input_names=["data", "tensor"],
+                output_name="output",
+            )
 
             data = np.random.rand(*input_shape)
             tensor = np.random.rand(*target_shape)
-            inputs = {'data': data, 'tensor': tensor}
-            expected = {'output': np.broadcast_to(data, target_shape)}
+            inputs = {"data": data, "tensor": tensor}
+            expected = {"output": np.broadcast_to(data, target_shape)}
 
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
 
@@ -1175,26 +1388,32 @@ def test_broadcast_to_static_cpu(self, cpu_only=True):
             input_shape = np.where(mask, 1, input_shape)
 
             target_rank = np.random.randint(low=rank, high=6)
-            target_shape = [np.random.randint(low=2, high=8) if (-i > rank or input_shape[i] == 1)
-                            else input_shape[i] for i in range(-1, -target_rank - 1, -1)][::-1]
+            target_shape = [
+                np.random.randint(low=2, high=8)
+                if (-i > rank or input_shape[i] == 1)
+                else input_shape[i]
+                for i in range(-1, -target_rank - 1, -1)
+            ][::-1]
 
-            input_features = [('data', datatypes.Array(*input_shape))]
+            input_features = [("data", datatypes.Array(*input_shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)],
-                disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_broadcast_to_static(name='broadcast_to_static',
-                                            input_name='data',
-                                            output_name='output',
-                                            output_shape=list(target_shape))
+            builder.add_broadcast_to_static(
+                name="broadcast_to_static",
+                input_name="data",
+                output_name="output",
+                output_shape=list(target_shape),
+            )
 
             data = np.random.rand(*input_shape)
-            input = {'data': data}
-            expected = {'output': np.broadcast_to(data, target_shape)}
+            input = {"data": data}
+            expected = {"output": np.broadcast_to(data, target_shape)}
 
             self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
-            self.assertEqual(target_rank, builder._get_rank('output'))
+            self.assertEqual(target_rank, builder._get_rank("output"))
 
     def test_broadcast_to_static_gpu(self):
         self.test_broadcast_to_static_cpu(cpu_only=False)
@@ -1207,26 +1426,34 @@ def test_broadcast_to_dynamic_cpu(self, cpu_only=True):
             input_shape = np.where(mask, 1, input_shape)
 
             target_rank = np.random.randint(low=rank, high=6)
-            target_shape = [np.random.randint(low=2, high=8) if (-i > rank or input_shape[i] == 1)
-                            else input_shape[i] for i in range(-1, -target_rank - 1, -1)][::-1]
+            target_shape = [
+                np.random.randint(low=2, high=8)
+                if (-i > rank or input_shape[i] == 1)
+                else input_shape[i]
+                for i in range(-1, -target_rank - 1, -1)
+            ][::-1]
 
-            input_features = [('data', datatypes.Array(*input_shape)),
-                              ('shape', datatypes.Array(len(target_shape)))]
+            input_features = [
+                ("data", datatypes.Array(*input_shape)),
+                ("shape", datatypes.Array(len(target_shape))),
+            ]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)],
-                disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_broadcast_to_dynamic(name='broadcast_to_dynamic',
-                                             input_names=['data', 'shape'],
-                                             output_name='output')
+            builder.add_broadcast_to_dynamic(
+                name="broadcast_to_dynamic",
+                input_names=["data", "shape"],
+                output_name="output",
+            )
 
             data = np.random.rand(*input_shape)
-            inputs = {'data': data, 'shape': np.array(target_shape, dtype='float')}
-            expected = {'output': np.broadcast_to(data, target_shape)}
+            inputs = {"data": data, "shape": np.array(target_shape, dtype="float")}
+            expected = {"output": np.broadcast_to(data, target_shape)}
 
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
-            self.assertEqual(builder._get_rank('output'), -1)
+            self.assertEqual(builder._get_rank("output"), -1)
 
     def test_broadcast_to_dynamic_gpu(self):
         self.test_broadcast_to_dynamic_cpu(cpu_only=False)
@@ -1241,89 +1468,109 @@ def test_unknown_rank(self, cpu_only=True):
             input_shape = np.where(mask, 1, input_shape)
 
             target_rank = np.random.randint(low=rank, high=6)
-            target_shape = [np.random.randint(low=2, high=8) if (-i > rank or input_shape[i] == 1)
-                            else input_shape[i] for i in range(-1, -target_rank - 1, -1)][::-1]
+            target_shape = [
+                np.random.randint(low=2, high=8)
+                if (-i > rank or input_shape[i] == 1)
+                else input_shape[i]
+                for i in range(-1, -target_rank - 1, -1)
+            ][::-1]
 
-            input_features = [('x', datatypes.Array(*input_shape)),
-                              ('shape', datatypes.Array(len(target_shape)))]
+            input_features = [
+                ("x", datatypes.Array(*input_shape)),
+                ("shape", datatypes.Array(len(target_shape))),
+            ]
 
             builder = neural_network.NeuralNetworkBuilder(
-                                    input_features, [('output', None)],
-                                    disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_broadcast_to_dynamic(name='broadcast_to_dynamic',
-                                             input_names=['x', 'shape'],
-                                             output_name='y')
+            builder.add_broadcast_to_dynamic(
+                name="broadcast_to_dynamic", input_names=["x", "shape"], output_name="y"
+            )
 
             condition = np.random.randint(0, 2, input_shape).astype(np.float32)
-            builder.add_load_constant_nd(name='load_constant_condition',
-                                         output_name='condition',
-                                         constant_value=condition,
-                                         shape=input_shape)
-
-            builder.add_where_broadcastable(name='where',
-                                            input_names=['condition', 'x', 'y'],
-                                            output_name='output')
+            builder.add_load_constant_nd(
+                name="load_constant_condition",
+                output_name="condition",
+                constant_value=condition,
+                shape=input_shape,
+            )
 
-            self.assertEqual(builder._get_rank('output'), -1)
+            builder.add_where_broadcastable(
+                name="where", input_names=["condition", "x", "y"], output_name="output"
+            )
 
+            self.assertEqual(builder._get_rank("output"), -1)
 
     def test_trigonometry_cpu(self, cpu_only=True):
 
-        ops = ['sin', 'cos', 'tan',
-               'asin', 'acos', 'atan',
-               'sinh', 'cosh', 'tanh',
-               'asinh', 'acosh', 'atanh']
+        ops = [
+            "sin",
+            "cos",
+            "tan",
+            "asin",
+            "acos",
+            "atan",
+            "sinh",
+            "cosh",
+            "tanh",
+            "asinh",
+            "acosh",
+            "atanh",
+        ]
 
         for op in ops:
             for rank in range(1, 6):
                 shape = np.random.randint(low=2, high=8, size=rank)
-                input_features = [('data', datatypes.Array(*shape))]
+                input_features = [("data", datatypes.Array(*shape))]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, [('output', None)], disable_rank5_shape_mapping=True)
+                    input_features, [("output", None)], disable_rank5_shape_mapping=True
+                )
 
                 x = np.random.rand(*shape)
 
-                if op == 'sin':
-                    builder.add_sin(name=op, input_name='data', output_name='output')
-                    expected = {'output': np.sin(x)}
-                elif op == 'cos':
-                    builder.add_cos(name=op, input_name='data', output_name='output')
-                    expected = {'output': np.cos(x)}
-                elif op == 'tan':
-                    builder.add_tan(name=op, input_name='data', output_name='output')
-                    expected = {'output': np.tan(x)}
-                elif op == 'asin':
-                    builder.add_asin(name=op, input_name='data', output_name='output')
-                    expected = {'output': np.arcsin(x)}
-                elif op == 'acos':
-                    builder.add_acos(name=op, input_name='data', output_name='output')
-                    expected = {'output': np.arccos(x)}
-                elif op == 'atan':
-                    builder.add_atan(name=op, input_name='data', output_name='output')
-                    expected = {'output': np.arctan(x)}
-                elif op == 'sinh':
-                    builder.add_sinh(name=op, input_name='data', output_name='output')
-                    expected = {'output': np.sinh(x)}
-                elif op == 'cosh':
-                    builder.add_cosh(name=op, input_name='data', output_name='output')
-                    expected = {'output': np.cosh(x)}
-                elif op == 'tanh':
-                    builder.add_tanh(name=op, input_name='data', output_name='output')
-                    expected = {'output': np.tanh(x)}
-                elif op == 'asinh':
-                    builder.add_asinh(name=op, input_name='data', output_name='output')
-                    expected = {'output': np.arcsinh(x)}
-                elif op == 'acosh':
+                if op == "sin":
+                    builder.add_sin(name=op, input_name="data", output_name="output")
+                    expected = {"output": np.sin(x)}
+                elif op == "cos":
+                    builder.add_cos(name=op, input_name="data", output_name="output")
+                    expected = {"output": np.cos(x)}
+                elif op == "tan":
+                    builder.add_tan(name=op, input_name="data", output_name="output")
+                    expected = {"output": np.tan(x)}
+                elif op == "asin":
+                    builder.add_asin(name=op, input_name="data", output_name="output")
+                    expected = {"output": np.arcsin(x)}
+                elif op == "acos":
+                    builder.add_acos(name=op, input_name="data", output_name="output")
+                    expected = {"output": np.arccos(x)}
+                elif op == "atan":
+                    builder.add_atan(name=op, input_name="data", output_name="output")
+                    expected = {"output": np.arctan(x)}
+                elif op == "sinh":
+                    builder.add_sinh(name=op, input_name="data", output_name="output")
+                    expected = {"output": np.sinh(x)}
+                elif op == "cosh":
+                    builder.add_cosh(name=op, input_name="data", output_name="output")
+                    expected = {"output": np.cosh(x)}
+                elif op == "tanh":
+                    builder.add_tanh(name=op, input_name="data", output_name="output")
+                    expected = {"output": np.tanh(x)}
+                elif op == "asinh":
+                    builder.add_asinh(name=op, input_name="data", output_name="output")
+                    expected = {"output": np.arcsinh(x)}
+                elif op == "acosh":
                     x = np.random.choice([10, np.e, 1], tuple(shape)).astype(np.float32)
-                    builder.add_acosh(name=op, input_name='data', output_name='output')
-                    expected = {'output': np.arccosh(x)}
-                elif op == 'atanh':
-                    builder.add_atanh(name=op, input_name='data', output_name='output')
-                    expected = {'output': np.arctanh(x)}
-
-                self._test_model(builder.spec, {'data': x}, expected, useCPUOnly=cpu_only)
+                    builder.add_acosh(name=op, input_name="data", output_name="output")
+                    expected = {"output": np.arccosh(x)}
+                elif op == "atanh":
+                    builder.add_atanh(name=op, input_name="data", output_name="output")
+                    expected = {"output": np.arctanh(x)}
+
+                self._test_model(
+                    builder.spec, {"data": x}, expected, useCPUOnly=cpu_only
+                )
 
     def test_trigonometry_gpu(self):
         self.test_trigonometry_cpu(cpu_only=False)
@@ -1331,16 +1578,16 @@ def test_trigonometry_gpu(self):
     def test_exp2_cpu(self, cpu_only=True):
         for rank in range(1, 6):
             shape = np.random.randint(low=2, high=8, size=rank)
-            input_features = [('data', datatypes.Array(*shape))]
+            input_features = [("data", datatypes.Array(*shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)],
-                disable_rank5_shape_mapping=True)
-            builder.add_exp2(name='exp2', input_name='data', output_name='output')
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
+            builder.add_exp2(name="exp2", input_name="data", output_name="output")
 
             x = np.random.rand(*shape)
-            input = {'data': x}
-            expected = {'output': np.exp2(x)}
+            input = {"data": x}
+            expected = {"output": np.exp2(x)}
 
             self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -1348,11 +1595,27 @@ def test_exp2_gpu(self):
         self.test_exp2_cpu(cpu_only=False)
 
     def test_elementwise_binary_cpu(self, cpu_only=True):
-        input_names = ['A', 'B']
-        test_cases = ['greater', 'less', 'equal', 'not_equal', 'greater_equal',
-                      'less_equal', 'logical_and', 'logical_or', 'logical_xor',
-                      'add', 'subtract', 'multiply', 'divide', 'power',
-                      'maximum', 'minimum', 'floor_divide', 'mod']
+        input_names = ["A", "B"]
+        test_cases = [
+            "greater",
+            "less",
+            "equal",
+            "not_equal",
+            "greater_equal",
+            "less_equal",
+            "logical_and",
+            "logical_or",
+            "logical_xor",
+            "add",
+            "subtract",
+            "multiply",
+            "divide",
+            "power",
+            "maximum",
+            "minimum",
+            "floor_divide",
+            "mod",
+        ]
         for test_case in test_cases:
             for _ in range(10):
                 rank_a = np.random.randint(low=1, high=6)
@@ -1365,134 +1628,202 @@ def test_elementwise_binary_cpu(self, cpu_only=True):
 
                 for i in range(-1, -rank_out - 1, -1):
                     dims = []
-                    if -i <= rank_a: dims.append(shape_a[i])
-                    if -i <= rank_b: dims.append(shape_b[i])
+                    if -i <= rank_a:
+                        dims.append(shape_a[i])
+                    if -i <= rank_b:
+                        dims.append(shape_b[i])
 
                     dim = np.random.choice(dims)
-                    if -i <= rank_a: shape_a[i] = np.random.choice([1, dim])
-                    if -i <= rank_b: shape_b[i] = np.random.choice([1, dim])
+                    if -i <= rank_a:
+                        shape_a[i] = np.random.choice([1, dim])
+                    if -i <= rank_b:
+                        shape_b[i] = np.random.choice([1, dim])
 
                 input_shapes = [shape_a, shape_b]
-                input_features = [('A', datatypes.Array(*input_shapes[0])),
-                                  ('B', datatypes.Array(*input_shapes[1]))]
+                input_features = [
+                    ("A", datatypes.Array(*input_shapes[0])),
+                    ("B", datatypes.Array(*input_shapes[1])),
+                ]
 
-                builder = neural_network.NeuralNetworkBuilder(input_features, [
-                    ('output', None)], disable_rank5_shape_mapping=True)
+                builder = neural_network.NeuralNetworkBuilder(
+                    input_features, [("output", None)], disable_rank5_shape_mapping=True
+                )
 
                 func = getattr(np, test_case)
-                if test_case == 'greater':
-                    builder.add_greater_than(test_case, input_names=input_names,
-                                             output_name='output')
-                elif test_case == 'less':
-                    builder.add_less_than(test_case, input_names=input_names,
-                                          output_name='output')
-                elif test_case == 'equal':
-                    builder.add_equal(test_case, input_names=input_names,
-                                      output_name='output')
-                elif test_case == 'not_equal':
-                    builder.add_not_equal(test_case, input_names=input_names,
-                                          output_name='output')
-                elif test_case == 'greater_equal':
-                    builder.add_greater_than(test_case, input_names=input_names,
-                                             output_name='output',
-                                             use_greater_than_equal=True)
-                elif test_case == 'less_equal':
-                    builder.add_less_than(test_case, input_names=input_names,
-                                          output_name='output',
-                                          use_less_than_equal=True)
-                elif test_case == 'logical_and':
-                    builder.add_logical(test_case, input_names=input_names,
-                                        output_name='output', mode='AND')
-                elif test_case == 'logical_or':
-                    builder.add_logical(test_case, input_names=input_names,
-                                        output_name='output', mode='OR')
-                elif test_case == 'logical_xor':
-                    builder.add_logical(test_case, input_names=input_names,
-                                        output_name='output', mode='XOR')
-                elif test_case == 'add':
-                    builder.add_add_broadcastable(test_case, input_names=input_names,
-                                                  output_name='output')
-                elif test_case == 'subtract':
-                    builder.add_subtract_broadcastable(test_case,
-                                                       input_names=input_names,
-                                                       output_name='output')
-                elif test_case == 'multiply':
-                    builder.add_multiply_broadcastable(test_case,
-                                                       input_names=input_names,
-                                                       output_name='output')
-                elif test_case == 'divide':
-                    builder.add_divide_broadcastable(test_case,
-                                                     input_names=input_names,
-                                                     output_name='output')
-                elif test_case == 'power':
-                    builder.add_pow_broadcastable(test_case,
-                                                  input_names=input_names,
-                                                  output_name='output')
-                elif test_case == 'maximum':
-                    builder.add_max_broadcastable(test_case,
-                                                  input_names=input_names,
-                                                  output_name='output')
-                elif test_case == 'minimum':
-                    builder.add_min_broadcastable(test_case,
-                                                  input_names=input_names,
-                                                  output_name='output')
-                elif test_case == 'floor_divide':
-                    builder.add_floor_div_broadcastable(test_case,
-                                                        input_names=input_names,
-                                                        output_name='output')
-                elif test_case == 'mod':
-                    builder.add_mod_broadcastable(test_case,
-                                                  input_names=input_names,
-                                                  output_name='output')
+                if test_case == "greater":
+                    builder.add_greater_than(
+                        test_case, input_names=input_names, output_name="output"
+                    )
+                elif test_case == "less":
+                    builder.add_less_than(
+                        test_case, input_names=input_names, output_name="output"
+                    )
+                elif test_case == "equal":
+                    builder.add_equal(
+                        test_case, input_names=input_names, output_name="output"
+                    )
+                elif test_case == "not_equal":
+                    builder.add_not_equal(
+                        test_case, input_names=input_names, output_name="output"
+                    )
+                elif test_case == "greater_equal":
+                    builder.add_greater_than(
+                        test_case,
+                        input_names=input_names,
+                        output_name="output",
+                        use_greater_than_equal=True,
+                    )
+                elif test_case == "less_equal":
+                    builder.add_less_than(
+                        test_case,
+                        input_names=input_names,
+                        output_name="output",
+                        use_less_than_equal=True,
+                    )
+                elif test_case == "logical_and":
+                    builder.add_logical(
+                        test_case,
+                        input_names=input_names,
+                        output_name="output",
+                        mode="AND",
+                    )
+                elif test_case == "logical_or":
+                    builder.add_logical(
+                        test_case,
+                        input_names=input_names,
+                        output_name="output",
+                        mode="OR",
+                    )
+                elif test_case == "logical_xor":
+                    builder.add_logical(
+                        test_case,
+                        input_names=input_names,
+                        output_name="output",
+                        mode="XOR",
+                    )
+                elif test_case == "add":
+                    builder.add_add_broadcastable(
+                        test_case, input_names=input_names, output_name="output"
+                    )
+                elif test_case == "subtract":
+                    builder.add_subtract_broadcastable(
+                        test_case, input_names=input_names, output_name="output"
+                    )
+                elif test_case == "multiply":
+                    builder.add_multiply_broadcastable(
+                        test_case, input_names=input_names, output_name="output"
+                    )
+                elif test_case == "divide":
+                    builder.add_divide_broadcastable(
+                        test_case, input_names=input_names, output_name="output"
+                    )
+                elif test_case == "power":
+                    builder.add_pow_broadcastable(
+                        test_case, input_names=input_names, output_name="output"
+                    )
+                elif test_case == "maximum":
+                    builder.add_max_broadcastable(
+                        test_case, input_names=input_names, output_name="output"
+                    )
+                elif test_case == "minimum":
+                    builder.add_min_broadcastable(
+                        test_case, input_names=input_names, output_name="output"
+                    )
+                elif test_case == "floor_divide":
+                    builder.add_floor_div_broadcastable(
+                        test_case, input_names=input_names, output_name="output"
+                    )
+                elif test_case == "mod":
+                    builder.add_mod_broadcastable(
+                        test_case, input_names=input_names, output_name="output"
+                    )
                 a = np.random.rand(*input_shapes[0])
                 b = np.random.rand(*input_shapes[1])
-                input = {'A': a, 'B': b}
-                expected = {'output': func(a, b, dtype=np.float32)}
+                input = {"A": a, "B": b}
+                expected = {"output": func(a, b, dtype=np.float32)}
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
     def test_elementwise_binary_gpu(self):
         self.test_elementwise_binary_cpu(cpu_only=False)
 
     def test_elementwise_boolean_unary_cpu(self, cpu_only=True):
-        input_names = ['input']
-        shapes = [(1, 2, 3, 1), (3, 1, 2, 1, 2), (1, 2, 1, 3), (2, 3),
-                  (2, 1, 1), (2, 3, 4), (2, 4), (1,), (1,)]
-        test_cases = ['greater', 'less', 'equal', 'not_equal', 'greater_equal',
-                      'less_equal']
+        input_names = ["input"]
+        shapes = [
+            (1, 2, 3, 1),
+            (3, 1, 2, 1, 2),
+            (1, 2, 1, 3),
+            (2, 3),
+            (2, 1, 1),
+            (2, 3, 4),
+            (2, 4),
+            (1,),
+            (1,),
+        ]
+        test_cases = [
+            "greater",
+            "less",
+            "equal",
+            "not_equal",
+            "greater_equal",
+            "less_equal",
+        ]
         for test_case in test_cases:
             for shape in shapes:
-                input_features = [('input', datatypes.Array(*shape))]
+                input_features = [("input", datatypes.Array(*shape))]
                 b = np.random.rand()
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, [('output', None)],
-                    disable_rank5_shape_mapping=True)
+                    input_features, [("output", None)], disable_rank5_shape_mapping=True
+                )
 
                 func = getattr(np, test_case)
-                if test_case == 'greater':
-                    builder.add_greater_than(test_case, input_names=input_names,
-                                             output_name='output', alpha=b)
-                elif test_case == 'less':
-                    builder.add_less_than(test_case, input_names=input_names,
-                                          output_name='output', alpha=b)
-                elif test_case == 'equal':
-                    builder.add_equal(test_case, input_names=input_names,
-                                      output_name='output', alpha=b)
-                elif test_case == 'not_equal':
-                    builder.add_not_equal(test_case, input_names=input_names,
-                                          output_name='output', alpha=b)
-                elif test_case == 'greater_equal':
-                    builder.add_greater_than(test_case, input_names=input_names,
-                                             output_name='output',
-                                             use_greater_than_equal=True,
-                                             alpha=b)
-                elif test_case == 'less_equal':
-                    builder.add_less_than(test_case, input_names=input_names,
-                                          output_name='output',
-                                          use_less_than_equal=True, alpha=b)
+                if test_case == "greater":
+                    builder.add_greater_than(
+                        test_case,
+                        input_names=input_names,
+                        output_name="output",
+                        alpha=b,
+                    )
+                elif test_case == "less":
+                    builder.add_less_than(
+                        test_case,
+                        input_names=input_names,
+                        output_name="output",
+                        alpha=b,
+                    )
+                elif test_case == "equal":
+                    builder.add_equal(
+                        test_case,
+                        input_names=input_names,
+                        output_name="output",
+                        alpha=b,
+                    )
+                elif test_case == "not_equal":
+                    builder.add_not_equal(
+                        test_case,
+                        input_names=input_names,
+                        output_name="output",
+                        alpha=b,
+                    )
+                elif test_case == "greater_equal":
+                    builder.add_greater_than(
+                        test_case,
+                        input_names=input_names,
+                        output_name="output",
+                        use_greater_than_equal=True,
+                        alpha=b,
+                    )
+                elif test_case == "less_equal":
+                    builder.add_less_than(
+                        test_case,
+                        input_names=input_names,
+                        output_name="output",
+                        use_less_than_equal=True,
+                        alpha=b,
+                    )
 
                 a = np.random.rand(*shape)
-                input = {'input': a}
-                expected = {'output': func(a, b, dtype=np.float32)}
+                input = {"input": a}
+                expected = {"output": func(a, b, dtype=np.float32)}
 
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -1500,20 +1831,30 @@ def test_elementwise_boolean_unary_gpu(self):
         self.test_elementwise_boolean_unary_cpu(cpu_only=False)
 
     def test_logical_not_cpu(self, cpu_only=True):
-        input_names = ['input']
-        shapes = [(1, 2, 3, 1), (3, 1, 2, 1, 2), (1, 2, 1, 3), (2, 3),
-                  (2, 1, 1), (2, 3, 4), (2, 4), (1,), (1,)]
+        input_names = ["input"]
+        shapes = [
+            (1, 2, 3, 1),
+            (3, 1, 2, 1, 2),
+            (1, 2, 1, 3),
+            (2, 3),
+            (2, 1, 1),
+            (2, 3, 4),
+            (2, 4),
+            (1,),
+            (1,),
+        ]
         for shape in shapes:
-            input_features = [('input', datatypes.Array(*shape))]
+            input_features = [("input", datatypes.Array(*shape))]
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)],
-                disable_rank5_shape_mapping=True)
-            builder.add_logical('logical_not', input_names=input_names,
-                                output_name='output', mode='NOT')
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
+            builder.add_logical(
+                "logical_not", input_names=input_names, output_name="output", mode="NOT"
+            )
 
             a = np.random.rand(*shape)
-            input = {'input': a}
-            expected = {'output': np.logical_not(a)}
+            input = {"input": a}
+            expected = {"output": np.logical_not(a)}
 
             self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -1528,27 +1869,30 @@ def test_stack_cpu(self, cpu_only=True):
                 input_features = []
                 input_names = []
                 for i in range(n_inputs):
-                    input_name = 'input_%s' % str(i)
+                    input_name = "input_%s" % str(i)
                     input_names.append(input_name)
-                    input_features.append(
-                        (input_name, datatypes.Array(*input_shape)))
-                output_features = [('output', None)]
+                    input_features.append((input_name, datatypes.Array(*input_shape)))
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features,
-                    disable_rank5_shape_mapping=True)
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
 
-                builder.add_stack(name='stack', input_names=input_names,
-                                  output_name='output', axis=axis)
+                builder.add_stack(
+                    name="stack",
+                    input_names=input_names,
+                    output_name="output",
+                    axis=axis,
+                )
 
                 input_tensors = []
                 for _ in range(n_inputs):
                     input_tensors.append(np.random.rand(*input_shape))
                 input = dict(zip(input_names, input_tensors))
-                expected = {'output': np.stack(input_tensors, axis)}
+                expected = {"output": np.stack(input_tensors, axis)}
 
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
-                self.assertEqual(input_rank + 1, builder._get_rank('output'))
+                self.assertEqual(input_rank + 1, builder._get_rank("output"))
 
     def test_stack_gpu(self):
         self.test_stack_cpu(cpu_only=False)
@@ -1556,21 +1900,21 @@ def test_stack_gpu(self):
     def test_ceil_cpu(self, cpu_only=True):
         for rank in range(1, 6):
             shape = np.random.randint(low=2, high=8, size=rank)
-            input_features = [('data', datatypes.Array(*shape))]
-            output_features = [('output', datatypes.Array(*shape))]
+            input_features = [("data", datatypes.Array(*shape))]
+            output_features = [("output", datatypes.Array(*shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True)
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
 
-            builder.add_ceil(name='ceil', input_name='data', output_name='output')
+            builder.add_ceil(name="ceil", input_name="data", output_name="output")
 
             x = np.random.rand(*shape)
-            inputs = {'data': x}
-            expected = {'output': np.ceil(x)}
+            inputs = {"data": x}
+            expected = {"output": np.ceil(x)}
 
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
-            self.assertEqual(rank, builder._get_rank('output'))
+            self.assertEqual(rank, builder._get_rank("output"))
 
     def test_ceil_gpu(self):
         self.test_ceil_cpu(cpu_only=False)
@@ -1578,39 +1922,42 @@ def test_ceil_gpu(self):
     def test_floor_cpu(self, cpu_only=True):
         for rank in range(1, 6):
             shape = np.random.randint(low=2, high=8, size=rank)
-            input_features = [('data', datatypes.Array(*shape))]
-            output_features = [('output', datatypes.Array(*shape))]
+            input_features = [("data", datatypes.Array(*shape))]
+            output_features = [("output", datatypes.Array(*shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True)
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
 
-            builder.add_floor(name='floor', input_name='data', output_name='output')
+            builder.add_floor(name="floor", input_name="data", output_name="output")
 
             x = np.random.rand(*shape)
-            inputs = {'data': x}
-            expected = {'output': np.floor(x)}
+            inputs = {"data": x}
+            expected = {"output": np.floor(x)}
 
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
 
+    @pytest.mark.xfail(reason="[GitLab CI failure: test_floor_gpu](rdar://64311149)")
     def test_floor_gpu(self):
         self.test_floor_cpu(cpu_only=False)
 
     def test_round_cpu(self, cpu_only=True):
         for rank in range(1, 6):
             shape = np.random.randint(low=2, high=8, size=rank)
-            input_features = [('data', datatypes.Array(*shape))]
-            output_features = [('output', datatypes.Array(*shape))]
+            input_features = [("data", datatypes.Array(*shape))]
+            output_features = [("output", datatypes.Array(*shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True)
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
 
-            builder.add_round(name='round', input_name='data', output_name='output')
+            builder.add_round(name="round", input_name="data", output_name="output")
 
-            x = np.float32(np.random.rand(*shape) * np.random.randint(low=-100, high=101))
-            inputs = {'data': x}
-            expected = {'output': np.around(x)}
+            x = np.float32(
+                np.random.rand(*shape) * np.random.randint(low=-100, high=101)
+            )
+            inputs = {"data": x}
+            expected = {"output": np.around(x)}
 
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
 
@@ -1620,19 +1967,20 @@ def test_round_gpu(self):
     def test_sign_cpu(self, cpu_only=True):
         for rank in range(1, 6):
             shape = np.random.randint(low=2, high=8, size=rank)
-            input_features = [('data', datatypes.Array(*shape))]
-            output_features = [('output', datatypes.Array(*shape))]
+            input_features = [("data", datatypes.Array(*shape))]
+            output_features = [("output", datatypes.Array(*shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True)
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
 
-            builder.add_sign(name='sign', input_name='data', output_name='output')
+            builder.add_sign(name="sign", input_name="data", output_name="output")
 
-            x = np.random.choice([-np.random.rand(1), 0.0, np.random.rand(1)],
-                                 tuple(shape)).astype(np.float32)
-            inputs = {'data': x}
-            expected = {'output': np.sign(x)}
+            x = np.random.choice(
+                [-np.random.rand(1), 0.0, np.random.rand(1)], tuple(shape)
+            ).astype(np.float32)
+            inputs = {"data": x}
+            expected = {"output": np.sign(x)}
 
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
 
@@ -1642,21 +1990,26 @@ def test_sign_gpu(self):
     def test_clip_cpu(self, cpu_only=True):
         for rank in range(1, 6):
             shape = np.random.randint(low=2, high=6, size=rank)
-            input_features = [('data', datatypes.Array(*shape))]
-            output_features = [('output', datatypes.Array(*shape))]
+            input_features = [("data", datatypes.Array(*shape))]
+            output_features = [("output", datatypes.Array(*shape))]
 
             x = np.random.rand(*shape)
             min_value = np.percentile(x, 25)
             max_value = np.percentile(x, 75)
-            input = {'data': x}
+            input = {"data": x}
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True)
-            builder.add_clip(name='clip', input_name='data', output_name='output',
-                             min_value=min_value, max_value=max_value)
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
+            builder.add_clip(
+                name="clip",
+                input_name="data",
+                output_name="output",
+                min_value=min_value,
+                max_value=max_value,
+            )
 
-            expected = {'output': np.clip(x, min_value, max_value)}
+            expected = {"output": np.clip(x, min_value, max_value)}
             self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
     def test_clip_gpu(self):
@@ -1672,37 +2025,40 @@ def test_split_nd_cpu(self, cpu_only=True):
                 output_features = []
                 output_names = []
                 almost_equal = random.choice([True, False])
-                remainder = np.random.choice(
-                    range(1, n_outputs)) if almost_equal else 0
+                remainder = np.random.choice(range(1, n_outputs)) if almost_equal else 0
                 value = np.random.choice(range(2, 5))
                 for k in range(n_outputs):
                     output_shapes.append(np.copy(input_shape))
-                    output_shapes[-1][
-                        axis] = value + 1 if k < remainder else value
+                    output_shapes[-1][axis] = value + 1 if k < remainder else value
                     input_shape[axis] += output_shapes[-1][axis]
 
                 for i in range(n_outputs):
-                    output_name = 'output_%s' % str(i)
+                    output_name = "output_%s" % str(i)
                     output_names.append(output_name)
-                    output_features.append(
-                        (output_name, None))
+                    output_features.append((output_name, None))
 
-                input_features = [('data', datatypes.Array(*input_shape))]
+                input_features = [("data", datatypes.Array(*input_shape))]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features,
-                    disable_rank5_shape_mapping=True)
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
 
-                builder.add_split_nd(name='split_nd', input_name='data',
-                                     output_names=output_names, axis=axis,
-                                     num_splits=n_outputs)
+                builder.add_split_nd(
+                    name="split_nd",
+                    input_name="data",
+                    output_names=output_names,
+                    axis=axis,
+                    num_splits=n_outputs,
+                )
 
                 x = np.random.rand(*input_shape)
-                input = {'data': x}
+                input = {"data": x}
                 expected = dict(
                     zip(
-                        output_names, np.array_split(x, n_outputs, axis=axis)
-                        if almost_equal else np.split(x, n_outputs, axis=axis)
+                        output_names,
+                        np.array_split(x, n_outputs, axis=axis)
+                        if almost_equal
+                        else np.split(x, n_outputs, axis=axis),
                     )
                 )  # Explicitly trying to compare against both versions of numpy split
 
@@ -1730,25 +2086,27 @@ def test_split_nd_with_split_sizes_cpu(self, cpu_only=True):
 
                 sections.pop()
                 for i in range(n_outputs):
-                    output_name = 'output_%s' % str(i)
+                    output_name = "output_%s" % str(i)
                     output_names.append(output_name)
-                    output_features.append(
-                        (output_name, None))
+                    output_features.append((output_name, None))
 
-                input_features = [('data', datatypes.Array(*input_shape))]
+                input_features = [("data", datatypes.Array(*input_shape))]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features,
-                    disable_rank5_shape_mapping=True)
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
 
-                builder.add_split_nd(name='split_nd', input_name='data',
-                                     output_names=output_names, axis=axis,
-                                     split_sizes=split_sizes)
+                builder.add_split_nd(
+                    name="split_nd",
+                    input_name="data",
+                    output_names=output_names,
+                    axis=axis,
+                    split_sizes=split_sizes,
+                )
 
                 x = np.random.rand(*input_shape)
-                input = {'data': x}
-                expected = dict(
-                    zip(output_names, np.split(x, sections, axis=axis)))
+                input = {"data": x}
+                expected = dict(zip(output_names, np.split(x, sections, axis=axis)))
 
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
                 for output_ in output_names:
@@ -1761,43 +2119,64 @@ def test_slice_static_cpu(self, cpu_only=True):
         for rank in range(1, 6):
             for _ in range(200):
                 input_shape = np.array([5 for _ in range(rank)])
-                objs, strides, begin_masks, end_ids, end_masks, begin_ids = [], [], [], [], [], []
+                objs, strides, begin_masks, end_ids, end_masks, begin_ids = (
+                    [],
+                    [],
+                    [],
+                    [],
+                    [],
+                    [],
+                )
                 for dim in range(rank):
                     stride = random.choice([-3, -1, 1, 2])
                     begin_mask = random.choice([True, False])
                     end_mask = random.choice([True, False])
                     length = 0
                     while length <= 0:
-                        begin_id = np.random.randint(low=-input_shape[dim],
-                                                     high=input_shape[dim])
-                        end_id = np.random.randint(low=-input_shape[dim],
-                                                   high=input_shape[dim])
-                        obj = slice(None if begin_mask else begin_id,
-                                    None if end_mask else end_id, stride)
+                        begin_id = np.random.randint(
+                            low=-input_shape[dim], high=input_shape[dim]
+                        )
+                        end_id = np.random.randint(
+                            low=-input_shape[dim], high=input_shape[dim]
+                        )
+                        obj = slice(
+                            None if begin_mask else begin_id,
+                            None if end_mask else end_id,
+                            stride,
+                        )
                         length = np.arange(input_shape[dim])[(obj,)].shape[0]
 
                     objs.append(obj), strides.append(stride), begin_masks.append(
-                        begin_mask)
+                        begin_mask
+                    )
                     end_masks.append(end_mask), begin_ids.append(
-                        begin_id), end_ids.append(end_id)
+                        begin_id
+                    ), end_ids.append(end_id)
 
-                input_features = [('data', datatypes.Array(*input_shape))]
-                output_features = [('output', None)]
+                input_features = [("data", datatypes.Array(*input_shape))]
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features,
-                    disable_rank5_shape_mapping=True)
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
 
-                builder.add_slice_static('slice_static', 'data', 'output',
-                                         begin_ids=begin_ids, end_ids=end_ids, strides=strides,
-                                         begin_masks=begin_masks, end_masks=end_masks)
+                builder.add_slice_static(
+                    "slice_static",
+                    "data",
+                    "output",
+                    begin_ids=begin_ids,
+                    end_ids=end_ids,
+                    strides=strides,
+                    begin_masks=begin_masks,
+                    end_masks=end_masks,
+                )
 
                 x = np.random.rand(*input_shape)
-                inputs = {'data': x}
-                expected = {'output': x[tuple(objs)]}
+                inputs = {"data": x}
+                expected = {"output": x[tuple(objs)]}
 
                 self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
-                self.assertEqual(rank, builder._get_rank('output'))
+                self.assertEqual(rank, builder._get_rank("output"))
 
     def test_slice_static_gpu(self):
         self.test_slice_static_cpu(cpu_only=False)
@@ -1805,109 +2184,218 @@ def test_slice_static_gpu(self):
     def test_slice_dynamic_cpu(self, cpu_only=True):
         for rank in range(1, 6):
             input_shape = np.array([5 for _ in range(rank)])
-            objs, strides, begin_masks, end_ids, end_masks, begin_ids = [], [], [], [], [], []
+            objs, strides, begin_masks, end_ids, end_masks, begin_ids = (
+                [],
+                [],
+                [],
+                [],
+                [],
+                [],
+            )
+            squeeze_masks = []
+            squeeze_axes = []
             for dim in range(rank):
                 stride = random.choice([-3, -1, 1, 2])
                 begin_mask = random.choice([True, False])
                 end_mask = random.choice([True, False])
+                if len(squeeze_axes) + 1 < rank:
+                    squeeze_mask = random.choice([True, False])
+                else:
+                    squeeze_mask = False
+                if squeeze_mask:
+                    squeeze_axes.append(dim)
                 length = 0
                 while length <= 0:
-                    begin_id = np.random.randint(low=-input_shape[dim],
-                                                 high=input_shape[dim])
-                    end_id = np.random.randint(low=-input_shape[dim],
-                                               high=input_shape[dim])
-                    obj = slice(None if begin_mask else begin_id,
-                                None if end_mask else end_id, stride)
+                    begin_id = np.random.randint(
+                        low=-input_shape[dim], high=input_shape[dim]
+                    )
+                    end_id = np.random.randint(
+                        low=-input_shape[dim], high=input_shape[dim]
+                    )
+                    obj = slice(
+                        None if begin_mask else begin_id,
+                        None if end_mask else end_id,
+                        stride,
+                    )
                     length = np.arange(input_shape[dim])[(obj,)].shape[0]
 
-                objs.append(obj), strides.append(stride), begin_masks.append(
-                    begin_mask)
-                end_masks.append(end_mask), begin_ids.append(
-                    begin_id), end_ids.append(end_id)
+                objs.append(obj), strides.append(stride), begin_masks.append(begin_mask)
+                end_masks.append(end_mask), begin_ids.append(begin_id), end_ids.append(
+                    end_id
+                )
+                squeeze_masks.append(squeeze_mask)
 
-            # test different number of inputs, from 2 inputs up to 6 inputs
+            # test different number of inputs, from 2 inputs up to 7 inputs
             # when num_inputs == 2, begin_ids are inputs, rest are read from parameters
-            # when num_inputs == 6, all read from inputs, none are read from parameters
+            # when num_inputs == 7, all read from inputs, none are read from parameters
             for num_inputs in [2, 3, 4, 5, 6]:
                 x = np.random.rand(*input_shape)
 
-                input_features = [('data', datatypes.Array(*input_shape))]
-                input_names = ['data']
+                input_features = [("data", datatypes.Array(*input_shape))]
+                input_names = ["data"]
                 inputs = dict()
-                inputs['data'] = x
+                inputs["data"] = x
 
                 if num_inputs == 2:
-                    input_features = [('data', datatypes.Array(*input_shape)),
-                                      ('begin_ids', datatypes.Array(len(begin_ids)))]
-                    input_names = ['data', 'begin_ids']
-                    inputs['begin_ids'] = np.array(begin_ids, dtype=np.int32)
+                    input_features = [
+                        ("data", datatypes.Array(*input_shape)),
+                        ("begin_ids", datatypes.Array(len(begin_ids))),
+                    ]
+                    input_names = ["data", "begin_ids"]
+                    inputs["begin_ids"] = np.array(begin_ids, dtype=np.int32)
                 elif num_inputs == 3:
-                    input_features = [('data', datatypes.Array(*input_shape)),
-                                      ('begin_ids', datatypes.Array(len(begin_ids))),
-                                      ('end_ids', datatypes.Array(len(end_ids)))]
-                    input_names = ['data', 'begin_ids', 'end_ids']
-                    inputs['begin_ids'] = np.array(begin_ids, dtype=np.int32)
-                    inputs['end_ids'] = np.array(end_ids, dtype=np.int32)
+                    input_features = [
+                        ("data", datatypes.Array(*input_shape)),
+                        ("begin_ids", datatypes.Array(len(begin_ids))),
+                        ("end_ids", datatypes.Array(len(end_ids))),
+                    ]
+                    input_names = ["data", "begin_ids", "end_ids"]
+                    inputs["begin_ids"] = np.array(begin_ids, dtype=np.int32)
+                    inputs["end_ids"] = np.array(end_ids, dtype=np.int32)
                 elif num_inputs == 4:
-                    input_features = [('data', datatypes.Array(*input_shape)),
-                                      ('begin_ids', datatypes.Array(len(begin_ids))),
-                                      ('end_ids', datatypes.Array(len(end_ids))),
-                                      ('strides', datatypes.Array(len(strides)))]
-                    input_names = ['data', 'begin_ids', 'end_ids', 'strides']
-                    inputs['begin_ids'] = np.array(begin_ids, dtype=np.int32)
-                    inputs['end_ids'] = np.array(end_ids, dtype=np.int32)
-                    inputs['strides'] = np.array(strides, dtype=np.int32)
+                    input_features = [
+                        ("data", datatypes.Array(*input_shape)),
+                        ("begin_ids", datatypes.Array(len(begin_ids))),
+                        ("end_ids", datatypes.Array(len(end_ids))),
+                        ("strides", datatypes.Array(len(strides))),
+                    ]
+                    input_names = ["data", "begin_ids", "end_ids", "strides"]
+                    inputs["begin_ids"] = np.array(begin_ids, dtype=np.int32)
+                    inputs["end_ids"] = np.array(end_ids, dtype=np.int32)
+                    inputs["strides"] = np.array(strides, dtype=np.int32)
                 elif num_inputs == 5:
-                    input_features = [('data', datatypes.Array(*input_shape)),
-                                      ('begin_ids', datatypes.Array(len(begin_ids))),
-                                      ('end_ids', datatypes.Array(len(end_ids))),
-                                      ('strides', datatypes.Array(len(strides))),
-                                      ('begin_masks', datatypes.Array(len(begin_masks)))]
-                    input_names = ['data', 'begin_ids', 'end_ids', 'strides', 'begin_masks']
-                    inputs['begin_ids'] = np.array(begin_ids, dtype=np.int32)
-                    inputs['end_ids'] = np.array(end_ids, dtype=np.int32)
-                    inputs['strides'] = np.array(strides, dtype=np.int32)
-                    inputs['begin_masks'] = np.array(begin_masks, dtype=np.int32)
+                    input_features = [
+                        ("data", datatypes.Array(*input_shape)),
+                        ("begin_ids", datatypes.Array(len(begin_ids))),
+                        ("end_ids", datatypes.Array(len(end_ids))),
+                        ("strides", datatypes.Array(len(strides))),
+                        ("begin_masks", datatypes.Array(len(begin_masks))),
+                    ]
+                    input_names = [
+                        "data",
+                        "begin_ids",
+                        "end_ids",
+                        "strides",
+                        "begin_masks",
+                    ]
+                    inputs["begin_ids"] = np.array(begin_ids, dtype=np.int32)
+                    inputs["end_ids"] = np.array(end_ids, dtype=np.int32)
+                    inputs["strides"] = np.array(strides, dtype=np.int32)
+                    inputs["begin_masks"] = np.array(begin_masks, dtype=np.int32)
                 elif num_inputs == 6:
-                    input_features = [('data', datatypes.Array(*input_shape)),
-                                      ('begin_ids', datatypes.Array(len(begin_ids))),
-                                      ('end_ids', datatypes.Array(len(end_ids))),
-                                      ('strides', datatypes.Array(len(strides))),
-                                      ('begin_masks', datatypes.Array(len(begin_masks))),
-                                      ('end_masks', datatypes.Array(len(end_masks)))]
-                    input_names = ['data', 'begin_ids', 'end_ids',
-                                   'strides', 'begin_masks', 'end_masks']
-                    inputs['begin_ids'] = np.array(begin_ids, dtype=np.int32)
-                    inputs['end_ids'] = np.array(end_ids, dtype=np.int32)
-                    inputs['strides'] = np.array(strides, dtype=np.int32)
-                    inputs['begin_masks'] = np.array(begin_masks, dtype=np.int32)
-                    inputs['end_masks'] = np.array(end_masks, dtype=np.int32)
+                    input_features = [
+                        ("data", datatypes.Array(*input_shape)),
+                        ("begin_ids", datatypes.Array(len(begin_ids))),
+                        ("end_ids", datatypes.Array(len(end_ids))),
+                        ("strides", datatypes.Array(len(strides))),
+                        ("begin_masks", datatypes.Array(len(begin_masks))),
+                        ("end_masks", datatypes.Array(len(end_masks))),
+                    ]
+                    input_names = [
+                        "data",
+                        "begin_ids",
+                        "end_ids",
+                        "strides",
+                        "begin_masks",
+                        "end_masks",
+                    ]
+                    inputs["begin_ids"] = np.array(begin_ids, dtype=np.int32)
+                    inputs["end_ids"] = np.array(end_ids, dtype=np.int32)
+                    inputs["strides"] = np.array(strides, dtype=np.int32)
+                    inputs["begin_masks"] = np.array(begin_masks, dtype=np.int32)
+                    inputs["end_masks"] = np.array(end_masks, dtype=np.int32)
+                elif num_inputs == 7:
+                    input_features = [
+                        ("data", datatypes.Array(*input_shape)),
+                        ("begin_ids", datatypes.Array(len(begin_ids))),
+                        ("end_ids", datatypes.Array(len(end_ids))),
+                        ("strides", datatypes.Array(len(strides))),
+                        ("begin_masks", datatypes.Array(len(begin_masks))),
+                        ("end_masks", datatypes.Array(len(end_masks))),
+                        ("squeeze_masks", datatypes.Array(len(squeeze_masks))),
+                    ]
+                    input_names = [
+                        "data",
+                        "begin_ids",
+                        "end_ids",
+                        "strides",
+                        "begin_masks",
+                        "end_masks",
+                        "squeeze_masks",
+                    ]
+                    inputs["begin_ids"] = np.array(begin_ids, dtype=np.int32)
+                    inputs["end_ids"] = np.array(end_ids, dtype=np.int32)
+                    inputs["strides"] = np.array(strides, dtype=np.int32)
+                    inputs["begin_masks"] = np.array(begin_masks, dtype=np.int32)
+                    inputs["end_masks"] = np.array(end_masks, dtype=np.int32)
+                    inputs["squeeze_masks"] = np.array(squeeze_masks, dtype=np.int32)
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, [('output', None)],
-                    disable_rank5_shape_mapping=True)
+                    input_features, [("output", None)], disable_rank5_shape_mapping=True
+                )
 
                 if num_inputs == 2:
-                    builder.add_slice_dynamic('slice_dynamic', input_names, 'output',
-                                              end_ids=end_ids, strides=strides,
-                                              begin_masks=begin_masks, end_masks=end_masks)
+                    builder.add_slice_dynamic(
+                        "slice_dynamic",
+                        input_names,
+                        "output",
+                        end_ids=end_ids,
+                        strides=strides,
+                        begin_masks=begin_masks,
+                        end_masks=end_masks,
+                        squeeze_masks=squeeze_masks,
+                    )
                 elif num_inputs == 3:
-                    builder.add_slice_dynamic('slice_dynamic', input_names, 'output',
-                                              strides=strides, begin_masks=begin_masks,
-                                              end_masks=end_masks)
+                    builder.add_slice_dynamic(
+                        "slice_dynamic",
+                        input_names,
+                        "output",
+                        strides=strides,
+                        begin_masks=begin_masks,
+                        end_masks=end_masks,
+                        squeeze_masks=squeeze_masks,
+                    )
                 elif num_inputs == 4:
-                    builder.add_slice_dynamic('slice_dynamic', input_names, 'output',
-                                              begin_masks=begin_masks, end_masks=end_masks)
+                    builder.add_slice_dynamic(
+                        "slice_dynamic",
+                        input_names,
+                        "output",
+                        begin_masks=begin_masks,
+                        end_masks=end_masks,
+                        squeeze_masks=squeeze_masks,
+                    )
                 elif num_inputs == 5:
-                    builder.add_slice_dynamic('slice_dynamic', input_names, 'output',
-                                              end_masks=end_masks)
+                    builder.add_slice_dynamic(
+                        "slice_dynamic",
+                        input_names,
+                        "output",
+                        end_masks=end_masks,
+                        squeeze_masks=squeeze_masks,
+                    )
                 elif num_inputs == 6:
-                    builder.add_slice_dynamic('slice_dynamic', input_names, 'output')
-
-                expected = {'output': x[tuple(objs)]}
+                    builder.add_slice_dynamic(
+                        "slice_dynamic",
+                        input_names,
+                        "output",
+                        squeeze_masks=squeeze_masks,
+                    )
+                elif num_inputs == 7:
+                    builder.add_slice_dynamic("slice_dynamic", input_names, "output")
+
+                expected_x = x[tuple(objs)]
+                squeeze_slices = []
+                for squeeze in squeeze_masks:
+                    if squeeze:
+                        squeeze_slices.append(slice(None, 1, None))
+                    else:
+                        squeeze_slices.append(slice(None, None, None))
+                expected_x = np.squeeze(
+                    expected_x[tuple(squeeze_slices)], axis=tuple(squeeze_axes)
+                )
+                expected = {"output": expected_x}
 
                 self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
-                self.assertEqual(rank, builder._get_rank('output'))
+                self.assertEqual(rank, builder._get_rank("output"))
 
     def test_slice_dynamic_gpu(self):
         self.test_slice_dynamic_cpu(cpu_only=False)
@@ -1915,27 +2403,49 @@ def test_slice_dynamic_gpu(self):
     def test_tile_cpu(self, cpu_only=True):
         for rank in range(1, 6):
             input_shape = np.random.randint(low=2, high=5, size=rank)
-            for rep_rank in range(1,rank+1):
+            for rep_rank in range(1, rank + 1):
                 reps = list(np.random.randint(low=1, high=9, size=rep_rank))
-                input_features = [('data', datatypes.Array(*input_shape))]
-                output_features = [('output', None)]
+                input_features = [("data", datatypes.Array(*input_shape))]
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features,
-                    disable_rank5_shape_mapping=True
+                    input_features, output_features, disable_rank5_shape_mapping=True
                 )
 
-                builder.add_tile('Tile', 'data', 'output', reps)
+                builder.add_tile("Tile", "data", "output", reps)
 
                 x = np.random.rand(*input_shape)
-                input = {'data': x}
-                expected = {'output': np.tile(x, reps)}
+                input = {"data": x}
+                expected = {"output": np.tile(x, reps)}
 
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
     def test_tile_gpu(self):
         self.test_tile_cpu(cpu_only=False)
 
+    def test_dynamic_tile_cpu(self, cpu_only=True):
+        for rank in range(1, 6):
+            input_shape = np.random.randint(low=2, high=5, size=rank)
+            for rep_rank in range(1, rank + 1):
+                reps = np.random.randint(low=1, high=9, size=rep_rank)
+                input_features = [
+                    ("data", datatypes.Array(*input_shape)),
+                    ("reps", datatypes.Array(*reps.shape)),
+                ]
+                output_features = [("output", None)]
+
+                builder = neural_network.NeuralNetworkBuilder(
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
+
+                builder.add_tile("Tile", ["data", "reps"], "output")
+
+                x = np.random.rand(*input_shape)
+                input = {"data": x, "reps": reps.astype(np.float32)}
+                expected = {"output": np.tile(x, list(reps))}
+
+                self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
+
     def test_sliding_windows_cpu(self, cpu_only=True):
         def numpy_sliding_windows(a, np_axis, np_size, np_step):
             n = (a.shape[np_axis] - np_size) // np_step + 1
@@ -1965,68 +2475,88 @@ def numpy_sliding_windows(a, np_axis, np_size, np_step):
                 pos_axis = axis if axis >= 0 else axis + rank
                 output_shape.insert(pos_axis + 1, window_size)
 
-                input_features = [('data', datatypes.Array(*input_shape))]
-                output_features = [('output', None)]
+                input_features = [("data", datatypes.Array(*input_shape))]
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features,
-                    disable_rank5_shape_mapping=True)
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
 
-                builder.add_sliding_windows('sliding_windows',
-                                            input_name='data',
-                                            output_name='output',
-                                            axis=axis,
-                                            window_size=window_size,
-                                            step=step)
+                builder.add_sliding_windows(
+                    "sliding_windows",
+                    input_name="data",
+                    output_name="output",
+                    axis=axis,
+                    window_size=window_size,
+                    step=step,
+                )
 
                 x = np.random.rand(*input_shape)
-                input = {'data': x}
-                expected = {'output': numpy_sliding_windows(x, axis, window_size, step)}
+                input = {"data": x}
+                expected = {"output": numpy_sliding_windows(x, axis, window_size, step)}
 
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
-                self.assertEqual(rank+1, builder._get_rank('output'))
+                self.assertEqual(rank + 1, builder._get_rank("output"))
 
     def test_sliding_windows_gpu(self):
         self.test_sliding_windows_cpu(cpu_only=False)
 
     def test_range_static_cpu(self, cpu_only=True):
 
-        params = [(-10.4, 23, 12.2), (0, 1000, 1), (50.5, 90.5, 1.5), (5, 8, 2),
-                  (5, 8, 98), (5, 8, 1.5), (10, 5, -0.6), (24, -65, -2)]
+        params = [
+            (-10.4, 23, 12.2),
+            (0, 1000, 1),
+            (50.5, 90.5, 1.5),
+            (5, 8, 2),
+            (5, 8, 98),
+            (5, 8, 1.5),
+            (10, 5, -0.6),
+            (24, -65, -2),
+        ]
 
         for param in params:
             start, end, step = param
-            input_features = [('multiplicative_input', datatypes.Array(1))]
+            input_features = [("multiplicative_input", datatypes.Array(1))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)],
-                disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_range_static('range_static', 'output_range',
-                                     end=end, start=start, step=step)
+            builder.add_range_static(
+                "range_static", "output_range", end=end, start=start, step=step
+            )
             builder.add_multiply_broadcastable(
-                name='multiply_broadcastable',
-                input_names=['multiplicative_input', 'output_range'],
-                output_name='output')
+                name="multiply_broadcastable",
+                input_names=["multiplicative_input", "output_range"],
+                output_name="output",
+            )
 
             # save the model
             model_dir = tempfile.mkdtemp()
-            model_path = os.path.join(model_dir, 'test_layer.mlmodel')
+            model_path = os.path.join(model_dir, "test_layer.mlmodel")
             coremltools.utils.save_spec(builder.spec, model_path)
 
             inputs = dict()
-            inputs['multiplicative_input'] = np.ones((1,), dtype=np.float64)
-            expected = {'output': np.arange(start, end, step)}
+            inputs["multiplicative_input"] = np.ones((1,), dtype=np.float64)
+            expected = {"output": np.arange(start, end, step)}
 
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
-            self.assertEqual(1, builder._get_rank('output'))
+            self.assertEqual(1, builder._get_rank("output"))
 
     def test_range_static_gpu(self):
         self.test_range_static_cpu(cpu_only=False)
 
     def test_range_dynamic_cpu(self, cpu_only=True):
-        params = [(-10.4, 23, 12.2), (0, 1000, 1), (50.5, 90.5, 1.5), (5, 8, 2),
-                  (5, 8, 98), (5, 8, 1.5), (10, 5, -0.6), (24, -65, -2)]
+        params = [
+            (-10.4, 23, 12.2),
+            (0, 1000, 1),
+            (50.5, 90.5, 1.5),
+            (5, 8, 2),
+            (5, 8, 98),
+            (5, 8, 1.5),
+            (10, 5, -0.6),
+            (24, -65, -2),
+        ]
 
         # input size == 1: end is input, start and step are read from parameters
         # input size == 2: end, start are inputs, step is read from parameters
@@ -2037,66 +2567,79 @@ def test_range_dynamic_cpu(self, cpu_only=True):
                 start, end, step = param
 
                 if num_inputs == 1:
-                    input_features = [('end', datatypes.Array(1))]
+                    input_features = [("end", datatypes.Array(1))]
                 elif num_inputs == 2:
-                    input_features = [('end', datatypes.Array(1)),
-                                      ('start', datatypes.Array(1))]
+                    input_features = [
+                        ("end", datatypes.Array(1)),
+                        ("start", datatypes.Array(1)),
+                    ]
                 elif num_inputs == 3:
-                    input_features = [('end', datatypes.Array(1)),
-                                      ('start', datatypes.Array(1)),
-                                      ('step', datatypes.Array(1))]
+                    input_features = [
+                        ("end", datatypes.Array(1)),
+                        ("start", datatypes.Array(1)),
+                        ("step", datatypes.Array(1)),
+                    ]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, [('output', None)],
-                    disable_rank5_shape_mapping=True)
+                    input_features, [("output", None)], disable_rank5_shape_mapping=True
+                )
 
                 if num_inputs == 1:
-                    inputs['end'] = end * np.ones((1,), dtype=np.float64)
-                    builder.add_range_dynamic('range_dynamic',
-                                              output_name='output',
-                                              input_names=['end'],
-                                              start=start, step=step)
+                    inputs["end"] = end * np.ones((1,), dtype=np.float64)
+                    builder.add_range_dynamic(
+                        "range_dynamic",
+                        output_name="output",
+                        input_names=["end"],
+                        start=start,
+                        step=step,
+                    )
                 elif num_inputs == 2:
-                    inputs['end'] = end * np.ones((1,), dtype=np.float64)
-                    inputs['start'] = start * np.ones((1,), dtype=np.float64)
-                    builder.add_range_dynamic('range_dynamic',
-                                              output_name='output',
-                                              input_names=['end', 'start'],
-                                              step=step)
+                    inputs["end"] = end * np.ones((1,), dtype=np.float64)
+                    inputs["start"] = start * np.ones((1,), dtype=np.float64)
+                    builder.add_range_dynamic(
+                        "range_dynamic",
+                        output_name="output",
+                        input_names=["end", "start"],
+                        step=step,
+                    )
                 elif num_inputs == 3:
-                    inputs['end'] = end * np.ones((1,), dtype=np.float64)
-                    inputs['start'] = start * np.ones((1,), dtype=np.float64)
-                    inputs['step'] = step * np.ones((1,), dtype=np.float64)
-                    builder.add_range_dynamic('range_dynamic',
-                                              output_name='output',
-                                              input_names=['end', 'start', 'step'])
+                    inputs["end"] = end * np.ones((1,), dtype=np.float64)
+                    inputs["start"] = start * np.ones((1,), dtype=np.float64)
+                    inputs["step"] = step * np.ones((1,), dtype=np.float64)
+                    builder.add_range_dynamic(
+                        "range_dynamic",
+                        output_name="output",
+                        input_names=["end", "start", "step"],
+                    )
 
-                expected = {'output': np.arange(start, end, step)}
+                expected = {"output": np.arange(start, end, step)}
 
                 self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
-                self.assertEqual(1, builder._get_rank('output'))
+                self.assertEqual(1, builder._get_rank("output"))
 
     def test_range_dynamic_gpu(self):
         self.test_range_dynamic_cpu(cpu_only=False)
 
     def test_linear_activation_different_ranks_cpu(self, cpu_only=True):
-        for input_dim in [(10, 15), (10, 15, 2, 3),
-                          (10, 2, 4, 15, 1, 4), (6,)]:
-            input_features = [('data', datatypes.Array(*input_dim))]
-            output_features = [('output', datatypes.Array(*input_dim))]
+        for input_dim in [(10, 15), (10, 15, 2, 3), (10, 2, 4, 15, 1), (6,)]:
+            input_features = [("data", datatypes.Array(*input_dim))]
+            output_features = [("output", datatypes.Array(*input_dim))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True)
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
 
-            builder.add_activation(name='activation',
-                                   non_linearity='LINEAR',
-                                   input_name='data',
-                                   output_name='output', params=[34.0, 67.0])
+            builder.add_activation(
+                name="activation",
+                non_linearity="LINEAR",
+                input_name="data",
+                output_name="output",
+                params=[34.0, 67.0],
+            )
 
             x = np.random.rand(*input_dim)
-            input = {'data': x}
-            expected = {'output': 34.0 * x + 67.0}
+            input = {"data": x}
+            expected = {"output": 34.0 * x + 67.0}
 
             self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -2118,33 +2661,52 @@ def test_topk_cpu(self, cpu_only=True):
                                 if negative_axis:
                                     axis = axis - len(input_shape)
 
-                                input_features = [('data', datatypes.Array(*input_shape))]
-                                output_features = [('values', None), ('indices', None)]
+                                input_features = [
+                                    ("data", datatypes.Array(*input_shape))
+                                ]
+                                output_features = [("values", None), ("indices", None)]
 
-                                input_names = ['data']
-                                output_names = ['values', 'indices']
+                                input_names = ["data"]
+                                output_names = ["values", "indices"]
 
                                 if n_inputs == 2:
-                                    input_names.append('k_in')
-                                    input_features.append(('k_in', datatypes.Array(1)))
+                                    input_names.append("k_in")
+                                    input_features.append(("k_in", datatypes.Array(1)))
 
                                 builder = neural_network.NeuralNetworkBuilder(
-                                    input_features, output_features,
-                                    disable_rank5_shape_mapping=True)
+                                    input_features,
+                                    output_features,
+                                    disable_rank5_shape_mapping=True,
+                                )
 
                                 if n_inputs == 2:
-                                    builder.add_topk('topk', input_names, output_names,
-                                                     axis=axis, use_bottom_k=bottom_k_flag)
+                                    builder.add_topk(
+                                        "topk",
+                                        input_names,
+                                        output_names,
+                                        axis=axis,
+                                        use_bottom_k=bottom_k_flag,
+                                    )
                                 else:
-                                    builder.add_topk('topk', input_names, output_names,
-                                                     k=k, axis=axis, use_bottom_k=bottom_k_flag)
-
-                                data = np.random.randint(low=0, high=int(np.prod(input_shape)), size=input_shape)
+                                    builder.add_topk(
+                                        "topk",
+                                        input_names,
+                                        output_names,
+                                        k=k,
+                                        axis=axis,
+                                        use_bottom_k=bottom_k_flag,
+                                    )
+
+                                data = np.random.randint(
+                                    low=0,
+                                    high=int(np.prod(input_shape)),
+                                    size=input_shape,
+                                )
                                 data = data.astype(np.float32)
 
-                                input = {'data': data}
+                                input = {"data": data}
                                 if n_inputs == 2:
-                                    input['k_in'] = k * np.ones([1], dtype=np.float32)
+                                    input["k_in"] = k * np.ones([1], dtype=np.float32)
 
                                 # numpy reference values
                                 if bottom_k_flag:
@@ -2155,22 +2717,28 @@ def test_topk_cpu(self, cpu_only=True):
                                 slc = [slice(None)] * len(input_shape)
                                 slc[axis] = slice(0, k)
                                 ref_indices = ref_indices[tuple(slc)]
-                                ref_values = np.take_along_axis(data, ref_indices, axis=axis)
-                                expected = {'values': ref_values, 'indices': ref_indices}
-
-                                self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
+                                ref_values = np.take_along_axis(
+                                    data, ref_indices, axis=axis
+                                )
+                                expected = {
+                                    "values": ref_values,
+                                    "indices": ref_indices,
+                                }
+
+                                self._test_model(
+                                    builder.spec, input, expected, useCPUOnly=cpu_only
+                                )
 
     def test_topk_gpu(self):
         self.test_topk_cpu(cpu_only=False)
 
-
+    @unittest.skipIf(not _HAS_TF, MSG_TF1_NOT_FOUND)
     def test_const_pad_cpu(self, cpu_only=True):
-
         def get_reference(data, pads, value):
             with tf.Graph().as_default(), tf.Session() as sess:
                 x = tf.placeholder(tf.float32, shape=data.shape)
                 p = tf.placeholder(tf.int32, shape=pads.shape)
-                y = tf.pad(x, p, mode='CONSTANT', constant_values=value)
+                y = tf.pad(x, p, mode="CONSTANT", constant_values=value)
                 return sess.run(y, feed_dict={x: data, p: pads})
 
         value = 34.0
@@ -2182,7 +2750,9 @@ def get_reference(data, pads, value):
             for force_zeros_in_end in [0, 2, 6]:
                 for max_pad_value in range(1, 6):
                     for n_inputs in [1, 2]:
-                        pads = np.random.randint(low=0, high=max_pad_value, size=(rank, 2))
+                        pads = np.random.randint(
+                            low=0, high=max_pad_value, size=(rank, 2)
+                        )
 
                         if force_zeros_in_end > 2 * rank:
                             continue
@@ -2196,39 +2766,51 @@ def get_reference(data, pads, value):
 
                         ctr += 1
 
-                        input_features = [('data', datatypes.Array(*shape))]
-                        output_features = [('output', None)]
+                        input_features = [("data", datatypes.Array(*shape))]
+                        output_features = [("output", None)]
 
-                        input_names = ['data']
+                        input_names = ["data"]
                         if n_inputs == 2:
-                            input_names.append('pads')
-                            input_features.append(('pads', datatypes.Array(2*rank,)))
-
-                        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
+                            input_names.append("pads")
+                            input_features.append(("pads", datatypes.Array(2 * rank,)))
+
+                        builder = neural_network.NeuralNetworkBuilder(
+                            input_features,
+                            output_features,
+                            disable_rank5_shape_mapping=True,
+                        )
                         if n_inputs == 2:
-                            builder.add_constant_pad('pad', input_names, 'output', value=value)
+                            builder.add_constant_pad(
+                                "pad", input_names, "output", value=value
+                            )
                         else:
-                            builder.add_constant_pad('pad', input_names, 'output', value=value, pad_amounts=pads.flatten())
+                            builder.add_constant_pad(
+                                "pad",
+                                input_names,
+                                "output",
+                                value=value,
+                                pad_amounts=pads.flatten(),
+                            )
 
-                        input = {'data': data}
+                        input = {"data": data}
                         if n_inputs == 2:
-                            input['pads'] = pads.flatten().astype(np.float)
-
-                        expected = {'output': reference}
-                        self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
+                            input["pads"] = pads.flatten().astype(np.float)
 
+                        expected = {"output": reference}
+                        self._test_model(
+                            builder.spec, input, expected, useCPUOnly=cpu_only
+                        )
 
     def test_const_pad_gpu(self):
         self.test_const_pad_cpu(cpu_only=False)
 
-
+    @unittest.skipIf(not _HAS_TF, MSG_TF1_NOT_FOUND)
     def test_const_pad_mode2_cpu(self, cpu_only=True):
-
         def get_reference(data, output_shape, value, left_pad=False):
             with tf.Graph().as_default(), tf.Session() as sess:
                 x = tf.placeholder(tf.float32, shape=data.shape)
                 p = tf.placeholder(tf.int32, shape=(len(output_shape), 2))
-                y = tf.pad(x, p, mode='CONSTANT', constant_values=value)
+                y = tf.pad(x, p, mode="CONSTANT", constant_values=value)
                 pads = np.zeros((len(output_shape), 2))
                 if left_pad:
                     pads[:, 0] = np.array(output_shape) - np.array(data.shape)
@@ -2266,39 +2848,57 @@ def get_reference(data, output_shape, value, left_pad=False):
 
                     ctr += 1
 
-                    input_features = [('data', datatypes.Array(*shape))]
-                    output_features = [('output', None)]
+                    input_features = [("data", datatypes.Array(*shape))]
+                    output_features = [("output", None)]
 
-                    input_names = ['data']
+                    input_names = ["data"]
                     if n_inputs == 2:
-                        input_names.append('pads')
-                        input_features.append(('pads', datatypes.Array(2*rank,)))
+                        input_names.append("pads")
+                        input_features.append(("pads", datatypes.Array(2 * rank,)))
 
-                    builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-                    if n_inputs == 2:
-                        builder.add_constant_pad('pad', input_names, 'output', value=value, pad_to_given_output_size_mode=True)
+                    builder = neural_network.NeuralNetworkBuilder(
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
+                    )
+                    if n_inputs == 2:
+                        builder.add_constant_pad(
+                            "pad",
+                            input_names,
+                            "output",
+                            value=value,
+                            pad_to_given_output_size_mode=True,
+                        )
                     else:
-                        builder.add_constant_pad('pad', input_names, 'output', value=value, pad_amounts=pads.flatten(), pad_to_given_output_size_mode=True)
-
-                    input = {'data': data}
+                        builder.add_constant_pad(
+                            "pad",
+                            input_names,
+                            "output",
+                            value=value,
+                            pad_amounts=pads.flatten(),
+                            pad_to_given_output_size_mode=True,
+                        )
+
+                    input = {"data": data}
                     if n_inputs == 2:
-                        input['pads'] = pads.flatten().astype(np.float)
+                        input["pads"] = pads.flatten().astype(np.float)
 
-                    expected = {'output': reference}
+                    expected = {"output": reference}
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
-
     def test_const_pad_mode2_gpu(self):
         self.test_const_pad_mode2_cpu(cpu_only=False)
 
-
+    @pytest.mark.xfail(reason="rdar://problem/59486372")
     def test_nms_cpu(self, cpu_only=True):
         def _compute_iou_matrix(boxes):
             # input is (N,4), in order [center_w, center_h, width, height]
-            assert len(boxes.shape) == 2
-            assert boxes.shape[1] == 4
+            self.assertEqual(len(boxes.shape), 2)
+            self.assertEqual(boxes.shape[1], 4)
             boxes = boxes.astype(np.float)
-            center_w, center_h, width, height = np.split(boxes, 4, axis=1)  # outs are all (N,1)
+            center_w, center_h, width, height = np.split(
+                boxes, 4, axis=1
+            )  # outs are all (N,1)
             top = center_h + 0.5 * height
             bottom = center_h - 0.5 * height
             left = center_w - 0.5 * width
@@ -2315,20 +2915,25 @@ def _compute_iou_matrix(boxes):
             iou = intersection_area / union_area
             return iou
 
-        def _nms_TF(boxes, scores, iou_threshold, score_threshold, per_class_suppression, M):
+        @unittest.skipIf(not _HAS_TF, MSG_TF1_NOT_FOUND)
+        def _nms_TF(
+            boxes, scores, iou_threshold, score_threshold, per_class_suppression, M
+        ):
             # boxes is (B,N,4), in order [center_w, center_h, width, height]
             # scores is (B,N,C)
             # output shapes: (B,M,4), (B,M,C), (B,M), (B,)
-            '''
+            """
             this is implementation of CoreML's NMS layer
-            '''
+            """
             B, N, C = scores.shape
 
             iou_threshold = iou_threshold.astype(np.float32)
             score_threshold = score_threshold.astype(np.float32)
 
             # convert box ids to TF style
-            center_w, center_h, width, height = np.split(boxes, 4, axis=-1)  # outs are all (B,N,1)
+            center_w, center_h, width, height = np.split(
+                boxes, 4, axis=-1
+            )  # outs are all (B,N,1)
             y1 = center_h - 0.5 * height
             y2 = center_h + 0.5 * height
             x1 = center_w - 0.5 * width
@@ -2346,19 +2951,34 @@ def _nms_TF(boxes, scores, iou_threshold, score_threshold, per_class_suppression
                 if not per_class_suppression:
                     # this is the simple case as TF directly supports it
                     with tf.Graph().as_default(), tf.Session() as sess:
-                        box_coord_matrix_pl = tf.placeholder(tf.float32, shape=box_coord_matrix.shape)
-                        score_vector_pl = tf.placeholder(tf.float32, shape=score_vector.shape)
-                        ids_g = tf.image.non_max_suppression(box_coord_matrix_pl,
-                                                             score_vector_pl,
-                                                             max_output_size=M, iou_threshold=iou_threshold,
-                                                             score_threshold=score_threshold)
-
-                        ids = sess.run(ids_g, feed_dict={box_coord_matrix_pl: box_coord_matrix, score_vector_pl: score_vector})
+                        box_coord_matrix_pl = tf.placeholder(
+                            tf.float32, shape=box_coord_matrix.shape
+                        )
+                        score_vector_pl = tf.placeholder(
+                            tf.float32, shape=score_vector.shape
+                        )
+                        ids_g = tf.image.non_max_suppression(
+                            box_coord_matrix_pl,
+                            score_vector_pl,
+                            max_output_size=M,
+                            iou_threshold=iou_threshold,
+                            score_threshold=score_threshold,
+                        )
+
+                        ids = sess.run(
+                            ids_g,
+                            feed_dict={
+                                box_coord_matrix_pl: box_coord_matrix,
+                                score_vector_pl: score_vector,
+                            },
+                        )
                 else:
                     # this is slightly complicated as TF does not directly support it
                     class_ids = np.argmax(scores[b, :, :], axis=-1)  # (N,)
                     sorted_score_ids = np.argsort(-score_vector)
-                    box_coord_matrix2 = np.take(box_coord_matrix, sorted_score_ids, axis=0)
+                    box_coord_matrix2 = np.take(
+                        box_coord_matrix, sorted_score_ids, axis=0
+                    )
                     score_vector2 = np.take(score_vector, sorted_score_ids)
                     class_ids = np.take(class_ids, sorted_score_ids)
                     classes_seen = dict()
@@ -2370,22 +2990,37 @@ def _nms_TF(boxes, scores, iou_threshold, score_threshold, per_class_suppression
                         classes_seen[c] = True
                         current_class_ids = np.where(class_ids == c)[0]
                         if len(current_class_ids) > 0:
-                            feed_in1 = np.take(box_coord_matrix2, current_class_ids, axis=0)
+                            feed_in1 = np.take(
+                                box_coord_matrix2, current_class_ids, axis=0
+                            )
                             feed_in2 = np.take(score_vector2, current_class_ids)
 
                             with tf.Graph().as_default(), tf.Session() as sess:
-                                box_coord_matrix_pl = tf.placeholder(tf.float32, shape=feed_in1.shape)
-                                score_vector_pl = tf.placeholder(tf.float32, shape=feed_in2.shape)
-                                cur_ids_g = tf.image.non_max_suppression(box_coord_matrix_pl,
-                                                                       score_vector_pl,
-                                                                       max_output_size=M, iou_threshold=iou_threshold,
-                                                                       score_threshold=score_threshold)
-                                cur_ids = sess.run(cur_ids_g, feed_dict={box_coord_matrix_pl: feed_in1,
-                                                                         score_vector_pl: feed_in2})
-
+                                box_coord_matrix_pl = tf.placeholder(
+                                    tf.float32, shape=feed_in1.shape
+                                )
+                                score_vector_pl = tf.placeholder(
+                                    tf.float32, shape=feed_in2.shape
+                                )
+                                cur_ids_g = tf.image.non_max_suppression(
+                                    box_coord_matrix_pl,
+                                    score_vector_pl,
+                                    max_output_size=M,
+                                    iou_threshold=iou_threshold,
+                                    score_threshold=score_threshold,
+                                )
+                                cur_ids = sess.run(
+                                    cur_ids_g,
+                                    feed_dict={
+                                        box_coord_matrix_pl: feed_in1,
+                                        score_vector_pl: feed_in2,
+                                    },
+                                )
 
                             from_sort_ids = np.take(current_class_ids, cur_ids)
-                            ids_intermediate = np.append(ids_intermediate, from_sort_ids)
+                            ids_intermediate = np.append(
+                                ids_intermediate, from_sort_ids
+                            )
                     ids_intermediate.sort()
                     ids = np.take(sorted_score_ids, ids_intermediate)
 
@@ -2409,7 +3044,7 @@ def _nms_TF(boxes, scores, iou_threshold, score_threshold, per_class_suppression
 
         number_of_test = 0
         for N_M in N_M_pairs_to_test:
-            for B in [1, 5]:
+            for B in [1]:  # [1, 5] TODO Re-enable when rdar://60280745 is fixed
                 for C in [1, 7]:
                     N, M = N_M
 
@@ -2417,7 +3052,9 @@ def _nms_TF(boxes, scores, iou_threshold, score_threshold, per_class_suppression
                     scores = np.random.rand(B, N, C)
 
                     iou_matrix = _compute_iou_matrix(boxes[0, :, :])  # (N,N)
-                    iou_matrix = iou_matrix[~np.eye(iou_matrix.shape[0], dtype=bool)].reshape(iou_matrix.shape[0], -1)
+                    iou_matrix = iou_matrix[
+                        ~np.eye(iou_matrix.shape[0], dtype=bool)
+                    ].reshape(iou_matrix.shape[0], -1)
 
                     for per_class_suppression in [False, True]:
                         for iou_thresh in iou_threshold_percentile:
@@ -2429,61 +3066,122 @@ def _nms_TF(boxes, scores, iou_threshold, score_threshold, per_class_suppression
                                     elif score_thresh == 100:
                                         score_threshold = np.max(scores) + 1
                                     else:
-                                        score_threshold = np.percentile(scores, score_thresh) + .01
+                                        score_threshold = (
+                                            np.percentile(scores, score_thresh) + 0.01
+                                        )
 
                                     if iou_thresh == 0:
-                                        iou_threshold = np.maximum(np.min(iou_matrix) - .01, 0.0)
+                                        iou_threshold = np.maximum(
+                                            np.min(iou_matrix) - 0.01, 0.0
+                                        )
                                     else:
-                                        iou_threshold = np.percentile(iou_matrix, iou_thresh) + .01
+                                        iou_threshold = (
+                                            np.percentile(iou_matrix, iou_thresh) + 0.01
+                                        )
 
                                     number_of_test += 1
 
-                                    tf_boxes, tf_scores, tf_ids, tf_num_boxes = _nms_TF(boxes, scores, iou_threshold,
-                                                                                        score_threshold,
-                                                                                        per_class_suppression,
-                                                                                        M)
+                                    tf_boxes, tf_scores, tf_ids, tf_num_boxes = _nms_TF(
+                                        boxes,
+                                        scores,
+                                        iou_threshold,
+                                        score_threshold,
+                                        per_class_suppression,
+                                        M,
+                                    )
                                     expected = dict()
-                                    expected['selected_boxes'] = tf_boxes
-                                    expected['selected_scores'] = tf_scores
-                                    expected['selected_box_ids'] = tf_ids
-                                    expected['number_of_boxes'] = tf_num_boxes
+                                    expected["selected_boxes"] = tf_boxes
+                                    expected["selected_scores"] = tf_scores
+                                    expected["selected_box_ids"] = tf_ids
+                                    expected["number_of_boxes"] = tf_num_boxes
 
                                     # define CoreML model
 
-                                    input_features = [('boxes', datatypes.Array(B,N,4)), ('scores', datatypes.Array(B,N,C))]
-                                    output_features = [('selected_boxes', None), ('selected_scores', None),
-                                                       ('selected_box_ids', None), ('number_of_boxes', None)]
-
-                                    input_names = ['boxes', 'scores']
+                                    input_features = [
+                                        ("boxes", datatypes.Array(B, N, 4)),
+                                        ("scores", datatypes.Array(B, N, C)),
+                                    ]
+                                    output_features = [
+                                        ("selected_boxes", None),
+                                        ("selected_scores", None),
+                                        ("selected_box_ids", None),
+                                        ("number_of_boxes", None),
+                                    ]
+
+                                    input_names = ["boxes", "scores"]
                                     if is_dynamic:
-                                        input_names.extend(['iou_threshold', 'score_threshold', 'max_boxes'])
-                                        input_features.append(('iou_threshold', datatypes.Array(1, )))
-                                        input_features.append(('score_threshold', datatypes.Array(1, )))
-                                        input_features.append(('max_boxes', datatypes.Array(1, )))
-
-                                    builder = neural_network.NeuralNetworkBuilder(input_features, output_features,
-                                                                                  disable_rank5_shape_mapping=True)
+                                        input_names.extend(
+                                            [
+                                                "iou_threshold",
+                                                "score_threshold",
+                                                "max_boxes",
+                                            ]
+                                        )
+                                        input_features.append(
+                                            ("iou_threshold", datatypes.Array(1,))
+                                        )
+                                        input_features.append(
+                                            ("score_threshold", datatypes.Array(1,))
+                                        )
+                                        input_features.append(
+                                            ("max_boxes", datatypes.Array(1,))
+                                        )
+
+                                    builder = neural_network.NeuralNetworkBuilder(
+                                        input_features,
+                                        output_features,
+                                        disable_rank5_shape_mapping=True,
+                                    )
 
                                     input_dict = dict()
-                                    input_dict['boxes'] = boxes
-                                    input_dict['scores'] = scores
+                                    input_dict["boxes"] = boxes
+                                    input_dict["scores"] = scores
 
                                     if is_dynamic:
-                                        builder.add_nms('nms', input_names,
-                                                        ['selected_boxes', 'selected_scores', 'selected_box_ids','number_of_boxes'],
-                                                        per_class_suppression=per_class_suppression)
-
-                                        input_dict['iou_threshold'] = iou_threshold * np.ones([1], dtype=np.float)
-                                        input_dict['score_threshold'] = score_threshold * np.ones([1], dtype=np.float)
-                                        input_dict['max_boxes'] = M * np.ones([1], dtype=np.float)
+                                        builder.add_nms(
+                                            "nms",
+                                            input_names,
+                                            [
+                                                "selected_boxes",
+                                                "selected_scores",
+                                                "selected_box_ids",
+                                                "number_of_boxes",
+                                            ],
+                                            per_class_suppression=per_class_suppression,
+                                        )
+
+                                        input_dict[
+                                            "iou_threshold"
+                                        ] = iou_threshold * np.ones([1], dtype=np.float)
+                                        input_dict["score_threshold"] = (
+                                            score_threshold
+                                            * np.ones([1], dtype=np.float)
+                                        )
+                                        input_dict["max_boxes"] = M * np.ones(
+                                            [1], dtype=np.float
+                                        )
                                     else:
-                                        builder.add_nms('nms', input_names,
-                                                        ['selected_boxes', 'selected_scores', 'selected_box_ids','number_of_boxes'],
-                                                        iou_threshold=iou_threshold, score_threshold=score_threshold,
-                                                        max_boxes=M, per_class_suppression=per_class_suppression)
-
-                                    self._test_model(builder.spec, input_dict, expected, useCPUOnly=cpu_only)
-
+                                        builder.add_nms(
+                                            "nms",
+                                            input_names,
+                                            [
+                                                "selected_boxes",
+                                                "selected_scores",
+                                                "selected_box_ids",
+                                                "number_of_boxes",
+                                            ],
+                                            iou_threshold=iou_threshold,
+                                            score_threshold=score_threshold,
+                                            max_boxes=M,
+                                            per_class_suppression=per_class_suppression,
+                                        )
+
+                                    self._test_model(
+                                        builder.spec,
+                                        input_dict,
+                                        expected,
+                                        useCPUOnly=cpu_only,
+                                    )
 
     def test_nms_gpu(self):
         self.test_nms_cpu(cpu_only=False)
@@ -2494,96 +3192,127 @@ def test_rank_preserving_reshape(self):
         output_shapes = [(5, 40), (20, 2, 25), (25, 3, 2)]
 
         for i in range(len(input_shapes)):
-            input_features = [('data', datatypes.Array(*input_shapes[i]))]
-            output_features = [('output', None)]
+            input_features = [("data", datatypes.Array(*input_shapes[i]))]
+            output_features = [("output", None)]
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True)
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
 
             builder.add_rank_preserving_reshape(
-                name='rank_preserving_reshape', input_name='data',
-                output_name='output', output_shape=target_shapes[i])
+                name="rank_preserving_reshape",
+                input_name="data",
+                output_name="output",
+                output_shape=target_shapes[i],
+            )
 
             x = np.random.rand(*input_shapes[i])
-            input = {'data': x}
-            expected = {'output': np.reshape(x, output_shapes[i])}
+            input = {"data": x}
+            expected = {"output": np.reshape(x, output_shapes[i])}
 
             self._test_model(builder.spec, input, expected, useCPUOnly=True)
-            self.assertEqual(len(output_shapes[i]), builder._get_rank('output'))
+            self.assertEqual(len(output_shapes[i]), builder._get_rank("output"))
 
     def test_expand_dims(self):
         input_shapes = [(10, 5), (10, 5), (10, 5), (10, 5), (10,)]
         axes = [(0, 1), (0, 2), (2, 0), (-2, -1), (1, 0, -2)]
-        output_shapes = [(1, 1, 10, 5), (1, 10, 1, 5), (1, 10, 1, 5), (10, 5, 1, 1), (1, 1, 1, 10)]
+        output_shapes = [
+            (1, 1, 10, 5),
+            (1, 10, 1, 5),
+            (1, 10, 1, 5),
+            (10, 5, 1, 1),
+            (1, 1, 1, 10),
+        ]
 
         for i in range(len(input_shapes)):
-            input_features = [('data', datatypes.Array(*input_shapes[i]))]
-            output_features = [('output', None)]
+            input_features = [("data", datatypes.Array(*input_shapes[i]))]
+            output_features = [("output", None)]
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True)
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
 
             builder.add_expand_dims(
-                name='expand_dims', input_name='data', output_name='output',
-                axes=axes[i]
+                name="expand_dims",
+                input_name="data",
+                output_name="output",
+                axes=axes[i],
             )
 
             x = np.random.rand(*input_shapes[i])
-            input = {'data': x}
-            expected = {'output': np.reshape(x, output_shapes[i])}
+            input = {"data": x}
+            expected = {"output": np.reshape(x, output_shapes[i])}
 
             self._test_model(builder.spec, input, expected, useCPUOnly=True)
-            self.assertEqual(len(output_shapes[i]), builder._get_rank('output'))
+            self.assertEqual(len(output_shapes[i]), builder._get_rank("output"))
 
     def test_squeeze(self):
-        input_shapes = [(1, 1, 10, 5), (1, 10, 1, 5), (10, 5, 1, 1),
-                        (10, 5, 1, 1), (1,), (10, 5, 1, 1), (3, 1, 7)]
+        input_shapes = [
+            (1, 1, 10, 5),
+            (1, 10, 1, 5),
+            (10, 5, 1, 1),
+            (10, 5, 1, 1),
+            (1,),
+            (10, 5, 1, 1),
+            (3, 1, 7),
+        ]
         axes = [(0, 1), (0, 2), (-2, -1), (-1, -2), (0,), (3, -2), (1,)]
         output_shapes = [(10, 5), (10, 5), (10, 5), (10, 5), (1,), (10, 5), (3, 7)]
 
         for i in range(len(input_shapes)):
-            input_features = [('data', datatypes.Array(*input_shapes[i]))]
-            output_features = [('output', None)]
+            input_features = [("data", datatypes.Array(*input_shapes[i]))]
+            output_features = [("output", None)]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
+            builder.add_squeeze(
+                name="squeeze_layer",
+                input_name="data",
+                output_name="output",
+                axes=list(axes[i]),
             )
-            builder.add_squeeze(name='squeeze_layer', input_name='data',
-                                output_name='output', axes=list(axes[i]))
 
             x = np.random.rand(*input_shapes[i])
-            input = {'data': x}
-            expected = {'output': np.reshape(x, output_shapes[i])}
+            input = {"data": x}
+            expected = {"output": np.reshape(x, output_shapes[i])}
 
             self._test_model(builder.spec, input, expected, useCPUOnly=True)
-            self.assertEqual(len(output_shapes[i]), builder._get_rank('output'))
+            self.assertEqual(len(output_shapes[i]), builder._get_rank("output"))
 
     def test_squeeze_all(self):
         input_shapes = [
-            (1, 1, 10, 5), (1, 10, 1, 5), (10, 5, 1, 1), (10, 5, 1, 1), (1,),
-            (10, 5, 1, 1), (3, 1, 7), (3,), (5, 6)
+            (1, 1, 10, 5),
+            (1, 10, 1, 5),
+            (10, 5, 1, 1),
+            (10, 5, 1, 1),
+            (1,),
+            (10, 5, 1, 1),
+            (3, 1, 7),
+            (3,),
+            (5, 6),
         ]
         for input_shape in input_shapes:
-            input_features = [('data', datatypes.Array(*input_shape))]
-            output_features = [('output', None)]
+            input_features = [("data", datatypes.Array(*input_shape))]
+            output_features = [("output", None)]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
+            builder.add_squeeze(
+                name="squeeze_layer",
+                input_name="data",
+                output_name="output",
+                squeeze_all=True,
             )
-            builder.add_squeeze(name='squeeze_layer', input_name='data',
-                                output_name='output', squeeze_all=True)
 
             x = np.random.rand(*input_shape)
-            input = {'data': x}
+            input = {"data": x}
             reference = np.squeeze(x)
             if not reference.shape:
                 reference = np.reshape(reference, (1,))
-            expected = {'output': reference}
+            expected = {"output": reference}
 
             self._test_model(builder.spec, input, expected, useCPUOnly=True)
-            self.assertEqual(-1, builder._get_rank('output'))
+            self.assertEqual(-1, builder._get_rank("output"))
 
     def test_argmax_argmin(self):
         test_input_shapes = [(9,), (8, 6), (9, 8, 10), (5, 9, 7, 9), (12, 8, 6, 6, 7)]
@@ -2591,7 +3320,7 @@ def test_argmax_argmin(self):
         # (1+2+3+4+5) * 2^3 = 120 test cases
         for input_shape in test_input_shapes:
             for negative_axis in [False, True]:
-                for mode in ['argmax', 'argmin']:
+                for mode in ["argmax", "argmin"]:
                     for keep_dims in [True, False]:
                         for axis in np.arange(len(input_shape)):
 
@@ -2600,21 +3329,34 @@ def test_argmax_argmin(self):
                             else:
                                 axis_val = axis
 
-                            input_features = [('data', datatypes.Array(*input_shape))]
-                            output_features = [('output', None)]
+                            input_features = [("data", datatypes.Array(*input_shape))]
+                            output_features = [("output", None)]
 
                             builder = neural_network.NeuralNetworkBuilder(
-                                input_features, output_features,
-                                disable_rank5_shape_mapping=True
+                                input_features,
+                                output_features,
+                                disable_rank5_shape_mapping=True,
                             )
 
                             x = np.random.rand(*input_shape)
 
-                            if mode == 'argmax':
-                                builder.add_argmax('argmax', 'data', 'output', axis=axis_val, keepdims=keep_dims)
+                            if mode == "argmax":
+                                builder.add_argmax(
+                                    "argmax",
+                                    "data",
+                                    "output",
+                                    axis=axis_val,
+                                    keepdims=keep_dims,
+                                )
                                 np_out = np.argmax(x, axis=axis_val)
                             else:
-                                builder.add_argmin('argmin', 'data', 'output', axis=axis_val, keepdims=keep_dims)
+                                builder.add_argmin(
+                                    "argmin",
+                                    "data",
+                                    "output",
+                                    axis=axis_val,
+                                    keepdims=keep_dims,
+                                )
                                 np_out = np.argmin(x, axis=axis_val)
 
                             if keep_dims:
@@ -2622,94 +3364,115 @@ def test_argmax_argmin(self):
                             elif len(input_shape) == 1:
                                 np_out = np.expand_dims(np_out, axis=axis_val)
 
-                            input = {'data': x}
-                            expected = {'output': np_out}
+                            input = {"data": x}
+                            expected = {"output": np_out}
+
+                            test_case = "test_argmax_argmin_input_shape_{}_axis_{}_keep_dims_{}_numpy_out_shape_{}".format(
+                                x.shape, axis_val, keep_dims, np_out.shape
+                            )
 
-                            self._test_model(builder.spec, input, expected, useCPUOnly=True)
-                            self.assertEqual(len(np_out.shape), builder._get_rank('output'))
+                            self._test_model(
+                                builder.spec, input, expected, useCPUOnly=True
+                            )
+                            if len(np_out.shape) != 0:
+                                self.assertEqual(
+                                    len(np_out.shape), builder._get_rank("output")
+                                )
 
     def test_get_shape(self):
         dims = [1, 2, 3, 4, 5]
         for rank in range(1, len(dims) + 1):
             input_shape = dims[:rank]
-            input_features = [('data', datatypes.Array(*input_shape))]
-            output_features = [('output', None)]
+            input_features = [("data", datatypes.Array(*input_shape))]
+            output_features = [("output", None)]
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
+            builder.add_get_shape(
+                name="get_shape_layer", input_name="data", output_name="output"
             )
-            builder.add_get_shape(name='get_shape_layer', input_name='data',
-                                  output_name='output')
 
-            feed = {'data': np.random.rand(*input_shape)}
-            expected = {'output': np.array(input_shape)}
+            feed = {"data": np.random.rand(*input_shape)}
+            expected = {"output": np.array(input_shape)}
 
             self._test_model(builder.spec, feed, expected, useCPUOnly=True)
-            self.assertEqual(1, builder._get_rank('output'))
+            self.assertEqual(1, builder._get_rank("output"))
 
     def test_load_constant_nd(self):
         dims = [2, 3, 4, 5, 6]
         for rank in range(1, len(dims) + 1):
             input_shape = dims[:rank]
-            input_features = [('data', datatypes.Array(*input_shape))]
-            output_features = [('output', None)]
+            input_features = [("data", datatypes.Array(*input_shape))]
+            output_features = [("output", None)]
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features,
-                disable_rank5_shape_mapping=True
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
+            builder.add_load_constant_nd(
+                "load_const_nd_layer",
+                "tmp",
+                constant_value=np.ones(input_shape),
+                shape=input_shape,
             )
-            builder.add_load_constant_nd('load_const_nd_layer', 'tmp',
-                                         constant_value=np.ones(input_shape),
-                                         shape=input_shape)
-            builder.add_elementwise('add_layer', ['data', 'tmp'], 'output',
-                                    mode='ADD')
-            feed = {'data': np.random.rand(*input_shape)}
-            expected = {'output': feed['data'] + 1}
+            builder.add_elementwise("add_layer", ["data", "tmp"], "output", mode="ADD")
+            feed = {"data": np.random.rand(*input_shape)}
+            expected = {"output": feed["data"] + 1}
 
             self._test_model(builder.spec, feed, expected, useCPUOnly=True)
-            self.assertEqual(rank, builder._get_rank('output'))
+            self.assertEqual(rank, builder._get_rank("output"))
 
-    @unittest.skip('fix')
     def test_simple_array_alloc_scatter(self):
         alloc_shape = [2, 3, 4]
         value_shape = [1, 3, 4]
-        input_features = [('alloc_shape', datatypes.Array(len(alloc_shape))),
-                          ('value', datatypes.Array(*value_shape)),
-                          ('index', datatypes.Array(1))]
-        output_features = [('output', None)]
+        input_features = [
+            ("alloc_shape", datatypes.Array(len(alloc_shape))),
+            ("value", datatypes.Array(*value_shape)),
+            ("index", datatypes.Array(1)),
+        ]
+        output_features = [("output", None)]
 
         builder = neural_network.NeuralNetworkBuilder(
-            input_features, output_features, disable_rank5_shape_mapping=True)
-        builder.add_fill_dynamic(name='fill_dynamic_layer', input_name='alloc_shape',
-                                 output_name='array', value=np.float(0.0))
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_fill_dynamic(
+            name="fill_dynamic_layer",
+            input_name="alloc_shape",
+            output_name="array",
+            value=np.float(0.0),
+        )
         # CoreML input order: container (array), indices, slices (value)
-        builder.add_scatter(name='scatter_layer',
-                            input_names=['array', 'index', 'value'],
-                            output_name='output')
+        builder.add_scatter(
+            name="scatter_layer",
+            input_names=["array", "index", "value"],
+            output_name="output",
+        )
 
-        value = np.random.rand(*value_shape).astype('float')
-        feed = {'alloc_shape': np.array(alloc_shape, dtype='float'),
-                'value': value,
-                'index': np.array([1], dtype='float')}
+        value = np.random.rand(*value_shape).astype("float")
+        feed = {
+            "alloc_shape": np.array(alloc_shape, dtype="float"),
+            "value": value,
+            "index": np.array([1], dtype="float"),
+        }
 
         ref = np.zeros(alloc_shape)
         ref[1, :, :] = value
-        expected = {'output': ref}
+        expected = {"output": ref}
 
         self._test_model(builder.spec, feed, expected, useCPUOnly=True)
 
     def test_erf_activation_cpu(self, cpu_only=True):
-        input_features = [('data', datatypes.Array(10, 45))]
-        output_features = [('output', datatypes.Array(10, 45))]
+        input_features = [("data", datatypes.Array(10, 45))]
+        output_features = [("output", datatypes.Array(10, 45))]
 
         builder = neural_network.NeuralNetworkBuilder(
-            input_features, output_features, disable_rank5_shape_mapping=True)
-        builder.add_erf(name='erf', input_name='data',
-                        output_name='output')
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_erf(name="erf", input_name="data", output_name="output")
         x = np.random.rand(10, 45)
-        input = {'data': x}
+        input = {"data": x}
         expected = {
-            'output': np.asarray([math.erf(i) for i in
-                                  x.flatten().tolist()]).reshape(10, 45)
+            "output": np.asarray([math.erf(i) for i in x.flatten().tolist()]).reshape(
+                10, 45
+            )
         }
 
         self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
@@ -2719,40 +3482,47 @@ def test_erf_activation_gpu(self):
 
     def test_gelu_activation(self):
 
-        for mode in ['EXACT', 'TANH_APPROXIMATION', 'SIGMOID_APPROXIMATION']:
+        for mode in ["EXACT", "TANH_APPROXIMATION", "SIGMOID_APPROXIMATION"]:
             for rank in range(1, 6):
                 shape = np.random.randint(low=2, high=5, size=rank)
-                input_features = [('data', datatypes.Array(*shape))]
-                output_features = [('output', None)]
+                input_features = [("data", datatypes.Array(*shape))]
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features, disable_rank5_shape_mapping=True)
-                builder.add_gelu(name='gelu', input_name='data',
-                                 output_name='output', mode=mode)
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
+                builder.add_gelu(
+                    name="gelu", input_name="data", output_name="output", mode=mode
+                )
 
                 x = np.random.rand(*shape)
-                input = {'data': x}
-                exact = np.asarray([0.5 * i * (1.0 + math.erf(i / math.sqrt(2)))
-                                    for i in x.flatten().tolist()]).reshape(*shape)
-
-                expected = {'output': exact}
+                input = {"data": x}
+                exact = np.asarray(
+                    [
+                        0.5 * i * (1.0 + math.erf(i / math.sqrt(2)))
+                        for i in x.flatten().tolist()
+                    ]
+                ).reshape(*shape)
+
+                expected = {"output": exact}
                 self._test_model(builder.spec, input, expected, useCPUOnly=True)
 
     def test_lower_triangular_cpu(self, cpu_only=True):
         for rank in range(2, 6):
             for k in range(-3, 4):
                 shape = np.random.randint(low=2, high=6, size=rank)
-                input_features = [('data', datatypes.Array(*shape))]
-                output_features = [('output', None)]
+                input_features = [("data", datatypes.Array(*shape))]
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features, disable_rank5_shape_mapping=True)
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
 
-                builder.add_lower_triangular('tril', 'data', 'output', k=k)
+                builder.add_lower_triangular("tril", "data", "output", k=k)
 
                 x = np.random.rand(*shape)
-                input = {'data': x}
-                expected = {'output': np.tril(x, k=k)}
+                input = {"data": x}
+                expected = {"output": np.tril(x, k=k)}
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
     def test_lower_triangular_gpu(self):
@@ -2762,17 +3532,18 @@ def test_upper_triangular_cpu(self, cpu_only=True):
         for rank in range(2, 6):
             for k in range(-3, 4):
                 shape = np.random.randint(low=2, high=6, size=rank)
-                input_features = [('data', datatypes.Array(*shape))]
-                output_features = [('output', None)]
+                input_features = [("data", datatypes.Array(*shape))]
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features, disable_rank5_shape_mapping=True)
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
 
-                builder.add_upper_triangular('triu', 'data', 'output', k=k)
+                builder.add_upper_triangular("triu", "data", "output", k=k)
 
                 x = np.random.rand(*shape)
-                input = {'data': x}
-                expected = {'output': np.triu(x, k=k)}
+                input = {"data": x}
+                expected = {"output": np.triu(x, k=k)}
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
     def test_upper_triangular_gpu(self):
@@ -2792,228 +3563,288 @@ def test_where_broadcastable_cpu(self, cpu_only=True):
 
             for i in range(-1, -rank_out - 1, -1):
                 dims = []
-                if -i <= rank_cond: dims.append(shape_cond[i])
-                if -i <= rank_true: dims.append(shape_true[i])
-                if -i <= rank_false: dims.append(shape_false[i])
+                if -i <= rank_cond:
+                    dims.append(shape_cond[i])
+                if -i <= rank_true:
+                    dims.append(shape_true[i])
+                if -i <= rank_false:
+                    dims.append(shape_false[i])
 
                 dim = np.random.choice(dims)
-                if -i <= rank_cond: shape_cond[i] = np.random.choice([1, dim])
-                if -i <= rank_true: shape_true[i] = np.random.choice([1, dim])
-                if -i <= rank_false: shape_false[i] = np.random.choice([1, dim])
+                if -i <= rank_cond:
+                    shape_cond[i] = np.random.choice([1, dim])
+                if -i <= rank_true:
+                    shape_true[i] = np.random.choice([1, dim])
+                if -i <= rank_false:
+                    shape_false[i] = np.random.choice([1, dim])
 
             input_features = [
-                ('cond', datatypes.Array(*shape_cond)),
-                ('true', datatypes.Array(*shape_true)),
-                ('false', datatypes.Array(*shape_false))
+                ("cond", datatypes.Array(*shape_cond)),
+                ("true", datatypes.Array(*shape_true)),
+                ("false", datatypes.Array(*shape_false)),
             ]
-            output_features = [('output', None)]
+            output_features = [("output", None)]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, output_features, disable_rank5_shape_mapping=True)
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
 
-            builder.add_where_broadcastable('if_broadcastable', input_names=['cond', 'true', 'false'],
-                                            output_name='output')
+            builder.add_where_broadcastable(
+                "if_broadcastable",
+                input_names=["cond", "true", "false"],
+                output_name="output",
+            )
 
             cond = np.random.choice([1.0, 0.0], size=shape_cond)
             true = np.random.rand(*shape_true)
             false = np.random.rand(*shape_false)
 
-            input = {'cond': cond, 'true': true, 'false': false}
-            expected = {'output': np.where(cond, true, false)}
+            input = {"cond": cond, "true": true, "false": false}
+            expected = {"output": np.where(cond, true, false)}
             self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
-            self.assertEqual(len(expected['output'].shape), builder._get_rank('output'))
+            self.assertEqual(len(expected["output"].shape), builder._get_rank("output"))
 
     def test_where_broadcastable_gpu(self):
         self.test_where_broadcastable_cpu(cpu_only=False)
 
     def test_random_normal_like_cpu(self, cpu_only=True):
-        mean, stddev, seed = 0., 1., 42
+        mean, stddev, seed = 0.0, 1.0, 42
 
         for rank in range(5, -1, -1):
             if rank > 0:
                 low_factor = np.random.randint(low=2, high=4)
-                low = int(np.power(1000, 1. / rank)) * low_factor
-                high = int(np.power(2000, 1. / rank)) * np.random.randint(low=low_factor, high=4)
+                low = int(np.power(1000, 1.0 / rank)) * low_factor
+                high = int(np.power(2000, 1.0 / rank)) * np.random.randint(
+                    low=low_factor, high=4
+                )
                 shape = np.random.randint(low=low, high=high, size=rank)
             else:  # one extra test to test more moments
                 shape = np.array([10, 10, 10, 10, 10000])
 
-            input_features = [('tensor', datatypes.Array(*shape))]
+            input_features = [("tensor", datatypes.Array(*shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)], disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_random_normal_like(name='random_normal_like',
-                                           input_name='tensor',
-                                           output_name='output',
-                                           mean=mean, stddev=stddev, seed=seed)
+            builder.add_random_normal_like(
+                name="random_normal_like",
+                input_name="tensor",
+                output_name="output",
+                mean=mean,
+                stddev=stddev,
+                seed=seed,
+            )
 
-            inputs = {'tensor': np.random.rand(*shape)}
-            expected = {'output': np.random.normal(mean, stddev, shape)}
+            inputs = {"tensor": np.random.rand(*shape)}
+            expected = {"output": np.random.normal(mean, stddev, shape)}
 
             if rank > 0:
-                CorrectnessTest._compare_moments(builder.spec, inputs, expected, num_moments=2)
+                CorrectnessTest._compare_moments(
+                    builder.spec, inputs, expected, num_moments=2
+                )
                 self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
             else:  # one extra test to test more moments
-                CorrectnessTest._compare_moments(builder.spec, inputs, expected, num_moments=6)
+                CorrectnessTest._compare_moments(
+                    builder.spec, inputs, expected, num_moments=6
+                )
 
     def test_random_normal_like_gpu(self):
         self.test_random_normal_like_cpu(cpu_only=False)
 
     def test_random_normal_static_cpu(self, cpu_only=True):
 
-        mean, stddev, seed = 0., 1., 42
+        mean, stddev, seed = 0.0, 1.0, 42
 
         for rank in range(1, 6):
             low_factor = np.random.randint(low=2, high=4)
-            low = int(np.power(1000, 1. / rank)) * low_factor
-            high = int(np.power(2000, 1. / rank)) * np.random.randint(low=low_factor, high=4)
+            low = int(np.power(1000, 1.0 / rank)) * low_factor
+            high = int(np.power(2000, 1.0 / rank)) * np.random.randint(
+                low=low_factor, high=4
+            )
 
             shape = np.random.randint(low=low, high=high, size=rank)
 
-            input_features = [('data', datatypes.Array(*shape))]
+            input_features = [("data", datatypes.Array(*shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)], disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_random_normal_static(name='random_normal_static',
-                                             output_name='tmp',
-                                             output_shape=list(shape),
-                                             mean=mean, stddev=stddev, seed=seed)
+            builder.add_random_normal_static(
+                name="random_normal_static",
+                output_name="tmp",
+                output_shape=list(shape),
+                mean=mean,
+                stddev=stddev,
+                seed=seed,
+            )
 
-            builder.add_elementwise('add_layer', ['data', 'tmp'], 'output', mode='ADD')
+            builder.add_elementwise("add_layer", ["data", "tmp"], "output", mode="ADD")
 
             data = np.zeros(shape)
-            inputs = {'data': data}
-            expected = {'output': data + np.random.normal(mean, stddev, shape)}
+            inputs = {"data": data}
+            expected = {"output": data + np.random.normal(mean, stddev, shape)}
 
-            CorrectnessTest._compare_moments(builder.spec, inputs, expected, num_moments=2)
+            CorrectnessTest._compare_moments(
+                builder.spec, inputs, expected, num_moments=2
+            )
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
-            self.assertEqual(rank, builder._get_rank('output'))
+            self.assertEqual(rank, builder._get_rank("output"))
 
     def test_random_normal_static_gpu(self):
         self.test_random_normal_static_cpu(cpu_only=False)
 
     def test_random_normal_dynamic_cpu(self, cpu_only=True):
-        mean, stddev, seed = 0., 1., 42
+        mean, stddev, seed = 0.0, 1.0, 42
 
         for rank in range(1, 6):
             low_factor = np.random.randint(low=2, high=4)
-            low = int(np.power(1000, 1. / rank)) * low_factor
-            high = int(np.power(2000, 1. / rank)) * np.random.randint(low=low_factor, high=4)
+            low = int(np.power(1000, 1.0 / rank)) * low_factor
+            high = int(np.power(2000, 1.0 / rank)) * np.random.randint(
+                low=low_factor, high=4
+            )
 
             shape = np.random.randint(low=low, high=high, size=rank)
 
-            input_features = [('shape', datatypes.Array(len(shape)))]
+            input_features = [("shape", datatypes.Array(len(shape)))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)], disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_random_normal_dynamic(name='random_normal_dynamic',
-                                              input_names=['shape'],
-                                              output_name='output',
-                                              mean=mean, stddev=stddev, seed=seed)
+            builder.add_random_normal_dynamic(
+                name="random_normal_dynamic",
+                input_names=["shape"],
+                output_name="output",
+                mean=mean,
+                stddev=stddev,
+                seed=seed,
+            )
 
-            inputs = {'shape': np.array(shape, np.float)}
-            expected = {'output': np.random.normal(mean, stddev, shape)}
+            inputs = {"shape": np.array(shape, np.float)}
+            expected = {"output": np.random.normal(mean, stddev, shape)}
 
-            CorrectnessTest._compare_moments(builder.spec, inputs, expected, num_moments=2)
+            CorrectnessTest._compare_moments(
+                builder.spec, inputs, expected, num_moments=2
+            )
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
-            self.assertEqual(-1, builder._get_rank('output'))
+            self.assertEqual(-1, builder._get_rank("output"))
 
     def test_random_normal_dynamic_gpu(self):
         self.test_random_normal_dynamic_cpu(cpu_only=False)
 
     def test_random_uniform_like_cpu(self, cpu_only=True):
-        minval, maxval, seed = 0., 1., 42
+        minval, maxval, seed = 0.0, 1.0, 42
 
         for rank in range(1, 6):
             low_factor = np.random.randint(low=2, high=4)
-            low = int(np.power(1000, 1. / rank)) * low_factor
-            high = int(np.power(2000, 1. / rank)) * np.random.randint(low=low_factor, high=4)
+            low = int(np.power(1000, 1.0 / rank)) * low_factor
+            high = int(np.power(2000, 1.0 / rank)) * np.random.randint(
+                low=low_factor, high=4
+            )
 
             shape = np.random.randint(low=low, high=high, size=rank)
 
-            input_features = [('tensor', datatypes.Array(*shape))]
+            input_features = [("tensor", datatypes.Array(*shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)], disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_random_uniform_like(name='random_uniform_like',
-                                            input_name='tensor',
-                                            output_name='output',
-                                            minval=minval, maxval=maxval, seed=seed)
+            builder.add_random_uniform_like(
+                name="random_uniform_like",
+                input_name="tensor",
+                output_name="output",
+                minval=minval,
+                maxval=maxval,
+                seed=seed,
+            )
 
             tensor = np.random.rand(*shape)
-            inputs = {'tensor': tensor}
-            expected = {'output': np.random.uniform(minval, maxval, shape)}
+            inputs = {"tensor": tensor}
+            expected = {"output": np.random.uniform(minval, maxval, shape)}
 
             CorrectnessTest._compare_moments(builder.spec, inputs, expected)
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
-            self.assertEqual(rank, builder._get_rank('output'))
+            self.assertEqual(rank, builder._get_rank("output"))
 
     def test_random_uniform_like_gpu(self):
         self.test_random_uniform_like_cpu(cpu_only=False)
 
     def test_random_uniform_static_cpu(self, cpu_only=True):
-        minval, maxval, seed = 0., 1., 42
+        minval, maxval, seed = 0.0, 1.0, 42
 
         for rank in range(1, 6):
             low_factor = np.random.randint(low=2, high=4)
-            low = int(np.power(1000, 1. / rank)) * low_factor
-            high = int(np.power(2000, 1. / rank)) * np.random.randint(low=low_factor, high=4)
+            low = int(np.power(1000, 1.0 / rank)) * low_factor
+            high = int(np.power(2000, 1.0 / rank)) * np.random.randint(
+                low=low_factor, high=4
+            )
 
             shape = np.random.randint(low=low, high=high, size=rank)
 
-            input_features = [('data', datatypes.Array(*shape))]
+            input_features = [("data", datatypes.Array(*shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)], disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_random_uniform_static(name='random_uniform_static',
-                                              output_name='tmp',
-                                              output_shape=list(shape),
-                                              minval=minval, maxval=maxval, seed=seed)
+            builder.add_random_uniform_static(
+                name="random_uniform_static",
+                output_name="tmp",
+                output_shape=list(shape),
+                minval=minval,
+                maxval=maxval,
+                seed=seed,
+            )
 
-            builder.add_elementwise('add_layer', ['data', 'tmp'], 'output', mode='ADD')
+            builder.add_elementwise("add_layer", ["data", "tmp"], "output", mode="ADD")
 
             data = np.zeros(shape)
-            inputs = {'data': data}
-            expected = {'output': data + np.random.uniform(minval, maxval, shape)}
+            inputs = {"data": data}
+            expected = {"output": data + np.random.uniform(minval, maxval, shape)}
 
             CorrectnessTest._compare_moments(builder.spec, inputs, expected)
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
-            self.assertEqual(rank, builder._get_rank('output'))
+            self.assertEqual(rank, builder._get_rank("output"))
 
     def test_random_uniform_static_gpu(self):
         self.test_random_uniform_static_cpu(cpu_only=False)
 
     def test_random_uniform_dynamic_cpu(self, cpu_only=True):
-        minval, maxval, seed = 0., 1., 42
+        minval, maxval, seed = 0.0, 1.0, 42
 
         for rank in range(1, 6):
             low_factor = np.random.randint(low=2, high=4)
-            low = int(np.power(1000, 1. / rank)) * low_factor
-            high = int(np.power(2000, 1. / rank)) * np.random.randint(low=low_factor, high=4)
+            low = int(np.power(1000, 1.0 / rank)) * low_factor
+            high = int(np.power(2000, 1.0 / rank)) * np.random.randint(
+                low=low_factor, high=4
+            )
 
             shape = np.random.randint(low=low, high=high, size=rank)
 
-            input_features = [('shape', datatypes.Array(len(shape)))]
+            input_features = [("shape", datatypes.Array(len(shape)))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)], disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_random_uniform_dynamic(name='random_uniform_dynamic',
-                                               input_names=['shape'],
-                                               output_name='output',
-                                               minval=minval, maxval=maxval, seed=seed)
+            builder.add_random_uniform_dynamic(
+                name="random_uniform_dynamic",
+                input_names=["shape"],
+                output_name="output",
+                minval=minval,
+                maxval=maxval,
+                seed=seed,
+            )
 
-            inputs = {'shape': np.array(shape, np.float)}
-            expected = {'output': np.random.uniform(minval, maxval, shape)}
+            inputs = {"shape": np.array(shape, np.float)}
+            expected = {"output": np.random.uniform(minval, maxval, shape)}
 
             CorrectnessTest._compare_moments(builder.spec, inputs, expected)
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
-            self.assertEqual(-1, builder._get_rank('output'))
+            self.assertEqual(-1, builder._get_rank("output"))
 
     def test_random_uniform_dynamic_gpu(self):
         self.test_random_uniform_dynamic_cpu(cpu_only=False)
@@ -3024,24 +3855,30 @@ def test_random_bernoulli_like_cpu(self, cpu_only=True):
 
         for rank in range(1, 6):
             low_factor = np.random.randint(low=2, high=4)
-            low = int(np.power(1000, 1. / rank)) * low_factor
-            high = int(np.power(2000, 1. / rank)) * np.random.randint(low=low_factor, high=4)
+            low = int(np.power(1000, 1.0 / rank)) * low_factor
+            high = int(np.power(2000, 1.0 / rank)) * np.random.randint(
+                low=low_factor, high=4
+            )
 
             shape = np.random.randint(low=low, high=high, size=rank)
 
-            input_features = [('tensor', datatypes.Array(*shape))]
+            input_features = [("tensor", datatypes.Array(*shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)], disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_random_bernoulli_like(name='random_bernoulli_like',
-                                              input_name='tensor',
-                                              output_name='output',
-                                              prob=prob, seed=seed)
+            builder.add_random_bernoulli_like(
+                name="random_bernoulli_like",
+                input_name="tensor",
+                output_name="output",
+                prob=prob,
+                seed=seed,
+            )
 
             tensor = np.random.rand(*shape)
-            inputs = {'tensor': tensor}
-            expected = {'output': np.random.binomial(1, prob, shape)}
+            inputs = {"tensor": tensor}
+            expected = {"output": np.random.binomial(1, prob, shape)}
 
             CorrectnessTest._compare_moments(builder.spec, inputs, expected)
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
@@ -3054,24 +3891,32 @@ def test_random_bernoulli_static_cpu(self, cpu_only=True):
 
         for rank in range(1, 6):
             low_factor = np.random.randint(low=2, high=4)
-            low = int(np.power(1000, 1. / rank)) * low_factor
-            high = int(np.power(2000, 1. / rank)) * np.random.randint(low=low_factor, high=4)
+            low = int(np.power(1000, 1.0 / rank)) * low_factor
+            high = int(np.power(2000, 1.0 / rank)) * np.random.randint(
+                low=low_factor, high=4
+            )
 
             shape = np.random.randint(low=low, high=high, size=rank)
 
-            input_features = [('data', datatypes.Array(*shape))]
+            input_features = [("data", datatypes.Array(*shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)], disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_random_bernoulli_static(name='random_bernoulli_static', output_name='tmp',
-                                                output_shape=list(shape), prob=prob, seed=seed)
+            builder.add_random_bernoulli_static(
+                name="random_bernoulli_static",
+                output_name="tmp",
+                output_shape=list(shape),
+                prob=prob,
+                seed=seed,
+            )
 
-            builder.add_elementwise('add_layer', ['data', 'tmp'], 'output', mode='ADD')
+            builder.add_elementwise("add_layer", ["data", "tmp"], "output", mode="ADD")
 
             data = np.zeros(shape)
-            inputs = {'data': data}
-            expected = {'output': data + np.random.binomial(1, prob, shape)}
+            inputs = {"data": data}
+            expected = {"output": data + np.random.binomial(1, prob, shape)}
 
             CorrectnessTest._compare_moments(builder.spec, inputs, expected)
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
@@ -3084,23 +3929,29 @@ def test_random_bernoulli_dynamic_cpu(self, cpu_only=True):
 
         for rank in range(1, 6):
             low_factor = np.random.randint(low=2, high=4)
-            low = int(np.power(1000, 1. / rank)) * low_factor
-            high = int(np.power(2000, 1. / rank)) * np.random.randint(low=low_factor, high=4)
+            low = int(np.power(1000, 1.0 / rank)) * low_factor
+            high = int(np.power(2000, 1.0 / rank)) * np.random.randint(
+                low=low_factor, high=4
+            )
 
             shape = np.random.randint(low=low, high=high, size=rank)
 
-            input_features = [('shape', datatypes.Array(len(shape)))]
+            input_features = [("shape", datatypes.Array(len(shape)))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)], disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_random_bernoulli_dynamic(name='random_bernoulli_dynamic',
-                                                 input_names=['shape'],
-                                                 output_name='output',
-                                                 prob=prob, seed=seed)
+            builder.add_random_bernoulli_dynamic(
+                name="random_bernoulli_dynamic",
+                input_names=["shape"],
+                output_name="output",
+                prob=prob,
+                seed=seed,
+            )
 
-            inputs = {'shape': np.array(shape, np.float)}
-            expected = {'output': np.random.binomial(1, prob, shape)}
+            inputs = {"shape": np.array(shape, np.float)}
+            expected = {"output": np.random.binomial(1, prob, shape)}
 
             CorrectnessTest._compare_moments(builder.spec, inputs, expected)
             self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
@@ -3114,55 +3965,77 @@ def test_categorical_distribution_cpu_shapes(self):
             shape = np.random.randint(low=2, high=8, size=rank)
             num_samples = np.random.randint(low=10, high=1000)
 
-            input_features = [('data', datatypes.Array(*shape))]
+            input_features = [("data", datatypes.Array(*shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)], disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_categorical_distribution(name='categorical_distribution',
-                                                 input_name='data',
-                                                 output_name='output',
-                                                 num_samples=num_samples)
+            builder.add_categorical_distribution(
+                name="categorical_distribution",
+                input_name="data",
+                output_name="output",
+                num_samples=num_samples,
+            )
 
             x = np.random.randint(low=0, high=20, size=shape).astype(np.float32)
-            inputs = {'data': x}
+            inputs = {"data": x}
             shape[-1] = num_samples
-            expected = {'output': np.random.rand(*shape)}
+            expected = {"output": np.random.rand(*shape)}
 
-            self._test_model(builder.spec, inputs, expected, useCPUOnly=True, validate_shapes_only=True)
+            self._test_model(
+                builder.spec,
+                inputs,
+                expected,
+                useCPUOnly=True,
+                validate_shapes_only=True,
+            )
 
+    @pytest.mark.xfail(
+        reason="rdar://64153463 ([GitLab CI] test_categorical_distribution_cpu_probs failing)"
+    )
     def test_categorical_distribution_cpu_logits(self):
-
         def softmax(data):
             e_data = np.exp(data - np.max(data))
             return e_data / e_data.sum()
 
         num_samples, num_class = 50000, 10
-        input_name, output_name = 'data', 'output'
-
-        shapes = [(2, num_class), (2, 1, num_class), (1, 2, num_class),
-                  (2, 1, 1, num_class), (1, 2, 1, num_class), (1, 1, 2, num_class),
-                  (2, 1, 1, 1, num_class), (1, 2, 1, 1, num_class),
-                  (1, 1, 2, 1, num_class), (1, 1, 1, 2, num_class)]
+        input_name, output_name = "data", "output"
+
+        shapes = [
+            (2, num_class),
+            (2, 1, num_class),
+            (1, 2, num_class),
+            (2, 1, 1, num_class),
+            (1, 2, 1, num_class),
+            (1, 1, 2, num_class),
+            (2, 1, 1, 1, num_class),
+            (1, 2, 1, 1, num_class),
+            (1, 1, 2, 1, num_class),
+            (1, 1, 1, 2, num_class),
+        ]
 
         for shape in shapes:
-            input_features = [('data', datatypes.Array(*shape))]
+            input_features = [("data", datatypes.Array(*shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)], disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_categorical_distribution(name='categorical_distribution',
-                                                 input_name=input_name,
-                                                 output_name=output_name,
-                                                 num_samples=num_samples,
-                                                 is_logits=True,
-                                                 seed=42)
+            builder.add_categorical_distribution(
+                name="categorical_distribution",
+                input_name=input_name,
+                output_name=output_name,
+                num_samples=num_samples,
+                is_logits=True,
+                seed=42,
+            )
 
             x = np.random.rand(*shape)
             inputs = {input_name: x}
 
             model = builder.spec
-            if isinstance(model, str):
+            if isinstance(model, _string_types):
                 model = coremltools.models.MLModel(model)
 
             model = coremltools.models.MLModel(model, useCPUOnly=True)
@@ -3184,43 +4057,71 @@ def softmax(data):
             pre0 = np.bincount(np.array(pre0).astype(np.int), minlength=num_class)
             pre1 = np.bincount(np.array(pre1).astype(np.int), minlength=num_class)
 
-            assert np.allclose(np.true_divide(pre0, num_samples), probs[0], atol=1e-2)
-            assert np.allclose(np.true_divide(pre0, num_samples),
-                               np.true_divide(ref0, num_samples), atol=1e-2)
+            np.testing.assert_allclose(
+                np.true_divide(pre0, num_samples), probs[0], atol=1e-2
+            )
+            np.testing.assert_allclose(
+                np.true_divide(pre0, num_samples),
+                np.true_divide(ref0, num_samples),
+                atol=1e-2,
+            )
 
-            assert np.allclose(np.true_divide(pre1, num_samples), probs[1], atol=1e-2)
-            assert np.allclose(np.true_divide(pre1, num_samples),
-                               np.true_divide(ref1, num_samples), atol=1e-2)
+            np.testing.assert_allclose(
+                np.true_divide(pre1, num_samples), probs[1], atol=1e-2
+            )
+            np.testing.assert_allclose(
+                np.true_divide(pre1, num_samples),
+                np.true_divide(ref1, num_samples),
+                atol=1e-2,
+            )
 
-            self._test_model(model, inputs, expected, useCPUOnly=True,
-                             output_name_shape_dict={'output': prediction['output'].shape})
+            self._test_model(
+                model,
+                inputs,
+                expected,
+                useCPUOnly=True,
+                output_name_shape_dict={"output": prediction["output"].shape},
+            )
 
+    @pytest.mark.xfail(
+        reason="rdar://64153463 ([GitLab CI] test_categorical_distribution_cpu_probs failing)"
+    )
     def test_categorical_distribution_cpu_probs(self):
-
         def softmax(data):
             e_data = np.exp(data - np.max(data))
             return e_data / e_data.sum()
 
         num_samples, num_class = 50000, 10
-        input_name, output_name = 'data', 'output'
-
-        shapes = [(2, num_class), (2, 1, num_class), (1, 2, num_class),
-                  (2, 1, 1, num_class), (1, 2, 1, num_class), (1, 1, 2, num_class),
-                  (2, 1, 1, 1, num_class), (1, 2, 1, 1, num_class),
-                  (1, 1, 2, 1, num_class), (1, 1, 1, 2, num_class)]
+        input_name, output_name = "data", "output"
+
+        shapes = [
+            (2, num_class),
+            (2, 1, num_class),
+            (1, 2, num_class),
+            (2, 1, 1, num_class),
+            (1, 2, 1, num_class),
+            (1, 1, 2, num_class),
+            (2, 1, 1, 1, num_class),
+            (1, 2, 1, 1, num_class),
+            (1, 1, 2, 1, num_class),
+            (1, 1, 1, 2, num_class),
+        ]
 
         for shape in shapes:
-            input_features = [('data', datatypes.Array(*shape))]
+            input_features = [("data", datatypes.Array(*shape))]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features, [('output', None)], disable_rank5_shape_mapping=True)
+                input_features, [("output", None)], disable_rank5_shape_mapping=True
+            )
 
-            builder.add_categorical_distribution(name='categorical_distribution',
-                                                 input_name=input_name,
-                                                 output_name=output_name,
-                                                 num_samples=num_samples,
-                                                 is_logits=False,
-                                                 seed=42)
+            builder.add_categorical_distribution(
+                name="categorical_distribution",
+                input_name=input_name,
+                output_name=output_name,
+                num_samples=num_samples,
+                is_logits=False,
+                seed=42,
+            )
 
             x = np.random.rand(*shape)
             probs = x.reshape(2, num_class)
@@ -3228,7 +4129,7 @@ def softmax(data):
             inputs = {input_name: np.reshape(probs, shape)}
 
             model = builder.spec
-            if isinstance(model, str):
+            if isinstance(model, _string_types):
                 model = coremltools.models.MLModel(model)
 
             model = coremltools.models.MLModel(model, useCPUOnly=True)
@@ -3249,16 +4150,31 @@ def softmax(data):
             pre0 = np.bincount(np.array(pre0).astype(np.int), minlength=num_class)
             pre1 = np.bincount(np.array(pre1).astype(np.int), minlength=num_class)
 
-            assert np.allclose(np.true_divide(pre0, num_samples), probs[0], atol=1e-2)
-            assert np.allclose(np.true_divide(pre0, num_samples),
-                               np.true_divide(ref0, num_samples), atol=1e-2)
+            np.testing.assert_allclose(
+                np.true_divide(pre0, num_samples), probs[0], atol=1e-2
+            )
+            np.testing.assert_allclose(
+                np.true_divide(pre0, num_samples),
+                np.true_divide(ref0, num_samples),
+                atol=1e-2,
+            )
 
-            assert np.allclose(np.true_divide(pre1, num_samples), probs[1], atol=1e-2)
-            assert np.allclose(np.true_divide(pre1, num_samples),
-                               np.true_divide(ref1, num_samples), atol=1e-2)
+            np.testing.assert_allclose(
+                np.true_divide(pre1, num_samples), probs[1], atol=1e-2
+            )
+            np.testing.assert_allclose(
+                np.true_divide(pre1, num_samples),
+                np.true_divide(ref1, num_samples),
+                atol=1e-2,
+            )
 
-            self._test_model(model, inputs, expected, useCPUOnly=True,
-                             output_name_shape_dict={'output': prediction['output'].shape})
+            self._test_model(
+                model,
+                inputs,
+                expected,
+                useCPUOnly=True,
+                output_name_shape_dict={"output": prediction["output"].shape},
+            )
 
     def test_reverse_cpu(self, cpu_only=True):
 
@@ -3268,18 +4184,18 @@ def test_reverse_cpu(self, cpu_only=True):
                 reverse_dim = [np.random.choice([True, False]) for _ in range(rank)]
                 axes = [i for i in range(rank) if reverse_dim[i] == True]
 
-                input_features = [('data', datatypes.Array(*input_shape))]
-                output_features = [('output', None)]
+                input_features = [("data", datatypes.Array(*input_shape))]
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features,
-                    disable_rank5_shape_mapping=True)
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
 
-                builder.add_reverse('reverse', 'data', 'output', reverse_dim)
+                builder.add_reverse("reverse", "data", "output", reverse_dim)
 
                 x = np.random.rand(*input_shape)
-                input = {'data': x}
-                expected = {'output': np.flip(x, axis=axes)}
+                input = {"data": x}
+                expected = {"output": np.flip(x, axis=axes)}
 
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -3293,25 +4209,33 @@ def test_matrix_band_part_cpu(self, cpu_only=True):
                 num_lower = np.random.randint(low=-7, high=8)
                 num_upper = np.random.randint(low=-7, high=8)
                 shape = np.random.randint(low=2, high=6, size=rank)
-                input_features = [('data', datatypes.Array(*shape))]
-                output_features = [('output', None)]
+                input_features = [("data", datatypes.Array(*shape))]
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features, disable_rank5_shape_mapping=True)
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
 
-                builder.add_matrix_band_part('matrix_band_part', 'data', 'output',
-                                             num_lower=num_lower, num_upper=num_upper)
+                builder.add_matrix_band_part(
+                    "matrix_band_part",
+                    "data",
+                    "output",
+                    num_lower=num_lower,
+                    num_upper=num_upper,
+                )
 
                 x = np.random.rand(*shape)
-                input = {'data': x}
+                input = {"data": x}
 
                 rows, cols = shape[-2:]
                 band = np.ones((rows, cols))
                 for m in range(rows):
                     for n in range(cols):
-                        band[m, n] = (num_lower < 0 or (m - n) <= num_lower) and (num_upper < 0 or (n - m) <= num_upper)
+                        band[m, n] = (num_lower < 0 or (m - n) <= num_lower) and (
+                            num_upper < 0 or (n - m) <= num_upper
+                        )
 
-                expected = {'output': np.multiply(band, x)}
+                expected = {"output": np.multiply(band, x)}
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
     def test_matrix_band_part_gpu(self):
@@ -3322,13 +4246,14 @@ def test_flatten_to_2d_cpu(self, cpu_only=True):
         for rank in range(1, 6):
             for axis in range(-rank, rank + 1):
                 shape = np.random.randint(low=2, high=6, size=rank)
-                input_features = [('data', datatypes.Array(*shape))]
-                output_features = [('output', None)]
+                input_features = [("data", datatypes.Array(*shape))]
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features, disable_rank5_shape_mapping=True)
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
 
-                builder.add_flatten_to_2d('flatten_to_2d', 'data', 'output', axis=axis)
+                builder.add_flatten_to_2d("flatten_to_2d", "data", "output", axis=axis)
 
                 x = np.random.rand(*shape)
                 np_axis = axis + rank if axis < 0 else axis
@@ -3341,10 +4266,10 @@ def test_flatten_to_2d_cpu(self, cpu_only=True):
                 new_shape = [pl, pr]
                 ref = x.reshape(new_shape)
 
-                input = {'data': x}
-                expected = {'output': ref}
+                input = {"data": x}
+                expected = {"output": ref}
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
-                self.assertEqual(2, builder._get_rank('output'))
+                self.assertEqual(2, builder._get_rank("output"))
 
     def test_flatten_to_2d_gpu(self):
         self.test_flatten_to_2d_cpu(cpu_only=False)
@@ -3353,10 +4278,10 @@ def test_reshape_like_cpu(self, cpu_only=True):
 
         for rank in range(1, 6):
             for _ in range(20):
-                input_shape = np.random.randint(low=2, high=4, size=rank)
+                input_shape = np.random.randint(low=2, high=8, size=rank)
                 n = int(np.prod(input_shape))
                 divisors = [d for d in range(1, n) if n % d == 0]
-                target_rank = np.random.randint(low=2, high=4)
+                target_rank = np.random.randint(low=2, high=6)
                 target_shape = [1]
                 for i in range(target_rank - 1):
                     dim_size = np.random.choice(divisors)
@@ -3366,25 +4291,30 @@ def test_reshape_like_cpu(self, cpu_only=True):
                 target_shape[0] = n // np.prod(target_shape)
 
                 np.random.shuffle(target_shape)
-                input_features = [('data', datatypes.Array(*input_shape)),
-                                  ('tensor', datatypes.Array(*target_shape))]
+                input_features = [
+                    ("data", datatypes.Array(*input_shape)),
+                    ("tensor", datatypes.Array(*target_shape)),
+                ]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, [('output', None)],
-                    disable_rank5_shape_mapping=True)
+                    input_features, [("output", None)], disable_rank5_shape_mapping=True
+                )
 
-                builder.add_reshape_like(name='reshape_like',
-                                         input_names=['data', 'tensor'],
-                                         output_name='output')
+                builder.add_reshape_like(
+                    name="reshape_like",
+                    input_names=["data", "tensor"],
+                    output_name="output",
+                )
 
                 data = np.random.rand(*input_shape)
                 tensor = np.random.rand(*target_shape)
-                inputs = {'data': data, 'tensor': tensor}
-                expected = {'output': np.reshape(data, target_shape)}
+                inputs = {"data": data, "tensor": tensor}
+                expected = {"output": np.reshape(data, target_shape)}
 
                 self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
-                self.assertEqual(target_rank, builder._get_rank('output'))
+                self.assertEqual(target_rank, builder._get_rank("output"))
 
+    @pytest.mark.xfail(reason="Fixed in https://github.com/apple/coremltools/pull/634")
     def test_reshape_like_gpu(self):
         self.test_reshape_like_cpu(cpu_only=False)
 
@@ -3406,23 +4336,25 @@ def test_reshape_static_cpu(self, cpu_only=True):
                 target_shape[0] = -1
 
                 np.random.shuffle(target_shape)
-                input_features = [('data', datatypes.Array(*input_shape))]
+                input_features = [("data", datatypes.Array(*input_shape))]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, [('output', None)],
-                    disable_rank5_shape_mapping=True)
+                    input_features, [("output", None)], disable_rank5_shape_mapping=True
+                )
 
-                builder.add_reshape_static(name='reshape_static',
-                                           input_name='data',
-                                           output_name='output',
-                                           output_shape=target_shape)
+                builder.add_reshape_static(
+                    name="reshape_static",
+                    input_name="data",
+                    output_name="output",
+                    output_shape=target_shape,
+                )
 
                 data = np.random.rand(*input_shape)
-                inputs = {'data': data}
-                expected = {'output': np.reshape(data, target_shape)}
+                inputs = {"data": data}
+                expected = {"output": np.reshape(data, target_shape)}
 
                 self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
-                self.assertEqual(len(target_shape), builder._get_rank('output'))
+                self.assertEqual(len(target_shape), builder._get_rank("output"))
 
     def test_reshape_static_gpu(self):
         self.test_reshape_static_cpu(cpu_only=False)
@@ -3445,23 +4377,27 @@ def test_reshape_dynamic_cpu(self, cpu_only=True):
                 target_shape[0] = -1
 
                 np.random.shuffle(target_shape)
-                input_features = [('data', datatypes.Array(*input_shape)),
-                                  ('shape', datatypes.Array(len(target_shape)))]
+                input_features = [
+                    ("data", datatypes.Array(*input_shape)),
+                    ("shape", datatypes.Array(len(target_shape))),
+                ]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, [('output', None)],
-                    disable_rank5_shape_mapping=True)
+                    input_features, [("output", None)], disable_rank5_shape_mapping=True
+                )
 
-                builder.add_reshape_dynamic(name='reshape_dynamic',
-                                            input_names=['data', 'shape'],
-                                            output_name='output')
+                builder.add_reshape_dynamic(
+                    name="reshape_dynamic",
+                    input_names=["data", "shape"],
+                    output_name="output",
+                )
 
                 data = np.random.rand(*input_shape)
-                inputs = {'data': data, 'shape': np.array(target_shape, dtype='float')}
-                expected = {'output': np.reshape(data, target_shape)}
+                inputs = {"data": data, "shape": np.array(target_shape, dtype="float")}
+                expected = {"output": np.reshape(data, target_shape)}
 
                 self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
-                self.assertEqual(-1, builder._get_rank('output'))
+                self.assertEqual(-1, builder._get_rank("output"))
 
     def test_reshape_dynamic_gpu(self):
         self.test_reshape_dynamic_cpu(cpu_only=False)
@@ -3469,12 +4405,21 @@ def test_reshape_dynamic_gpu(self):
     def test_reduce_sum_cpu(self, cpu_only=True):
 
         for rank in range(1, 6):
-            axes_list = [axes for length in range(1, rank + 1) for axes in itertools.combinations(range(rank), length)]
+            axes_list = [
+                axes
+                for length in range(1, rank + 1)
+                for axes in itertools.combinations(range(rank), length)
+            ]
             axes_list.append(None)
 
             for axes in axes_list:
                 if axes:
-                    axes = tuple([axis if np.random.choice([True, False]) else axis - rank for axis in axes])
+                    axes = tuple(
+                        [
+                            axis if np.random.choice([True, False]) else axis - rank
+                            for axis in axes
+                        ]
+                    )
                     reduce_all = False
                 else:
                     reduce_all = True
@@ -3482,25 +4427,33 @@ def test_reduce_sum_cpu(self, cpu_only=True):
                 for keep_dims in [True, False]:
                     input_shape = np.random.randint(low=2, high=5, size=rank)
 
-                    input_features = [('data', datatypes.Array(*input_shape))]
-                    output_features = [('output', None)]
+                    input_features = [("data", datatypes.Array(*input_shape))]
+                    output_features = [("output", None)]
 
                     builder = neural_network.NeuralNetworkBuilder(
-                        input_features, output_features,
-                        disable_rank5_shape_mapping=True
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
                     )
 
-                    builder.add_reduce_sum('reduce', 'data', 'output', axes, keepdims=keep_dims, reduce_all=reduce_all)
+                    builder.add_reduce_sum(
+                        "reduce",
+                        "data",
+                        "output",
+                        axes,
+                        keepdims=keep_dims,
+                        reduce_all=reduce_all,
+                    )
 
                     x = np.random.rand(*input_shape)
-                    input = {'data': x}
-                    expected = {'output': np.add.reduce(x, axes, keepdims=keep_dims)}
+                    input = {"data": x}
+                    expected = {"output": np.add.reduce(x, axes, keepdims=keep_dims)}
 
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
-                    expected_rank = len(expected['output'].shape)
+                    expected_rank = len(expected["output"].shape)
                     if expected_rank == 0:
                         expected_rank = 1
-                    self.assertEqual(expected_rank, builder._get_rank('output'))
+                    self.assertEqual(expected_rank, builder._get_rank("output"))
 
     def test_reduce_sum_gpu(self):
         self.test_reduce_sum_cpu(cpu_only=False)
@@ -3508,12 +4461,21 @@ def test_reduce_sum_gpu(self):
     def test_reduce_prod_cpu(self, cpu_only=True):
 
         for rank in range(1, 6):
-            axes_list = [axes for length in range(1, rank + 1) for axes in itertools.combinations(range(rank), length)]
+            axes_list = [
+                axes
+                for length in range(1, rank + 1)
+                for axes in itertools.combinations(range(rank), length)
+            ]
             axes_list.append(None)
 
             for axes in axes_list:
                 if axes:
-                    axes = tuple([axis if np.random.choice([True, False]) else axis - rank for axis in axes])
+                    axes = tuple(
+                        [
+                            axis if np.random.choice([True, False]) else axis - rank
+                            for axis in axes
+                        ]
+                    )
                     reduce_all = False
                 else:
                     reduce_all = True
@@ -3521,38 +4483,56 @@ def test_reduce_prod_cpu(self, cpu_only=True):
                 for keep_dims in [True, False]:
                     input_shape = np.random.randint(low=2, high=5, size=rank)
 
-                    input_features = [('data', datatypes.Array(*input_shape))]
-                    output_features = [('output', None)]
+                    input_features = [("data", datatypes.Array(*input_shape))]
+                    output_features = [("output", None)]
 
                     builder = neural_network.NeuralNetworkBuilder(
-                        input_features, output_features,
-                        disable_rank5_shape_mapping=True
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
                     )
 
-                    builder.add_reduce_prod('reduce', 'data', 'output', axes, keepdims=keep_dims,
-                                            reduce_all=reduce_all)
+                    builder.add_reduce_prod(
+                        "reduce",
+                        "data",
+                        "output",
+                        axes,
+                        keepdims=keep_dims,
+                        reduce_all=reduce_all,
+                    )
 
                     x = np.random.rand(*input_shape)
-                    input = {'data': x}
-                    expected = {'output': np.multiply.reduce(x, axes, keepdims=keep_dims)}
+                    input = {"data": x}
+                    expected = {
+                        "output": np.multiply.reduce(x, axes, keepdims=keep_dims)
+                    }
 
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
-                    expected_rank = len(expected['output'].shape)
+                    expected_rank = len(expected["output"].shape)
                     if expected_rank == 0:
                         expected_rank = 1
-                    self.assertEqual(expected_rank, builder._get_rank('output'))
+                    self.assertEqual(expected_rank, builder._get_rank("output"))
 
     def test_reduce_prod_gpu(self):
         self.test_reduce_prod_cpu(cpu_only=False)
 
     def test_reduce_mean_cpu(self, cpu_only=True):
         for rank in range(1, 6):
-            axes_list = [axes for length in range(1, rank + 1) for axes in itertools.combinations(range(rank), length)]
+            axes_list = [
+                axes
+                for length in range(1, rank + 1)
+                for axes in itertools.combinations(range(rank), length)
+            ]
             axes_list.append(None)
 
             for axes in axes_list:
                 if axes:
-                    axes = tuple([axis if np.random.choice([True, False]) else axis - rank for axis in axes])
+                    axes = tuple(
+                        [
+                            axis if np.random.choice([True, False]) else axis - rank
+                            for axis in axes
+                        ]
+                    )
                     reduce_all = False
                 else:
                     reduce_all = True
@@ -3560,20 +4540,27 @@ def test_reduce_mean_cpu(self, cpu_only=True):
                 for keep_dims in [True, False]:
                     input_shape = np.random.randint(low=2, high=5, size=rank)
 
-                    input_features = [('data', datatypes.Array(*input_shape))]
-                    output_features = [('output', None)]
+                    input_features = [("data", datatypes.Array(*input_shape))]
+                    output_features = [("output", None)]
 
                     builder = neural_network.NeuralNetworkBuilder(
-                        input_features, output_features,
-                        disable_rank5_shape_mapping=True
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
                     )
 
-                    builder.add_reduce_mean('reduce', 'data', 'output', axes, keepdims=keep_dims,
-                                            reduce_all=reduce_all)
+                    builder.add_reduce_mean(
+                        "reduce",
+                        "data",
+                        "output",
+                        axes,
+                        keepdims=keep_dims,
+                        reduce_all=reduce_all,
+                    )
 
                     x = np.random.rand(*input_shape)
-                    input = {'data': x}
-                    expected = {'output': np.mean(x, axes, keepdims=keep_dims)}
+                    input = {"data": x}
+                    expected = {"output": np.mean(x, axes, keepdims=keep_dims)}
 
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -3582,12 +4569,21 @@ def test_reduce_mean_gpu(self):
 
     def test_reduce_max_cpu(self, cpu_only=True):
         for rank in range(1, 6):
-            axes_list = [axes for length in range(1, rank + 1) for axes in itertools.combinations(range(rank), length)]
+            axes_list = [
+                axes
+                for length in range(1, rank + 1)
+                for axes in itertools.combinations(range(rank), length)
+            ]
             axes_list.append(None)
 
             for axes in axes_list:
                 if axes:
-                    axes = tuple([axis if np.random.choice([True, False]) else axis - rank for axis in axes])
+                    axes = tuple(
+                        [
+                            axis if np.random.choice([True, False]) else axis - rank
+                            for axis in axes
+                        ]
+                    )
                     reduce_all = False
                 else:
                     reduce_all = True
@@ -3595,19 +4591,29 @@ def test_reduce_max_cpu(self, cpu_only=True):
                 for keep_dims in [True, False]:
                     input_shape = np.random.randint(low=2, high=5, size=rank)
 
-                    input_features = [('data', datatypes.Array(*input_shape))]
-                    output_features = [('output', None)]
+                    input_features = [("data", datatypes.Array(*input_shape))]
+                    output_features = [("output", None)]
 
                     builder = neural_network.NeuralNetworkBuilder(
-                        input_features, output_features,
-                        disable_rank5_shape_mapping=True
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
                     )
 
-                    builder.add_reduce_max('reduce', 'data', 'output', axes, keepdims=keep_dims, reduce_all=reduce_all)
+                    builder.add_reduce_max(
+                        "reduce",
+                        "data",
+                        "output",
+                        axes,
+                        keepdims=keep_dims,
+                        reduce_all=reduce_all,
+                    )
 
                     x = np.random.rand(*input_shape)
-                    input = {'data': x}
-                    expected = {'output': np.maximum.reduce(x, axes, keepdims=keep_dims)}
+                    input = {"data": x}
+                    expected = {
+                        "output": np.maximum.reduce(x, axes, keepdims=keep_dims)
+                    }
 
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -3616,12 +4622,21 @@ def test_reduce_max_gpu(self):
 
     def test_reduce_min_cpu(self, cpu_only=True):
         for rank in range(1, 6):
-            axes_list = [axes for length in range(1, rank + 1) for axes in itertools.combinations(range(rank), length)]
+            axes_list = [
+                axes
+                for length in range(1, rank + 1)
+                for axes in itertools.combinations(range(rank), length)
+            ]
             axes_list.append(None)
 
             for axes in axes_list:
                 if axes:
-                    axes = tuple([axis if np.random.choice([True, False]) else axis - rank for axis in axes])
+                    axes = tuple(
+                        [
+                            axis if np.random.choice([True, False]) else axis - rank
+                            for axis in axes
+                        ]
+                    )
                     reduce_all = False
                 else:
                     reduce_all = True
@@ -3629,19 +4644,29 @@ def test_reduce_min_cpu(self, cpu_only=True):
                 for keep_dims in [True, False]:
                     input_shape = np.random.randint(low=2, high=5, size=rank)
 
-                    input_features = [('data', datatypes.Array(*input_shape))]
-                    output_features = [('output', None)]
+                    input_features = [("data", datatypes.Array(*input_shape))]
+                    output_features = [("output", None)]
 
                     builder = neural_network.NeuralNetworkBuilder(
-                        input_features, output_features,
-                        disable_rank5_shape_mapping=True
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
                     )
 
-                    builder.add_reduce_min('reduce', 'data', 'output', axes, keepdims=keep_dims, reduce_all=reduce_all)
+                    builder.add_reduce_min(
+                        "reduce",
+                        "data",
+                        "output",
+                        axes,
+                        keepdims=keep_dims,
+                        reduce_all=reduce_all,
+                    )
 
                     x = np.random.rand(*input_shape)
-                    input = {'data': x}
-                    expected = {'output': np.minimum.reduce(x, axes, keepdims=keep_dims)}
+                    input = {"data": x}
+                    expected = {
+                        "output": np.minimum.reduce(x, axes, keepdims=keep_dims)
+                    }
 
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -3650,12 +4675,21 @@ def test_reduce_min_gpu(self):
 
     def test_reduce_l2_cpu(self, cpu_only=True):
         for rank in range(1, 6):
-            axes_list = [axes for length in range(1, rank + 1) for axes in itertools.combinations(range(rank), length)]
+            axes_list = [
+                axes
+                for length in range(1, rank + 1)
+                for axes in itertools.combinations(range(rank), length)
+            ]
             axes_list.append(None)
 
             for axes in axes_list:
                 if axes:
-                    axes = tuple([axis if np.random.choice([True, False]) else axis - rank for axis in axes])
+                    axes = tuple(
+                        [
+                            axis if np.random.choice([True, False]) else axis - rank
+                            for axis in axes
+                        ]
+                    )
                     reduce_all = False
                 else:
                     reduce_all = True
@@ -3663,19 +4697,31 @@ def test_reduce_l2_cpu(self, cpu_only=True):
                 for keep_dims in [True, False]:
                     input_shape = np.random.randint(low=2, high=5, size=rank)
 
-                    input_features = [('data', datatypes.Array(*input_shape))]
-                    output_features = [('output', None)]
+                    input_features = [("data", datatypes.Array(*input_shape))]
+                    output_features = [("output", None)]
 
                     builder = neural_network.NeuralNetworkBuilder(
-                        input_features, output_features,
-                        disable_rank5_shape_mapping=True
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
                     )
 
-                    builder.add_reduce_l2('reduce', 'data', 'output', axes, keepdims=keep_dims, reduce_all=reduce_all)
+                    builder.add_reduce_l2(
+                        "reduce",
+                        "data",
+                        "output",
+                        axes,
+                        keepdims=keep_dims,
+                        reduce_all=reduce_all,
+                    )
 
                     x = np.random.rand(*input_shape)
-                    input = {'data': x}
-                    expected = {'output': np.sqrt(np.sum(np.square(x), axis=axes, keepdims=keep_dims))}
+                    input = {"data": x}
+                    expected = {
+                        "output": np.sqrt(
+                            np.sum(np.square(x), axis=axes, keepdims=keep_dims)
+                        )
+                    }
 
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -3684,12 +4730,21 @@ def test_reduce_l2_gpu(self):
 
     def test_reduce_l1_cpu(self, cpu_only=True):
         for rank in range(1, 6):
-            axes_list = [axes for length in range(1, rank + 1) for axes in itertools.combinations(range(rank), length)]
+            axes_list = [
+                axes
+                for length in range(1, rank + 1)
+                for axes in itertools.combinations(range(rank), length)
+            ]
             axes_list.append(None)
 
             for axes in axes_list:
                 if axes:
-                    axes = tuple([axis if np.random.choice([True, False]) else axis - rank for axis in axes])
+                    axes = tuple(
+                        [
+                            axis if np.random.choice([True, False]) else axis - rank
+                            for axis in axes
+                        ]
+                    )
                     reduce_all = False
                 else:
                     reduce_all = True
@@ -3697,19 +4752,29 @@ def test_reduce_l1_cpu(self, cpu_only=True):
                 for keep_dims in [True, False]:
                     input_shape = np.random.randint(low=2, high=5, size=rank)
 
-                    input_features = [('data', datatypes.Array(*input_shape))]
-                    output_features = [('output', None)]
+                    input_features = [("data", datatypes.Array(*input_shape))]
+                    output_features = [("output", None)]
 
                     builder = neural_network.NeuralNetworkBuilder(
-                        input_features, output_features,
-                        disable_rank5_shape_mapping=True
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
                     )
 
-                    builder.add_reduce_l1('reduce', 'data', 'output', axes, keepdims=keep_dims, reduce_all=reduce_all)
+                    builder.add_reduce_l1(
+                        "reduce",
+                        "data",
+                        "output",
+                        axes,
+                        keepdims=keep_dims,
+                        reduce_all=reduce_all,
+                    )
 
                     x = np.random.rand(*input_shape)
-                    input = {'data': x}
-                    expected = {'output': np.sum(np.abs(x), axis=axes, keepdims=keep_dims)}
+                    input = {"data": x}
+                    expected = {
+                        "output": np.sum(np.abs(x), axis=axes, keepdims=keep_dims)
+                    }
 
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -3718,12 +4783,21 @@ def test_reduce_l1_gpu(self):
 
     def test_reduce_sumsquare_cpu(self, cpu_only=True):
         for rank in range(1, 6):
-            axes_list = [axes for length in range(1, rank + 1) for axes in itertools.combinations(range(rank), length)]
+            axes_list = [
+                axes
+                for length in range(1, rank + 1)
+                for axes in itertools.combinations(range(rank), length)
+            ]
             axes_list.append(None)
 
             for axes in axes_list:
                 if axes:
-                    axes = tuple([axis if np.random.choice([True, False]) else axis - rank for axis in axes])
+                    axes = tuple(
+                        [
+                            axis if np.random.choice([True, False]) else axis - rank
+                            for axis in axes
+                        ]
+                    )
                     reduce_all = False
                 else:
                     reduce_all = True
@@ -3731,20 +4805,29 @@ def test_reduce_sumsquare_cpu(self, cpu_only=True):
                 for keep_dims in [True, False]:
                     input_shape = np.random.randint(low=2, high=5, size=rank)
 
-                    input_features = [('data', datatypes.Array(*input_shape))]
-                    output_features = [('output', None)]
+                    input_features = [("data", datatypes.Array(*input_shape))]
+                    output_features = [("output", None)]
 
                     builder = neural_network.NeuralNetworkBuilder(
-                        input_features, output_features,
-                        disable_rank5_shape_mapping=True
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
                     )
 
-                    builder.add_reduce_sumsquare('reduce', 'data', 'output', axes, keepdims=keep_dims,
-                                                 reduce_all=reduce_all)
+                    builder.add_reduce_sumsquare(
+                        "reduce",
+                        "data",
+                        "output",
+                        axes,
+                        keepdims=keep_dims,
+                        reduce_all=reduce_all,
+                    )
 
                     x = np.random.rand(*input_shape)
-                    input = {'data': x}
-                    expected = {'output': np.sum(np.square(x), axis=axes, keepdims=keep_dims)}
+                    input = {"data": x}
+                    expected = {
+                        "output": np.sum(np.square(x), axis=axes, keepdims=keep_dims)
+                    }
 
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -3753,12 +4836,21 @@ def test_reduce_sumsquare_gpu(self):
 
     def test_reduce_logsum_cpu(self, cpu_only=True):
         for rank in range(1, 6):
-            axes_list = [axes for length in range(1, rank + 1) for axes in itertools.combinations(range(rank), length)]
+            axes_list = [
+                axes
+                for length in range(1, rank + 1)
+                for axes in itertools.combinations(range(rank), length)
+            ]
             axes_list.append(None)
 
             for axes in axes_list:
                 if axes:
-                    axes = tuple([axis if np.random.choice([True, False]) else axis - rank for axis in axes])
+                    axes = tuple(
+                        [
+                            axis if np.random.choice([True, False]) else axis - rank
+                            for axis in axes
+                        ]
+                    )
                     reduce_all = False
                 else:
                     reduce_all = True
@@ -3766,20 +4858,29 @@ def test_reduce_logsum_cpu(self, cpu_only=True):
                 for keep_dims in [True, False]:
                     input_shape = np.random.randint(low=2, high=5, size=rank)
 
-                    input_features = [('data', datatypes.Array(*input_shape))]
-                    output_features = [('output', None)]
+                    input_features = [("data", datatypes.Array(*input_shape))]
+                    output_features = [("output", None)]
 
                     builder = neural_network.NeuralNetworkBuilder(
-                        input_features, output_features,
-                        disable_rank5_shape_mapping=True
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
                     )
 
-                    builder.add_reduce_logsum('reduce', 'data', 'output', axes, keepdims=keep_dims,
-                                              reduce_all=reduce_all)
+                    builder.add_reduce_logsum(
+                        "reduce",
+                        "data",
+                        "output",
+                        axes,
+                        keepdims=keep_dims,
+                        reduce_all=reduce_all,
+                    )
 
                     x = np.random.rand(*input_shape)
-                    input = {'data': x}
-                    expected = {'output': np.log(np.sum(x, axis=axes, keepdims=keep_dims))}
+                    input = {"data": x}
+                    expected = {
+                        "output": np.log(np.sum(x, axis=axes, keepdims=keep_dims))
+                    }
 
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -3788,12 +4889,21 @@ def test_reduce_logsum_gpu(self):
 
     def test_reduce_logsumexp_cpu(self, cpu_only=True):
         for rank in range(1, 6):
-            axes_list = [axes for length in range(1, rank + 1) for axes in itertools.combinations(range(rank), length)]
+            axes_list = [
+                axes
+                for length in range(1, rank + 1)
+                for axes in itertools.combinations(range(rank), length)
+            ]
             axes_list.append(None)
 
             for axes in axes_list:
                 if axes:
-                    axes = tuple([axis if np.random.choice([True, False]) else axis - rank for axis in axes])
+                    axes = tuple(
+                        [
+                            axis if np.random.choice([True, False]) else axis - rank
+                            for axis in axes
+                        ]
+                    )
                     reduce_all = False
                 else:
                     reduce_all = True
@@ -3801,26 +4911,38 @@ def test_reduce_logsumexp_cpu(self, cpu_only=True):
                 for keep_dims in [True, False]:
                     input_shape = np.random.randint(low=2, high=5, size=rank)
 
-                    input_features = [('data', datatypes.Array(*input_shape))]
-                    output_features = [('output', None)]
+                    input_features = [("data", datatypes.Array(*input_shape))]
+                    output_features = [("output", None)]
 
                     builder = neural_network.NeuralNetworkBuilder(
-                        input_features, output_features,
-                        disable_rank5_shape_mapping=True
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
                     )
 
-                    builder.add_reduce_logsumexp('reduce', 'data', 'output', axes, keepdims=keep_dims,
-                                                 reduce_all=reduce_all)
+                    builder.add_reduce_logsumexp(
+                        "reduce",
+                        "data",
+                        "output",
+                        axes,
+                        keepdims=keep_dims,
+                        reduce_all=reduce_all,
+                    )
 
                     x = np.random.rand(*input_shape)
-                    input = {'data': x}
-                    expected = {'output': np.log(np.sum(np.exp(x), axis=axes, keepdims=keep_dims))}
+                    input = {"data": x}
+                    expected = {
+                        "output": np.log(
+                            np.sum(np.exp(x), axis=axes, keepdims=keep_dims)
+                        )
+                    }
 
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
     def test_reduce_logsumexp_gpu(self):
         self.test_reduce_logsumexp_cpu(cpu_only=False)
 
+    @unittest.skipIf(not _HAS_TF, MSG_TF1_NOT_FOUND)
     def test_reverse_sequence_cpu(self, cpu_only=True):
         for rank in range(2, 6):
             for i in range(20):
@@ -3833,32 +4955,45 @@ def test_reverse_sequence_cpu(self, cpu_only=True):
                 while pos_batch_axis >= pos_seq_axis:
                     seq_axis = np.random.randint(low=-rank, high=rank)
                     batch_axis = np.random.randint(low=-rank, high=rank)
-                    pos_batch_axis = batch_axis if batch_axis >= 0 else rank + batch_axis
+                    pos_batch_axis = (
+                        batch_axis if batch_axis >= 0 else rank + batch_axis
+                    )
                     pos_seq_axis = seq_axis if seq_axis >= 0 else rank + seq_axis
 
-                input_features = [('data', datatypes.Array(*input_shape)),
-                                  ('lengths', datatypes.Array(input_shape[batch_axis]))]
+                input_features = [
+                    ("data", datatypes.Array(*input_shape)),
+                    ("lengths", datatypes.Array(input_shape[batch_axis])),
+                ]
 
-                output_features = [('output', None)]
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features,
-                    disable_rank5_shape_mapping=True
+                    input_features, output_features, disable_rank5_shape_mapping=True
                 )
 
-                builder.add_reverse_sequence('reverse_sequence', ['data', 'lengths'],
-                                             'output', batch_axis=batch_axis,
-                                             seq_axis=seq_axis)
+                builder.add_reverse_sequence(
+                    "reverse_sequence",
+                    ["data", "lengths"],
+                    "output",
+                    batch_axis=batch_axis,
+                    seq_axis=seq_axis,
+                )
 
                 data = np.random.rand(*input_shape)
-                lengths = np.random.randint(low=0, high=input_shape[seq_axis], size=input_shape[batch_axis])
+                lengths = np.random.randint(
+                    low=0, high=input_shape[seq_axis], size=input_shape[batch_axis]
+                )
 
-                input = {'data': data, 'lengths': lengths.astype(np.float32)}
+                input = {"data": data, "lengths": lengths.astype(np.float32)}
 
                 with tf.Graph().as_default(), tf.Session() as sess:
-                    tf_op = tf.reverse_sequence(input=data, seq_lengths=lengths,
-                                                seq_axis=pos_seq_axis, batch_axis=pos_batch_axis)
-                    expected = {'output': sess.run(tf_op)}
+                    tf_op = tf.reverse_sequence(
+                        input=data,
+                        seq_lengths=lengths,
+                        seq_axis=pos_seq_axis,
+                        batch_axis=pos_batch_axis,
+                    )
+                    expected = {"output": sess.run(tf_op)}
 
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -3871,61 +5006,71 @@ def test_where_nonzero_cpu(self, cpu_only=True):
             for i in range(10):
                 shape = np.random.randint(low=2, high=8, size=rank)
 
-                input_features = [('data', datatypes.Array(*shape))]
-                output_features = [('output', None)]
+                input_features = [("data", datatypes.Array(*shape))]
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features,
-                    disable_rank5_shape_mapping=True
+                    input_features, output_features, disable_rank5_shape_mapping=True
                 )
 
-                builder.add_where_nonzero('multi_indices', 'data', 'output')
+                builder.add_where_nonzero("multi_indices", "data", "output")
 
                 x = np.random.randint(low=0, high=3, size=shape)
 
-                input = {'data': x.astype(np.float)}
-                expected = {'output': np.transpose(np.nonzero(x)).astype(np.float)}
+                input = {"data": x.astype(np.float)}
+                expected = {"output": np.transpose(np.nonzero(x)).astype(np.float)}
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
     def test_where_nonzero_gpu(self):
         self.test_where_nonzero_cpu(cpu_only=False)
 
     def test_gather_cpu(self, cpu_only=True):
-        for rankParams, rankIndices in [(i, j) for i in range(1, 6) for j in range(1, 6)]:
+        for rankParams, rankIndices in [
+            (i, j) for i in range(1, 6) for j in range(1, 6)
+        ]:
             for axis in range(-rankParams, rankParams):
                 shapeParams = np.random.randint(low=2, high=5, size=rankParams)
-                shapeIndices = np.random.randint(low=2, high=5,
-                                                 size=rankIndices)
+                shapeIndices = np.random.randint(low=2, high=5, size=rankIndices)
                 input_shapes = [shapeParams, shapeIndices]
                 posAxis = axis if axis >= 0 else axis + rankParams
-                output_shape = list(shapeParams[:posAxis]) + list(
-                    shapeIndices) + list(shapeParams[posAxis + 1:])
+                output_shape = (
+                    list(shapeParams[:posAxis])
+                    + list(shapeIndices)
+                    + list(shapeParams[posAxis + 1 :])
+                )
 
                 if len(output_shape) > 5:
                     continue
 
-                input_names = ['params', 'indices']
+                input_names = ["params", "indices"]
                 input_features = [
-                    ('params', datatypes.Array(*input_shapes[0])),
-                    ('indices', datatypes.Array(*input_shapes[1]))
+                    ("params", datatypes.Array(*input_shapes[0])),
+                    ("indices", datatypes.Array(*input_shapes[1])),
                 ]
-                output_features = [('output', None)]
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features,
-                    disable_rank5_shape_mapping=True)
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
 
-                builder.add_gather(name='gather', input_names=input_names,
-                                   output_name='output', axis=axis)
+                builder.add_gather(
+                    name="gather",
+                    input_names=input_names,
+                    output_name="output",
+                    axis=axis,
+                )
 
                 a = np.random.rand(*input_shapes[0])
-                b = np.random.randint(-shapeParams[axis], shapeParams[axis],
-                                      size=shapeIndices)
-                input = {'params': a, 'indices': b.astype(np.float)}
-                expected = {'output': np.take(a, b, axis=axis)}
+                b = np.random.randint(
+                    -shapeParams[axis], shapeParams[axis], size=shapeIndices
+                )
+                input = {"params": a, "indices": b.astype(np.float)}
+                expected = {"output": np.take(a, b, axis=axis)}
 
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
-                self.assertEqual(len(expected['output'].shape), builder._get_rank('output'))
+                self.assertEqual(
+                    len(expected["output"].shape), builder._get_rank("output")
+                )
 
     def test_gather_gpu(self):
         self.test_gather_cpu(cpu_only=False)
@@ -3938,95 +5083,129 @@ def test_gather_along_axis_cpu(self, cpu_only=True):
                     indices_shape = np.copy(params_shape)
                     indices_shape[axis] = np.random.randint(low=1, high=8)
 
-                    input_features = [('params', datatypes.Array(*params_shape)),
-                                      ('indices', datatypes.Array(*indices_shape))]
-                    output_features = [('output', None)]
+                    input_features = [
+                        ("params", datatypes.Array(*params_shape)),
+                        ("indices", datatypes.Array(*indices_shape)),
+                    ]
+                    output_features = [("output", None)]
 
                     builder = neural_network.NeuralNetworkBuilder(
-                        input_features, output_features,
-                        disable_rank5_shape_mapping=True
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
+                    )
+                    builder.add_gather_along_axis(
+                        "gather_along_axis", ["params", "indices"], "output", axis=axis
                     )
-                    builder.add_gather_along_axis('gather_along_axis', ['params', 'indices'], 'output', axis=axis)
 
                     a = np.random.rand(*params_shape)
-                    b = np.random.randint(-params_shape[axis], params_shape[axis], size=indices_shape)
+                    b = np.random.randint(
+                        -params_shape[axis], params_shape[axis], size=indices_shape
+                    )
 
-                    input = {'params': a, 'indices': b.astype(np.float)}
-                    expected = {'output': np.take_along_axis(a, b, axis=axis)}
+                    input = {"params": a, "indices": b.astype(np.float)}
+                    expected = {"output": np.take_along_axis(a, b, axis=axis)}
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
-                    self.assertEqual(len(expected['output'].shape), builder._get_rank('output'))
+                    self.assertEqual(
+                        len(expected["output"].shape), builder._get_rank("output")
+                    )
 
     def test_gather_along_axis_gpu(self):
         self.test_gather_along_axis_cpu(cpu_only=False)
 
+    @unittest.skipIf(not _HAS_TF, MSG_TF1_NOT_FOUND)
     def test_gather_nd_cpu(self, cpu_only=True):
-        for params_rank, indices_rank in [(i, j) for i in range(1, 6) for j in range(1, 6)]:
+        for params_rank, indices_rank in [
+            (i, j) for i in range(1, 6) for j in range(1, 6)
+        ]:
             params_shape = np.random.randint(low=2, high=8, size=params_rank)
             indices_shape = np.random.randint(low=2, high=8, size=indices_rank)
             indices_shape[-1] = np.random.randint(low=1, high=params_rank + 1)
 
             for _ in range(5):
-                input_features = [('params', datatypes.Array(*params_shape)),
-                                  ('indices', datatypes.Array(*indices_shape))]
-                output_features = [('output', None)]
+                input_features = [
+                    ("params", datatypes.Array(*params_shape)),
+                    ("indices", datatypes.Array(*indices_shape)),
+                ]
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features,
-                    disable_rank5_shape_mapping=True
+                    input_features, output_features, disable_rank5_shape_mapping=True
                 )
 
-                output_shape = list(indices_shape[:-1]) + list(params_shape[indices_shape[-1]:])
+                output_shape = list(indices_shape[:-1]) + list(
+                    params_shape[indices_shape[-1] :]
+                )
                 if len(output_shape) > 5:
                     continue
 
-                builder.add_gather_nd('gather_nd', ['params', 'indices'], 'output')
+                builder.add_gather_nd("gather_nd", ["params", "indices"], "output")
 
                 a = np.random.rand(*params_shape)
                 indices_list = []
                 for i in range(indices_shape[-1]):
-                    indices_list.append(np.random.randint(0, params_shape[i], size=indices_shape[:-1]))
+                    indices_list.append(
+                        np.random.randint(0, params_shape[i], size=indices_shape[:-1])
+                    )
 
                 indices = np.stack(indices_list, axis=-1)
-                input = {'params': a, 'indices': indices.astype(np.float)}
+                input = {"params": a, "indices": indices.astype(np.float)}
 
                 with tf.Graph().as_default(), tf.Session() as sess:
                     tf_op = tf.gather_nd(a, indices)
-                    expected = {'output': sess.run(tf_op)}
+                    expected = {"output": sess.run(tf_op)}
 
                 self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
-                self.assertEqual(-1, builder._get_rank('output'))
+                self.assertEqual(-1, builder._get_rank("output"))
 
     def test_gather_nd_gpu(self):
         self.test_gather_nd_cpu(cpu_only=False)
 
+    @unittest.skipIf(not _HAS_TF, MSG_TF1_NOT_FOUND)
     def test_scatter_cpu(self, cpu_only=True):
-        for ref_rank, indices_rank in [(i, j) for i in range(1, 6) for j in range(1, 6)]:
+        for ref_rank, indices_rank in [
+            (i, j) for i in range(1, 6) for j in range(1, 6)
+        ]:
             for accumulate_mode in ["UPDATE", "ADD", "SUB", "MUL", "DIV", "MAX", "MIN"]:
                 for _ in range(5):
                     ref_shape = np.random.randint(low=2, high=8, size=ref_rank)
                     indices_shape = np.random.randint(low=2, high=8, size=indices_rank)
                     updates_shape = list(indices_shape) + list(ref_shape[1:])
 
-                    input_features = [('ref', datatypes.Array(*ref_shape)),
-                                      ('indices', datatypes.Array(*indices_shape)),
-                                      ('updates', datatypes.Array(*updates_shape))]
-                    output_features = [('output', None)]
+                    input_features = [
+                        ("ref", datatypes.Array(*ref_shape)),
+                        ("indices", datatypes.Array(*indices_shape)),
+                        ("updates", datatypes.Array(*updates_shape)),
+                    ]
+                    output_features = [("output", None)]
 
                     builder = neural_network.NeuralNetworkBuilder(
-                        input_features, output_features,
-                        disable_rank5_shape_mapping=True
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
                     )
 
                     if len(updates_shape) > 5:
                         continue
 
-                    builder.add_scatter('scatter', ['ref', 'indices', 'updates'], 'output', axis=0,
-                                        mode=accumulate_mode)
+                    builder.add_scatter(
+                        "scatter",
+                        ["ref", "indices", "updates"],
+                        "output",
+                        axis=0,
+                        mode=accumulate_mode,
+                    )
 
                     ref = np.random.rand(*ref_shape)
                     updates = np.random.rand(*updates_shape)
+                    if accumulate_mode == "DIV":
+                        updates += 10.0
                     indices = np.random.randint(0, ref_shape[0], size=indices_shape)
-                    input = {'ref': ref, 'indices': indices.astype(np.float), 'updates': updates}
+                    input = {
+                        "ref": ref,
+                        "indices": indices.astype(np.float),
+                        "updates": updates,
+                    }
 
                     with tf.Graph().as_default(), tf.Session() as sess:
                         tf_output = tf.Variable(ref)
@@ -4045,7 +5224,7 @@ def test_scatter_cpu(self, cpu_only=True):
                             sess.run(tf.scatter_max(tf_output, indices, updates))
                         if accumulate_mode == "MIN":
                             sess.run(tf.scatter_min(tf_output, indices, updates))
-                        expected = {'output': sess.run(tf_output)}
+                        expected = {"output": sess.run(tf_output)}
 
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -4054,36 +5233,54 @@ def test_scatter_gpu(self):
 
     def test_gather_scatter_multiple_axis_cpu(self, cpu_only=True):
 
-        for params_rank, indices_rank in [(i, j) for i in range(1, 6) for j in range(1, 6)]:
+        for params_rank, indices_rank in [
+            (i, j) for i in range(1, 6) for j in range(1, 6)
+        ]:
             for axis in range(-params_rank, params_rank):
                 for _ in range(5):
                     params_shape = np.random.randint(low=2, high=8, size=params_rank)
                     indices_shape = np.random.randint(low=2, high=8, size=indices_rank)
 
                     pos_axis = axis if axis >= 0 else axis + params_rank
-                    output_shape = list(params_shape[:pos_axis]) + list(indices_shape) + list(
-                        params_shape[pos_axis + 1:])
+                    output_shape = (
+                        list(params_shape[:pos_axis])
+                        + list(indices_shape)
+                        + list(params_shape[pos_axis + 1 :])
+                    )
 
                     if len(output_shape) > 5:
                         continue
 
-                    input_features = [('params', datatypes.Array(*params_shape)),
-                                      ('indices', datatypes.Array(*indices_shape))]
-                    output_features = [('output', None)]
+                    input_features = [
+                        ("params", datatypes.Array(*params_shape)),
+                        ("indices", datatypes.Array(*indices_shape)),
+                    ]
+                    output_features = [("output", None)]
 
                     builder = neural_network.NeuralNetworkBuilder(
-                        input_features, output_features,
-                        disable_rank5_shape_mapping=True
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
                     )
 
-                    builder.add_gather('gather', ['params', 'indices'], 'updates', axis=axis)
-                    builder.add_scatter('scatter', ['params', 'indices', 'updates'], 'output', axis=axis, mode='UPDATE')
+                    builder.add_gather(
+                        "gather", ["params", "indices"], "updates", axis=axis
+                    )
+                    builder.add_scatter(
+                        "scatter",
+                        ["params", "indices", "updates"],
+                        "output",
+                        axis=axis,
+                        mode="UPDATE",
+                    )
 
                     a = np.random.rand(*params_shape)
-                    b = np.random.randint(-params_shape[axis], params_shape[axis], size=indices_shape)
+                    b = np.random.randint(
+                        -params_shape[axis], params_shape[axis], size=indices_shape
+                    )
 
-                    input = {'params': a, 'indices': b.astype(np.float)}
-                    expected = {'output': a}
+                    input = {"params": a, "indices": b.astype(np.float)}
+                    expected = {"output": a}
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
     def test_gather_scatter_multiple_axis_gpu(self):
@@ -4098,63 +5295,97 @@ def test_scatter_along_axis_cpu(self, cpu_only=True):
                     indices_shape[axis] = np.random.randint(low=1, high=8)
                     updates_shape = indices_shape
 
-                    input_features = [('ref', datatypes.Array(*ref_shape)),
-                                      ('indices', datatypes.Array(*indices_shape)),
-                                      ('updates', datatypes.Array(*updates_shape))]
-                    output_features = [('output', None)]
+                    input_features = [
+                        ("ref", datatypes.Array(*ref_shape)),
+                        ("indices", datatypes.Array(*indices_shape)),
+                        ("updates", datatypes.Array(*updates_shape)),
+                    ]
+                    output_features = [("output", None)]
 
                     builder = neural_network.NeuralNetworkBuilder(
-                        input_features, output_features,
-                        disable_rank5_shape_mapping=True
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
                     )
 
-                    builder.add_scatter_along_axis('scatter_along_axis', ['ref', 'indices', 'updates'], 'output',
-                                                   axis=axis, mode="UPDATE")
+                    builder.add_scatter_along_axis(
+                        "scatter_along_axis",
+                        ["ref", "indices", "updates"],
+                        "output",
+                        axis=axis,
+                        mode="UPDATE",
+                    )
 
                     ref = np.random.rand(*ref_shape)
                     updates = np.random.rand(*updates_shape)
-                    indices = np.random.randint(-ref_shape[axis], ref_shape[axis], size=indices_shape)
-                    input = {'ref': ref, 'indices': indices.astype(np.float), 'updates': updates}
+                    indices = np.random.randint(
+                        -ref_shape[axis], ref_shape[axis], size=indices_shape
+                    )
+                    input = {
+                        "ref": ref,
+                        "indices": indices.astype(np.float),
+                        "updates": updates,
+                    }
 
                     np_output = np.copy(ref)
                     np.put_along_axis(np_output, indices, updates, axis=axis)
-                    expected = {'output': np_output}
+                    expected = {"output": np_output}
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
     def test_scatter_along_axis_gpu(self):
         self.test_scatter_along_axis_cpu(cpu_only=False)
 
+    @unittest.skipIf(not _HAS_TF, MSG_TF1_NOT_FOUND)
     def test_scatter_nd_cpu(self, cpu_only=True):
-        for ref_rank, indices_rank in [(i, j) for i in range(1, 6) for j in range(2, 6)]:
+        for ref_rank, indices_rank in [
+            (i, j) for i in range(1, 6) for j in range(2, 6)
+        ]:
             ref_shape = np.random.randint(low=2, high=8, size=ref_rank)
             indices_shape = np.random.randint(low=2, high=8, size=indices_rank)
             indices_shape[-1] = np.random.randint(low=1, high=ref_rank + 1)
             for accumulate_mode in ["UPDATE", "ADD", "SUB"]:
                 for id in range(20):
-                    updates_shape = list(indices_shape[:-1]) + list(ref_shape[indices_shape[-1]:])
-                    if len(updates_shape) > 5: continue
+                    updates_shape = list(indices_shape[:-1]) + list(
+                        ref_shape[indices_shape[-1] :]
+                    )
+                    if len(updates_shape) > 5:
+                        continue
 
-                    input_features = [('ref', datatypes.Array(*ref_shape)),
-                                      ('indices', datatypes.Array(*indices_shape)),
-                                      ('updates', datatypes.Array(*updates_shape))]
-                    output_features = [('output', None)]
+                    input_features = [
+                        ("ref", datatypes.Array(*ref_shape)),
+                        ("indices", datatypes.Array(*indices_shape)),
+                        ("updates", datatypes.Array(*updates_shape)),
+                    ]
+                    output_features = [("output", None)]
 
                     builder = neural_network.NeuralNetworkBuilder(
-                        input_features, output_features,
-                        disable_rank5_shape_mapping=True
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
                     )
 
-                    builder.add_scatter_nd('scatter_nd', ['ref', 'indices', 'updates'], 'output', mode=accumulate_mode)
+                    builder.add_scatter_nd(
+                        "scatter_nd",
+                        ["ref", "indices", "updates"],
+                        "output",
+                        mode=accumulate_mode,
+                    )
 
                     ref = np.random.rand(*ref_shape)
                     updates = np.random.rand(*updates_shape)
                     indices_list = []
                     for i in range(indices_shape[-1]):
-                        indices_list.append(np.random.randint(0, ref_shape[i], size=indices_shape[:-1]))
+                        indices_list.append(
+                            np.random.randint(0, ref_shape[i], size=indices_shape[:-1])
+                        )
 
                     indices = np.stack(indices_list, axis=-1)
 
-                    input = {'ref': ref, 'indices': indices.astype(np.float), 'updates': updates}
+                    input = {
+                        "ref": ref,
+                        "indices": indices.astype(np.float),
+                        "updates": updates,
+                    }
 
                     with tf.Graph().as_default(), tf.Session() as sess:
                         tf_output = tf.Variable(ref)
@@ -4165,7 +5396,7 @@ def test_scatter_nd_cpu(self, cpu_only=True):
                             sess.run(tf.scatter_nd_add(tf_output, indices, updates))
                         if accumulate_mode == "SUB":
                             sess.run(tf.scatter_nd_sub(tf_output, indices, updates))
-                        expected = {'output': sess.run(tf_output)}
+                        expected = {"output": sess.run(tf_output)}
 
                     self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
 
@@ -4176,8 +5407,11 @@ def test_layer_normalization_cpu(self, cpu_only=True):
         def layer_norm_numpy(x, shapes, gamma_, beta_, eps=1e-5):
             axes = [-i - 1 for i, _ in enumerate(shapes)]
             num = x - np.mean(x, axis=tuple(axes), keepdims=True)
-            dem = np.sqrt(np.sum(np.square(num), axis=tuple(axes),
-                                 keepdims=True) / np.prod(shapes) + eps)
+            dem = np.sqrt(
+                np.sum(np.square(num), axis=tuple(axes), keepdims=True)
+                / np.prod(shapes)
+                + eps
+            )
             return num / dem * gamma_ + beta_
 
         for rank in range(1, 6):
@@ -4190,23 +5424,25 @@ def layer_norm_numpy(x, shapes, gamma_, beta_, eps=1e-5):
                 gamma = np.random.rand(*norm_shapes)
                 beta = np.random.rand(*norm_shapes)
 
-                input_features = [('data', datatypes.Array(*input_shape))]
-                output_features = [('output', None)]
+                input_features = [("data", datatypes.Array(*input_shape))]
+                output_features = [("output", None)]
 
                 builder = neural_network.NeuralNetworkBuilder(
-                    input_features, output_features,
-                    disable_rank5_shape_mapping=True
+                    input_features, output_features, disable_rank5_shape_mapping=True
                 )
 
-                builder.add_layer_normalization(name='layer_normalization',
-                                                input_name='data',
-                                                output_name='output',
-                                                normalized_shape=norm_shapes,
-                                                gamma=gamma, beta=beta)
+                builder.add_layer_normalization(
+                    name="layer_normalization",
+                    input_name="data",
+                    output_name="output",
+                    normalized_shape=norm_shapes,
+                    gamma=gamma,
+                    beta=beta,
+                )
 
-                inputs = {'data': data}
+                inputs = {"data": data}
                 ref = layer_norm_numpy(data, norm_shapes, gamma, beta)
-                expected = {'output': ref}
+                expected = {"output": ref}
 
                 self._test_model(builder.spec, inputs, expected, useCPUOnly=cpu_only)
 
@@ -4218,9 +5454,12 @@ def get_size_after_stride(X, params):
     start = params["start"]
     end = params["end"]
     stride = params["stride"]
-    if params["axis"] == 'width': axis = 2
-    if params["axis"] == 'height': axis = 1
-    if params["axis"] == 'channel': axis = 0
+    if params["axis"] == "width":
+        axis = 2
+    if params["axis"] == "height":
+        axis = 1
+    if params["axis"] == "channel":
+        axis = 0
     N = X.shape[axis]
     if end < 0:
         end = end + N
@@ -4238,11 +5477,11 @@ def get_numpy_predictions_slice(X, params):
     start = params["start"]
     end = params["end"]
     stride = params["stride"]
-    if params["axis"] == 'width':
+    if params["axis"] == "width":
         return X[:, :, start:end:stride]
-    if params["axis"] == 'height':
+    if params["axis"] == "height":
         return X[:, start:end:stride, :]
-    if params["axis"] == 'channel':
+    if params["axis"] == "channel":
         return X[start:end:stride, :, :]
 
 
@@ -4251,31 +5490,39 @@ def get_coreml_predictions_slice(X, params):
     eval = True
     try:
         input_dim = X.shape
-        output_dim = (1, 1,
-                      1)  # some random dimensions here: we are going to remove this information later
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', datatypes.Array(*output_dim))]
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
-        builder.add_slice('slice', 'data', 'output',
-                          start_index=params["start"],
-                          end_index=params["end"], stride=params["stride"],
-                          axis=params["axis"])
+        output_dim = (
+            1,
+            1,
+            1,
+        )  # some random dimensions here: we are going to remove this information later
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", datatypes.Array(*output_dim))]
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        builder.add_slice(
+            "slice",
+            "data",
+            "output",
+            start_index=params["start"],
+            end_index=params["end"],
+            stride=params["stride"],
+            axis=params["axis"],
+        )
         # Remove output shape by deleting and adding an output
         del builder.spec.description.output[-1]
         output = builder.spec.description.output.add()
-        output.name = 'output'
+        output.name = "output"
         output.type.multiArrayType.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value(
-            'DOUBLE')
+            "DOUBLE"
+        )
         # save the model
         model_dir = tempfile.mkdtemp()
-        model_path = os.path.join(model_dir, 'test_layer.mlmodel')
+        model_path = os.path.join(model_dir, "test_layer.mlmodel")
         coremltools.utils.save_spec(builder.spec, model_path)
         # prepare input and get predictions
         coreml_model = coremltools.models.MLModel(model_path)
-        coreml_input = {'data': X}
-        if is_macos() and macos_version() >= (10, 13):
-            coreml_preds = coreml_model.predict(coreml_input)['output']
+        coreml_input = {"data": X}
+        if _is_macos() and _macos_version() >= (10, 13):
+            coreml_preds = coreml_model.predict(coreml_input)["output"]
         else:
             coreml_preds = None
         if os.path.exists(model_dir):
@@ -4288,22 +5535,37 @@ def get_coreml_predictions_slice(X, params):
 
 
 def get_numpy_predictions_reduce(X, params):
-    if params["axis"] == 'CHW': axis = (0, 1, 2)
-    if params["axis"] == 'HW': axis = (1, 2)
-    if params["axis"] == 'C': axis = 0
-    if params["axis"] == 'H': axis = 1
-    if params["axis"] == 'W': axis = 2
-
-    if params["mode"] == 'sum': return np.sum(X, axis)
-    if params["mode"] == 'avg': return np.mean(X, axis)
-    if params["mode"] == 'prod': return np.prod(X, axis)
-    if params["mode"] == 'logsum': return np.sum(np.log(X + 1e-6), axis)
-    if params["mode"] == 'sumsquare': return np.sum(X ** 2, axis)
-    if params["mode"] == 'L2': return np.sqrt(np.sum(X ** 2, axis))
-    if params["mode"] == 'L1': return np.sum(np.abs(X), axis)
-    if params["mode"] == 'max': return np.amax(X, axis)
-    if params["mode"] == 'min': return np.amin(X, axis)
-    if params["mode"] == 'argmax': return np.argmax(X, axis)
+    if params["axis"] == "CHW":
+        axis = (0, 1, 2)
+    if params["axis"] == "HW":
+        axis = (1, 2)
+    if params["axis"] == "C":
+        axis = 0
+    if params["axis"] == "H":
+        axis = 1
+    if params["axis"] == "W":
+        axis = 2
+
+    if params["mode"] == "sum":
+        return np.sum(X, axis)
+    if params["mode"] == "avg":
+        return np.mean(X, axis)
+    if params["mode"] == "prod":
+        return np.prod(X, axis)
+    if params["mode"] == "logsum":
+        return np.sum(np.log(X + 1e-6), axis)
+    if params["mode"] == "sumsquare":
+        return np.sum(X ** 2, axis)
+    if params["mode"] == "L2":
+        return np.sqrt(np.sum(X ** 2, axis))
+    if params["mode"] == "L1":
+        return np.sum(np.abs(X), axis)
+    if params["mode"] == "max":
+        return np.amax(X, axis)
+    if params["mode"] == "min":
+        return np.amin(X, axis)
+    if params["mode"] == "argmax":
+        return np.argmax(X, axis)
 
 
 def get_coreml_predictions_reduce(X, params):
@@ -4313,27 +5575,28 @@ def get_coreml_predictions_reduce(X, params):
         input_dim = X.shape
         # some random dimensions here: we are going to remove this information later
         output_dim = (1, 1, 1)
-        input_features = [('data', datatypes.Array(*input_dim))]
-        output_features = [('output', datatypes.Array(*output_dim))]
-        builder = neural_network.NeuralNetworkBuilder(input_features,
-                                                      output_features)
-        builder.add_reduce('reduce', 'data', 'output', axis=params["axis"],
-                           mode=params["mode"])
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", datatypes.Array(*output_dim))]
+        builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+        builder.add_reduce(
+            "reduce", "data", "output", axis=params["axis"], mode=params["mode"]
+        )
         # Remove output shape by deleting and adding an output
         del builder.spec.description.output[-1]
         output = builder.spec.description.output.add()
-        output.name = 'output'
+        output.name = "output"
         output.type.multiArrayType.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value(
-            'DOUBLE')
+            "DOUBLE"
+        )
         # save the model
         model_dir = tempfile.mkdtemp()
-        model_path = os.path.join(model_dir, 'test_layer.mlmodel')
+        model_path = os.path.join(model_dir, "test_layer.mlmodel")
         coremltools.utils.save_spec(builder.spec, model_path)
         # prepare input and get predictions
         coreml_model = coremltools.models.MLModel(model_path)
-        coreml_input = {'data': X}
-        if is_macos() and macos_version() >= (10, 13):
-            coreml_preds = coreml_model.predict(coreml_input)['output']
+        coreml_input = {"data": X}
+        if _is_macos() and _macos_version() >= (10, 13):
+            coreml_preds = coreml_model.predict(coreml_input)["output"]
         else:
             coreml_preds = None
         if os.path.exists(model_dir):
@@ -4346,14 +5609,13 @@ def get_coreml_predictions_reduce(X, params):
 
 
 class StressTest(CorrectnessTest):
-
     def test_slice_layer(self):
         params_dict = dict(
             input_shape=[[30, 100, 8], [80, 50, 5], [4, 12, 5], [56, 8, 14]],
-            axis=['channel', 'height', 'width'],
+            axis=["channel", "height", "width"],
             start=[0, 1, 2, 5],
             end=[5, 100, 56, -1, -2, -4],
-            stride=[1, 2, 3]
+            stride=[1, 2, 3],
         )
         params = list(itertools.product(*params_dict.values()))
         all_candidates = [dict(zip(params_dict.keys(), x)) for x in params]
@@ -4362,8 +5624,12 @@ def test_slice_layer(self):
             X = np.random.rand(*pr["input_shape"])
             if get_size_after_stride(X, pr):
                 valid_params.append(pr)
-        print("Total params to be tested: ", len(valid_params),
-              "out of candidates: ", len(all_candidates))
+        print(
+            "Total params to be tested: ",
+            len(valid_params),
+            "out of candidates: ",
+            len(all_candidates),
+        )
 
         failed_tests_compile = []
         failed_tests_shape = []
@@ -4388,20 +5654,33 @@ def test_slice_layer(self):
     def test_reduce_layer(self):
         params_dict = dict(
             input_shape=[[3, 10, 8], [8, 5, 5], [4, 12, 10], [7, 1, 14]],
-            mode=['sum', 'avg', 'prod', 'sumsquare', 'L1', 'L2', 'max',
-                  'min', 'argmax'],
-            axis=['CHW', 'HW', 'C', 'H', 'W'],
+            mode=[
+                "sum",
+                "avg",
+                "prod",
+                "sumsquare",
+                "L1",
+                "L2",
+                "max",
+                "min",
+                "argmax",
+            ],
+            axis=["CHW", "HW", "C", "H", "W"],
         )
         params = list(itertools.product(*params_dict.values()))
         all_candidates = [dict(zip(params_dict.keys(), x)) for x in params]
         valid_params = []
         for pr in all_candidates:
-            if pr["mode"] == 'argmax':
-                if pr["axis"] == 'CHW' or pr["axis"] == 'HW':
+            if pr["mode"] == "argmax":
+                if pr["axis"] == "CHW" or pr["axis"] == "HW":
                     continue
             valid_params.append(pr)
-        print("Total params to be tested: ", len(valid_params),
-              "out of candidates: ", len(all_candidates))
+        print(
+            "Total params to be tested: ",
+            len(valid_params),
+            "out of candidates: ",
+            len(all_candidates),
+        )
 
         failed_tests_compile = []
         failed_tests_shape = []
@@ -4424,148 +5703,298 @@ def test_reduce_layer(self):
         self.assertEqual(failed_tests_numerical, [])
 
 
-@unittest.skipIf(not is_macos() or macos_version() < LAYERS_10_15_MACOS_VERSION,
-                 'macOS 10.15+ required. Skipping tests.')
+@unittest.skipIf(
+    not _is_macos() or _macos_version() < LAYERS_10_15_MACOS_VERSION,
+    "macOS 10.15+ required. Skipping tests.",
+)
 class CoreML3NetworkStressTest(CorrectnessTest):
     def test_dyn_weight_conv2d_stress(self):
         options = dict(
-            padding = ['valid'],
-            filters = [1,2,4],
-            kernel_size = [1,3,5], # square kernels
-            strides = [1,2],
-            dilation_rate = [1],
+            padding=["valid"],
+            filters=[1, 2, 4],
+            kernel_size=[1, 3, 5],  # square kernels
+            strides=[1, 2],
+            dilation_rate=[1],
+            batch_size=[1, 64, 512],
         )
 
-        input_size = 16
-        input_channels = 3
-        input_dim = (1, input_channels, input_size, input_size)
+        input_size = 64
+        input_channels = 64
+        input_dim = [1, input_channels, input_size, input_size]
 
-        def conv_spatial_size(image_size, kernel_size, stride, dilation,
-                padding):
-            if padding == 'valid':
+        def conv_spatial_size(image_size, kernel_size, stride, dilation, padding):
+            if padding == "valid":
                 kernel_size_dilated = (kernel_size - 1) * dilation + 1
                 return (image_size - kernel_size_dilated) // stride + 1
-            elif padding == 'same':
+            elif padding == "same":
                 return int(math.ceil(image_size * 1.0 / stride))
             else:
                 return 0
 
         for x in itertools.product(*options.values()):
             kwargs = dict(zip(options.keys(), x))
-            if kwargs['strides'] > 1 and kwargs['dilation_rate'] > 1:
+            if kwargs["strides"] > 1 and kwargs["dilation_rate"] > 1:
                 continue
             # weight layout: (output_channels, kernel_channels, height, width)
-            weight_dim = (kwargs['filters'], input_channels,
-                kwargs['kernel_size'], kwargs['kernel_size'])
+            weight_dim = (
+                kwargs["filters"],
+                input_channels,
+                kwargs["kernel_size"],
+                kwargs["kernel_size"],
+            )
 
+            input_dim[0] = kwargs["batch_size"]
             input_features = [
-                ('input', datatypes.Array(*input_dim)),
-                ('weight', datatypes.Array(*weight_dim))]
-            output_features = [('output', None)]
+                ("input", datatypes.Array(*input_dim)),
+                ("weight", datatypes.Array(*weight_dim)),
+            ]
+            output_features = [("output", None)]
 
             builder = neural_network.NeuralNetworkBuilder(
-                input_features,
-                output_features,
-                disable_rank5_shape_mapping=True)
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
 
             builder.add_convolution(
-                name='two_input_conv_layer',
+                name="two_input_conv_layer",
                 kernel_channels=input_channels,
-                output_channels=kwargs['filters'],
-                height=kwargs['kernel_size'],
-                width=kwargs['kernel_size'],
-                stride_height=kwargs['strides'],
-                stride_width=kwargs['strides'],
-                border_mode=kwargs['padding'],
+                output_channels=kwargs["filters"],
+                height=kwargs["kernel_size"],
+                width=kwargs["kernel_size"],
+                stride_height=kwargs["strides"],
+                stride_width=kwargs["strides"],
+                border_mode=kwargs["padding"],
                 groups=1,
                 W=None,
                 b=None,
                 has_bias=False,
-                dilation_rate=kwargs['dilation_rate'],
-                input_name=['input', 'weight'],
-                output_name='output')
+                dilation_rate=kwargs["dilation_rate"],
+                input_name=["input", "weight"],
+                output_name="output",
+            )
 
             # Assigning everything to ones should cover the execution path
             # and engine failures, but is not a complete check on numerics.
             out_spatial_size = conv_spatial_size(
                 input_size,
-                kwargs['kernel_size'],
-                kwargs['strides'],
-                kwargs['dilation_rate'],
-                kwargs['padding'])
+                kwargs["kernel_size"],
+                kwargs["strides"],
+                kwargs["dilation_rate"],
+                kwargs["padding"],
+            )
 
             input_val = np.ones(input_dim)
             weight_val = np.ones(weight_dim)
-            output_dim = (1, kwargs['filters'], out_spatial_size, out_spatial_size)
-            expected = np.ones(output_dim) * (kwargs['kernel_size'] * kwargs['kernel_size'] * input_channels)
+            output_dim = (
+                kwargs["batch_size"],
+                kwargs["filters"],
+                out_spatial_size,
+                out_spatial_size,
+            )
+            expected = np.ones(output_dim) * (
+                kwargs["kernel_size"] * kwargs["kernel_size"] * input_channels
+            )
 
-            feed_dict = {'input': input_val, 'weight': weight_val}
-            expected = {'output': expected}
+            feed_dict = {"input": input_val, "weight": weight_val}
+            expected = {"output": expected}
 
             self._test_model(builder.spec, feed_dict, expected)
 
+    def test_static_weight_conv2d_stress(self):
+        options = dict(
+            padding=["valid"],
+            filters=[1, 2, 5],
+            kernel_size=[1, 3, 4],  # square kernels
+            strides=[1, 2],
+            dilation_rate=[1, 2],
+            batch_size=[1, 32, 512],
+        )
+
+        input_size = 64
+        input_channels = 64
+        input_dim = [1, input_channels, input_size, input_size]
+
+        def conv_spatial_size(image_size, kernel_size, stride, dilation, padding):
+            if padding == "valid":
+                kernel_size_dilated = (kernel_size - 1) * dilation + 1
+                return (image_size - kernel_size_dilated) // stride + 1
+            elif padding == "same":
+                return int(math.ceil(image_size * 1.0 / stride))
+            else:
+                return 0
+
+        for x in itertools.product(*options.values()):
+            kwargs = dict(zip(options.keys(), x))
+            if kwargs["strides"] > 1 and kwargs["dilation_rate"] > 1:
+                continue
+            # weight layout: (output_channels, kernel_channels, height, width)
+            weight_dim = (
+                kwargs["filters"],
+                input_channels,
+                kwargs["kernel_size"],
+                kwargs["kernel_size"],
+            )
+
+            input_dim[0] = kwargs["batch_size"]
+            input_features = [("input", datatypes.Array(*input_dim))]
+            # ('weight', datatypes.Array(*weight_dim))]
+            output_features = [("output", None)]
+
+            input_weight = np.ones(weight_dim)
+            builder = neural_network.NeuralNetworkBuilder(
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
+
+            builder.add_convolution(
+                name="two_input_conv_layer",
+                kernel_channels=input_channels,
+                output_channels=kwargs["filters"],
+                height=kwargs["kernel_size"],
+                width=kwargs["kernel_size"],
+                stride_height=kwargs["strides"],
+                stride_width=kwargs["strides"],
+                border_mode=kwargs["padding"],
+                groups=1,
+                W=input_weight,
+                b=None,
+                has_bias=False,
+                dilation_factors=[kwargs["dilation_rate"]] * 2,
+                input_name=["input"],
+                output_name="output",
+            )
+
+            # Assigning everything to ones should cover the execution path
+            # and engine failures, but is not a complete check on numerics.
+            out_spatial_size = conv_spatial_size(
+                input_size,
+                kwargs["kernel_size"],
+                kwargs["strides"],
+                kwargs["dilation_rate"],
+                kwargs["padding"],
+            )
+
+            input_val = np.ones(input_dim)
+            weight_val = np.ones(weight_dim)
+            output_dim = (
+                kwargs["batch_size"],
+                kwargs["filters"],
+                out_spatial_size,
+                out_spatial_size,
+            )
+            expected = np.ones(output_dim) * (
+                kwargs["kernel_size"] * kwargs["kernel_size"] * input_channels
+            )
+
+            feed_dict = {"input": input_val}  # , 'weight': weight_val}
+            expected = {"output": expected}
+
+            self._test_model(builder.spec, feed_dict, expected)
 
     def test_power_iteration_cpu(self):
 
         convergence_tolerance = 1e-8
         number_of_iterations = 200
 
-        input_features = [('matrix', datatypes.Array(*(2, 2))),
-                          ('starting_vector', datatypes.Array(*(2,)))]
+        input_features = [
+            ("matrix", datatypes.Array(*(2, 2))),
+            ("starting_vector", datatypes.Array(*(2,))),
+        ]
 
-        output_features = [('maximum_eigen_value', datatypes.Array(*(1,))),
-                           ('eigen_vector', None),
-                           ('iteration_count', datatypes.Array(*(1,)))]
+        output_features = [
+            ("maximum_eigen_value", datatypes.Array(*(1, 1))),
+            ("eigen_vector", None),
+            ("iteration_count", datatypes.Array(*(1,))),
+        ]
 
-        builder = neural_network.NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)
-        builder.add_expand_dims('expand_dims', 'starting_vector', 'x', axes=[-1])
-        builder.add_load_constant_nd('iteration_count', 'iteration_count',
-                                        constant_value=np.zeros((1,)),
-                                        shape=(1,))
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_expand_dims("expand_dims", "starting_vector", "x", axes=[-1])
+        builder.add_load_constant_nd(
+            "iteration_count",
+            "iteration_count",
+            constant_value=np.zeros((1,)),
+            shape=(1,),
+        )
 
-        loop_layer = builder.add_loop('loop', max_iterations=number_of_iterations)
-        loop_body_builder = neural_network.NeuralNetworkBuilder(nn_spec=loop_layer.loop.bodyNetwork)
+        loop_layer = builder.add_loop("loop", max_iterations=number_of_iterations)
+        loop_body_builder = neural_network.NeuralNetworkBuilder(
+            nn_spec=loop_layer.loop.bodyNetwork
+        )
         # output shape: (n,1)
-        loop_body_builder.add_batched_mat_mul('bmm.1', input_names=['matrix', 'x'], output_name='y')
-        loop_body_builder.add_reduce_l2('reduce', input_name='y', output_name='norm', axes=0)
-        loop_body_builder.add_divide_broadcastable('divide', ['y', 'norm'], 'y_normalized')
+        loop_body_builder.add_batched_mat_mul(
+            "bmm.1", input_names=["matrix", "x"], output_name="y"
+        )
+        loop_body_builder.add_reduce_l2(
+            "reduce", input_name="y", output_name="norm", axes=[0]
+        )
+        loop_body_builder.add_divide_broadcastable(
+            "divide", ["y", "norm"], "y_normalized"
+        )
         # find diff: 1- abs(cosine)
-        loop_body_builder.add_batched_mat_mul('cosine', ['y_normalized', 'x'], 'cosine_diff', transpose_a=True)
-        loop_body_builder.add_squeeze('squeeze_all', 'cosine_diff', 'cosine_diff_squeeze', squeeze_all=True)
-        loop_body_builder.add_unary('abs_cosine', 'cosine_diff_squeeze', 'abs_cosine_diff', mode='abs')
-        loop_body_builder.add_activation('diff', non_linearity='LINEAR',
-                                         input_name='abs_cosine_diff',
-                                         output_name='diff', params=[-1,1])
+        loop_body_builder.add_batched_mat_mul(
+            "cosine", ["y_normalized", "x"], "cosine_diff", transpose_a=True
+        )
+        loop_body_builder.add_squeeze(
+            "squeeze_all", "cosine_diff", "cosine_diff_squeeze", squeeze_all=True
+        )
+        loop_body_builder.add_unary(
+            "abs_cosine", "cosine_diff_squeeze", "abs_cosine_diff", mode="abs"
+        )
+        loop_body_builder.add_activation(
+            "diff",
+            non_linearity="LINEAR",
+            input_name="abs_cosine_diff",
+            output_name="diff",
+            params=[-1, 1],
+        )
 
         # update iteration count
-        loop_body_builder.add_activation('iteration_count_add', non_linearity='LINEAR',
-                                         input_name='iteration_count',
-                                         output_name='iteration_count_plus_1', params=[1, 1])
-        loop_body_builder.add_copy('iteration_count_copy', 'iteration_count_plus_1', 'iteration_count')
+        loop_body_builder.add_activation(
+            "iteration_count_add",
+            non_linearity="LINEAR",
+            input_name="iteration_count",
+            output_name="iteration_count_plus_1",
+            params=[1, 1],
+        )
+        loop_body_builder.add_copy(
+            "iteration_count_copy", "iteration_count_plus_1", "iteration_count"
+        )
 
         # update 'x'
-        loop_body_builder.add_copy('update_x', 'y_normalized', 'x')
+        loop_body_builder.add_copy("update_x", "y_normalized", "x")
 
         # add condition to break from the loop, if convergence criterion is met
-        loop_body_builder.add_less_than('cond', ['diff'], 'cond', alpha=convergence_tolerance)
-        branch_layer = loop_body_builder.add_branch('branch_layer', 'cond')
-        builder_ifbranch = neural_network.NeuralNetworkBuilder(nn_spec=branch_layer.branch.ifBranch)
-        builder_ifbranch.add_loop_break('break')
+        loop_body_builder.add_less_than(
+            "cond", ["diff"], "cond", alpha=convergence_tolerance
+        )
+        branch_layer = loop_body_builder.add_branch("branch_layer", "cond")
+        builder_ifbranch = neural_network.NeuralNetworkBuilder(
+            nn_spec=branch_layer.branch.ifBranch
+        )
+        builder_ifbranch.add_loop_break("break")
 
         # now we are out of the loop, compute the eigen value
-        builder.add_batched_mat_mul('bmm.2', input_names=['matrix', 'x'], output_name='x_right')
-        builder.add_batched_mat_mul('bmm.3', input_names=['x', 'x_right'], output_name='maximum_eigen_value',
-                                    transpose_a=True)
-        builder.add_squeeze('squeeze', 'x', 'eigen_vector', squeeze_all=True)
+        builder.add_batched_mat_mul(
+            "bmm.2", input_names=["matrix", "x"], output_name="x_right"
+        )
+        builder.add_batched_mat_mul(
+            "bmm.3",
+            input_names=["x", "x_right"],
+            output_name="maximum_eigen_value",
+            transpose_a=True,
+        )
+        builder.add_squeeze("squeeze", "x", "eigen_vector", squeeze_all=True)
 
         # make input sizes flexible
         spec = builder.spec
 
-        flexible_shape_utils.add_multiarray_ndshape_enumeration(spec, feature_name='matrix',
-                                                                enumerated_shapes=[(3,3), (4,4)])
+        flexible_shape_utils.add_multiarray_ndshape_enumeration(
+            spec, feature_name="matrix", enumerated_shapes=[(3, 3), (4, 4)]
+        )
 
-        flexible_shape_utils.add_multiarray_ndshape_enumeration(spec, feature_name='starting_vector',
-                                                                enumerated_shapes=[(3,), (4,)])
+        flexible_shape_utils.add_multiarray_ndshape_enumeration(
+            spec, feature_name="starting_vector", enumerated_shapes=[(3,), (4,)]
+        )
 
         from numpy import linalg as LA
 
@@ -4576,8 +6005,8 @@ def test_power_iteration_cpu(self):
 
         e, v = LA.eig(A)
         idx = np.argmax(abs(e))
-        input = {'starting_vector': starting_vector, 'matrix': A.astype(np.float)}
-        expected = {'maximum_eigen_value': e[idx]}
+        input = {"starting_vector": starting_vector, "matrix": A.astype(np.float)}
+        expected = {"maximum_eigen_value": np.array([[e[idx]]])}
         self._test_model(spec, input, expected, useCPUOnly=True)
 
         # try on 2x2 matrix
@@ -4588,14 +6017,1147 @@ def test_power_iteration_cpu(self):
         e, v = LA.eig(A)
         idx = np.argmax(abs(e))
 
-        input = {'starting_vector': starting_vector, 'matrix': A.astype(np.float)}
-        expected = {'maximum_eigen_value': e[idx]}
+        input = {"starting_vector": starting_vector, "matrix": A.astype(np.float)}
+        expected = {"maximum_eigen_value": np.array([[e[idx]]])}
         self._test_model(spec, input, expected, useCPUOnly=True)
 
 
-if __name__ == '__main__':
-    unittest.main()
-    # suite = unittest.TestSuite()
-    # suite.addTest(NewLayersSimpleTest("test_softmax_nan_bug_cpu"))
-    # #suite.addTest(SimpleNetworkTest("test_power_iteration_cpu"))
-    # unittest.TextTestRunner().run(suite)
+@unittest.skipIf(
+    _macos_version() < LAYERS_10_16_MACOS_VERSION,
+    "macOS 10.16+ required. Skipping tests.",
+)
+class IOS14SingleLayerTests(CorrectnessTest):
+    @unittest.skipIf(not _HAS_TF, MSG_TF1_NOT_FOUND)
+    def test_onehot_layer_cpu(self, cpu_only=True):
+        ctr = 0
+        params_dict = dict(
+            input_rank=[1, 2, 3, 4],
+            negative_axis=[True, False],
+            depth=[30],
+            on_value=[30.0],
+            off_value=[-4.0],
+        )
+        params = list(itertools.product(*params_dict.values()))
+        for param in params:
+            param = dict(zip(params_dict.keys(), param))
+            input_rank = param["input_rank"]
+            vectorSize = param["depth"]
+            on_value = param["on_value"]
+            off_value = param["off_value"]
+
+            for axis in range(input_rank + 1):
+                ctr += 1
+                if param["negative_axis"]:
+                    axis_param = axis - (input_rank + 1)
+                else:
+                    axis_param = axis
+
+                input_shape = np.random.randint(1, 10, size=(input_rank,))
+
+                input_features = [("data", datatypes.Array(*input_shape))]
+                output_features = [("output", None)]
+                builder = neural_network.NeuralNetworkBuilder(
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
+
+                builder.add_one_hot(
+                    "one_hot",
+                    ["data"],
+                    "output",
+                    one_hot_vector_size=vectorSize,
+                    axis=axis_param,
+                    on_value=on_value,
+                    off_value=off_value,
+                )
+
+                x = np.random.randint(0, vectorSize, size=input_shape)
+                # x[::4] -= vectorSize  # [To do] Need to Handle this case.
+
+                with tf.Graph().as_default(), tf.Session() as sess:
+                    # TF seems to have a bug with axis < -1
+                    if axis_param < -1:
+                        axis_param += input_rank + 1
+                    tf_op = tf.one_hot(
+                        x,
+                        axis=axis_param,
+                        depth=vectorSize,
+                        on_value=on_value,
+                        off_value=off_value,
+                    )
+                    expected = {"output": sess.run(tf_op)}
+
+                input = {"data": x.astype(np.float)}
+                self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
+
+    def test_batched_mat_mul_dynamic_quantization_cpu(self, cpu_only=True):
+        X1 = 11
+        X2 = 23
+        W = np.random.rand(X1, X2) * 20 - 10  # uniform between [-10, 10]
+        b = np.random.rand(X2) * 20 - 10
+        input_shapes = [
+            (X1,),
+            (5, X1),
+            (2, 3, X1),
+            (4, 1, X1),
+        ]  # , (12, 5, 8, X1), (2, 3, 1, 5, X1)]
+
+        W_max = max(np.abs(np.min(W)), np.abs(np.max(W)))
+        W_normalized = W / W_max  # [-1,1]
+        W_quantized_int8 = 127.0 * W_normalized  # [-127, 127]
+        W_quantized_int8 = W_quantized_int8.astype(np.int8)
+        quant_scale = W_max / 127.0
+
+        for input_shape in input_shapes:
+            x = np.random.rand(*input_shape) * 10
+
+            input_features = [("data", datatypes.Array(*input_shape))]
+            output_features = [("output", None)]
+
+            for has_bias in [True, False]:
+                builder = neural_network.NeuralNetworkBuilder(
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
+
+                builder.add_batched_mat_mul(
+                    name="batched_mat_mul",
+                    input_names=["data"],
+                    output_name="output",
+                    weight_matrix_rows=X1,
+                    weight_matrix_columns=X2,
+                    int_8_dynamic_quantize=True,
+                    is_quantized_weight=True,
+                    quantization_type="linear",
+                    nbits=8,
+                    W=W_quantized_int8.tobytes(),
+                    bias=b if has_bias else None,
+                    quant_scale=np.array([quant_scale]),
+                )
+                inputs = {"data": x}
+                expected = {
+                    "output": np.matmul(
+                        x, W_quantized_int8.astype(np.float) * quant_scale
+                    )
+                    + (b if has_bias else np.zeros(X2))
+                }
+                self._test_model(
+                    builder.spec,
+                    inputs,
+                    expected,
+                    useCPUOnly=cpu_only,
+                    test_metric="SNR",
+                    SNR=40,
+                )
+
+    def test_batched_mat_mul_dynamic_quantization_gpu(self):
+        self.test_batched_mat_mul_dynamic_quantization_cpu(cpu_only=False)
+
+    def test_inner_product_dynamic_quantization_cpu(self, cpu_only=True):
+        Xin = 24
+        Xout = 23
+        W = np.random.rand(Xout, Xin)
+        b = np.random.rand(Xout)
+        # For rank 4 and 5, the product of the last 3 dimensions must equal Xin
+        input_shapes = [
+            (Xin,),
+            (5, Xin),
+            (2, 3, Xin),
+            (4, 1, Xin),
+            (5, 2, 3, 4),
+            (5, 6, 2, 3, 4),
+        ]
+
+        W_max = max(np.abs(np.min(W)), np.abs(np.max(W)))
+        W_normalized = W / W_max  # [-1,1]
+        W_quantized_int8 = 127.0 * W_normalized  # [-127, 127]
+        W_quantized_int8 = W_quantized_int8.astype(np.int8)
+        quant_scale = W_max / 127.0
+
+        for input_shape in input_shapes:
+            rank = len(input_shape)
+            x = np.random.rand(*input_shape) * 5
+
+            W_for_numpy = W_quantized_int8.astype(np.float) * quant_scale
+            for has_bias in [True, False]:
+                b = b if has_bias else np.zeros(Xout)
+                if rank == 1 or rank == 2 or rank == 3:
+                    np_out = np.matmul(x, np.transpose(W_for_numpy)) + b
+                    expected = {"output": np_out}
+                elif rank == 4:
+                    x_shaped = np.reshape(x, (x.shape[0], np.product(x.shape[1:])))
+                    np_out = np.matmul(x_shaped, np.transpose(W_for_numpy)) + b
+                    expected = {"output": np.reshape(np_out, np_out.shape + (1, 1))}
+                elif rank == 5:
+                    x_shaped = np.reshape(x, x.shape[0:2] + (np.product(x.shape[2:]),))
+                    np_out = np.matmul(x_shaped, np.transpose(W_for_numpy)) + b
+                    expected = {
+                        "output": np.reshape(
+                            np_out, x.shape[0:2] + (np_out.shape[-1],) + (1, 1)
+                        )
+                    }
+
+                input_features = [("data", datatypes.Array(*input_shape))]
+                output_features = [("output", None)]
+                builder = neural_network.NeuralNetworkBuilder(
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
+
+                builder.add_inner_product(
+                    name="ip",
+                    W=W_quantized_int8.tobytes(),
+                    b=b if has_bias else None,
+                    input_channels=Xin,
+                    output_channels=Xout,
+                    has_bias=has_bias,
+                    input_name="data",
+                    output_name="output",
+                    int_8_dynamic_quantize=True,
+                    is_quantized_weight=True,
+                    quantization_type="linear",
+                    nbits=8,
+                    quant_scale=np.array([quant_scale]),
+                )
+                inputs = {"data": x}
+                self._test_model(
+                    builder.spec,
+                    inputs,
+                    expected,
+                    useCPUOnly=cpu_only,
+                    test_metric="SNR",
+                    SNR=40,
+                )
+
+    def test_inner_product_dynamic_quantization_gpu(self):
+        self.test_inner_product_dynamic_quantization_cpu(cpu_only=False)
+
+    def test_onehot_layer_gpu(self):
+        self.test_onehot_layer_cpu(cpu_only=False)
+
+    @unittest.skipIf(not _HAS_TF, MSG_TF1_NOT_FOUND)
+    def test_cumsum_layer_cpu(self, cpu_only=True):
+        ctr = 0
+        params_dict = dict(
+            rank=[1, 2, 3, 4, 5],
+            exclusive=[False, True],
+            reverse=[False, True],
+            n_inputs=[1, 2],
+        )
+        params = list(itertools.product(*params_dict.values()))
+        for param in params:
+            param = dict(zip(params_dict.keys(), param))
+            rank = param["rank"]
+            exclusive = param["exclusive"]
+            reverse = param["reverse"]
+            n_inputs = param["n_inputs"]
+
+            for axis in range(rank):
+                ctr += 1
+                if np.random.rand(1) > 0.5:
+                    axis_param = axis
+                else:
+                    axis_param = axis - rank
+
+                input_shape = np.random.randint(1, 10, size=(rank,))
+
+                input_features = [("data", datatypes.Array(*input_shape))]
+                if n_inputs == 2:
+                    input_features.append(("axis", datatypes.Array(1,)))
+
+                output_features = [("output", None)]
+                builder = neural_network.NeuralNetworkBuilder(
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
+
+                if n_inputs == 1:
+                    builder.add_cumsum(
+                        "cumsum",
+                        ["data"],
+                        "output",
+                        axis=axis_param,
+                        reverse=reverse,
+                        exclusive=exclusive,
+                    )
+                else:
+                    builder.add_cumsum(
+                        "cumsum",
+                        ["data", "axis"],
+                        "output",
+                        reverse=reverse,
+                        exclusive=exclusive,
+                    )
+
+                x = np.random.rand(*input_shape)
+
+                with tf.Graph().as_default(), tf.Session() as sess:
+                    tf_op = tf.cumsum(
+                        x, axis=axis_param, exclusive=exclusive, reverse=reverse
+                    )
+                    expected = {"output": sess.run(tf_op)}
+
+                input = {"data": x}
+                if n_inputs == 2:
+                    input["axis"] = axis_param * np.ones((1,), dtype=np.float)
+
+                self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
+
+    def test_cumsum_layer_gpu(self):
+        self.test_cumsum_layer_cpu(cpu_only=False)
+
+    def test_clamped_relu_cpu(self, cpu_only=True):
+
+        params_dict = dict(alpha=[0.0, 2.0, -3.0], beta=[7.0, -8.0])
+        params = list(itertools.product(*params_dict.values()))
+        for param in params:
+            param = dict(zip(params_dict.keys(), param))
+            alpha = param["alpha"]
+            beta = param["beta"]
+            input_shape = [40]
+            input_features = [("data", datatypes.Array(*input_shape))]
+            output_features = [("output", None)]
+            builder = neural_network.NeuralNetworkBuilder(
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
+            builder.add_clamped_relu(
+                "clamped_relu", "data", "output", alpha=alpha, beta=beta
+            )
+
+            x = np.arange(-20, 20, dtype=np.float)
+            input = {"data": x}
+            expected = {"output": np.minimum(beta, np.where(x >= 0, x, x * alpha))}
+            self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
+
+    def test_clamped_relu_gpu(self):
+        self.test_clamped_relu_cpu(cpu_only=False)
+
+    def _test_pool3d(self, cpu_only):
+        pool_types = ("MAX", "AVERAGE")
+        # Defining shapes as (batch, channel, depth, height, width)
+        shapes = ((1, 1, 1, 2, 2), (1, 1, 3, 3, 3), (3, 4, 10, 17, 90))
+        # Defining kernels and strides as (depth, height, width)
+        kernels = ((2, 2, 2), (1, 3, 4), (2, 3, 4), (5, 1, 6), (8, 9, 1), (7, 11, 13))
+        strides = ((1, 1, 1), (1, 2, 3), (2, 3, 2), (4, 1, 2), (3, 4, 1), (7, 11, 13))
+        # Defining paddings as (left, right, top, bottom, front, back)
+        # This is backwards from how we define shapes, kernels, and strides,
+        # but it better matches pytorch, making the creation of pytorch layers
+        # much easier.
+        paddings = (
+            ("CUSTOM", (0, 0, 0, 0, 0, 0)),
+            ("CUSTOM", (2, 2, 2, 2, 2, 2)),
+            ("CUSTOM", (5, 6, 3, 4, 2, 2)),
+            # VALID and SAME padding must have custom paddings unset or set to zero.
+            ("VALID", (0, 0, 0, 0, 0, 0)),
+            ("SAME", (0, 0, 0, 0, 0, 0)),
+        )
+
+        # Structure to collect failures so
+        # we can run all tests, even if one fails.
+        # This should be able to go away when we can parameterize
+        # our tests: <rdar://problem/59966164> Enable parameterized tests in test_numpy_nn_layers.py
+        failures = []
+        num_successes = 0
+        num_skipped = 0
+
+        for pool_type in pool_types:
+            for shape in shapes:
+                for kernel in kernels:
+                    for stride in strides:
+                        for padding in paddings:
+                            for average_pooling_count_excludes_padding in (False, True):
+                                result = self._test_pool3d_single_case(
+                                    cpu_only,
+                                    pool_type,
+                                    shape,
+                                    kernel,
+                                    stride,
+                                    padding,
+                                    average_pooling_count_excludes_padding,
+                                )
+                                if type(result) is str:
+                                    failures.append(result)
+                                elif result:
+                                    num_successes += 1
+                                else:
+                                    num_skipped += 1
+        self.assertEqual(
+            len(failures),
+            0,
+            "Got %s successes, %s skipped,  %s failures: %s"
+            % (num_successes, num_skipped, len(failures), failures),
+        )
+
+    def _test_pool3d_single_case(
+        self,
+        cpu_only,
+        pool_type,
+        shape,
+        kernel,
+        stride,
+        padding,
+        average_pooling_count_excludes_padding,
+    ):
+        """
+
+        Args:
+            cpu_only:
+            pool_type:
+            shape:
+            kernel:
+            stride:
+            padding:
+            average_pooling_count_excludes_padding:
+
+        Returns: True if success, False if skipped, Str if error
+
+        """
+        test_case = (
+            "Test case:: pool_type: %s, shape: %s, kernel: %s, stride: %s, padding: %s, average_pooling_count_excludes_padding: %s"
+            % (
+                pool_type,
+                shape,
+                kernel,
+                stride,
+                padding,
+                average_pooling_count_excludes_padding,
+            )
+        )
+        input_features = [("data", datatypes.Array(*shape))]
+        output_features = [("output", None)]
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        padding_mode = padding[0]
+        padding_values = padding[1]
+        builder.add_pooling3d(
+            name="pooling3d",
+            input_name="data",
+            output_name="output",
+            pooling_type=pool_type,
+            kernel_depth=kernel[0],
+            kernel_height=kernel[1],
+            kernel_width=kernel[2],
+            stride_depth=stride[0],
+            stride_height=stride[1],
+            stride_width=stride[2],
+            padding_mode=padding_mode,
+            custom_padding_front=padding_values[4],
+            custom_padding_back=padding_values[5],
+            custom_padding_top=padding_values[2],
+            custom_padding_bottom=padding_values[3],
+            custom_padding_left=padding_values[0],
+            custom_padding_right=padding_values[1],
+            average_pooling_count_excludes_padding=average_pooling_count_excludes_padding,
+        )
+
+        # Expected output
+        input = np.random.rand(*shape)
+        torch_input = torch.from_numpy(np.reshape(input, shape))
+
+        # Padding
+        if padding_mode == "CUSTOM":
+            torch_padding = torch.nn.ConstantPad3d(padding_values, 0)
+        elif padding_mode == "VALID":
+            torch_padding = torch.nn.ConstantPad3d(0, 0)
+        elif padding_mode == "SAME":
+            padding_list = []
+            # torch.nn.ConstantPad3d wants (left, right, top, bottom, front, back)
+            # but our shape, kernel, and stride are (depth, height, width).
+            total_paddings = aggregated_pad(
+                pad_type=padding_mode.lower(),
+                kernel_shape=kernel,
+                input_shape=shape[2:],
+                strides=stride,
+            )
+            total_paddings.reverse()
+            for p in total_paddings:
+                before = int(math.floor(float(p) / 2.0))
+                after = int(math.ceil(float(p) / 2.0))
+                padding_list.append(before)
+                padding_list.append(after)
+
+            torch_padding = torch.nn.ConstantPad3d(tuple(padding_list), 0)
+            padding_values = padding_list[:]
+        else:
+            assert False
+
+        # Validate output shape
+        for i in range(3):
+            try:
+                IOS14SingleLayerTests._validate_pooling_dimension(
+                    shape[i + 2],
+                    kernel[i],
+                    stride[i],
+                    padding_values[6 - i - 2],
+                    padding_values[6 - i - 1],
+                )
+            except ValueError:
+                return False
+
+        # Pooling type
+        # Average pooling
+        if pool_type == "AVERAGE":
+            # torch.nn.AvgPool3d only accepts a single integer for padding, so we normally
+            # create a pooling layer first which allows us to fully specify the
+            # before and after padding in all three dimensions.
+            #
+            # However, when we use a padding layer, torch.nn.AvgPool3d doesn't
+            # know what is padding and what isn't, which means that its
+            # `count_include_pad` parameter has no effect.
+            #
+            # Therefore, we can only test average_pooling_count_excludes_padding=True
+            # when padding is homogeneous.
+            is_padding_homogeneous = all(p == padding_values[0] for p in padding_values)
+            if average_pooling_count_excludes_padding:
+                if not is_padding_homogeneous:
+                    return False
+                else:
+                    # padding is homogeneous
+                    torch_model = torch.nn.AvgPool3d(
+                        kernel,
+                        stride=stride,
+                        padding=padding_values[0],
+                        count_include_pad=not average_pooling_count_excludes_padding,
+                    )
+            else:
+                # average_pooling_count_excludes_padding == False
+                torch_pool = torch.nn.AvgPool3d(
+                    kernel,
+                    stride=stride,
+                    count_include_pad=not average_pooling_count_excludes_padding,
+                )
+                torch_model = torch.nn.Sequential(torch_padding, torch_pool)
+        # Max pooling
+        else:
+            torch_pool = torch.nn.MaxPool3d(kernel, stride=stride)
+            torch_model = torch.nn.Sequential(torch_padding, torch_pool)
+
+        try:
+            expected = torch_model(torch_input).numpy()
+            self._test_model(
+                builder.spec, {"data": input}, {"output": expected}, useCPUOnly=cpu_only
+            )
+            return True
+        except AssertionError as e:
+            print(e)
+            return "test_case: %s, error: %s" % (test_case, e)
+
+    @staticmethod
+    def _validate_pooling_dimension(
+        input_size, kernel_size, stride, start_padding, end_padding
+    ):
+        # https://adeshpande3.github.io/A-Beginner%27s-Guide-To-Understanding-Convolutional-Neural-Networks-Part-2/
+        output_size = (
+            input_size + start_padding + end_padding - kernel_size
+        ) / stride + 1
+        if output_size < 1:
+            raise ValueError(
+                "Dimension with input_size: %s, kernel_size: %s, stride: %s, start_padding: %s, end_padding: %s "
+                "has output size of %s, but must be >= 1"
+                % (
+                    input_size,
+                    kernel_size,
+                    stride,
+                    start_padding,
+                    end_padding,
+                    output_size,
+                )
+            )
+        if input_size < kernel_size:
+            raise ValueError(
+                "Dimension has input_size (%s) less than kernel_size (%s)"
+                % (input_size, kernel_size)
+            )
+        if (start_padding + end_padding) / 2 >= kernel_size / 2:
+            raise ValueError(
+                "The average of the start (%s) and end (%s) padding must be less than half the kernel size (%s / 2 = %s)"
+                % (start_padding, end_padding, kernel_size, kernel_size / 2)
+            )
+
+    def test_pool3d_cpu(self):
+        self._test_pool3d(cpu_only=True)
+
+    def test_pool3d_gpu(self):
+        self._test_pool3d(cpu_only=False)
+
+    def _test_global_pool3d(self, cpu_only):
+        shapes = ((1, 1, 1, 2, 2), (1, 1, 3, 3, 3), (3, 4, 10, 17, 90))
+        pool_types = ("MAX", "AVERAGE")
+
+        for shape in shapes:
+            for pool_type in pool_types:
+                test_case = "test_case:: shape: %s, pool_type: %s" % (shape, pool_type)
+                print(test_case)
+                input_features = [("data", datatypes.Array(*shape))]
+                output_features = [("output", None)]
+                builder = neural_network.NeuralNetworkBuilder(
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
+                builder.add_global_pooling3d(
+                    name="pooling3d",
+                    input_name="data",
+                    output_name="output",
+                    pooling_type=pool_type,
+                )
+                input = np.random.rand(*shape)
+
+                # Expected output from Torch
+                torch_input = torch.from_numpy(np.reshape(input, shape))
+                if pool_type == "AVERAGE":
+                    torch_pool = torch.nn.AvgPool3d(shape[-3:])
+                else:
+                    torch_pool = torch.nn.MaxPool3d(shape[-3:])
+                exptected = torch_pool(torch_input).numpy()
+
+                self._test_model(
+                    builder.spec,
+                    {"data": input},
+                    {"output": exptected},
+                    useCPUOnly=cpu_only,
+                )
+
+    def test_global_pool3d_cpu(self):
+        self._test_global_pool3d(cpu_only=True)
+
+    def test_global_pool3d_gpu(self):
+        self._test_global_pool3d(cpu_only=False)
+
+    def test_argsort_cpu(self, cpu_only=True):
+
+        shapes = [(4,), (3, 4), (2, 5, 6), (3, 5, 2, 4), (4, 5, 3, 6, 7)]
+
+        for shape in shapes:
+            for descending in [False, True]:
+                for axis in range(len(shape)):
+
+                    input_features = [("data", datatypes.Array(*shape))]
+                    output_features = [("output", None)]
+                    builder = neural_network.NeuralNetworkBuilder(
+                        input_features,
+                        output_features,
+                        disable_rank5_shape_mapping=True,
+                    )
+                    builder.add_argsort(
+                        "argsort", "data", "output", axis=axis, descending=descending
+                    )
+
+                    x = np.random.rand(*shape)
+                    if descending:
+                        expected = {"output": np.argsort(-x, axis)}
+                    else:
+                        expected = {"output": np.argsort(x, axis)}
+
+                    input = {"data": x}
+                    self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
+
+    def test_argsort_gpu(self):
+        self.test_argsort_cpu(cpu_only=False)
+
+    def test_upsample_pytorch_cpu(self):
+        self.upsample_pytorch_test_iter(np.arange(1, 4), True)
+        self.upsample_pytorch_test_iter(np.arange(1.0, 3.0, 0.66), True)
+
+    def test_upsample_pytorch_gpu(self):
+        self.upsample_pytorch_test_iter(np.arange(1, 4), False)
+        self.upsample_pytorch_test_iter(np.arange(1.0, 3.0, 0.66), False)
+
+    def upsample_pytorch_test_iter(self, scale_range, cpu_only):
+        for align_corners in [False, True]:
+            for scale_h in scale_range:
+                for scale_w in scale_range:
+                    for input_h in range(2, 6):
+                        for input_w in range(2, 6):
+                            self.upsample_pytorch_test(
+                                input_h,
+                                input_w,
+                                scale_h,
+                                scale_w,
+                                align_corners,
+                                cpu_only,
+                            )
+
+    def upsample_pytorch_test(self, h, w, scale_h, scale_w, align_corners, cpu_only):
+        input_dim = (1, 1, h, w)
+        if align_corners:
+            linear_upsample_mode = "ALIGN_CORNERS_TRUE"
+        else:
+            linear_upsample_mode = "ALIGN_CORNERS_FALSE"
+
+        input_features = [("data", datatypes.Array(*input_dim))]
+        output_features = [("output", None)]
+
+        builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        builder.add_upsample(
+            name="upsample",
+            scaling_factor_h=scale_h,
+            scaling_factor_w=scale_w,
+            linear_upsample_mode=linear_upsample_mode,
+            input_name="data",
+            output_name="output",
+            mode="BILINEAR",
+        )
+
+        input_tensor = np.reshape(np.arange(1.0, 1.0 + (h * w), 1.0), input_dim)
+        input = {"data": input_tensor}
+
+        # Get result from PyTorch
+        x = torch.from_numpy(np.reshape(input_tensor, (1, 1, h, w)))
+        m = torch.nn.Upsample(
+            scale_factor=(scale_h, scale_w),
+            mode="bilinear",
+            align_corners=align_corners,
+        )
+        pytorch_output = m(x)
+
+        # Expect PyTorch output matches CoreML output
+        expected = {"output": pytorch_output.numpy()}
+
+        self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
+        self.assertEquals(len(input_dim), builder._get_rank("output"))
+
+    def test_slice_by_size_cpu(self, cpu_only=True):
+
+        shapes = [(4,), (3, 4), (2, 5, 6), (3, 5, 2, 4), (4, 5, 3, 6, 7)]
+
+        for shape in shapes:
+            for axis in range(len(shape)):
+                begin = np.random.randint(shape[axis])
+                begin_input = np.array([begin]).astype(np.float32)
+                size = np.random.randint(shape[axis] - begin) + 1
+
+                x = np.random.rand(*shape)
+                slices = []
+                for i in range(len(shape)):
+                    if i != axis:
+                        slices.append(slice(None, None, None))
+                    else:
+                        slices.append(slice(begin, begin + size, 1))
+                expected = {"output": x[slices]}
+
+                input_features = [
+                    ("data", datatypes.Array(*shape)),
+                    ("begin", datatypes.Array(1)),
+                ]
+                output_features = [("output", datatypes.Array(*x[slices].shape))]
+                builder = neural_network.NeuralNetworkBuilder(
+                    input_features, output_features, disable_rank5_shape_mapping=True
+                )
+                builder.add_slice_by_size(
+                    "slice_by_size", ["data", "begin"], "output", axis=axis, size=size
+                )
+
+                input = {"data": x, "begin": begin_input}
+                self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
+
+    def _test_conv3d(self, cpu_only, full_test):
+        # Input shape defined by us and PyTorch as [batch, channels, depth, height, width]
+        input_shapes = [
+            [1, 3, 3, 8, 8],
+            [1, 1, 3, 8, 8],
+            [1, 7, 8, 15, 63],
+            [4, 32, 8, 16, 16],
+        ]
+        # Large enough kernels and/or input causes int overflow and seg fault: see rdar://60309763
+        kernels = [[3, 3, 3], [2, 2, 2]]
+        strides = [[1, 1, 1], [2, 2, 2]]
+        dilations = [[1, 1, 1], [2, 2, 2]]
+        has_biases = [True, False]
+        # Note: PyTorch's `torch.nn.Conv3d` doesn't support these padding modes, just a single
+        # padding value (for all dimensions) or 3 values (for each dimension)
+        padding_modes = ["custom", "valid", "same"]
+        # Padding shape is front, back, top, bottom, left, right
+        paddings = [[0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1]]
+
+        # Add some additional test cases if `full_test` is True
+        if full_test:
+            input_shapes.extend([[1, 4, 3, 128, 128]])
+            kernels.extend([[1, 2, 3], [5, 5, 5]])
+            strides.extend([[1, 2, 3]])
+            dilations.extend([[1, 2, 3]])
+            paddings.extend([[2, 0, 2, 0, 2, 0], [0, 1, 2, 3, 4, 5]])
+
+        test_case_format_str = (
+            "Conv3d test case | Input shape: {}, Output channels: {}, Groups: {}, Kernel shape: {},"
+            " Stride: {}, Padding: {}, Padding mode: {}, Dilation: {}, Has bias: {}"
+        )
+
+        for in_shape in input_shapes:
+            # Test "normal" and depthwise convolution with corresponding groups and output channels
+            groups_outchannels = [(1, 2), (in_shape[1], 2 * in_shape[1])]
+            for kernel in kernels:
+                for has_bias in has_biases:
+                    for stride in strides:
+                        for dilation in dilations:
+                            for padding_mode in padding_modes:
+                                # For all modes besides 'custom', the padding values are ignored
+                                if padding_mode == "custom":
+                                    loop_paddings = paddings
+                                else:
+                                    loop_paddings = [[0, 0, 0, 0, 0, 0]]
+                                for padding in loop_paddings:
+                                    for groups, output_channels in groups_outchannels:
+                                        # Dilated kernel shape = (K - 1) * D + 1
+                                        dilated_kernel = list(
+                                            map(
+                                                lambda k, d: (k - 1) * d + 1,
+                                                kernel,
+                                                dilation,
+                                            )
+                                        )
+
+                                        # Use paddings if padding_mode is "custom", else compute
+                                        # them according to
+                                        # https://stanford.edu/~shervine/teaching/cs-230/cheatsheet-convolutional-neural-networks#filter
+                                        if padding_mode == "same":
+                                            pad_d = max(
+                                                0,
+                                                (
+                                                    stride[0]
+                                                    * math.ceil(
+                                                        in_shape[2] / float(stride[0])
+                                                    )
+                                                    - in_shape[2]
+                                                    + dilated_kernel[0]
+                                                    - stride[0]
+                                                )
+                                                / 2.0,
+                                            )
+                                            pad_h = max(
+                                                0,
+                                                (
+                                                    stride[1]
+                                                    * math.ceil(
+                                                        in_shape[3] / float(stride[1])
+                                                    )
+                                                    - in_shape[3]
+                                                    + dilated_kernel[1]
+                                                    - stride[1]
+                                                )
+                                                / 2.0,
+                                            )
+                                            pad_w = max(
+                                                0,
+                                                (
+                                                    stride[2]
+                                                    * math.ceil(
+                                                        in_shape[4] / float(stride[2])
+                                                    )
+                                                    - in_shape[4]
+                                                    + dilated_kernel[2]
+                                                    - stride[2]
+                                                )
+                                                / 2.0,
+                                            )
+
+                                            # Depth
+                                            padding[0] = int(math.floor(pad_d))
+                                            padding[1] = int(math.ceil(pad_d))
+                                            # Height
+                                            padding[2] = int(math.floor(pad_h))
+                                            padding[3] = int(math.ceil(pad_h))
+                                            # Width
+                                            padding[4] = int(math.floor(pad_w))
+                                            padding[5] = int(math.ceil(pad_w))
+                                        elif padding_mode == "valid":
+                                            # Set to zero for PyTorch padding
+                                            padding = [0] * 6
+                                        elif padding_mode == "custom":
+                                            # No-op: valid ignores padding and custom uses the
+                                            # specified padding
+                                            pass
+
+                                        input_features = [
+                                            ("data", datatypes.Array(*in_shape))
+                                        ]
+                                        output_features = [("output", None)]
+                                        input_channels = in_shape[1]
+                                        # [output_channels, kernel_channels, depth, height, width]
+                                        weights_shape = [
+                                            output_channels,
+                                            int(input_channels / groups),
+                                            kernel[0],
+                                            kernel[1],
+                                            kernel[2],
+                                        ]
+
+                                        # Init random input
+                                        input_tensor = np.random.normal(size=in_shape)
+                                        input_torch = torch.tensor(input_tensor)
+                                        # Init random weights
+                                        weights_tensor = np.random.normal(
+                                            size=weights_shape
+                                        )
+                                        weights_torch = torch.DoubleTensor(
+                                            weights_tensor
+                                        )
+                                        # Init random bias if applicable
+                                        if has_bias:
+                                            bias_tensor = np.random.normal(
+                                                size=output_channels
+                                            )
+                                            bias_torch = torch.DoubleTensor(bias_tensor)
+                                        else:
+                                            bias_tensor = None
+                                            bias_torch = None
+
+                                        builder = neural_network.NeuralNetworkBuilder(
+                                            input_features,
+                                            output_features,
+                                            disable_rank5_shape_mapping=True,
+                                        )
+                                        builder.add_convolution3d(
+                                            name="conv3d",
+                                            input_channels=input_channels,
+                                            output_channels=output_channels,
+                                            depth=kernel[0],
+                                            height=kernel[1],
+                                            width=kernel[2],
+                                            W=weights_tensor,
+                                            b=bias_tensor,
+                                            has_bias=has_bias,
+                                            groups=groups,
+                                            stride_depth=stride[0],
+                                            stride_height=stride[1],
+                                            stride_width=stride[2],
+                                            dilation_depth=dilation[0],
+                                            dilation_height=dilation[1],
+                                            dilation_width=dilation[2],
+                                            padding_mode=padding_mode,
+                                            padding_front=padding[0],
+                                            padding_back=padding[1],
+                                            padding_top=padding[2],
+                                            padding_bottom=padding[3],
+                                            padding_left=padding[4],
+                                            padding_right=padding[5],
+                                            input_name="data",
+                                            output_name="output",
+                                        )
+
+                                        # Get PyTorch output to compare ours to
+                                        # First pad, since PyTorch Conv3d only supports custom and
+                                        # same symmetric padding. Padding shape is
+                                        # (left, right, top, bottom, front, back)
+                                        padded_input = input_torch
+                                        if any(p > 0 for p in padding):
+                                            torch_padding = (
+                                                padding[4],
+                                                padding[5],
+                                                padding[2],
+                                                padding[3],
+                                                padding[0],
+                                                padding[1],
+                                            )
+                                            pad_layer = torch.nn.ConstantPad3d(
+                                                torch_padding, 0
+                                            )
+                                            padded_input = pad_layer(input_torch)
+                                        # Check if dilated kernel size exceeds padded input size in
+                                        # any dimension. If it does, it's not a valid convolution
+                                        if (
+                                            dilated_kernel[0] > padded_input.shape[2]
+                                            or dilated_kernel[1] > padded_input.shape[3]
+                                            or dilated_kernel[2] > padded_input.shape[4]
+                                        ):
+                                            print(
+                                                "SKIPPING: Dilated kernel exceeds padded input."
+                                            )
+                                            continue
+                                        # Using Sequential with a padding layer first produces
+                                        # incorrect convolution output
+                                        model = torch.nn.Sequential(
+                                            torch.nn.Conv3d(
+                                                input_channels,
+                                                output_channels,
+                                                kernel,
+                                                stride=stride,
+                                                padding=0,
+                                                dilation=dilation,
+                                                groups=groups,
+                                                bias=False,
+                                            )
+                                        )
+                                        with torch.no_grad():
+                                            model[0].weight = torch.nn.Parameter(
+                                                weights_torch
+                                            )
+                                            if has_bias:
+                                                model[0].bias = torch.nn.Parameter(
+                                                    bias_torch
+                                                )
+                                        torch_expected = model(padded_input)
+
+                                        test_case = test_case_format_str.format(
+                                            in_shape,
+                                            output_channels,
+                                            groups,
+                                            weights_shape,
+                                            stride,
+                                            padding,
+                                            padding_mode,
+                                            dilation,
+                                            has_bias,
+                                        )
+                                        try:
+                                            self._test_model(
+                                                builder.spec,
+                                                {"data": input_tensor},
+                                                {
+                                                    "output": torch_expected.detach().numpy()
+                                                },
+                                                useCPUOnly=cpu_only,
+                                                test_metric="SNR",
+                                                SNR=40,
+                                                validate_shapes_only=False,
+                                            )
+                                        except AssertionError as e:
+                                            print(test_case)
+                                            raise
+
+    def test_conv3d_cpu_basic(self):
+        self._test_conv3d(cpu_only=True, full_test=False)
+
+    @pytest.mark.slow
+    def test_conv3d_cpu_slow(self):
+        self._test_conv3d(cpu_only=True, full_test=True)
+
+    def test_conv3d_gpu_basic(self):
+        self._test_conv3d(cpu_only=False, full_test=False)
+
+    @pytest.mark.slow
+    def test_conv3d_gpu_slow(self):
+        self._test_conv3d(cpu_only=False, full_test=True)
+
+
+@unittest.skipUnless(
+    _is_macos() and _macos_version() >= LAYERS_10_16_MACOS_VERSION,
+    "Only supported on macOS 10.16+",
+)
+class ReorganizeDataTests(CorrectnessTest):
+    def _to_rank_4(self, x):
+        from_rank = len(x.shape)
+        if from_rank == 3:
+            return np.reshape(x, [1] + list(x.shape))
+        elif from_rank == 4:
+            return x
+        elif from_rank == 5:
+            return np.squeeze(x, axis=0)
+
+    def _from_rank_4(self, x, to_rank):
+        if to_rank == 3:
+            return np.squeeze(x, axis=0)
+        elif to_rank == 4:
+            return x
+        elif to_rank == 5:
+            return np.reshape(x, [1] + list(x.shape))
+
+    @unittest.skipIf(not _HAS_TF, MSG_TF1_NOT_FOUND)
+    def test_depth_to_space_cpu(self, cpu_only=True):
+
+        params_dict = {
+            "block_size": [2, 3, 4],
+            "channels_div_bsq": [1, 2, 3, 7],
+            "spatial": [[2, 3], [4, 4], [1, 1]],
+            "batch_size": [None, 1, 2],
+            "seq_length": [None, 1],
+        }
+        params_product = list(itertools.product(*params_dict.values()))
+        for param in params_product:
+            param = dict(zip(params_dict.keys(), param))
+            # Create input based on params
+            block_size = param["block_size"]
+            bsq = block_size * block_size
+            input_shape = [bsq * param["channels_div_bsq"]] + param["spatial"]
+            if param["batch_size"] is not None:
+                input_shape = [param["batch_size"]] + input_shape
+            if param["seq_length"] is not None:
+                input_shape = [param["seq_length"]] + input_shape
+            rank = len(input_shape)
+            x = np.random.random(input_shape)
+            input = {"data": x}
+
+            # Set up network
+            input_features = [("data", datatypes.Array(*input_shape))]
+            output_features = [("output", None)]
+            builder = neural_network.NeuralNetworkBuilder(
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
+            builder.add_reorganize_data(
+                "reorganize_data",
+                "data",
+                "output",
+                mode="DEPTH_TO_SPACE",
+                block_size=block_size,
+            )
+
+            # Run tensorflow to calculate expected values
+            with tf.Session() as sess:
+                # TensorFlow requires rank 4, NHWC order on CPU
+                x_tf = self._to_rank_4(x).transpose(0, 2, 3, 1)
+                out_tf = sess.run(
+                    tf.nn.depth_to_space(x_tf, block_size, data_format="NHWC")
+                )
+                out = self._from_rank_4(out_tf.transpose(0, 3, 1, 2), to_rank=rank)
+                expected = {"output": out}
+
+            # Run model to calculate CoreML values and compare with expected
+            self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
+
+    def test_depth_to_space_gpu(self):
+        self.test_depth_to_space_cpu(cpu_only=False)
+
+    @unittest.skipIf(
+        _macos_version() < LAYERS_10_16_MACOS_VERSION,
+        "macOS 10.16+ required. Skipping tests.",
+    )
+    def test_pixel_shuffle_cpu(self, cpu_only=True):
+
+        params_dict = {
+            "block_size": [2, 3, 4],
+            "channels_div_bsq": [1, 2, 3, 7],
+            "spatial": [[2, 3], [4, 4], [1, 1]],
+            "batch_size": [None, 1, 2],
+            "seq_length": [None, 1],
+        }
+        params_product = list(itertools.product(*params_dict.values()))
+        for param in params_product:
+            param = dict(zip(params_dict.keys(), param))
+            # Create input based on params
+            block_size = param["block_size"]
+            bsq = block_size * block_size
+            input_shape = [bsq * param["channels_div_bsq"]] + param["spatial"]
+            if param["batch_size"] is not None:
+                input_shape = [param["batch_size"]] + input_shape
+            if param["seq_length"] is not None:
+                input_shape = [param["seq_length"]] + input_shape
+            rank = len(input_shape)
+            x = np.random.random(input_shape)
+            input = {"data": x}
+
+            # Set up network
+            input_features = [("data", datatypes.Array(*input_shape))]
+            output_features = [("output", None)]
+            builder = neural_network.NeuralNetworkBuilder(
+                input_features, output_features, disable_rank5_shape_mapping=True
+            )
+            builder.add_reorganize_data(
+                "reorganize_data",
+                "data",
+                "output",
+                mode="PIXEL_SHUFFLE",
+                block_size=block_size,
+            )
+
+            # Run pytorch to calculate expected values
+            x_torch = torch.from_numpy(self._to_rank_4(x))
+            out_torch = torch.pixel_shuffle(x_torch, upscale_factor=block_size)
+            out = self._from_rank_4(out_torch.numpy(), to_rank=rank)
+            expected = {"output": out}
+
+            # Run model to calculate CoreML values and compare with expected
+            self._test_model(builder.spec, input, expected, useCPUOnly=cpu_only)
+
+    @unittest.skipIf(
+        _macos_version() < LAYERS_10_16_MACOS_VERSION,
+        "macOS 10.16+ required. Skipping tests.",
+    )
+    def test_pixel_shuffle_gpu(self):
+        self.test_pixel_shuffle_cpu(cpu_only=False)
diff --git a/coremltools/test/neural_network/test_quantization.py b/coremltools/test/neural_network/test_quantization.py
index aaa713b24..3931e5223 100644
--- a/coremltools/test/neural_network/test_quantization.py
+++ b/coremltools/test/neural_network/test_quantization.py
@@ -7,27 +7,31 @@
 import pytest
 
 import coremltools
-import \
-    coremltools.models.neural_network.quantization_utils as quantization_utils
-from coremltools._deps import HAS_KERAS2_TF
+import coremltools.models.datatypes as datatypes
+from coremltools.models import neural_network
+import coremltools.models.neural_network.quantization_utils as quantization_utils
+from coremltools.models.neural_network.quantization_utils import (
+    activate_int8_int8_matrix_multiplications,
+    MatrixMultiplyLayerSelector,
+    _quantize_spec_weights,
+)
+
+from coremltools._deps import _HAS_KERAS2_TF
 from coremltools.models import (
     _MLMODEL_FULL_PRECISION,
     _QUANTIZATION_MODE_LINEAR_QUANTIZATION,
     _QUANTIZATION_MODE_LOOKUP_TABLE_KMEANS,
-    _QUANTIZATION_MODE_CUSTOM_LOOKUP_TABLE
+    _QUANTIZATION_MODE_CUSTOM_LOOKUP_TABLE,
 )
 
-from coremltools.models import neural_network as neural_network
-import coremltools.models.datatypes as datatypes
-
 
-@unittest.skipIf(not coremltools.utils.is_macos() or
-                 coremltools.utils.macos_version() < (10, 14),
-                 'Missing macOS 10.14+. Skipping tests.')
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(
+    not coremltools.utils._is_macos() or coremltools.utils._macos_version() < (10, 14),
+    "Missing macOS 10.14+. Skipping tests.",
+)
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 class QuantizationNumericalCorrectnessTests(unittest.TestCase):
-
     def runTest(self):
         pass
 
@@ -35,12 +39,12 @@ def setUp(self):
         self.qbits = 8  # n-bit quantization for tests
         self.qmode = _QUANTIZATION_MODE_LINEAR_QUANTIZATION
         self.custom_lut = None
-        from test_keras2_numeric import KerasBasicNumericCorrectnessTest
+        from .test_keras2_numeric import KerasBasicNumericCorrectnessTest
+
         self.keras_tester = KerasBasicNumericCorrectnessTest()
         self.keras_tester._test_model = self._test_model
 
-    def _run_quantized_test(
-            self, input_, full_precision_model, quantized_model, delta):
+    def _run_quantized_test(self, input_, full_precision_model, quantized_model, delta):
         # Output from both models should be the same
         full_output = full_precision_model.predict(input_)
         quantized_output = quantized_model.predict(input_)
@@ -50,43 +54,49 @@ def _run_quantized_test(
             full_output_flatten = full_output[key].flatten()
             quantized_output_flatten = quantized_output[key].flatten()
 
-            self.assertTrue(
-                len(full_output_flatten) == len(quantized_output_flatten)
-            )
+            self.assertTrue(len(full_output_flatten) == len(quantized_output_flatten))
 
-            norm_factor = np.maximum(
-                full_output_flatten, quantized_output_flatten)
+            norm_factor = np.maximum(full_output_flatten, quantized_output_flatten)
             norm_factor = np.maximum(norm_factor, 1.0)
             f_out = full_output_flatten / norm_factor
             q_out = quantized_output_flatten / norm_factor
 
             for idx, full_value in enumerate(f_out):
                 quantized_value = q_out[idx]
-                self.assertAlmostEqual(
-                    full_value, quantized_value, delta=delta)
-
-    def _test_model(self, model, num_samples=1, mode='random', delta=1e-2,
-                    model_dir=None, transpose_keras_result=True,
-                    one_dim_seq_flags=None,
-                    model_precision=_MLMODEL_FULL_PRECISION):
+                self.assertAlmostEqual(full_value, quantized_value, delta=delta)
+
+    def _test_model(
+        self,
+        model,
+        num_samples=1,
+        mode="random",
+        delta=1e-2,
+        model_dir=None,
+        transpose_keras_result=True,
+        one_dim_seq_flags=None,
+        model_precision=_MLMODEL_FULL_PRECISION,
+    ):
         # Get the model path
         use_tmp_folder = False
         if model_dir is None:
             use_tmp_folder = True
             model_dir = tempfile.mkdtemp()
-        _ = os.path.join(model_dir, 'keras.mlmodel')
+        _ = os.path.join(model_dir, "keras.mlmodel")
 
         # Get converted coreml model and sample input
         (
             input_names,
             output_names,
             _,
-            coreml_input
+            coreml_input,
         ) = self.keras_tester._get_coreml_model_params_and_test_input(
-            model, mode, one_dim_seq_flags)
-        from test_keras2_numeric import _get_coreml_model
-        coreml_model = _get_coreml_model(model, input_names, output_names,
-                                         model_precision=model_precision)
+            model, mode, one_dim_seq_flags
+        )
+        from .test_keras2_numeric import _get_coreml_model
+
+        coreml_model = _get_coreml_model(
+            model, input_names, output_names, model_precision=model_precision
+        )
 
         # Now we quantize the model and dequantize it. We then use this model
         # as our full precision model since quantizing this model again will
@@ -97,7 +107,7 @@ def _test_model(self, model, num_samples=1, mode='random', delta=1e-2,
             spec=coreml_spec,
             nbits=self.qbits,
             quantization_mode=self.qmode,
-            lut_function=self.custom_lut
+            lut_function=self.custom_lut,
         )
 
         # De-quantize model
@@ -109,14 +119,14 @@ def _test_model(self, model, num_samples=1, mode='random', delta=1e-2,
             spec=coreml_model.get_spec(),
             nbits=self.qbits,
             quantization_mode=self.qmode,
-            lut_function=self.custom_lut
+            lut_function=self.custom_lut,
         )
 
-        full_precision_model = coremltools.models.MLModel(
-            full_precision_model_spec)
+        full_precision_model = coremltools.models.MLModel(full_precision_model_spec)
         quantized_model = coremltools.models.MLModel(quantized_model_spec)
-        self._run_quantized_test(coreml_input, full_precision_model,
-                                 quantized_model, delta)
+        self._run_quantized_test(
+            coreml_input, full_precision_model, quantized_model, delta
+        )
 
         # Clean up after ourselves
         if use_tmp_folder and os.path.exists(model_dir):
@@ -244,78 +254,84 @@ def test_quantized_lstm_seq_backwards(self):
         self.keras_tester.test_lstm_seq_backwards()
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 @pytest.mark.slow
 class SevenBitQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+    QuantizationNumericalCorrectnessTests
+):
     def setUp(self):
         super(SevenBitQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 7
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 @pytest.mark.slow
 class SixBitQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+    QuantizationNumericalCorrectnessTests
+):
     def setUp(self):
         super(SixBitQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 6
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 @pytest.mark.slow
 class FiveBitQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+    QuantizationNumericalCorrectnessTests
+):
     def setUp(self):
         super(FiveBitQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 5
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 class FourBitQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+    QuantizationNumericalCorrectnessTests
+):
     def setUp(self):
         super(FourBitQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 4
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 @pytest.mark.slow
 class ThreeBitQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+    QuantizationNumericalCorrectnessTests
+):
     def setUp(self):
         super(ThreeBitQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 3
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 @pytest.mark.slow
 class TwoBitQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+    QuantizationNumericalCorrectnessTests
+):
     def setUp(self):
         super(TwoBitQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 2
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 class OneBitQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+    QuantizationNumericalCorrectnessTests
+):
     def setUp(self):
         super(OneBitQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 1
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
-class LUTQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+class LUTQuantizationNumericalCorrectnessTests(QuantizationNumericalCorrectnessTests):
     def setUp(self):
         super(LUTQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 8
@@ -325,86 +341,93 @@ def test_quantized_custom_lut(self):
         pass
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 @pytest.mark.slow
 class LUTSevenBitQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+    QuantizationNumericalCorrectnessTests
+):
     def setUp(self):
         super(LUTSevenBitQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 7
         self.qmode = _QUANTIZATION_MODE_LOOKUP_TABLE_KMEANS
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 @pytest.mark.slow
 class LUTSixBitQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+    QuantizationNumericalCorrectnessTests
+):
     def setUp(self):
         super(LUTSixBitQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 6
         self.qmode = _QUANTIZATION_MODE_LOOKUP_TABLE_KMEANS
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 @pytest.mark.slow
 class LUTFiveBitQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+    QuantizationNumericalCorrectnessTests
+):
     def setUp(self):
         super(LUTFiveBitQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 5
         self.qmode = _QUANTIZATION_MODE_LOOKUP_TABLE_KMEANS
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 class LUTFourBitQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+    QuantizationNumericalCorrectnessTests
+):
     def setUp(self):
         super(LUTFourBitQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 4
         self.qmode = _QUANTIZATION_MODE_LOOKUP_TABLE_KMEANS
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 @pytest.mark.slow
 class LUTThreeBitQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+    QuantizationNumericalCorrectnessTests
+):
     def setUp(self):
-        super(LUTThreeBitQuantizationNumericalCorrectnessTests,
-              self).setUp()
+        super(LUTThreeBitQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 3
         self.qmode = _QUANTIZATION_MODE_LOOKUP_TABLE_KMEANS
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 @pytest.mark.slow
 class LUTTwoBitQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+    QuantizationNumericalCorrectnessTests
+):
     def setUp(self):
         super(LUTTwoBitQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 2
         self.qmode = _QUANTIZATION_MODE_LOOKUP_TABLE_KMEANS
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 class LUTOneBitQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+    QuantizationNumericalCorrectnessTests
+):
     def setUp(self):
         super(LUTOneBitQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 1
         self.qmode = _QUANTIZATION_MODE_LOOKUP_TABLE_KMEANS
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 class LUTCustomQuantizationNumericalCorrectnessTests(
-        QuantizationNumericalCorrectnessTests):
+    QuantizationNumericalCorrectnessTests
+):
     def setUp(self):
         super(LUTCustomQuantizationNumericalCorrectnessTests, self).setUp()
         self.qbits = 8
@@ -414,14 +437,17 @@ def setUp(self):
 
 from coremltools.converters import keras as keras_converter
 
-@unittest.skipIf(not coremltools.utils.is_macos() or
-                 coremltools.utils.macos_version() < (10, 14),
-                 'Missing macOS 10.14+. Skipping tests.')
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras. Skipping tests.')
+
+@unittest.skipIf(
+    not coremltools.utils._is_macos() or coremltools.utils._macos_version() < (10, 14),
+    "Missing macOS 10.14+. Skipping tests.",
+)
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras. Skipping tests.")
 @pytest.mark.keras2
 class AdvancedQuantizationNumericalCorrectnessTests(unittest.TestCase):
     """ Quantization tests for advanced settings
     """
+
     def test_8bit_symmetric_and_skips(self):
         from keras.models import Sequential
         from keras.layers import Conv2D
@@ -437,36 +463,43 @@ def stable_rel_error(x, ref):
 
         # Define a model
         model = Sequential()
-        model.add(Conv2D(input_shape=(input_dim, input_dim, input_channels),
-            filters=num_kernels, kernel_size=(kernel_height, kernel_width)))
+        model.add(
+            Conv2D(
+                input_shape=(input_dim, input_dim, input_channels),
+                filters=num_kernels,
+                kernel_size=(kernel_height, kernel_width),
+            )
+        )
 
         # Set some random weights
         weight, bias = model.layers[0].get_weights()
         num_filters = weight.shape[-1]
         filter_shape = weight.shape[:-1]
 
-        new_weight = np.stack([4.0 * np.random.rand(*filter_shape) - 2 for
-            i in range(num_filters)], axis=-1)
+        new_weight = np.stack(
+            [4.0 * np.random.rand(*filter_shape) - 2 for i in range(num_filters)],
+            axis=-1,
+        )
         model.layers[0].set_weights([new_weight, bias])
 
-        mlmodel = keras_converter.convert(model, ['data'], ['output_0'])
+        mlmodel = keras_converter.convert(model, ["data"], ["output_0"])
         selector = quantization_utils.AdvancedQuantizedLayerSelector(
-                skip_layer_types=['batchnorm', 'bias', 'depthwiseConv'],
-                minimum_conv_kernel_channels=4,
-                minimum_conv_weight_count=4096)
+            skip_layer_types=["batchnorm", "bias", "depthwiseConv"],
+            minimum_conv_kernel_channels=4,
+            minimum_conv_weight_count=4096,
+        )
 
-        q_mlmodel = quantization_utils.quantize_weights(mlmodel, 8,
-                selector=selector)
+        q_mlmodel = quantization_utils.quantize_weights(mlmodel, 8, selector=selector)
 
-        input_shape = (1,1,input_channels,input_dim,input_dim)
+        input_shape = (1, 1, input_channels, input_dim, input_dim)
         input_val = 2 * np.random.rand(*input_shape) - 1
 
-        coreml_input = {'data' : input_val}
+        coreml_input = {"data": input_val}
         coreml_output = mlmodel.predict(coreml_input)
         q_coreml_output = q_mlmodel.predict(coreml_input)
 
-        val = coreml_output['output_0']
-        q_val = q_coreml_output['output_0']
+        val = coreml_output["output_0"]
+        q_val = q_coreml_output["output_0"]
         rel_err = stable_rel_error(q_val, val)
         max_rel_err, mean_rel_err = np.max(rel_err), np.mean(rel_err)
         self.assertTrue(max_rel_err < 0.25)
@@ -474,48 +507,550 @@ def stable_rel_error(x, ref):
         self.assertTrue(mean_rel_err < 0.02)
 
 
-@unittest.skipIf(not coremltools.utils.is_macos() or
-                 coremltools.utils.macos_version() < (10, 15),
-                 'Missing macOS 10.15+. Skipping tests.')
-class QuantizeWeightsAPI(unittest.TestCase):
+@unittest.skipIf(
+    not coremltools.utils._is_macos() or coremltools.utils._macos_version() < (10, 16),
+    "Missing macOS 10.16+. Skipping tests.",
+)
+class DynamicQuantizedInt8Int8MatMul(unittest.TestCase):
+    """
+    Quantization tests for dynamic Int8 - Int8 matrix multiplications
+    """
+
+    def initialize(self):
+        np.random.seed(1988)
+        self.Cout, self.Cin = 16, 32
+        self.W = np.random.rand(self.Cout, self.Cin) * 20.0 - 10.0
+        self.b = np.random.rand(self.Cout) * 20.0 - 10.0
+        self.input_shape = (5, self.Cin)
+        input_features = [("data", datatypes.Array(*self.input_shape))]
+        output_features = [("output", None)]
+        self.builder = neural_network.NeuralNetworkBuilder(
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
+        self.selector = MatrixMultiplyLayerSelector()
+
+    def _test_predictions(
+        self, np_preds, coreml_preds, SNR=30, PSNR=40,
+    ):
+
+        np_preds = np_preds.flatten()
+        coreml_preds = coreml_preds.flatten()
+
+        noise = np_preds - coreml_preds
+        noise_var = np.sum(noise ** 2) / len(noise) + 1e-7
+        signal_energy = np.sum(np_preds ** 2) / len(np_preds)
+        max_signal_energy = np.amax(np_preds ** 2)
+        snr = 10 * np.log10(signal_energy / noise_var)
+        psnr = 10 * np.log10(max_signal_energy / noise_var)
+        self.assertGreaterEqual(snr, SNR)
+        self.assertGreaterEqual(psnr, PSNR)
+
+    def compare(self, specification_modified=True):
+        x = np.random.rand(*self.input_shape)
+
+        def _get_preds(spec):
+            mlmodel = coremltools.models.MLModel(spec)
+            return mlmodel.predict({"data": x}, useCPUOnly=True)["output"]
 
+        preds = _get_preds(self.builder.spec)
+        self.assertEqual(self.builder.spec.specificationVersion, 4)
+
+        quantized_spec = activate_int8_int8_matrix_multiplications(
+            self.builder.spec, self.selector
+        )
+
+        layer = self.builder.spec.neuralNetwork.layers[0]
+        layer_type = layer.WhichOneof("layer")
+        if layer_type == "innerProduct":
+            matmul_layer = layer.innerProduct
+
+        elif layer_type == "batchedMatmul":
+            matmul_layer = layer.batchedMatmul
+        wp = matmul_layer.weights
+
+        if specification_modified:
+            self.assertEqual(self.builder.spec.specificationVersion, 5)
+            quant_preds = _get_preds(quantized_spec)
+            self._test_predictions(preds, quant_preds, SNR=40)
+            self.assertEqual(len(wp.floatValue), 0)
+        else:
+            self.assertEqual(self.builder.spec.specificationVersion, 4)
+            quant_preds = _get_preds(quantized_spec)
+            np.testing.assert_array_almost_equal(preds, quant_preds)
+            self.assertGreater(len(wp.floatValue), 0)
+
+    def test_single_batched_matmul_no_bias(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+        )
+        self.compare()
+
+    def test_single_batched_matmul_with_bias(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+            bias=self.b,
+        )
+        self.compare()
+
+    def test_single_inner_product_no_bias(self):
+
+        self.initialize()
+        self.builder.add_inner_product(
+            name="ip",
+            input_name="data",
+            output_name="output",
+            input_channels=self.Cin,
+            output_channels=self.Cout,
+            W=self.W,
+            b=None,
+            has_bias=False,
+        )
+        self.compare()
+
+    def test_single_inner_product_with_bias(self):
+
+        self.initialize()
+        self.builder.add_inner_product(
+            name="ip",
+            input_name="data",
+            output_name="output",
+            input_channels=self.Cin,
+            output_channels=self.Cout,
+            W=self.W,
+            b=self.b,
+            has_bias=True,
+        )
+        self.compare()
+
+    def test_inner_product_min_input_channels_valid(self):
+        self.initialize()
+        self.builder.add_inner_product(
+            name="ip",
+            input_name="data",
+            output_name="output",
+            input_channels=self.Cin,
+            output_channels=self.Cout,
+            W=self.W,
+            b=self.b,
+            has_bias=True,
+        )
+        self.selector.minimum_input_channels = 31
+        self.compare()
+
+    def test_batched_matmul_min_input_channels_valid(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+        )
+        self.selector.minimum_input_channels = 32
+        self.compare()
+
+    def test_inner_product_min_input_channels_invalid(self):
+        self.initialize()
+        self.builder.add_inner_product(
+            name="ip",
+            input_name="data",
+            output_name="output",
+            input_channels=self.Cin,
+            output_channels=self.Cout,
+            W=self.W,
+            b=self.b,
+            has_bias=True,
+        )
+        self.selector.minimum_input_channels = 33
+        self.compare(specification_modified=False)
+
+    def test_batched_matmul_min_input_channels_invalid(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+        )
+        self.selector.minimum_input_channels = 33
+        self.compare(specification_modified=False)
+
+    def test_batched_matmul_max_input_channels_valid(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+        )
+        self.selector.maximum_input_channels = 32
+        self.compare()
+
+    def test_inner_product_max_input_channels_valid(self):
+        self.initialize()
+        self.builder.add_inner_product(
+            name="ip",
+            input_name="data",
+            output_name="output",
+            input_channels=self.Cin,
+            output_channels=self.Cout,
+            W=self.W,
+            b=self.b,
+            has_bias=True,
+        )
+        self.selector.maximum_input_channels = 33
+        self.compare()
+
+    def test_batched_matmul_max_input_channels_invalid(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+        )
+        self.selector.maximum_input_channels = 31
+        self.compare(specification_modified=False)
+
+    def test_inner_product_max_input_channels_invalid(self):
+        self.initialize()
+        self.builder.add_inner_product(
+            name="ip",
+            input_name="data",
+            output_name="output",
+            input_channels=self.Cin,
+            output_channels=self.Cout,
+            W=self.W,
+            b=self.b,
+            has_bias=True,
+        )
+        self.selector.maximum_input_channels = 30
+        self.compare(specification_modified=False)
+
+    def test_inner_product_min_output_channels_valid(self):
+        self.initialize()
+        self.builder.add_inner_product(
+            name="ip",
+            input_name="data",
+            output_name="output",
+            input_channels=self.Cin,
+            output_channels=self.Cout,
+            W=self.W,
+            b=self.b,
+            has_bias=True,
+        )
+        self.selector.minimum_output_channels = 16
+        self.compare()
+
+    def test_batched_matmul_min_output_channels_valid(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+        )
+        self.selector.minimum_output_channels = 16
+        self.compare()
+
+    def test_inner_product_min_output_channels_invalid(self):
+        self.initialize()
+        self.builder.add_inner_product(
+            name="ip",
+            input_name="data",
+            output_name="output",
+            input_channels=self.Cin,
+            output_channels=self.Cout,
+            W=self.W,
+            b=self.b,
+            has_bias=True,
+        )
+        self.selector.minimum_output_channels = 17
+        self.compare(specification_modified=False)
+
+    def test_batched_matmul_min_output_channels_invalid(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+        )
+        self.selector.minimum_output_channels = 17
+        self.compare(specification_modified=False)
+
+    def test_batched_matmul_max_output_channels_valid(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+        )
+        self.selector.maximum_output_channels = 17
+        self.compare()
+
+    def test_inner_product_max_output_channels_valid(self):
+        self.initialize()
+        self.builder.add_inner_product(
+            name="ip",
+            input_name="data",
+            output_name="output",
+            input_channels=self.Cin,
+            output_channels=self.Cout,
+            W=self.W,
+            b=self.b,
+            has_bias=True,
+        )
+        self.selector.maximum_output_channels = 16
+        self.compare()
+
+    def test_batched_matmul_max_output_channels_invalid(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+        )
+        self.selector.maximum_output_channels = 14
+        self.compare(specification_modified=False)
+
+    def test_inner_product_max_output_channels_invalid(self):
+        self.initialize()
+        self.builder.add_inner_product(
+            name="ip",
+            input_name="data",
+            output_name="output",
+            input_channels=self.Cin,
+            output_channels=self.Cout,
+            W=self.W,
+            b=self.b,
+            has_bias=True,
+        )
+        self.selector.maximum_output_channels = 15
+        self.compare(specification_modified=False)
+
+    def test_inner_product_min_weight_count_valid(self):
+        self.initialize()
+        self.builder.add_inner_product(
+            name="ip",
+            input_name="data",
+            output_name="output",
+            input_channels=self.Cin,
+            output_channels=self.Cout,
+            W=self.W,
+            b=self.b,
+            has_bias=True,
+        )
+        self.selector.minimum_weight_count = 512
+        self.compare()
+
+    def test_batched_matmul_min_weight_count_invalid(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+        )
+        self.selector.minimum_weight_count = 513
+        self.compare(specification_modified=False)
+
+    def test_inner_product_layer_names_invalid(self):
+        self.initialize()
+        self.builder.add_inner_product(
+            name="ip",
+            input_name="data",
+            output_name="output",
+            input_channels=self.Cin,
+            output_channels=self.Cout,
+            W=self.W,
+            b=self.b,
+            has_bias=True,
+        )
+        self.selector.include_layers_with_names = ["ip1", "ip2"]
+        self.compare(specification_modified=False)
+
+    def test_batched_matmul_layer_names_valid(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+        )
+        self.selector.include_layers_with_names = ["bm1", "batched_matmul"]
+        self.compare()
+
+    def test_batched_matmul_8bit_weight_quantized(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+        )
+        _quantize_spec_weights(
+            self.builder.spec, 8, _QUANTIZATION_MODE_LINEAR_QUANTIZATION
+        )
+        self.compare()
+
+    def test_batched_matmul_4bit_weight_quantized(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+        )
+        _quantize_spec_weights(
+            self.builder.spec, 4, _QUANTIZATION_MODE_LINEAR_QUANTIZATION
+        )
+        self.compare()
+
+    def test_batched_matmul_2bit_weight_quantized(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+        )
+        _quantize_spec_weights(
+            self.builder.spec, 2, _QUANTIZATION_MODE_LINEAR_QUANTIZATION
+        )
+        self.compare()
+
+    def test_batched_matmul_1bit_weight_quantized(self):
+
+        self.initialize()
+        self.builder.add_batched_mat_mul(
+            name="batched_matmul",
+            input_names=["data"],
+            output_name="output",
+            weight_matrix_rows=self.Cin,
+            weight_matrix_columns=self.Cout,
+            W=self.W,
+        )
+        _quantize_spec_weights(
+            self.builder.spec, 1, _QUANTIZATION_MODE_LINEAR_QUANTIZATION
+        )
+        self.compare()
+
+
+@unittest.skipIf(
+    not coremltools.utils._is_macos() or coremltools.utils._macos_version() < (10, 15),
+    "Missing macOS 10.15+. Skipping tests.",
+)
+class QuantizeWeightsAPI(unittest.TestCase):
     def test_embeddingND_quantize(self):
-        input_features = [('data', datatypes.Array(10,1))]
-        output_features = [('output', None)]
+        input_features = [("data", datatypes.Array(10, 1))]
+        output_features = [("output", None)]
         builder = neural_network.NeuralNetworkBuilder(
-            input_features, output_features,
-            disable_rank5_shape_mapping=True)
+            input_features, output_features, disable_rank5_shape_mapping=True
+        )
 
-        builder.add_embedding_nd(name='embedding_nd',
-                                 input_name='data',
-                                 output_name='output',
-                                 vocab_size=300,
-                                 embedding_size=20,
-                                 W=np.random.rand(20, 300))
+        builder.add_embedding_nd(
+            name="embedding_nd",
+            input_name="data",
+            output_name="output",
+            vocab_size=300,
+            embedding_size=20,
+            W=np.random.rand(20, 300),
+        )
 
         spec = builder.spec
         model_fp32 = coremltools.models.MLModel(spec)
-        self.assertEqual(len(spec.neuralNetwork.layers[0].embeddingND.weights.floatValue), 6000)
+        self.assertEqual(
+            len(spec.neuralNetwork.layers[0].embeddingND.weights.floatValue), 6000
+        )
 
         # quantize to FP16
         model_fp16 = quantization_utils.quantize_weights(model_fp32, nbits=16)
         spec_fp16 = model_fp16.get_spec()
-        self.assertEqual(len(spec_fp16.neuralNetwork.layers[0].embeddingND.weights.floatValue), 0)
-        self.assertEqual(len(spec_fp16.neuralNetwork.layers[0].embeddingND.weights.float16Value), 2*6000)
+        self.assertEqual(
+            len(spec_fp16.neuralNetwork.layers[0].embeddingND.weights.floatValue), 0
+        )
+        self.assertEqual(
+            len(spec_fp16.neuralNetwork.layers[0].embeddingND.weights.float16Value),
+            2 * 6000,
+        )
 
         # quantize to uint8
         model_uint8 = quantization_utils.quantize_weights(model_fp32, nbits=8)
         spec_uint8 = model_uint8.get_spec()
-        self.assertEqual(len(spec_uint8.neuralNetwork.layers[0].embeddingND.weights.floatValue), 0)
-        self.assertEqual(len(spec_uint8.neuralNetwork.layers[0].embeddingND.weights.float16Value), 0)
-        self.assertEqual(len(spec_uint8.neuralNetwork.layers[0].embeddingND.weights.rawValue), 6000)
+        self.assertEqual(
+            len(spec_uint8.neuralNetwork.layers[0].embeddingND.weights.floatValue), 0
+        )
+        self.assertEqual(
+            len(spec_uint8.neuralNetwork.layers[0].embeddingND.weights.float16Value), 0
+        )
+        self.assertEqual(
+            len(spec_uint8.neuralNetwork.layers[0].embeddingND.weights.rawValue), 6000
+        )
 
         # quantize to uint5
         model_uint5 = quantization_utils.quantize_weights(model_fp32, nbits=5)
         spec_uint5 = model_uint5.get_spec()
-        self.assertEqual(len(spec_uint5.neuralNetwork.layers[0].embeddingND.weights.floatValue), 0)
-        self.assertEqual(len(spec_uint5.neuralNetwork.layers[0].embeddingND.weights.float16Value), 0)
-        self.assertEqual(len(spec_uint5.neuralNetwork.layers[0].embeddingND.weights.rawValue), 3750) # 3750 = 5*6000/8
-
-
-
+        self.assertEqual(
+            len(spec_uint5.neuralNetwork.layers[0].embeddingND.weights.floatValue), 0
+        )
+        self.assertEqual(
+            len(spec_uint5.neuralNetwork.layers[0].embeddingND.weights.float16Value), 0
+        )
+        self.assertEqual(
+            len(spec_uint5.neuralNetwork.layers[0].embeddingND.weights.rawValue), 3750
+        )  # 3750 = 5*6000/8
diff --git a/coremltools/test/neural_network/test_recurrent_stress_tests.py b/coremltools/test/neural_network/test_recurrent_stress_tests.py
index 20dc988d4..3649abb17 100644
--- a/coremltools/test/neural_network/test_recurrent_stress_tests.py
+++ b/coremltools/test/neural_network/test_recurrent_stress_tests.py
@@ -5,12 +5,12 @@
 import numpy as np
 import pytest
 
-from coremltools._deps import HAS_KERAS2_TF, HAS_KERAS_TF
-from coremltools.models.utils import macos_version, is_macos
+from coremltools._deps import _HAS_KERAS2_TF, _HAS_KERAS_TF
+from coremltools.models.utils import _macos_version, _is_macos
 
 np.random.seed(1377)
 
-if HAS_KERAS2_TF or HAS_KERAS_TF:
+if _HAS_KERAS2_TF or _HAS_KERAS_TF:
     import keras
     from keras.models import Sequential
     from keras.layers import LSTM, GRU, SimpleRNN, RepeatVector
@@ -18,26 +18,28 @@
     import keras.backend as K
     from coremltools.converters import keras as keras_converter
 
-'''
+"""
 =============================
 Utility Functions
 =============================
-'''
+"""
 
 
 def get_recurrent_activation_name_from_keras(activation):
     if activation == keras.activations.sigmoid:
-        activation_str = 'SIGMOID'
+        activation_str = "SIGMOID"
     elif activation == keras.activations.hard_sigmoid:
-        activation_str = 'SIGMOID_HARD'
+        activation_str = "SIGMOID_HARD"
     elif activation == keras.activations.tanh:
-        activation_str = 'TANH'
+        activation_str = "TANH"
     elif activation == keras.activations.relu:
-        activation_str = 'RELU'
+        activation_str = "RELU"
     elif activation == keras.activations.linear:
-        activation_str = 'LINEAR'
+        activation_str = "LINEAR"
     else:
-        raise NotImplementedError('activation %s not supported for Recurrent layer.' % activation)
+        raise NotImplementedError(
+            "activation %s not supported for Recurrent layer." % activation
+        )
     return activation_str
 
 
@@ -50,7 +52,7 @@ def relu(x):
 
 
 def sigmoid(x):
-    return 1. / (1 + np.exp(-x))
+    return 1.0 / (1 + np.exp(-x))
 
 
 def hard_sigmoid(x, alpha=0.2, beta=0.5):
@@ -62,15 +64,15 @@ def tanh(x):
 
 
 def apply_act(x, option):
-    if option == 'TANH':
+    if option == "TANH":
         return tanh(x)
-    elif option == 'RELU':
+    elif option == "RELU":
         return relu(x)
-    elif option == 'SIGMOID':
+    elif option == "SIGMOID":
         return sigmoid(x)
-    elif option == 'SIGMOID_HARD':
+    elif option == "SIGMOID_HARD":
         return hard_sigmoid(x)
-    elif option == 'LINEAR':
+    elif option == "LINEAR":
         return linear(x)
 
 
@@ -80,10 +82,7 @@ def clip(x, threshold=50.0):
 
 def valid_params(params):
     """Checks if this combination of parameters is allowed by Keras"""
-    return not (
-            params['input_dims'][1] == 1 and
-            params['unroll']
-    )
+    return not (params["input_dims"][1] == 1 and params["unroll"])
 
 
 def _compute_SNR(x, y):
@@ -101,11 +100,11 @@ def _compute_SNR(x, y):
     return SNR, PSNR, signal_energy
 
 
-'''
+"""
 =============================
 Numpy implementations
 =============================
-'''
+"""
 
 
 def get_numpy_prediction_gru(model, X):
@@ -116,26 +115,30 @@ def get_numpy_prediction_gru(model, X):
     if keras_layer.go_backwards:
         X = X[::-1, :]
 
-    if HAS_KERAS2_TF:
+    if _HAS_KERAS2_TF:
         hidden_size = keras_layer.units
 
         keras_W_h = keras_layer.get_weights()[1].T
-        R_z = keras_W_h[0 * hidden_size:][:hidden_size]
-        R_r = keras_W_h[1 * hidden_size:][:hidden_size]
-        R_o = keras_W_h[2 * hidden_size:][:hidden_size]
+        R_z = keras_W_h[0 * hidden_size :][:hidden_size]
+        R_r = keras_W_h[1 * hidden_size :][:hidden_size]
+        R_o = keras_W_h[2 * hidden_size :][:hidden_size]
 
         keras_W_x = keras_layer.get_weights()[0].T
-        W_z = keras_W_x[0 * hidden_size:][:hidden_size]
-        W_r = keras_W_x[1 * hidden_size:][:hidden_size]
-        W_o = keras_W_x[2 * hidden_size:][:hidden_size]
+        W_z = keras_W_x[0 * hidden_size :][:hidden_size]
+        W_r = keras_W_x[1 * hidden_size :][:hidden_size]
+        W_o = keras_W_x[2 * hidden_size :][:hidden_size]
 
         keras_b = keras_layer.get_weights()[2]
-        b_z = keras_b[0 * hidden_size:][:hidden_size]
-        b_r = keras_b[1 * hidden_size:][:hidden_size]
-        b_o = keras_b[2 * hidden_size:][:hidden_size]
+        b_z = keras_b[0 * hidden_size :][:hidden_size]
+        b_r = keras_b[1 * hidden_size :][:hidden_size]
+        b_o = keras_b[2 * hidden_size :][:hidden_size]
 
-        inner_activation_str = get_recurrent_activation_name_from_keras(keras_layer.recurrent_activation)
-        activation_str = get_recurrent_activation_name_from_keras(keras_layer.activation)
+        inner_activation_str = get_recurrent_activation_name_from_keras(
+            keras_layer.recurrent_activation
+        )
+        activation_str = get_recurrent_activation_name_from_keras(
+            keras_layer.activation
+        )
 
     else:
         hidden_size = keras_layer.output_dim
@@ -152,8 +155,12 @@ def get_numpy_prediction_gru(model, X):
         b_r = keras_layer.get_weights()[5]
         b_o = keras_layer.get_weights()[8]
 
-        inner_activation_str = get_recurrent_activation_name_from_keras(keras_layer.inner_activation)
-        activation_str = get_recurrent_activation_name_from_keras(keras_layer.activation)
+        inner_activation_str = get_recurrent_activation_name_from_keras(
+            keras_layer.inner_activation
+        )
+        activation_str = get_recurrent_activation_name_from_keras(
+            keras_layer.activation
+        )
 
     h = np.zeros((hidden_size))
     c = np.zeros((hidden_size))
@@ -182,29 +189,33 @@ def get_numpy_prediction_unilstm(model, X):
     if keras_layer.go_backwards:
         X = X[::-1, :]
 
-    if HAS_KERAS2_TF:
+    if _HAS_KERAS2_TF:
         hidden_size = keras_layer.units
 
         keras_W_h = keras_layer.get_weights()[1].T
-        R_i = keras_W_h[0 * hidden_size:][:hidden_size]
-        R_f = keras_W_h[1 * hidden_size:][:hidden_size]
-        R_o = keras_W_h[3 * hidden_size:][:hidden_size]
-        R_g = keras_W_h[2 * hidden_size:][:hidden_size]
+        R_i = keras_W_h[0 * hidden_size :][:hidden_size]
+        R_f = keras_W_h[1 * hidden_size :][:hidden_size]
+        R_o = keras_W_h[3 * hidden_size :][:hidden_size]
+        R_g = keras_W_h[2 * hidden_size :][:hidden_size]
 
         keras_W_x = keras_layer.get_weights()[0].T
-        W_i = keras_W_x[0 * hidden_size:][:hidden_size]
-        W_f = keras_W_x[1 * hidden_size:][:hidden_size]
-        W_o = keras_W_x[3 * hidden_size:][:hidden_size]
-        W_g = keras_W_x[2 * hidden_size:][:hidden_size]
+        W_i = keras_W_x[0 * hidden_size :][:hidden_size]
+        W_f = keras_W_x[1 * hidden_size :][:hidden_size]
+        W_o = keras_W_x[3 * hidden_size :][:hidden_size]
+        W_g = keras_W_x[2 * hidden_size :][:hidden_size]
 
         keras_b = keras_layer.get_weights()[2]
-        b_i = keras_b[0 * hidden_size:][:hidden_size]
-        b_f = keras_b[1 * hidden_size:][:hidden_size]
-        b_o = keras_b[3 * hidden_size:][:hidden_size]
-        b_g = keras_b[2 * hidden_size:][:hidden_size]
+        b_i = keras_b[0 * hidden_size :][:hidden_size]
+        b_f = keras_b[1 * hidden_size :][:hidden_size]
+        b_o = keras_b[3 * hidden_size :][:hidden_size]
+        b_g = keras_b[2 * hidden_size :][:hidden_size]
 
-        inner_activation_str = get_recurrent_activation_name_from_keras(keras_layer.recurrent_activation)
-        activation_str = get_recurrent_activation_name_from_keras(keras_layer.activation)
+        inner_activation_str = get_recurrent_activation_name_from_keras(
+            keras_layer.recurrent_activation
+        )
+        activation_str = get_recurrent_activation_name_from_keras(
+            keras_layer.activation
+        )
 
     else:
         hidden_size = keras_layer.output_dim
@@ -224,8 +235,12 @@ def get_numpy_prediction_unilstm(model, X):
         b_o = keras_layer.get_weights()[11]
         b_g = keras_layer.get_weights()[5]
 
-        inner_activation_str = get_recurrent_activation_name_from_keras(keras_layer.inner_activation)
-        activation_str = get_recurrent_activation_name_from_keras(keras_layer.activation)
+        inner_activation_str = get_recurrent_activation_name_from_keras(
+            keras_layer.inner_activation
+        )
+        activation_str = get_recurrent_activation_name_from_keras(
+            keras_layer.activation
+        )
 
     h = np.zeros((hidden_size))
     c = np.zeros((hidden_size))
@@ -251,7 +266,9 @@ def get_numpy_prediction_bilstm_batched(model, X):
     batch, _, _ = X.shape
     out = []
     for i in range(batch):
-        out.append(get_numpy_prediction_bilstm(model, np.expand_dims(X[i, :, :], axis=0)))
+        out.append(
+            get_numpy_prediction_bilstm(model, np.expand_dims(X[i, :, :], axis=0))
+        )
     return np.stack(out, axis=0)
 
 
@@ -261,47 +278,51 @@ def get_numpy_prediction_bilstm(model, X):
     keras_layer = model.layers[0]
     return_seq = keras_layer.return_sequences
 
-    if HAS_KERAS2_TF:
+    if _HAS_KERAS2_TF:
         hidden_size = keras_layer.forward_layer.units
 
         keras_W_h = keras_layer.forward_layer.get_weights()[1].T
-        R_i = keras_W_h[0 * hidden_size:][:hidden_size]
-        R_f = keras_W_h[1 * hidden_size:][:hidden_size]
-        R_o = keras_W_h[3 * hidden_size:][:hidden_size]
-        R_g = keras_W_h[2 * hidden_size:][:hidden_size]
+        R_i = keras_W_h[0 * hidden_size :][:hidden_size]
+        R_f = keras_W_h[1 * hidden_size :][:hidden_size]
+        R_o = keras_W_h[3 * hidden_size :][:hidden_size]
+        R_g = keras_W_h[2 * hidden_size :][:hidden_size]
 
         keras_W_x = keras_layer.forward_layer.get_weights()[0].T
-        W_i = keras_W_x[0 * hidden_size:][:hidden_size]
-        W_f = keras_W_x[1 * hidden_size:][:hidden_size]
-        W_o = keras_W_x[3 * hidden_size:][:hidden_size]
-        W_g = keras_W_x[2 * hidden_size:][:hidden_size]
+        W_i = keras_W_x[0 * hidden_size :][:hidden_size]
+        W_f = keras_W_x[1 * hidden_size :][:hidden_size]
+        W_o = keras_W_x[3 * hidden_size :][:hidden_size]
+        W_g = keras_W_x[2 * hidden_size :][:hidden_size]
 
         keras_b = keras_layer.forward_layer.get_weights()[2]
-        b_i = keras_b[0 * hidden_size:][:hidden_size]
-        b_f = keras_b[1 * hidden_size:][:hidden_size]
-        b_o = keras_b[3 * hidden_size:][:hidden_size]
-        b_g = keras_b[2 * hidden_size:][:hidden_size]
+        b_i = keras_b[0 * hidden_size :][:hidden_size]
+        b_f = keras_b[1 * hidden_size :][:hidden_size]
+        b_o = keras_b[3 * hidden_size :][:hidden_size]
+        b_g = keras_b[2 * hidden_size :][:hidden_size]
 
         keras_W_h = keras_layer.backward_layer.get_weights()[1].T
-        R_i_back = keras_W_h[0 * hidden_size:][:hidden_size]
-        R_f_back = keras_W_h[1 * hidden_size:][:hidden_size]
-        R_o_back = keras_W_h[3 * hidden_size:][:hidden_size]
-        R_g_back = keras_W_h[2 * hidden_size:][:hidden_size]
+        R_i_back = keras_W_h[0 * hidden_size :][:hidden_size]
+        R_f_back = keras_W_h[1 * hidden_size :][:hidden_size]
+        R_o_back = keras_W_h[3 * hidden_size :][:hidden_size]
+        R_g_back = keras_W_h[2 * hidden_size :][:hidden_size]
 
         keras_W_x = keras_layer.backward_layer.get_weights()[0].T
-        W_i_back = keras_W_x[0 * hidden_size:][:hidden_size]
-        W_f_back = keras_W_x[1 * hidden_size:][:hidden_size]
-        W_o_back = keras_W_x[3 * hidden_size:][:hidden_size]
-        W_g_back = keras_W_x[2 * hidden_size:][:hidden_size]
+        W_i_back = keras_W_x[0 * hidden_size :][:hidden_size]
+        W_f_back = keras_W_x[1 * hidden_size :][:hidden_size]
+        W_o_back = keras_W_x[3 * hidden_size :][:hidden_size]
+        W_g_back = keras_W_x[2 * hidden_size :][:hidden_size]
 
         keras_b = keras_layer.backward_layer.get_weights()[2]
-        b_i_back = keras_b[0 * hidden_size:][:hidden_size]
-        b_f_back = keras_b[1 * hidden_size:][:hidden_size]
-        b_o_back = keras_b[3 * hidden_size:][:hidden_size]
-        b_g_back = keras_b[2 * hidden_size:][:hidden_size]
+        b_i_back = keras_b[0 * hidden_size :][:hidden_size]
+        b_f_back = keras_b[1 * hidden_size :][:hidden_size]
+        b_o_back = keras_b[3 * hidden_size :][:hidden_size]
+        b_g_back = keras_b[2 * hidden_size :][:hidden_size]
 
-        inner_activation_str = get_recurrent_activation_name_from_keras(keras_layer.forward_layer.recurrent_activation)
-        activation_str = get_recurrent_activation_name_from_keras(keras_layer.forward_layer.activation)
+        inner_activation_str = get_recurrent_activation_name_from_keras(
+            keras_layer.forward_layer.recurrent_activation
+        )
+        activation_str = get_recurrent_activation_name_from_keras(
+            keras_layer.forward_layer.activation
+        )
 
     else:
         hidden_size = keras_layer.forward_layer.output_dim
@@ -336,8 +357,12 @@ def get_numpy_prediction_bilstm(model, X):
         b_o_back = keras_layer.backward_layer.get_weights()[11]
         b_g_back = keras_layer.backward_layer.get_weights()[5]
 
-        inner_activation_str = get_recurrent_activation_name_from_keras(keras_layer.forward_layer.inner_activation)
-        activation_str = get_recurrent_activation_name_from_keras(keras_layer.forward_layer.activation)
+        inner_activation_str = get_recurrent_activation_name_from_keras(
+            keras_layer.forward_layer.inner_activation
+        )
+        activation_str = get_recurrent_activation_name_from_keras(
+            keras_layer.forward_layer.activation
+        )
 
     h = np.zeros((hidden_size))
     c = np.zeros((hidden_size))
@@ -358,12 +383,23 @@ def get_numpy_prediction_bilstm(model, X):
     np_out_backward = np.zeros((seq_len, hidden_size))
     for k in range(seq_len):
         x = X[seq_len - k - 1, :]
-        i = apply_act(clip(np.dot(W_i_back, x) + np.dot(R_i_back, h) + b_i_back), inner_activation_str)
-        f = apply_act(clip(np.dot(W_f_back, x) + np.dot(R_f_back, h) + b_f_back), inner_activation_str)
-        g = apply_act(clip(np.dot(W_g_back, x) + np.dot(R_g_back, h) + b_g_back), activation_str)
+        i = apply_act(
+            clip(np.dot(W_i_back, x) + np.dot(R_i_back, h) + b_i_back),
+            inner_activation_str,
+        )
+        f = apply_act(
+            clip(np.dot(W_f_back, x) + np.dot(R_f_back, h) + b_f_back),
+            inner_activation_str,
+        )
+        g = apply_act(
+            clip(np.dot(W_g_back, x) + np.dot(R_g_back, h) + b_g_back), activation_str
+        )
         c = c * f + i * g
         c = clip(c, 50000.0)
-        o = apply_act(clip(np.dot(W_o_back, x) + np.dot(R_o_back, h) + b_o_back), inner_activation_str)
+        o = apply_act(
+            clip(np.dot(W_o_back, x) + np.dot(R_o_back, h) + b_o_back),
+            inner_activation_str,
+        )
         h = o * apply_act(c, activation_str)
         np_out_backward[k, :] = h
 
@@ -379,46 +415,58 @@ def get_numpy_prediction_bilstm(model, X):
     return np_out_final
 
 
-'''
+"""
 =============================
 Nosetest Functions
 =============================
-'''
+"""
 
 
 def get_mlkit_model_from_path(model):
     from coremltools.converters import keras as keras_converter
-    model = keras_converter.convert(model, ['data'], ['output'])
+
+    model = keras_converter.convert(model, ["data"], ["output"])
     return model
 
 
 def generate_input(dim0, dim1, dim2):
-    input_data = np.random.rand(dim0, dim1, dim2).astype('f')  # astype() should be removed
+    input_data = np.random.rand(dim0, dim1, dim2).astype(
+        "f"
+    )  # astype() should be removed
     return input_data
 
 
 def simple_model_eval(params, model):
     mlkitmodel = get_mlkit_model_from_path(model)
     # New test case takes in 2D input as opposed to uniform 3d input across all other tests
-    if len(params[0]['input_dims']) == 3:
-        input_data = generate_input(params[0]['input_dims'][0], params[0]['input_dims'][1],
-                                    params[0]['input_dims'][2])
+    if len(params[0]["input_dims"]) == 3:
+        input_data = generate_input(
+            params[0]["input_dims"][0],
+            params[0]["input_dims"][1],
+            params[0]["input_dims"][2],
+        )
         keras_preds = model.predict(input_data).flatten()
-    elif len(params[0]['input_dims']) == 2:
-        input_data = np.squeeze(np.random.rand(params[0]['input_dims'][0], params[0]['input_dims'][1]))
+    elif len(params[0]["input_dims"]) == 2:
+        input_data = np.squeeze(
+            np.random.rand(params[0]["input_dims"][0], params[0]["input_dims"][1])
+        )
         keras_preds = model.predict(
-            input_data.reshape((params[0]['input_dims'][0], params[0]['input_dims'][1]))).flatten()
-    if len(params[0]['input_dims']) == 3:
+            input_data.reshape((params[0]["input_dims"][0], params[0]["input_dims"][1]))
+        ).flatten()
+    if len(params[0]["input_dims"]) == 3:
         input_data = np.transpose(input_data, [1, 0, 2])
-    if is_macos() and macos_version() >= (10, 13):
-        coreml_preds = mlkitmodel.predict({'data': input_data})['output'].flatten()
+    if _is_macos() and _macos_version() >= (10, 13):
+        coreml_preds = mlkitmodel.predict({"data": input_data})["output"].flatten()
         if K.tensorflow_backend._SESSION:
             import tensorflow as tf
+
             tf.reset_default_graph()
             K.tensorflow_backend._SESSION.close()
             K.tensorflow_backend._SESSION = None
 
-        max_denominator = np.maximum(np.maximum(np.abs(coreml_preds), np.abs(keras_preds)), 1.0)
+        max_denominator = np.maximum(
+            np.maximum(np.abs(coreml_preds), np.abs(keras_preds)), 1.0
+        )
         relative_error = coreml_preds / max_denominator - keras_preds / max_denominator
         return relative_error, keras_preds, coreml_preds
     else:
@@ -432,118 +480,166 @@ class SimpleTestCase(unittest.TestCase):
     """
 
     def _test_simple_rnn(self, keras_major_version):
-        params = dict(
-            input_dims=[1, 2, 100], go_backwards=False, activation='tanh',
-            stateful=False, unroll=False, return_sequences=True, output_dim=4  # Passes for < 3
-        ),
+        params = (
+            dict(
+                input_dims=[1, 2, 100],
+                go_backwards=False,
+                activation="tanh",
+                stateful=False,
+                unroll=False,
+                return_sequences=True,
+                output_dim=4,  # Passes for < 3
+            ),
+        )
         model = Sequential()
         if keras_major_version == 2:
-            model.add(SimpleRNN(units=params[0]['output_dim'],
-                                input_shape=(params[0]['input_dims'][1], params[0]['input_dims'][2]),
-                                activation=params[0]['activation'],
-                                return_sequences=params[0]['return_sequences'],
-                                go_backwards=params[0]['go_backwards'],
-                                unroll=True,
-                                ))
+            model.add(
+                SimpleRNN(
+                    units=params[0]["output_dim"],
+                    input_shape=(
+                        params[0]["input_dims"][1],
+                        params[0]["input_dims"][2],
+                    ),
+                    activation=params[0]["activation"],
+                    return_sequences=params[0]["return_sequences"],
+                    go_backwards=params[0]["go_backwards"],
+                    unroll=True,
+                )
+            )
         else:
-            model.add(SimpleRNN(output_dim=params[0]['output_dim'],
-                                input_length=params[0]['input_dims'][1],
-                                input_dim=params[0]['input_dims'][2],
-                                activation=params[0]['activation'],
-                                return_sequences=params[0]['return_sequences'],
-                                go_backwards=params[0]['go_backwards'],
-                                unroll=True,
-                                ))
+            model.add(
+                SimpleRNN(
+                    output_dim=params[0]["output_dim"],
+                    input_length=params[0]["input_dims"][1],
+                    input_dim=params[0]["input_dims"][2],
+                    activation=params[0]["activation"],
+                    return_sequences=params[0]["return_sequences"],
+                    go_backwards=params[0]["go_backwards"],
+                    unroll=True,
+                )
+            )
         relative_error, keras_preds, coreml_preds = simple_model_eval(params, model)
         for i in range(len(relative_error)):
             self.assertLessEqual(relative_error[i], 0.01)
 
     def _test_simple_lstm(self, keras_major_version):
-        params = dict(
-            input_dims=[1, 3, 5], go_backwards=True, activation='linear',
-            stateful=False, unroll=False, return_sequences=False, output_dim=3,
-            inner_activation='linear'
-        ),
+        params = (
+            dict(
+                input_dims=[1, 3, 5],
+                go_backwards=True,
+                activation="linear",
+                stateful=False,
+                unroll=False,
+                return_sequences=False,
+                output_dim=3,
+                inner_activation="linear",
+            ),
+        )
         model = Sequential()
         if keras_major_version == 2:
-            model.add(LSTM(units=params[0]['output_dim'],
-                           input_shape=(params[0]['input_dims'][1], params[0]['input_dims'][2]),
-                           activation=params[0]['activation'],
-                           return_sequences=params[0]['return_sequences'],
-                           go_backwards=params[0]['go_backwards'],
-                           unroll=True,
-                           recurrent_activation='linear'
-                           ))
+            model.add(
+                LSTM(
+                    units=params[0]["output_dim"],
+                    input_shape=(
+                        params[0]["input_dims"][1],
+                        params[0]["input_dims"][2],
+                    ),
+                    activation=params[0]["activation"],
+                    return_sequences=params[0]["return_sequences"],
+                    go_backwards=params[0]["go_backwards"],
+                    unroll=True,
+                    recurrent_activation="linear",
+                )
+            )
         else:
-            model.add(LSTM(output_dim=params[0]['output_dim'],
-                           input_length=params[0]['input_dims'][1],
-                           input_dim=params[0]['input_dims'][2],
-                           activation=params[0]['activation'],
-                           return_sequences=params[0]['return_sequences'],
-                           go_backwards=params[0]['go_backwards'],
-                           unroll=True,
-                           inner_activation='linear'
-                           ))
+            model.add(
+                LSTM(
+                    output_dim=params[0]["output_dim"],
+                    input_length=params[0]["input_dims"][1],
+                    input_dim=params[0]["input_dims"][2],
+                    activation=params[0]["activation"],
+                    return_sequences=params[0]["return_sequences"],
+                    go_backwards=params[0]["go_backwards"],
+                    unroll=True,
+                    inner_activation="linear",
+                )
+            )
         relative_error, keras_preds, coreml_preds = simple_model_eval(params, model)
         for i in range(len(relative_error)):
             self.assertLessEqual(relative_error[i], 0.01)
 
     def _test_simple_gru(self, keras_major_version):
-        params = dict(
-            input_dims=[1, 4, 8], go_backwards=False, activation='tanh',
-            stateful=False, unroll=False, return_sequences=False, output_dim=4
-        ),
+        params = (
+            dict(
+                input_dims=[1, 4, 8],
+                go_backwards=False,
+                activation="tanh",
+                stateful=False,
+                unroll=False,
+                return_sequences=False,
+                output_dim=4,
+            ),
+        )
         model = Sequential()
         if keras_major_version == 2:
-            model.add(GRU(units=params[0]['output_dim'],
-                          input_shape=(params[0]['input_dims'][1], params[0]['input_dims'][2]),
-                          activation=params[0]['activation'],
-                          recurrent_activation='sigmoid',
-                          return_sequences=params[0]['return_sequences'],
-                          go_backwards=params[0]['go_backwards'],
-                          unroll=True,
-                          ))
+            model.add(
+                GRU(
+                    units=params[0]["output_dim"],
+                    input_shape=(
+                        params[0]["input_dims"][1],
+                        params[0]["input_dims"][2],
+                    ),
+                    activation=params[0]["activation"],
+                    recurrent_activation="sigmoid",
+                    return_sequences=params[0]["return_sequences"],
+                    go_backwards=params[0]["go_backwards"],
+                    unroll=True,
+                )
+            )
         else:
-            model.add(GRU(output_dim=params[0]['output_dim'],
-                          input_length=params[0]['input_dims'][1],
-                          input_dim=params[0]['input_dims'][2],
-                          activation=params[0]['activation'],
-                          inner_activation='sigmoid',
-                          return_sequences=params[0]['return_sequences'],
-                          go_backwards=params[0]['go_backwards'],
-                          unroll=True,
-                          ))
+            model.add(
+                GRU(
+                    output_dim=params[0]["output_dim"],
+                    input_length=params[0]["input_dims"][1],
+                    input_dim=params[0]["input_dims"][2],
+                    activation=params[0]["activation"],
+                    inner_activation="sigmoid",
+                    return_sequences=params[0]["return_sequences"],
+                    go_backwards=params[0]["go_backwards"],
+                    unroll=True,
+                )
+            )
         model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
         relative_error, keras_preds, coreml_preds = simple_model_eval(params, model)
         for i in range(len(relative_error)):
             self.assertLessEqual(relative_error[i], 0.01)
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     def test_keras1_simple_rnn(self):
         self._test_simple_rnn(keras_major_version=1)
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     def test_keras1_simple_lstm(self):
         self._test_simple_lstm(keras_major_version=1)
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     def test_keras1_simple_gru(self):
         self._test_simple_gru(keras_major_version=1)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     def test_keras2_simple_rnn(self):
         self._test_simple_rnn(keras_major_version=2)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     def test_keras2_simple_lstm(self):
         self._test_simple_lstm(keras_major_version=2)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     def test_keras2_simple_gru(self):
         self._test_simple_gru(keras_major_version=2)
@@ -562,7 +658,7 @@ def setUp(self):
             go_backwards=[False, True],
             unroll=[True],
             return_sequences=[False, True],
-            activation=['tanh', 'linear', 'sigmoid', 'hard_sigmoid', 'relu'],
+            activation=["tanh", "linear", "sigmoid", "hard_sigmoid", "relu"],
         )
         self.base_layer_params = list(itertools.product(*self.params_dict.values()))
 
@@ -575,7 +671,9 @@ class RNNLayer(RecurrentLayerTest):
     def setUp(self):
         super(RNNLayer, self).setUp()
         self.simple_rnn_params_dict = self.params_dict
-        self.rnn_layer_params = list(itertools.product(self.simple_rnn_params_dict.values()))
+        self.rnn_layer_params = list(
+            itertools.product(self.simple_rnn_params_dict.values())
+        )
 
     def _test_rnn_layer(self, keras_major_version, limit=None):
         i = 0
@@ -584,95 +682,119 @@ def _test_rnn_layer(self, keras_major_version, limit=None):
         numerical_failiure = 0
         params = list(itertools.product(self.base_layer_params, self.rnn_layer_params))
         np.random.shuffle(params)
-        params = [param for param in params if valid_params(dict(zip(self.params_dict.keys(), param[0])))]
+        params = [
+            param
+            for param in params
+            if valid_params(dict(zip(self.params_dict.keys(), param[0])))
+        ]
         for base_params, rnn_params in params[:limit]:
             base_params = dict(zip(self.params_dict.keys(), base_params))
             rnn_params = dict(zip(self.simple_rnn_params_dict.keys(), rnn_params))
             model = Sequential()
-            unroll = base_params['unroll']
-            if base_params['input_dims'][1] == 1 and unroll == True:
+            unroll = base_params["unroll"]
+            if base_params["input_dims"][1] == 1 and unroll == True:
                 unroll = False
             if keras_major_version == 2:
                 model.add(
                     SimpleRNN(
-                        base_params['output_dim'],
-                        input_shape=base_params['input_dims'][1:],
-                        activation=base_params['activation'],
-                        return_sequences=base_params['return_sequences'],
-                        go_backwards=base_params['go_backwards'],
+                        base_params["output_dim"],
+                        input_shape=base_params["input_dims"][1:],
+                        activation=base_params["activation"],
+                        return_sequences=base_params["return_sequences"],
+                        go_backwards=base_params["go_backwards"],
                         unroll=unroll,
                     )
                 )
             else:
                 model.add(
                     SimpleRNN(
-                        base_params['output_dim'],
-                        input_length=base_params['input_dims'][1],
-                        input_dim=base_params['input_dims'][2],
-                        activation=base_params['activation'],
-                        return_sequences=base_params['return_sequences'],
-                        go_backwards=base_params['go_backwards'],
+                        base_params["output_dim"],
+                        input_length=base_params["input_dims"][1],
+                        input_dim=base_params["input_dims"][2],
+                        activation=base_params["activation"],
+                        return_sequences=base_params["return_sequences"],
+                        go_backwards=base_params["go_backwards"],
                         unroll=unroll,
                     )
                 )
             mlkitmodel = get_mlkit_model_from_path(model)
-            input_data = generate_input(base_params['input_dims'][0], base_params['input_dims'][1],
-                                        base_params['input_dims'][2])
+            input_data = generate_input(
+                base_params["input_dims"][0],
+                base_params["input_dims"][1],
+                base_params["input_dims"][2],
+            )
             keras_preds = model.predict(input_data).flatten()
             if K.tensorflow_backend._SESSION:
                 import tensorflow as tf
+
                 tf.reset_default_graph()
                 K.tensorflow_backend._SESSION.close()
                 K.tensorflow_backend._SESSION = None
             input_data = np.transpose(input_data, [1, 0, 2])
-            if is_macos() and macos_version() >= (10, 13):
-                coreml_preds = mlkitmodel.predict({'data': input_data})['output'].flatten()
+            if _is_macos() and _macos_version() >= (10, 13):
+                coreml_preds = mlkitmodel.predict({"data": input_data})[
+                    "output"
+                ].flatten()
                 try:
                     self.assertEquals(coreml_preds.shape, keras_preds.shape)
                 except AssertionError:
-                    print("Shape error:\nbase_params: {}\nkeras_preds.shape: {}\ncoreml_preds.shape: {}".format(
-                        base_params, keras_preds.shape, coreml_preds.shape))
+                    print(
+                        "Shape error:\nbase_params: {}\nkeras_preds.shape: {}\ncoreml_preds.shape: {}".format(
+                            base_params, keras_preds.shape, coreml_preds.shape
+                        )
+                    )
                     shape_err_models.append(base_params)
                     i += 1
                     continue
                 try:
-                    max_denominator = np.maximum(np.maximum(np.abs(coreml_preds), np.abs(keras_preds)), 1.0)
-                    relative_error = coreml_preds / max_denominator - keras_preds / max_denominator
+                    max_denominator = np.maximum(
+                        np.maximum(np.abs(coreml_preds), np.abs(keras_preds)), 1.0
+                    )
+                    relative_error = (
+                        coreml_preds / max_denominator - keras_preds / max_denominator
+                    )
                     for i in range(len(relative_error)):
                         self.assertLessEqual(relative_error[i], 0.01)
                 except AssertionError:
-                    print("Assertion error:\nbase_params: {}\nkeras_preds: {}\ncoreml_preds: {}".format(base_params,
-                                                                                                        keras_preds,
-                                                                                                        coreml_preds))
+                    print(
+                        "Assertion error:\nbase_params: {}\nkeras_preds: {}\ncoreml_preds: {}".format(
+                            base_params, keras_preds, coreml_preds
+                        )
+                    )
                     numerical_failiure += 1
                     numerical_err_models.append(base_params)
             i += 1
 
-        self.assertEquals(shape_err_models, [], msg='Shape error models {}'.format(shape_err_models))
-        self.assertEquals(numerical_err_models, [], msg='Numerical error models {}\n'
-                                                        'Total numerical failiures: {}/{}\n'.format(
+        self.assertEquals(
+            shape_err_models, [], msg="Shape error models {}".format(shape_err_models)
+        )
+        self.assertEquals(
             numerical_err_models,
-            numerical_failiure, i)
-                          )
+            [],
+            msg="Numerical error models {}\n"
+            "Total numerical failiures: {}/{}\n".format(
+                numerical_err_models, numerical_failiure, i
+            ),
+        )
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     @pytest.mark.slow
     def test_kers1_rnn_layer_stress(self):
         self._test_rnn_layer(keras_major_version=1)
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     def test_keras1_rnn_layer(self):
         self._test_rnn_layer(keras_major_version=1, limit=10)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     @pytest.mark.slow
     def test_keras2_rnn_layer_stress(self):
         self._test_rnn_layer(keras_major_version=2)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     def test_keras2_rnn_layer(self):
         self._test_rnn_layer(keras_major_version=2, limit=10)
@@ -686,26 +808,28 @@ class LSTMLayer(RecurrentLayerTest):
     def setUp(self):
         super(LSTMLayer, self).setUp()
         self.lstm_params_dict = dict(
-            inner_activation=['tanh', 'linear', 'sigmoid', 'hard_sigmoid', 'relu'],
+            inner_activation=["tanh", "linear", "sigmoid", "hard_sigmoid", "relu"],
             bidirectional=[False],
         )
-        self.lstm_layer_params = list(itertools.product(*self.lstm_params_dict.values()))
+        self.lstm_layer_params = list(
+            itertools.product(*self.lstm_params_dict.values())
+        )
 
     def _test_bilstm_layer(self, batched=False):
         if not batched:
             params_dict = dict(
                 input_dims=[[1, 5, 10], [1, 2, 5]],
                 output_dim=[1, 5, 10],
-                activation=['tanh', 'linear', 'sigmoid', 'hard_sigmoid', 'relu'],
-                inner_activation=['tanh', 'linear', 'sigmoid', 'hard_sigmoid', 'relu'],
+                activation=["tanh", "linear", "sigmoid", "hard_sigmoid", "relu"],
+                inner_activation=["tanh", "linear", "sigmoid", "hard_sigmoid", "relu"],
                 return_sequences=[True, False],
             )
         else:
             params_dict = dict(
                 input_dims=[[3, 5, 10], [6, 2, 5]],
                 output_dim=[1, 5, 10],
-                activation=['tanh', 'linear', 'sigmoid', 'hard_sigmoid', 'relu'],
-                inner_activation=['tanh', 'linear', 'sigmoid', 'hard_sigmoid', 'relu'],
+                activation=["tanh", "linear", "sigmoid", "hard_sigmoid", "relu"],
+                inner_activation=["tanh", "linear", "sigmoid", "hard_sigmoid", "relu"],
                 return_sequences=[True, False],
             )
 
@@ -717,58 +841,73 @@ def _test_bilstm_layer(self, batched=False):
         numerical_failiure = 0
         for param in params:
             ii += 1
-            #print('-------------- %d / %d ------------------- ' % (ii, len(params)))
+            # print('-------------- %d / %d ------------------- ' % (ii, len(params)))
             param = dict(zip(params_dict.keys(), param))
 
-            if param['activation'] == 'linear':
+            if param["activation"] == "linear":
                 keras_act = None
             else:
-                keras_act = param['activation']
+                keras_act = param["activation"]
 
-            if param['inner_activation'] == 'linear':
+            if param["inner_activation"] == "linear":
                 keras_inner_act = None
             else:
-                keras_inner_act = param['inner_activation']
+                keras_inner_act = param["inner_activation"]
 
             model = Sequential()
             model.add(
                 Bidirectional(
                     LSTM(
-                        param['output_dim'],
+                        param["output_dim"],
                         activation=keras_act,
                         recurrent_activation=keras_inner_act,
-                        return_sequences=param['return_sequences'],
+                        return_sequences=param["return_sequences"],
                         go_backwards=False,
                         unroll=False,
                     ),
-                    input_shape=(param['input_dims'][1], param['input_dims'][2]),
+                    input_shape=(param["input_dims"][1], param["input_dims"][2]),
                 )
             )
             mlmodel = get_mlkit_model_from_path(model)
 
-            Batch = param['input_dims'][0]
-            Seq = param['input_dims'][1]
-            h = param['output_dim']
-            input_size = param['input_dims'][2]
+            Batch = param["input_dims"][0]
+            Seq = param["input_dims"][1]
+            h = param["output_dim"]
+            input_size = param["input_dims"][2]
 
             input_data = generate_input(Batch, Seq, input_size)
 
-            activations_to_test_with_numpy = {'linear', 'relu'}
-            if param['activation'] in activations_to_test_with_numpy or param['inner_activation'] in activations_to_test_with_numpy:
-                keras_preds = get_numpy_prediction_bilstm_batched(model, input_data)  # (Batch, Seq, h)
+            activations_to_test_with_numpy = {"linear", "relu"}
+            if (
+                param["activation"] in activations_to_test_with_numpy
+                or param["inner_activation"] in activations_to_test_with_numpy
+            ):
+                keras_preds = get_numpy_prediction_bilstm_batched(
+                    model, input_data
+                )  # (Batch, Seq, h)
             else:
                 keras_preds = model.predict(input_data)  # (Batch, Seq, h)
 
-            if is_macos() and macos_version() >= (10, 13):
+            if _is_macos() and _macos_version() >= (10, 13):
                 input_data = np.transpose(input_data, [1, 0, 2])
                 input_dict = {}
-                input_dict['data'] = input_data
-                input_dict['bidirectional_1_h_in'] = np.zeros((1, Batch, h), dtype=np.float)
-                input_dict['bidirectional_1_c_in'] = np.zeros((1, Batch, h), dtype=np.float)
-                input_dict['bidirectional_1_h_in_rev'] = np.zeros((1, Batch, h), dtype=np.float)
-                input_dict['bidirectional_1_c_in_rev'] = np.zeros((1, Batch, h), dtype=np.float)
-                coreml_preds = mlmodel.predict(input_dict)['output']  # (Seq, Batch, h, .. )
-                if param['return_sequences']:
+                input_dict["data"] = input_data
+                input_dict["bidirectional_1_h_in"] = np.zeros(
+                    (1, Batch, h), dtype=np.float
+                )
+                input_dict["bidirectional_1_c_in"] = np.zeros(
+                    (1, Batch, h), dtype=np.float
+                )
+                input_dict["bidirectional_1_h_in_rev"] = np.zeros(
+                    (1, Batch, h), dtype=np.float
+                )
+                input_dict["bidirectional_1_c_in_rev"] = np.zeros(
+                    (1, Batch, h), dtype=np.float
+                )
+                coreml_preds = mlmodel.predict(input_dict)[
+                    "output"
+                ]  # (Seq, Batch, h, .. )
+                if param["return_sequences"]:
                     coreml_preds = np.reshape(coreml_preds, [Seq, Batch, 2 * h])
                 else:
                     coreml_preds = np.reshape(coreml_preds, [1, Batch, 2 * h])
@@ -777,6 +916,7 @@ def _test_bilstm_layer(self, batched=False):
 
                 if K.tensorflow_backend._SESSION:
                     import tensorflow as tf
+
                     tf.reset_default_graph()
                     K.tensorflow_backend._SESSION.close()
                     K.tensorflow_backend._SESSION = None
@@ -785,27 +925,45 @@ def _test_bilstm_layer(self, batched=False):
                     self.assertEquals(coreml_preds.shape, keras_preds.shape)
                 except AssertionError:
                     print(
-                        "Shape error:\n param: {}\n\n keras_preds.shape: {}\n\n coreml_preds.shape: {}".format(param, keras_preds.shape, coreml_preds.shape))
+                        "Shape error:\n param: {}\n\n keras_preds.shape: {}\n\n coreml_preds.shape: {}".format(
+                            param, keras_preds.shape, coreml_preds.shape
+                        )
+                    )
                     shape_err_models.append(param)
                     i += 1
                     continue
-                max_denominator = np.maximum(np.maximum(np.abs(coreml_preds.flatten()), np.abs(keras_preds.flatten())), 1.0)
-                relative_error = coreml_preds.flatten() / max_denominator - keras_preds.flatten() / max_denominator
+                max_denominator = np.maximum(
+                    np.maximum(
+                        np.abs(coreml_preds.flatten()), np.abs(keras_preds.flatten())
+                    ),
+                    1.0,
+                )
+                relative_error = (
+                    coreml_preds.flatten() / max_denominator
+                    - keras_preds.flatten() / max_denominator
+                )
                 max_relative_error = np.amax(relative_error)
                 try:
                     self.assertLessEqual(max_relative_error, 0.01)
                 except AssertionError:
                     snr, psnr, signal_energy = _compute_SNR(keras_preds, coreml_preds)
-                    print('-*' * 80)
-                    print('Assertion error. \n param : {} \n'.format(param))
-                    print('max error = %.4f, snr = %.1f, psnr = %.1f, energy = %.6f' % (max_relative_error, snr, psnr, signal_energy))
-                    print('keras preds shape: {}, coreml preds shape = {}'.format(str(keras_preds.shape), str(coreml_preds.shape)))
+                    print("-*" * 80)
+                    print("Assertion error. \n param : {} \n".format(param))
+                    print(
+                        "max error = %.4f, snr = %.1f, psnr = %.1f, energy = %.6f"
+                        % (max_relative_error, snr, psnr, signal_energy)
+                    )
+                    print(
+                        "keras preds shape: {}, coreml preds shape = {}".format(
+                            str(keras_preds.shape), str(coreml_preds.shape)
+                        )
+                    )
                     # for b in range(Batch):
                     #     snr, psnr, signal_energy = _compute_SNR(keras_preds[b, :, :], coreml_preds[b, :, :])
                     #     print('snr = %.1f, psnr = %.1f, energy = %.6f' % (snr, psnr, signal_energy))
                     #     print('batch id = {}, keras_preds = \n{} '.format(b, keras_preds[b, :, :]))
                     #     print('batch id = {}, coreml_preds = \n{} '.format(b, coreml_preds[b, :, :]))
-                    print('-*' * 80)
+                    print("-*" * 80)
 
                     numerical_failiure += 1
                     numerical_err_models.append(param)
@@ -813,15 +971,21 @@ def _test_bilstm_layer(self, batched=False):
 
             i += 1
 
-        self.assertEquals(shape_err_models, [], msg='Shape error models {}'.format(shape_err_models))
-        self.assertEquals(numerical_err_models, [], msg='Numerical error models {}'.format(numerical_err_models))
+        self.assertEquals(
+            shape_err_models, [], msg="Shape error models {}".format(shape_err_models)
+        )
+        self.assertEquals(
+            numerical_err_models,
+            [],
+            msg="Numerical error models {}".format(numerical_err_models),
+        )
 
     def _test_batched_lstm_layer(self):
         params_dict = dict(
             input_dims=[[3, 5, 10], [6, 2, 5]],
             output_dim=[1, 5, 10],
-            activation=['tanh', 'linear', 'sigmoid', 'hard_sigmoid', 'relu'],
-            inner_activation=['tanh', 'linear', 'sigmoid', 'hard_sigmoid', 'relu'],
+            activation=["tanh", "linear", "sigmoid", "hard_sigmoid", "relu"],
+            inner_activation=["tanh", "linear", "sigmoid", "hard_sigmoid", "relu"],
             return_sequences=[True, False],
         )
         params = list(itertools.product(*params_dict.values()))
@@ -832,48 +996,53 @@ def _test_batched_lstm_layer(self):
         numerical_failiure = 0
         for param in params:
             ii += 1
-            #print('-------------- %d / %d ------------------- ' % (ii, len(params)))
+            # print('-------------- %d / %d ------------------- ' % (ii, len(params)))
             param = dict(zip(params_dict.keys(), param))
 
-            if param['activation'] == 'linear':
+            if param["activation"] == "linear":
                 keras_act = None
             else:
-                keras_act = param['activation']
+                keras_act = param["activation"]
 
-            if param['inner_activation'] == 'linear':
+            if param["inner_activation"] == "linear":
                 keras_inner_act = None
             else:
-                keras_inner_act = param['inner_activation']
+                keras_inner_act = param["inner_activation"]
 
             model = Sequential()
-            model.add(LSTM(
-                        param['output_dim'],
-                        input_shape=(param['input_dims'][1], param['input_dims'][2]),
-                        activation=keras_act,
-                        recurrent_activation=keras_inner_act,
-                        return_sequences=param['return_sequences'],
-                        go_backwards=False,
-                        unroll=False))
+            model.add(
+                LSTM(
+                    param["output_dim"],
+                    input_shape=(param["input_dims"][1], param["input_dims"][2]),
+                    activation=keras_act,
+                    recurrent_activation=keras_inner_act,
+                    return_sequences=param["return_sequences"],
+                    go_backwards=False,
+                    unroll=False,
+                )
+            )
 
             mlmodel = get_mlkit_model_from_path(model)
 
-            Batch = param['input_dims'][0]
-            Seq = param['input_dims'][1]
-            h = param['output_dim']
-            input_size = param['input_dims'][2]
+            Batch = param["input_dims"][0]
+            Seq = param["input_dims"][1]
+            h = param["output_dim"]
+            input_size = param["input_dims"][2]
 
             input_data = generate_input(Batch, Seq, input_size)
 
             keras_preds = model.predict(input_data)  # (Batch, Seq, h)
 
-            if is_macos() and macos_version() >= (10, 13):
+            if _is_macos() and _macos_version() >= (10, 13):
                 input_data = np.transpose(input_data, [1, 0, 2])
                 input_dict = {}
-                input_dict['data'] = input_data
-                input_dict['lstm_1_h_in'] = np.zeros((1, Batch, h), dtype=np.float)
-                input_dict['lstm_1_c_in'] = np.zeros((1, Batch, h), dtype=np.float)
-                coreml_preds = mlmodel.predict(input_dict)['output']  # (Seq, Batch, h, .. )
-                if param['return_sequences']:
+                input_dict["data"] = input_data
+                input_dict["lstm_1_h_in"] = np.zeros((1, Batch, h), dtype=np.float)
+                input_dict["lstm_1_c_in"] = np.zeros((1, Batch, h), dtype=np.float)
+                coreml_preds = mlmodel.predict(input_dict)[
+                    "output"
+                ]  # (Seq, Batch, h, .. )
+                if param["return_sequences"]:
                     coreml_preds = np.reshape(coreml_preds, [Seq, Batch, h])
                 else:
                     coreml_preds = np.reshape(coreml_preds, [1, Batch, h])
@@ -882,6 +1051,7 @@ def _test_batched_lstm_layer(self):
 
                 if K.tensorflow_backend._SESSION:
                     import tensorflow as tf
+
                     tf.reset_default_graph()
                     K.tensorflow_backend._SESSION.close()
                     K.tensorflow_backend._SESSION = None
@@ -890,27 +1060,46 @@ def _test_batched_lstm_layer(self):
                     self.assertEquals(coreml_preds.shape, keras_preds.shape)
                 except AssertionError:
                     print(
-                        "Shape error:\n param: {}\n\n keras_preds.shape: {}\n\n coreml_preds.shape: {}".format(param, keras_preds.shape, coreml_preds.shape))
+                        "Shape error:\n param: {}\n\n keras_preds.shape: {}\n\n coreml_preds.shape: {}".format(
+                            param, keras_preds.shape, coreml_preds.shape
+                        )
+                    )
                     shape_err_models.append(param)
                     i += 1
                     continue
                 try:
-                    max_denominator = np.maximum(np.maximum(np.abs(coreml_preds.flatten()), np.abs(keras_preds.flatten())), 1.0)
-                    relative_error = coreml_preds.flatten() / max_denominator - keras_preds.flatten() / max_denominator
+                    max_denominator = np.maximum(
+                        np.maximum(
+                            np.abs(coreml_preds.flatten()),
+                            np.abs(keras_preds.flatten()),
+                        ),
+                        1.0,
+                    )
+                    relative_error = (
+                        coreml_preds.flatten() / max_denominator
+                        - keras_preds.flatten() / max_denominator
+                    )
                     max_relative_error = np.amax(relative_error)
                     self.assertLessEqual(max_relative_error, 0.01)
                 except AssertionError:
                     snr, psnr, signal_energy = _compute_SNR(keras_preds, coreml_preds)
-                    print('-*' * 80)
-                    print('Assertion error. \n param : {} \n'.format(param))
-                    print('max error = %.4f, snr = %.1f, psnr = %.1f, energy = %.6f' % (max_relative_error, snr, psnr, signal_energy))
-                    print('keras preds shape: {}, coreml preds shape = {}'.format(str(keras_preds.shape), str(coreml_preds.shape)))
+                    print("-*" * 80)
+                    print("Assertion error. \n param : {} \n".format(param))
+                    print(
+                        "max error = %.4f, snr = %.1f, psnr = %.1f, energy = %.6f"
+                        % (max_relative_error, snr, psnr, signal_energy)
+                    )
+                    print(
+                        "keras preds shape: {}, coreml preds shape = {}".format(
+                            str(keras_preds.shape), str(coreml_preds.shape)
+                        )
+                    )
                     # for b in range(Batch):
                     #     snr, psnr, signal_energy = _compute_SNR(keras_preds[b, :, :], coreml_preds[b, :, :])
                     #     print('snr = %.1f, psnr = %.1f, energy = %.6f' % (snr, psnr, signal_energy))
                     #     print('batch id = {}, keras_preds = \n{} '.format(b, keras_preds[b, :, :]))
                     #     print('batch id = {}, coreml_preds = \n{} '.format(b, coreml_preds[b, :, :]))
-                    print('-*' * 80)
+                    print("-*" * 80)
 
                     numerical_failiure += 1
                     numerical_err_models.append(param)
@@ -918,8 +1107,14 @@ def _test_batched_lstm_layer(self):
 
             i += 1
 
-        self.assertEquals(shape_err_models, [], msg='Shape error models {}'.format(shape_err_models))
-        self.assertEquals(numerical_err_models, [], msg='Numerical error models {}'.format(numerical_err_models))
+        self.assertEquals(
+            shape_err_models, [], msg="Shape error models {}".format(shape_err_models)
+        )
+        self.assertEquals(
+            numerical_err_models,
+            [],
+            msg="Numerical error models {}".format(numerical_err_models),
+        )
 
     def _test_lstm_layer(self, keras_major_version, limit=None):
         params_keys = list(self.params_dict.keys())
@@ -929,7 +1124,11 @@ def _test_lstm_layer(self, keras_major_version, limit=None):
         params = list(itertools.product(self.base_layer_params, self.lstm_layer_params))
         np.random.shuffle(params)
 
-        params = [param for param in params if valid_params(dict(zip(self.params_dict.keys(), param[0])))]
+        params = [
+            param
+            for param in params
+            if valid_params(dict(zip(self.params_dict.keys(), param[0])))
+        ]
         ctr = 0
         for base_params, lstm_params in params[:limit]:
             ctr += 1
@@ -937,85 +1136,107 @@ def _test_lstm_layer(self, keras_major_version, limit=None):
             base_params = dict(zip(self.params_dict.keys(), base_params))
             lstm_params = dict(zip(self.lstm_params_dict.keys(), lstm_params))
             model = Sequential()
-            unroll = base_params['unroll']
-            if base_params['input_dims'][1] == 1 and unroll == True:
+            unroll = base_params["unroll"]
+            if base_params["input_dims"][1] == 1 and unroll == True:
                 unroll = False
-            if lstm_params['bidirectional'] is True:
+            if lstm_params["bidirectional"] is True:
                 if keras_major_version == 2:
                     model.add(
                         Bidirectional(
                             LSTM(
-                                base_params['output_dim'],
-                                activation=base_params['activation'],
-                                recurrent_activation=lstm_params['inner_activation'],
-                                return_sequences=base_params['return_sequences'],
+                                base_params["output_dim"],
+                                activation=base_params["activation"],
+                                recurrent_activation=lstm_params["inner_activation"],
+                                return_sequences=base_params["return_sequences"],
                                 go_backwards=False,
                                 unroll=unroll,
                             ),
-                            input_shape=(base_params['input_dims'][1], base_params['input_dims'][2]),
-
+                            input_shape=(
+                                base_params["input_dims"][1],
+                                base_params["input_dims"][2],
+                            ),
                         )
                     )
                 else:
                     model.add(
                         Bidirectional(
                             LSTM(
-                                base_params['output_dim'],
-                                activation=base_params['activation'],
-                                inner_activation=lstm_params['inner_activation'],
-                                return_sequences=base_params['return_sequences'],
+                                base_params["output_dim"],
+                                activation=base_params["activation"],
+                                inner_activation=lstm_params["inner_activation"],
+                                return_sequences=base_params["return_sequences"],
                                 go_backwards=False,
                                 unroll=unroll,
                             ),
-                            input_shape=(base_params['input_dims'][1], base_params['input_dims'][2]),
-
+                            input_shape=(
+                                base_params["input_dims"][1],
+                                base_params["input_dims"][2],
+                            ),
                         )
                     )
             else:
                 if keras_major_version == 2:
                     model.add(
                         LSTM(
-                            base_params['output_dim'],
-                            input_shape=(base_params['input_dims'][1], base_params['input_dims'][2]),
-                            activation=base_params['activation'],
-                            recurrent_activation=lstm_params['inner_activation'],
-                            return_sequences=base_params['return_sequences'],
-                            go_backwards=base_params['go_backwards'],
+                            base_params["output_dim"],
+                            input_shape=(
+                                base_params["input_dims"][1],
+                                base_params["input_dims"][2],
+                            ),
+                            activation=base_params["activation"],
+                            recurrent_activation=lstm_params["inner_activation"],
+                            return_sequences=base_params["return_sequences"],
+                            go_backwards=base_params["go_backwards"],
                             unroll=unroll,
                         )
                     )
                 else:
                     model.add(
                         LSTM(
-                            base_params['output_dim'],
-                            input_shape=(base_params['input_dims'][1], base_params['input_dims'][2]),
-                            activation=base_params['activation'],
-                            inner_activation=lstm_params['inner_activation'],
-                            return_sequences=base_params['return_sequences'],
-                            go_backwards=base_params['go_backwards'],
+                            base_params["output_dim"],
+                            input_shape=(
+                                base_params["input_dims"][1],
+                                base_params["input_dims"][2],
+                            ),
+                            activation=base_params["activation"],
+                            inner_activation=lstm_params["inner_activation"],
+                            return_sequences=base_params["return_sequences"],
+                            go_backwards=base_params["go_backwards"],
                             unroll=unroll,
                         )
                     )
             mlkitmodel = get_mlkit_model_from_path(model)
-            input_data = generate_input(base_params['input_dims'][0], base_params['input_dims'][1],
-                                        base_params['input_dims'][2])
-
-            activations_to_test_with_numpy = {'linear', 'relu'}
-            if base_params['activation'] in activations_to_test_with_numpy or lstm_params[
-                'inner_activation'] in activations_to_test_with_numpy:
-                if lstm_params['bidirectional']:
-                    keras_preds = get_numpy_prediction_bilstm(model, input_data).flatten()
+            input_data = generate_input(
+                base_params["input_dims"][0],
+                base_params["input_dims"][1],
+                base_params["input_dims"][2],
+            )
+
+            activations_to_test_with_numpy = {"linear", "relu"}
+            if (
+                base_params["activation"] in activations_to_test_with_numpy
+                or lstm_params["inner_activation"] in activations_to_test_with_numpy
+            ):
+                if lstm_params["bidirectional"]:
+                    keras_preds = get_numpy_prediction_bilstm(
+                        model, input_data
+                    ).flatten()
                 else:
-                    keras_preds = get_numpy_prediction_unilstm(model, input_data).flatten()
+                    keras_preds = get_numpy_prediction_unilstm(
+                        model, input_data
+                    ).flatten()
             else:
                 keras_preds = model.predict(input_data).flatten()
 
-            if is_macos() and macos_version() >= (10, 13):
+            if _is_macos() and _macos_version() >= (10, 13):
                 input_data = np.transpose(input_data, [1, 0, 2])
-                coreml_preds = mlkitmodel.predict({'data': input_data})['output'].flatten()
+                coreml_preds = mlkitmodel.predict({"data": input_data})[
+                    "output"
+                ].flatten()
 
                 if K.tensorflow_backend._SESSION:
                     import tensorflow as tf
+
                     tf.reset_default_graph()
                     K.tensorflow_backend._SESSION.close()
                     K.tensorflow_backend._SESSION = None
@@ -1025,13 +1246,22 @@ def _test_lstm_layer(self, keras_major_version, limit=None):
                 except AssertionError:
                     print(
                         "Shape error:\n base_params: {}\n\n lstm_params: {}\n\n keras_preds.shape: {}\n\n coreml_preds.shape: {}".format(
-                            base_params, lstm_params, keras_preds.shape, coreml_preds.shape))
+                            base_params,
+                            lstm_params,
+                            keras_preds.shape,
+                            coreml_preds.shape,
+                        )
+                    )
                     shape_err_models.append(base_params)
                     continue
 
-                max_denominator = np.maximum(np.maximum(np.abs(coreml_preds), np.abs(keras_preds)), 1.0)
+                max_denominator = np.maximum(
+                    np.maximum(np.abs(coreml_preds), np.abs(keras_preds)), 1.0
+                )
                 try:
-                    relative_error = coreml_preds / max_denominator - keras_preds / max_denominator
+                    relative_error = (
+                        coreml_preds / max_denominator - keras_preds / max_denominator
+                    )
                     for i in range(len(relative_error)):
                         self.assertLessEqual(relative_error[i], 0.01)
                 except AssertionError:
@@ -1042,48 +1272,56 @@ def _test_lstm_layer(self, keras_major_version, limit=None):
                             keras_preds / max_denominator,
                             coreml_preds / max_denominator,
                             keras_preds,
-                            coreml_preds))
+                            coreml_preds,
+                        )
+                    )
                     numerical_failiure += 1
                     numerical_err_models.append(base_params)
 
-        self.assertEquals(shape_err_models, [], msg='Shape error models {}'.format(shape_err_models))
-        self.assertEquals(numerical_err_models, [], msg='Numerical error models {}'.format(numerical_err_models))
+        self.assertEquals(
+            shape_err_models, [], msg="Shape error models {}".format(shape_err_models)
+        )
+        self.assertEquals(
+            numerical_err_models,
+            [],
+            msg="Numerical error models {}".format(numerical_err_models),
+        )
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     @pytest.mark.slow
     def test_keras_lstm_layer_stress(self):
         self._test_lstm_layer(keras_major_version=1)
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     def test_keras_lstm_layer(self):
         self._test_lstm_layer(keras_major_version=1, limit=10)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     @pytest.mark.slow
     def test_keras2_lstm_layer_stress(self):
         self._test_lstm_layer(keras_major_version=2)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     def test_keras2_lstm_layer(self):
         self._test_lstm_layer(keras_major_version=2, limit=10)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     @pytest.mark.slow
     def test_keras2_bilstm_layer(self):
         self._test_bilstm_layer()
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     @pytest.mark.slow
     def test_keras2_bilstm_layer_batched(self):
         self._test_bilstm_layer(batched=True)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     @pytest.mark.slow
     def test_keras2_lstm_layer_batched(self):
@@ -1098,7 +1336,7 @@ class GRULayer(RecurrentLayerTest):
     def setUp(self):
         super(GRULayer, self).setUp()
         self.gru_params_dict = dict(
-            inner_activation=['tanh', 'linear', 'sigmoid', 'hard_sigmoid', 'relu']
+            inner_activation=["tanh", "linear", "sigmoid", "hard_sigmoid", "relu"]
         )
         self.gru_layer_params = list(itertools.product(*self.gru_params_dict.values()))
 
@@ -1109,56 +1347,71 @@ def _test_gru_layer(self, keras_major_version, limit=None):
         numerical_failiure = 0
         params = list(itertools.product(self.base_layer_params, self.gru_layer_params))
         np.random.shuffle(params)
-        params = [param for param in params if valid_params(dict(zip(self.params_dict.keys(), param[0])))]
+        params = [
+            param
+            for param in params
+            if valid_params(dict(zip(self.params_dict.keys(), param[0])))
+        ]
         for base_params, gru_params in params[:limit]:
             base_params = dict(zip(self.params_dict.keys(), base_params))
             gru_params = dict(zip(self.gru_params_dict.keys(), gru_params))
             model = Sequential()
-            unroll = base_params['unroll']
-            if base_params['input_dims'][1] == 1 and unroll == True:
+            unroll = base_params["unroll"]
+            if base_params["input_dims"][1] == 1 and unroll == True:
                 unroll = False
             if keras_major_version == 2:
                 model.add(
                     GRU(
-                        base_params['output_dim'],
-                        input_shape=(base_params['input_dims'][1], base_params['input_dims'][2]),
-                        activation=base_params['activation'],
-                        recurrent_activation=gru_params['inner_activation'],
-                        return_sequences=base_params['return_sequences'],
-                        go_backwards=base_params['go_backwards'],
+                        base_params["output_dim"],
+                        input_shape=(
+                            base_params["input_dims"][1],
+                            base_params["input_dims"][2],
+                        ),
+                        activation=base_params["activation"],
+                        recurrent_activation=gru_params["inner_activation"],
+                        return_sequences=base_params["return_sequences"],
+                        go_backwards=base_params["go_backwards"],
                         unroll=unroll,
                     )
                 )
             else:
                 model.add(
                     GRU(
-                        base_params['output_dim'],
-                        input_length=base_params['input_dims'][1],
-                        input_dim=base_params['input_dims'][2],
-                        activation=base_params['activation'],
-                        inner_activation=gru_params['inner_activation'],
-                        return_sequences=base_params['return_sequences'],
-                        go_backwards=base_params['go_backwards'],
+                        base_params["output_dim"],
+                        input_length=base_params["input_dims"][1],
+                        input_dim=base_params["input_dims"][2],
+                        activation=base_params["activation"],
+                        inner_activation=gru_params["inner_activation"],
+                        return_sequences=base_params["return_sequences"],
+                        go_backwards=base_params["go_backwards"],
                         unroll=unroll,
                     )
                 )
             model.set_weights([np.random.rand(*w.shape) for w in model.get_weights()])
             mlkitmodel = get_mlkit_model_from_path(model)
-            input_data = generate_input(base_params['input_dims'][0], base_params['input_dims'][1],
-                                        base_params['input_dims'][2])
+            input_data = generate_input(
+                base_params["input_dims"][0],
+                base_params["input_dims"][1],
+                base_params["input_dims"][2],
+            )
 
-            activations_to_test_with_numpy = {'linear', 'relu'}
-            if base_params['activation'] in activations_to_test_with_numpy or gru_params[
-                'inner_activation'] in activations_to_test_with_numpy:
+            activations_to_test_with_numpy = {"linear", "relu"}
+            if (
+                base_params["activation"] in activations_to_test_with_numpy
+                or gru_params["inner_activation"] in activations_to_test_with_numpy
+            ):
                 keras_preds = get_numpy_prediction_gru(model, input_data).flatten()
             else:
                 keras_preds = model.predict(input_data).flatten()
 
-            if is_macos() and macos_version() >= (10, 13):
+            if _is_macos() and _macos_version() >= (10, 13):
                 input_data = np.transpose(input_data, [1, 0, 2])
-                coreml_preds = mlkitmodel.predict({'data': input_data})['output'].flatten()
+                coreml_preds = mlkitmodel.predict({"data": input_data})[
+                    "output"
+                ].flatten()
                 if K.tensorflow_backend._SESSION:
                     import tensorflow as tf
+
                     tf.reset_default_graph()
                     K.tensorflow_backend._SESSION.close()
                     K.tensorflow_backend._SESSION = None
@@ -1167,14 +1420,23 @@ def _test_gru_layer(self, keras_major_version, limit=None):
                 except AssertionError:
                     print(
                         "Shape error:\nbase_params: {}\n gru_params: {}\nkeras_preds.shape: {}\ncoreml_preds.shape: {}".format(
-                            base_params, gru_params, keras_preds.shape, coreml_preds.shape))
+                            base_params,
+                            gru_params,
+                            keras_preds.shape,
+                            coreml_preds.shape,
+                        )
+                    )
                     shape_err_models.append(base_params)
                     i += 1
                     continue
 
-                max_denominator = np.maximum(np.maximum(np.abs(coreml_preds), np.abs(keras_preds)), 1.0)
+                max_denominator = np.maximum(
+                    np.maximum(np.abs(coreml_preds), np.abs(keras_preds)), 1.0
+                )
                 try:
-                    relative_error = coreml_preds / max_denominator - keras_preds / max_denominator
+                    relative_error = (
+                        coreml_preds / max_denominator - keras_preds / max_denominator
+                    )
                     for i in range(len(relative_error)):
                         self.assertLessEqual(relative_error[i], 0.01)
                 except AssertionError:
@@ -1185,32 +1447,40 @@ def _test_gru_layer(self, keras_major_version, limit=None):
                             keras_preds / max_denominator,
                             coreml_preds / max_denominator,
                             keras_preds,
-                            coreml_preds))
+                            coreml_preds,
+                        )
+                    )
                     numerical_failiure += 1
                     numerical_err_models.append(base_params)
             i += 1
 
-        self.assertEquals(shape_err_models, [], msg='Shape error models {}'.format(shape_err_models))
-        self.assertEquals(numerical_err_models, [], msg='Numerical error models {}'.format(numerical_err_models))
+        self.assertEquals(
+            shape_err_models, [], msg="Shape error models {}".format(shape_err_models)
+        )
+        self.assertEquals(
+            numerical_err_models,
+            [],
+            msg="Numerical error models {}".format(numerical_err_models),
+        )
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     @pytest.mark.slow
     def test_keras1_test_gru_layer_stress(self):
         self._test_gru_layer(keras_major_version=1)
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     def test_keras1_test_gru_layer(self):
         self._test_gru_layer(keras_major_version=1, limit=10)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     @pytest.mark.slow
     def test_keras2_test_gru_layer_stress(self):
         self._test_gru_layer(keras_major_version=2)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     def test_keras2_test_gru_layer(self):
         self._test_gru_layer(keras_major_version=2, limit=10)
@@ -1230,8 +1500,8 @@ def setUp(self):
             unroll=[True],
             return_sequences=[True],
             top_return_sequences=[True, False],
-            activation=['tanh', 'sigmoid', 'hard_sigmoid'],
-            number_of_layers=[1, 2, 3]
+            activation=["tanh", "sigmoid", "hard_sigmoid"],
+            number_of_layers=[1, 2, 3],
         )
         self.base_layer_params = list(itertools.product(*self.params_dict.values()))
 
@@ -1242,99 +1512,156 @@ def _test_lstm_stacked(self, keras_major_version, limit=None):
         params = copy(self.base_layer_params)
         np.random.shuffle(params)
         i = 0
-        params = [param for param in params if valid_params(dict(zip(self.params_dict.keys(), param)))]
+        params = [
+            param
+            for param in params
+            if valid_params(dict(zip(self.params_dict.keys(), param)))
+        ]
         for base_params in params[:limit]:
             base_params = dict(zip(self.params_dict.keys(), base_params))
             model = Sequential()
-            unroll = base_params['unroll']
-            if base_params['input_dims'][1] == 1 and unroll == True:
+            unroll = base_params["unroll"]
+            if base_params["input_dims"][1] == 1 and unroll == True:
                 unroll = False
             settings = dict(
-                activation=base_params['activation'],
+                activation=base_params["activation"],
                 return_sequences=True,
-                go_backwards=base_params['go_backwards'],
+                go_backwards=base_params["go_backwards"],
                 unroll=unroll,
             )
             if keras_major_version == 2:
-                model.add(LSTM(base_params['output_dim'],
-                               input_shape=base_params['input_dims'][1:],
-                               recurrent_activation='sigmoid',
-                               **settings))
-                for idx in range(0, base_params['number_of_layers']):
-                    model.add(LSTM(base_params['output_dim'],
-                                   input_shape=(base_params['input_dims'][1], base_params['output_dim']),
-                                   return_sequences=True,
-                                   activation='tanh',
-                                   recurrent_activation='sigmoid',
-                                   ))
-                model.add(LSTM(10, input_shape=(base_params['input_dims'][1], base_params['output_dim']),
-                               return_sequences=base_params['top_return_sequences'], activation='sigmoid'))
+                model.add(
+                    LSTM(
+                        base_params["output_dim"],
+                        input_shape=base_params["input_dims"][1:],
+                        recurrent_activation="sigmoid",
+                        **settings
+                    )
+                )
+                for idx in range(0, base_params["number_of_layers"]):
+                    model.add(
+                        LSTM(
+                            base_params["output_dim"],
+                            input_shape=(
+                                base_params["input_dims"][1],
+                                base_params["output_dim"],
+                            ),
+                            return_sequences=True,
+                            activation="tanh",
+                            recurrent_activation="sigmoid",
+                        )
+                    )
+                model.add(
+                    LSTM(
+                        10,
+                        input_shape=(
+                            base_params["input_dims"][1],
+                            base_params["output_dim"],
+                        ),
+                        return_sequences=base_params["top_return_sequences"],
+                        activation="sigmoid",
+                    )
+                )
 
             else:
-                model.add(LSTM(output_dim=base_params['output_dim'],
-                               input_length=base_params['input_dims'][1],
-                               input_dim=base_params['input_dims'][2],
-                               inner_activation='sigmoid',
-                               **settings))
-                for idx in range(0, base_params['number_of_layers']):
-                    model.add(LSTM(output_dim=base_params["output_dim"],
-                                   return_sequences=True,
-                                   activation='tanh',
-                                   inner_activation='sigmoid',
-                                   ))
-                model.add(LSTM(output_dim=10, return_sequences=base_params['top_return_sequences'], activation='sigmoid'))
+                model.add(
+                    LSTM(
+                        output_dim=base_params["output_dim"],
+                        input_length=base_params["input_dims"][1],
+                        input_dim=base_params["input_dims"][2],
+                        inner_activation="sigmoid",
+                        **settings
+                    )
+                )
+                for idx in range(0, base_params["number_of_layers"]):
+                    model.add(
+                        LSTM(
+                            output_dim=base_params["output_dim"],
+                            return_sequences=True,
+                            activation="tanh",
+                            inner_activation="sigmoid",
+                        )
+                    )
+                model.add(
+                    LSTM(
+                        output_dim=10,
+                        return_sequences=base_params["top_return_sequences"],
+                        activation="sigmoid",
+                    )
+                )
             mlkitmodel = get_mlkit_model_from_path(model)
-            input_data = generate_input(base_params['input_dims'][0], base_params['input_dims'][1],
-                                        base_params['input_dims'][2])
-            if is_macos() and macos_version() >= (10, 13):
+            input_data = generate_input(
+                base_params["input_dims"][0],
+                base_params["input_dims"][1],
+                base_params["input_dims"][2],
+            )
+            if _is_macos() and _macos_version() >= (10, 13):
                 keras_preds = model.predict(input_data).flatten()
                 input_data = np.transpose(input_data, [1, 0, 2])
-                coreml_preds = mlkitmodel.predict({'data': input_data})['output'].flatten()
+                coreml_preds = mlkitmodel.predict({"data": input_data})[
+                    "output"
+                ].flatten()
                 import tensorflow as tf
+
                 tf.reset_default_graph()
                 K.tensorflow_backend._SESSION.close()
                 K.tensorflow_backend._SESSION = None
                 try:
                     self.assertEquals(coreml_preds.shape, keras_preds.shape)
                 except AssertionError:
-                    print("Shape error:\nbase_params: {}\nkeras_preds.shape: {}\ncoreml_preds.shape: {}".format(
-                        base_params, keras_preds.shape, coreml_preds.shape))
+                    print(
+                        "Shape error:\nbase_params: {}\nkeras_preds.shape: {}\ncoreml_preds.shape: {}".format(
+                            base_params, keras_preds.shape, coreml_preds.shape
+                        )
+                    )
                     shape_err_models.append(base_params)
                     i += 1
                     continue
                 try:
-                    max_denominator = np.maximum(np.maximum(np.abs(coreml_preds), np.abs(keras_preds)), 1.0)
-                    relative_error = coreml_preds / max_denominator - keras_preds / max_denominator
+                    max_denominator = np.maximum(
+                        np.maximum(np.abs(coreml_preds), np.abs(keras_preds)), 1.0
+                    )
+                    relative_error = (
+                        coreml_preds / max_denominator - keras_preds / max_denominator
+                    )
                     for i in range(len(relative_error)):
                         self.assertLessEqual(relative_error[i], 0.01)
                 except AssertionError:
-                    print("Assertion error:\nbase_params: {}\nkeras_preds: {}\ncoreml_preds: {}".format(base_params,
-                                                                                                        keras_preds,
-                                                                                                        coreml_preds))
+                    print(
+                        "Assertion error:\nbase_params: {}\nkeras_preds: {}\ncoreml_preds: {}".format(
+                            base_params, keras_preds, coreml_preds
+                        )
+                    )
                     numerical_failiure += 1
                     numerical_err_models.append(base_params)
             i += 1
-        self.assertEquals(shape_err_models, [], msg='Shape error models {}'.format(shape_err_models))
-        self.assertEquals(numerical_err_models, [], msg='Numerical error models {}'.format(numerical_err_models))
+        self.assertEquals(
+            shape_err_models, [], msg="Shape error models {}".format(shape_err_models)
+        )
+        self.assertEquals(
+            numerical_err_models,
+            [],
+            msg="Numerical error models {}".format(numerical_err_models),
+        )
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     @pytest.mark.slow
     def test_keras1_lstm_stacked_stress(self):
         self._test_lstm_stacked(keras_major_version=1)
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     def test_keras1_lstm_stacked(self):
         self._test_lstm_stacked(keras_major_version=1, limit=10)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     @pytest.mark.slow
     def test_keras2_lstm_stacked_stress(self):
         self._test_lstm_stacked(keras_major_version=2)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     def test_keras2_lstm_stacked(self):
         self._test_lstm_stacked(keras_major_version=2, limit=10)
@@ -1346,112 +1673,148 @@ class DifferentIOModelsTypes(unittest.TestCase):
     """
 
     def _test_one_to_many(self, keras_major_version):
-        params = dict(
-            input_dims=[1, 10], activation='tanh',
-            return_sequences=False, output_dim=3
-        ),
+        params = (
+            dict(
+                input_dims=[1, 10],
+                activation="tanh",
+                return_sequences=False,
+                output_dim=3,
+            ),
+        )
         number_of_times = 4
         model = Sequential()
         model.add(RepeatVector(number_of_times, input_shape=(10,)))
 
         if keras_major_version == 2:
-            model.add(LSTM(params[0]['output_dim'],
-                           input_shape=params[0]['input_dims'],
-                           activation=params[0]['activation'],
-                           recurrent_activation='sigmoid',
-                           return_sequences=True,
-                           ))
+            model.add(
+                LSTM(
+                    params[0]["output_dim"],
+                    input_shape=params[0]["input_dims"],
+                    activation=params[0]["activation"],
+                    recurrent_activation="sigmoid",
+                    return_sequences=True,
+                )
+            )
         else:
-            model.add(LSTM(output_dim=params[0]['output_dim'],
-                           activation=params[0]['activation'],
-                           inner_activation='sigmoid',
-                           return_sequences=True,
-                           ))
+            model.add(
+                LSTM(
+                    output_dim=params[0]["output_dim"],
+                    activation=params[0]["activation"],
+                    inner_activation="sigmoid",
+                    return_sequences=True,
+                )
+            )
         relative_error, keras_preds, coreml_preds = simple_model_eval(params, model)
         # print relative_error, '\n', keras_preds, '\n', coreml_preds, '\n'
         for i in range(len(relative_error)):
             self.assertLessEqual(relative_error[i], 0.01)
 
     def _test_many_to_one(self, keras_major_version):
-        params = dict(
-            input_dims=[1, 10, 5], go_backwards=False, activation='tanh',  # fails with hard_sigmoid
-            stateful=False, unroll=False, return_sequences=False, output_dim=1
-        ),
+        params = (
+            dict(
+                input_dims=[1, 10, 5],
+                go_backwards=False,
+                activation="tanh",  # fails with hard_sigmoid
+                stateful=False,
+                unroll=False,
+                return_sequences=False,
+                output_dim=1,
+            ),
+        )
         model = Sequential()
         if keras_major_version == 2:
-            model.add(LSTM(params[0]['output_dim'],
-                           input_shape=params[0]['input_dims'][1:],
-                           activation=params[0]['activation'],
-                           recurrent_activation='sigmoid',
-                           ))
+            model.add(
+                LSTM(
+                    params[0]["output_dim"],
+                    input_shape=params[0]["input_dims"][1:],
+                    activation=params[0]["activation"],
+                    recurrent_activation="sigmoid",
+                )
+            )
         else:
-            model.add(LSTM(output_dim=params[0]['output_dim'],
-                           input_shape=params[0]['input_dims'][1:],
-                           activation=params[0]['activation'],
-                           inner_activation='sigmoid',
-                           ))
+            model.add(
+                LSTM(
+                    output_dim=params[0]["output_dim"],
+                    input_shape=params[0]["input_dims"][1:],
+                    activation=params[0]["activation"],
+                    inner_activation="sigmoid",
+                )
+            )
         relative_error, keras_preds, coreml_preds = simple_model_eval(params, model)
         # print relative_error, '\n', keras_preds, '\n', coreml_preds, '\n'
         for i in range(len(relative_error)):
             self.assertLessEqual(relative_error[i], 0.01)
 
     def _test_many_to_many(self, keras_major_version):
-        params = dict(
-            input_dims=[1, 10, 5], go_backwards=False, activation='tanh',  # fails with hard_sigmoid
-            stateful=False, unroll=False, return_sequences=True, output_dim=1
-        ),
+        params = (
+            dict(
+                input_dims=[1, 10, 5],
+                go_backwards=False,
+                activation="tanh",  # fails with hard_sigmoid
+                stateful=False,
+                unroll=False,
+                return_sequences=True,
+                output_dim=1,
+            ),
+        )
         model = Sequential()
         if keras_major_version == 2:
-            model.add(LSTM(params[0]['output_dim'],
-                           input_shape=params[0]['input_dims'][1:],
-                           activation=params[0]['activation'],
-                           recurrent_activation='sigmoid',
-                           return_sequences=True,
-                           ))
+            model.add(
+                LSTM(
+                    params[0]["output_dim"],
+                    input_shape=params[0]["input_dims"][1:],
+                    activation=params[0]["activation"],
+                    recurrent_activation="sigmoid",
+                    return_sequences=True,
+                )
+            )
         else:
-            model.add(LSTM(output_dim=params[0]['output_dim'],
-                           input_shape=params[0]['input_dims'][1:],
-                           activation=params[0]['activation'],
-                           inner_activation='sigmoid',
-                           return_sequences=True,
-                           ))
+            model.add(
+                LSTM(
+                    output_dim=params[0]["output_dim"],
+                    input_shape=params[0]["input_dims"][1:],
+                    activation=params[0]["activation"],
+                    inner_activation="sigmoid",
+                    return_sequences=True,
+                )
+            )
         relative_error, keras_preds, coreml_preds = simple_model_eval(params, model)
         # print relative_error, '\n', keras_preds, '\n', coreml_preds, '\n'
         for i in range(len(relative_error)):
             self.assertLessEqual(relative_error[i], 0.01)
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     def test_keras1_test_one_to_many(self):
         self._test_one_to_many(keras_major_version=1)
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     def test_keras1_test_many_to_one(self):
         self._test_many_to_one(keras_major_version=1)
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     def test_keras1_many_to_many(self):
         self._test_many_to_many(keras_major_version=1)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     def test_keras2_test_one_to_many(self):
         self._test_one_to_many(keras_major_version=2)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     def test_keras2_test_many_to_one(self):
         self._test_many_to_one(keras_major_version=2)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     def test_keras2_many_to_many(self):
         self._test_many_to_many(keras_major_version=2)
 
 
-@unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+@unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
 @pytest.mark.keras2
 class InitialStateRecurrentModels(unittest.TestCase):
     """
@@ -1463,50 +1826,70 @@ def test_initial_state_GRU(self):
         data = np.random.rand(1, 1, 2)
 
         model = keras.models.Sequential()
-        model.add(keras.layers.GRU(5, input_shape=(1, 2), batch_input_shape=[1, 1, 2], stateful=True))
+        model.add(
+            keras.layers.GRU(
+                5, input_shape=(1, 2), batch_input_shape=[1, 1, 2], stateful=True
+            )
+        )
         model.get_layer(index=1).reset_states()
 
-        coreml_model = keras_converter.convert(model=model, input_names='data', output_names='output')
-        if is_macos() and macos_version() >= (10, 13):
+        coreml_model = keras_converter.convert(
+            model=model, input_names="data", output_names="output"
+        )
+        if _is_macos() and _macos_version() >= (10, 13):
             keras_output_1 = model.predict(data)
-            coreml_full_output_1 = coreml_model.predict({'data': data})
-            coreml_output_1 = coreml_full_output_1['output']
+            coreml_full_output_1 = coreml_model.predict({"data": data})
+            coreml_output_1 = coreml_full_output_1["output"]
             coreml_output_1 = np.expand_dims(coreml_output_1, 1)
 
             np.testing.assert_array_almost_equal(coreml_output_1.T, keras_output_1)
 
-        hidden_state = (np.random.rand(1, 5))
+        hidden_state = np.random.rand(1, 5)
         model.get_layer(index=1).reset_states(hidden_state)
-        coreml_model = keras_converter.convert(model=model, input_names='data', output_names='output')
+        coreml_model = keras_converter.convert(
+            model=model, input_names="data", output_names="output"
+        )
         spec = coreml_model.get_spec()
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             keras_output_2 = model.predict(data)
-            coreml_full_output_2 = coreml_model.predict({'data': data, spec.description.input[1].name: hidden_state[0]})
-            coreml_output_2 = coreml_full_output_2['output']
+            coreml_full_output_2 = coreml_model.predict(
+                {"data": data, spec.description.input[1].name: hidden_state[0]}
+            )
+            coreml_output_2 = coreml_full_output_2["output"]
             coreml_output_2 = np.expand_dims(coreml_output_2, 1)
             np.testing.assert_array_almost_equal(coreml_output_2.T, keras_output_2)
 
     def test_initial_state_SimpleRNN(self):
         data = np.random.rand(1, 1, 2)
         model = keras.models.Sequential()
-        model.add(keras.layers.SimpleRNN(5, input_shape=(1, 2), batch_input_shape=[1, 1, 2], stateful=True))
+        model.add(
+            keras.layers.SimpleRNN(
+                5, input_shape=(1, 2), batch_input_shape=[1, 1, 2], stateful=True
+            )
+        )
         model.get_layer(index=1).reset_states()
-        coreml_model = keras_converter.convert(model=model, input_names='data', output_names='output')
-        if is_macos() and macos_version() >= (10, 13):
+        coreml_model = keras_converter.convert(
+            model=model, input_names="data", output_names="output"
+        )
+        if _is_macos() and _macos_version() >= (10, 13):
             keras_output_1 = model.predict(data)
-            coreml_full_output_1 = coreml_model.predict({'data': data})
-            coreml_output_1 = coreml_full_output_1['output']
+            coreml_full_output_1 = coreml_model.predict({"data": data})
+            coreml_output_1 = coreml_full_output_1["output"]
             coreml_output_1 = np.expand_dims(coreml_output_1, 1)
             np.testing.assert_array_almost_equal(coreml_output_1.T, keras_output_1)
 
         hidden_state = np.random.rand(1, 5)
         model.get_layer(index=1).reset_states(hidden_state)
-        coreml_model = keras_converter.convert(model=model, input_names='data', output_names='output')
+        coreml_model = keras_converter.convert(
+            model=model, input_names="data", output_names="output"
+        )
         spec = coreml_model.get_spec()
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             keras_output_2 = model.predict(data)
-            coreml_full_output_2 = coreml_model.predict({'data': data, spec.description.input[1].name: hidden_state[0]})
-            coreml_output_2 = coreml_full_output_2['output']
+            coreml_full_output_2 = coreml_model.predict(
+                {"data": data, spec.description.input[1].name: hidden_state[0]}
+            )
+            coreml_output_2 = coreml_full_output_2["output"]
             coreml_output_2 = np.expand_dims(coreml_output_2, 1)
             np.testing.assert_array_almost_equal(coreml_output_2.T, keras_output_2)
 
@@ -1514,15 +1897,21 @@ def test_initial_state_LSTM(self):
         data = np.random.rand(1, 1, 2)
 
         model = keras.models.Sequential()
-        model.add(keras.layers.LSTM(5, input_shape=(1, 2), batch_input_shape=[1, 1, 2], stateful=True))
+        model.add(
+            keras.layers.LSTM(
+                5, input_shape=(1, 2), batch_input_shape=[1, 1, 2], stateful=True
+            )
+        )
         model.get_layer(index=1).reset_states()
 
-        if is_macos() and macos_version() >= (10, 13):
-            coreml_model = keras_converter.convert(model=model, input_names='data', output_names='output')
+        if _is_macos() and _macos_version() >= (10, 13):
+            coreml_model = keras_converter.convert(
+                model=model, input_names="data", output_names="output"
+            )
 
             keras_output_1 = model.predict(data)
-            coreml_full_output_1 = coreml_model.predict({'data': data})
-            coreml_output_1 = coreml_full_output_1['output']
+            coreml_full_output_1 = coreml_model.predict({"data": data})
+            coreml_output_1 = coreml_full_output_1["output"]
             coreml_output_1 = np.expand_dims(coreml_output_1, 1)
 
             np.testing.assert_array_almost_equal(coreml_output_1.T, keras_output_1)
@@ -1530,21 +1919,27 @@ def test_initial_state_LSTM(self):
         hidden_state = (np.random.rand(1, 5), np.random.rand(1, 5))
         model.get_layer(index=1).reset_states(hidden_state)
 
-        coreml_model = keras_converter.convert(model=model, input_names='data', output_names='output')
+        coreml_model = keras_converter.convert(
+            model=model, input_names="data", output_names="output"
+        )
         spec = coreml_model.get_spec()
 
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             keras_output_2 = model.predict(data)
             coreml_full_output_2 = coreml_model.predict(
-                {'data': data, spec.description.input[1].name: hidden_state[0][0],
-                 spec.description.input[2].name: hidden_state[1][0]})
-            coreml_output_2 = coreml_full_output_2['output']
+                {
+                    "data": data,
+                    spec.description.input[1].name: hidden_state[0][0],
+                    spec.description.input[2].name: hidden_state[1][0],
+                }
+            )
+            coreml_output_2 = coreml_full_output_2["output"]
             coreml_output_2 = np.expand_dims(coreml_output_2, 1)
 
             np.testing.assert_array_almost_equal(coreml_output_2.T, keras_output_2)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     # unittest.main()
     ## To run a specific test:
     suite = unittest.TestSuite()
diff --git a/coremltools/test/neural_network/test_shaper_bindings.py b/coremltools/test/neural_network/test_shaper_bindings.py
deleted file mode 100644
index f5d17c5ec..000000000
--- a/coremltools/test/neural_network/test_shaper_bindings.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright (c) 2018, Apple Inc. All rights reserved.
-#
-# Use of this source code is governed by a BSD-3-clause license that can be
-# found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
-
-import tempfile
-import unittest
-
-import coremltools
-from coremltools.models import NeuralNetworkShaper
-from coremltools.models.neural_network.flexible_shape_utils import \
-    can_allow_multiple_input_shapes, get_allowed_shape_ranges, \
-    update_multiarray_shape_range, _get_input_names, \
-    NeuralNetworkMultiArrayShapeRange, _CONSTRAINED_KEYS
-from coremltools.models.utils import is_macos
-from coremltools.proto import Model_pb2
-
-
-@unittest.skipUnless(is_macos(), 'Only supported on MacOS platform.')
-class ShaperTest(unittest.TestCase):
-
-    @classmethod
-    def setUpClass(self):
-
-        width = 100
-        height = width
-        numLayers = 10
-
-        channels = 1
-        modelName = 'planSubmitTestColorArray'
-
-        self.spec = coremltools.proto.Model_pb2.Model()
-        self.spec.specificationVersion = 1
-
-        input_ = self.spec.description.input.add()
-        input_.name = 'input'
-        input_.type.multiArrayType.MergeFromString(b'')
-        input_.type.multiArrayType.shape.append(channels)
-        input_.type.multiArrayType.shape.append(height)
-        input_.type.multiArrayType.shape.append(width)
-        input_.type.multiArrayType.dataType = coremltools.proto.Model_pb2.ArrayFeatureType.DOUBLE
-
-        output_ = self.spec.description.output.add()
-        output_.name = 'output'
-        output_.type.multiArrayType.MergeFromString(b'')
-        output_.type.multiArrayType.dataType = coremltools.proto.Model_pb2.ArrayFeatureType.DOUBLE
-
-        # big stack of identity convolutions
-        for i in range(0, numLayers):
-
-            layer = self.spec.neuralNetwork.layers.add()
-            if (i == 0):
-                layer.input.append('input')
-            else:
-                layer.input.append('conv' + str(i - 1))
-
-            if (i == numLayers - 1):
-                #######
-                # Just convolutions
-
-                layer.name = 'last_layer'
-                layer.output.append('output')
-                layer.name = 'conv' + str(i)
-                layer.convolution.outputChannels = channels
-                layer.convolution.kernelChannels = channels
-                layer.convolution.kernelSize.append(1)
-                layer.convolution.kernelSize.append(1)
-                layer.convolution.same.MergeFromString(b'')
-                layer.convolution.hasBias = False
-                for i in range(0, channels):
-                    for j in range(0, channels):
-                        if i == j:
-                            layer.convolution.weights.floatValue.append(1.0)
-                        else:
-                            layer.convolution.weights.floatValue.append(0.0)
-
-            else:  # not the last layer
-                layer.output.append('conv' + str(i))
-                layer.name = 'conv' + str(i)
-
-                layer.convolution.outputChannels = channels
-                layer.convolution.kernelChannels = channels
-
-                layer.convolution.kernelSize.append(1)
-                layer.convolution.kernelSize.append(1)
-                layer.convolution.same.MergeFromString(b'')
-
-                layer.convolution.hasBias = False
-
-                for i in range(0, channels):
-                    for j in range(0, channels):
-                        if (i == j):
-                            layer.convolution.weights.floatValue.append(1.0)
-                        else:
-                            layer.convolution.weights.floatValue.append(0.0)
-
-        self.coremlModel = coremltools.models.MLModel(self.spec)
-
-    def test_model_creation_spec(self):
-
-        shaper = NeuralNetworkShaper(self.spec)
-        self.assertIsNotNone(shaper)
-
-    def test_model_creation_file(self):
-
-        filename = tempfile.mktemp(suffix = '.mlmodel')
-        self.coremlModel.save(filename)
-
-        shaper = NeuralNetworkShaper(filename)
-        self.assertIsNotNone(shaper)
-
-    def test_get_shape(self):
-
-        shaper = NeuralNetworkShaper(self.spec)
-
-        input_shape = shaper.shape('input')
-        self.assertIsNotNone(input_shape)
-
-        random_shape = shaper.shape('conv5')
-        self.assertIsNotNone(random_shape)
-
-        with self.assertRaises(Exception):
-            missing_shape = shaper.shape('idontexist')
-
-    def test_is_flexible(self):
-
-        self.assertTrue(can_allow_multiple_input_shapes(self.spec))
-
-    def test_get_ranges(self):
-
-        ranges = get_allowed_shape_ranges(self.spec)
-        input_names = _get_input_names(self.spec)
-        spec_copy = self.spec
-        for input_name in input_names:
-            whole_range = ranges[input_name]
-            constraint_range = {key: whole_range[key] for key in _CONSTRAINED_KEYS}
-            shape_range = NeuralNetworkMultiArrayShapeRange(constraint_range)
-            update_multiarray_shape_range(spec_copy, input_name, shape_range)
-
-        self.assertTrue(spec_copy.description.input[0].type.multiArrayType.HasField('shapeRange'))
diff --git a/coremltools/test/neural_network/test_simple_recurrent_single_layer.py b/coremltools/test/neural_network/test_simple_recurrent_single_layer.py
index 58e805198..6cb032c6b 100644
--- a/coremltools/test/neural_network/test_simple_recurrent_single_layer.py
+++ b/coremltools/test/neural_network/test_simple_recurrent_single_layer.py
@@ -10,11 +10,11 @@
 import tempfile
 import itertools
 import coremltools
-from coremltools._deps import HAS_KERAS_TF, HAS_KERAS2_TF
-from coremltools.models.utils import macos_version, is_macos
+from coremltools._deps import _HAS_KERAS_TF, _HAS_KERAS2_TF
+from coremltools.models.utils import _macos_version, _is_macos
 import pytest
 
-if HAS_KERAS_TF or HAS_KERAS2_TF:
+if _HAS_KERAS_TF or _HAS_KERAS2_TF:
     from keras.models import Sequential
     from keras.layers import LSTM, GRU, SimpleRNN
     from coremltools.converters import keras as keras_converter
@@ -22,22 +22,22 @@
 
 def _get_mlkit_model_from_path(model, model_path):
     from coremltools.converters import keras as keras_converter
-    print('converting')
-    model = keras_converter.convert(model, ['data'], ['output'])
+
+    print("converting")
+    model = keras_converter.convert(model, ["data"], ["output"])
     return model
 
 
 def generate_input(dim0, dim1, dim2):
-    input_data = np.random.rand(dim0, dim1, dim2).astype('f')  # astype() should be removed
+    input_data = np.random.rand(dim0, dim1, dim2).astype(
+        "f"
+    )  # astype() should be removed
     return input_data
 
 
 def valid_params(params):
     """Checks if this combination of parameters is allowed by Keras"""
-    return not (
-            params['input_dims'][1] == 1 and
-            params['unroll']
-    )
+    return not (params["input_dims"][1] == 1 and params["unroll"])
 
 
 class RecurrentLayerTest(unittest.TestCase):
@@ -53,7 +53,7 @@ def setUp(self):
             go_backwards=[False],  # True],
             unroll=[False, True],
             return_sequences=[False, True],
-            activation=['sigmoid', 'tanh', 'hard_sigmoid', 'linear']
+            activation=["sigmoid", "tanh", "hard_sigmoid", "linear"],
         )
         self.base_layer_params = list(itertools.product(*self.params_dict.values()))
 
@@ -66,107 +66,137 @@ class SimpleRNNLayer(RecurrentLayerTest):
     def setUp(self):
         super(SimpleRNNLayer, self).setUp()
         self.simple_rnn_params_dict = dict(
-            dropout=[{'dropout_W': 0., 'dropout_U': 0.}],
-            regularizer=[{'W_regularizer': None, 'U_regularizer': None, 'b_regularizer': None}],
+            dropout=[{"dropout_W": 0.0, "dropout_U": 0.0}],
+            regularizer=[
+                {"W_regularizer": None, "U_regularizer": None, "b_regularizer": None}
+            ],
+        )
+        self.rnn_layer_params = list(
+            itertools.product(*self.simple_rnn_params_dict.values())
         )
-        self.rnn_layer_params = list(itertools.product(*self.simple_rnn_params_dict.values()))
 
     def _test_rnn_layer(self, keras_major_version, limit=None):
         i = 0
-        layer_name = str(SimpleRNN).split('.')[3].split("'>")[0]
+        layer_name = str(SimpleRNN).split(".")[3].split("'>")[0]
         numerical_err_models = []
         shape_err_models = []
         params = list(itertools.product(self.base_layer_params, self.rnn_layer_params))
         np.random.shuffle(params)
-        params = [param for param in params if valid_params(dict(zip(self.params_dict.keys(), param[0])))]
+        params = [
+            param
+            for param in params
+            if valid_params(dict(zip(self.params_dict.keys(), param[0])))
+        ]
         for base_params, rnn_params in params[:limit]:
             base_params = dict(zip(self.params_dict.keys(), base_params))
             rnn_params = dict(zip(self.simple_rnn_params_dict.keys(), rnn_params))
-            input_data = generate_input(base_params['input_dims'][0], base_params['input_dims'][1],
-                                        base_params['input_dims'][2])
+            input_data = generate_input(
+                base_params["input_dims"][0],
+                base_params["input_dims"][1],
+                base_params["input_dims"][2],
+            )
             model = Sequential()
             settings = dict(
-                activation=base_params['activation'],
-                return_sequences=base_params['return_sequences'],
-                go_backwards=base_params['go_backwards'],
-                unroll=base_params['unroll'],
+                activation=base_params["activation"],
+                return_sequences=base_params["return_sequences"],
+                go_backwards=base_params["go_backwards"],
+                unroll=base_params["unroll"],
             )
             if keras_major_version == 2:
                 model.add(
                     SimpleRNN(
-                        base_params['output_dim'],
-                        input_shape=base_params['input_dims'][1:],
-                        dropout=rnn_params['dropout']['dropout_U'],
-                        recurrent_dropout=rnn_params['dropout']['dropout_W'],
-                        kernel_regularizer=rnn_params['regularizer']['W_regularizer'],
-                        recurrent_regularizer=rnn_params['regularizer']['U_regularizer'],
-                        bias_regularizer=rnn_params['regularizer']['b_regularizer'],
+                        base_params["output_dim"],
+                        input_shape=base_params["input_dims"][1:],
+                        dropout=rnn_params["dropout"]["dropout_U"],
+                        recurrent_dropout=rnn_params["dropout"]["dropout_W"],
+                        kernel_regularizer=rnn_params["regularizer"]["W_regularizer"],
+                        recurrent_regularizer=rnn_params["regularizer"][
+                            "U_regularizer"
+                        ],
+                        bias_regularizer=rnn_params["regularizer"]["b_regularizer"],
                         **settings
                     )
                 )
             else:
                 model.add(
                     SimpleRNN(
-                        base_params['output_dim'],
-                        input_length=base_params['input_dims'][1],
-                        input_dim=base_params['input_dims'][2],
-                        dropout_U=rnn_params['dropout']['dropout_U'],
-                        dropout_W=rnn_params['dropout']['dropout_W'],
-                        W_regularizer=rnn_params['regularizer']['W_regularizer'],
-                        U_regularizer=rnn_params['regularizer']['U_regularizer'],
-                        b_regularizer=rnn_params['regularizer']['b_regularizer'],
+                        base_params["output_dim"],
+                        input_length=base_params["input_dims"][1],
+                        input_dim=base_params["input_dims"][2],
+                        dropout_U=rnn_params["dropout"]["dropout_U"],
+                        dropout_W=rnn_params["dropout"]["dropout_W"],
+                        W_regularizer=rnn_params["regularizer"]["W_regularizer"],
+                        U_regularizer=rnn_params["regularizer"]["U_regularizer"],
+                        b_regularizer=rnn_params["regularizer"]["b_regularizer"],
                         **settings
                     )
                 )
             model_dir = tempfile.mkdtemp()
-            keras_model_path = os.path.join(model_dir, 'keras.h5')
-            coreml_model_path = os.path.join(model_dir, 'keras.mlmodel')
+            keras_model_path = os.path.join(model_dir, "keras.h5")
+            coreml_model_path = os.path.join(model_dir, "keras.mlmodel")
             model.save_weights(keras_model_path)
             mlkitmodel = _get_mlkit_model_from_path(model, coreml_model_path)
-            if is_macos() and macos_version() >= (10, 13):
+            if _is_macos() and _macos_version() >= (10, 13):
                 keras_preds = model.predict(input_data).flatten()
                 input_data = np.transpose(input_data, [1, 0, 2])
-                coreml_preds = mlkitmodel.predict({'data': input_data})['output'].flatten()
+                coreml_preds = mlkitmodel.predict({"data": input_data})[
+                    "output"
+                ].flatten()
                 try:
                     self.assertEquals(coreml_preds.shape, keras_preds.shape)
                 except AssertionError:
-                    print("Shape error:\nbase_params: {}\nkeras_preds.shape: {}\ncoreml_preds.shape: {}".format(
-                        base_params, keras_preds.shape, coreml_preds.shape))
+                    print(
+                        "Shape error:\nbase_params: {}\nkeras_preds.shape: {}\ncoreml_preds.shape: {}".format(
+                            base_params, keras_preds.shape, coreml_preds.shape
+                        )
+                    )
                     shape_err_models.append(base_params)
                     shutil.rmtree(model_dir)
                     i += 1
                     continue
                 try:
                     for idx in range(0, len(coreml_preds)):
-                        relative_error = (coreml_preds[idx] - keras_preds[idx]) / coreml_preds[idx]
+                        relative_error = (
+                            coreml_preds[idx] - keras_preds[idx]
+                        ) / coreml_preds[idx]
                         self.assertAlmostEqual(relative_error, 0, places=2)
                 except AssertionError:
-                    print("Assertion error:\nbase_params: {}\nkeras_preds: {}\ncoreml_preds: {}".format(base_params, keras_preds, coreml_preds))
+                    print(
+                        "Assertion error:\nbase_params: {}\nkeras_preds: {}\ncoreml_preds: {}".format(
+                            base_params, keras_preds, coreml_preds
+                        )
+                    )
                     numerical_err_models.append(base_params)
             shutil.rmtree(model_dir)
             i += 1
 
-        self.assertEquals(shape_err_models, [], msg='Shape error models {}'.format(shape_err_models))
-        self.assertEquals(numerical_err_models, [], msg='Numerical error models {}'.format(numerical_err_models))
+        self.assertEquals(
+            shape_err_models, [], msg="Shape error models {}".format(shape_err_models)
+        )
+        self.assertEquals(
+            numerical_err_models,
+            [],
+            msg="Numerical error models {}".format(numerical_err_models),
+        )
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     @pytest.mark.slow
     def test_keras1_rnn_layer_stress(self):
         self._test_rnn_layer(keras_major_version=1)
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     def test_keras1_rnn_layer(self):
         self._test_rnn_layer(keras_major_version=1, limit=10)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     @pytest.mark.slow
     def test_keras2_rnn_layer_stress(self):
         self._test_rnn_layer(keras_major_version=2)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     def test_keras2_rnn_layer(self):
         self._test_rnn_layer(keras_major_version=2, limit=10)
@@ -180,10 +210,14 @@ class LSTMLayer(RecurrentLayerTest):
     def setUp(self):
         super(LSTMLayer, self).setUp()
         self.lstm_params_dict = dict(
-            dropout=[{'dropout_W': 0., 'dropout_U': 0.}],
-            regularizer=[{'W_regularizer': None, 'U_regularizer': None, 'b_regularizer': None}],
+            dropout=[{"dropout_W": 0.0, "dropout_U": 0.0}],
+            regularizer=[
+                {"W_regularizer": None, "U_regularizer": None, "b_regularizer": None}
+            ],
+        )
+        self.lstm_layer_params = list(
+            itertools.product(*self.lstm_params_dict.values())
         )
-        self.lstm_layer_params = list(itertools.product(*self.lstm_params_dict.values()))
 
     def _test_lstm_layer(self, keras_major_version, limit=None):
         i = 0
@@ -191,97 +225,121 @@ def _test_lstm_layer(self, keras_major_version, limit=None):
         shape_err_models = []
         params = list(itertools.product(self.base_layer_params, self.lstm_layer_params))
         np.random.shuffle(params)
-        params = [param for param in params if valid_params(dict(zip(self.params_dict.keys(), param[0])))]
+        params = [
+            param
+            for param in params
+            if valid_params(dict(zip(self.params_dict.keys(), param[0])))
+        ]
         for base_params, lstm_params in params[:limit]:
             base_params = dict(zip(self.params_dict.keys(), base_params))
             lstm_params = dict(zip(self.lstm_params_dict.keys(), lstm_params))
-            input_data = generate_input(base_params['input_dims'][0], base_params['input_dims'][1],
-                                        base_params['input_dims'][2])
+            input_data = generate_input(
+                base_params["input_dims"][0],
+                base_params["input_dims"][1],
+                base_params["input_dims"][2],
+            )
             model = Sequential()
             settings = dict(
-                activation=base_params['activation'],
-                return_sequences=base_params['return_sequences'],
-                go_backwards=base_params['go_backwards'],
-                unroll=base_params['unroll'],
+                activation=base_params["activation"],
+                return_sequences=base_params["return_sequences"],
+                go_backwards=base_params["go_backwards"],
+                unroll=base_params["unroll"],
             )
             if keras_major_version == 2:
                 model.add(
                     LSTM(
-                        base_params['output_dim'],
-                        input_shape=base_params['input_dims'][1:],
-                        recurrent_dropout=lstm_params['dropout']['dropout_U'],
-                        dropout=lstm_params['dropout']['dropout_W'],
-                        kernel_regularizer=lstm_params['regularizer']['W_regularizer'],
-                        recurrent_regularizer=lstm_params['regularizer']['U_regularizer'],
-                        bias_regularizer=lstm_params['regularizer']['b_regularizer'],
+                        base_params["output_dim"],
+                        input_shape=base_params["input_dims"][1:],
+                        recurrent_dropout=lstm_params["dropout"]["dropout_U"],
+                        dropout=lstm_params["dropout"]["dropout_W"],
+                        kernel_regularizer=lstm_params["regularizer"]["W_regularizer"],
+                        recurrent_regularizer=lstm_params["regularizer"][
+                            "U_regularizer"
+                        ],
+                        bias_regularizer=lstm_params["regularizer"]["b_regularizer"],
                         **settings
                     )
                 )
             else:
                 model.add(
                     LSTM(
-                        base_params['output_dim'],
-                        input_length=base_params['input_dims'][1],
-                        input_dim=base_params['input_dims'][2],
-                        dropout_U=lstm_params['dropout']['dropout_U'],
-                        dropout_W=lstm_params['dropout']['dropout_W'],
-                        W_regularizer=lstm_params['regularizer']['W_regularizer'],
-                        U_regularizer=lstm_params['regularizer']['U_regularizer'],
-                        b_regularizer=lstm_params['regularizer']['b_regularizer'],
+                        base_params["output_dim"],
+                        input_length=base_params["input_dims"][1],
+                        input_dim=base_params["input_dims"][2],
+                        dropout_U=lstm_params["dropout"]["dropout_U"],
+                        dropout_W=lstm_params["dropout"]["dropout_W"],
+                        W_regularizer=lstm_params["regularizer"]["W_regularizer"],
+                        U_regularizer=lstm_params["regularizer"]["U_regularizer"],
+                        b_regularizer=lstm_params["regularizer"]["b_regularizer"],
                         **settings
                     )
                 )
             model_dir = tempfile.mkdtemp()
-            keras_model_path = os.path.join(model_dir, 'keras.h5')
-            coreml_model_path = os.path.join(model_dir, 'keras.mlmodel')
+            keras_model_path = os.path.join(model_dir, "keras.h5")
+            coreml_model_path = os.path.join(model_dir, "keras.mlmodel")
             model.save_weights(keras_model_path)
             mlkitmodel = _get_mlkit_model_from_path(model, coreml_model_path)
-            if is_macos() and macos_version() >= (10, 13):
+            if _is_macos() and _macos_version() >= (10, 13):
                 keras_preds = model.predict(input_data).flatten()
                 input_data = np.transpose(input_data, [1, 0, 2])
-                coreml_preds = mlkitmodel.predict({'data': input_data})['output'].flatten()
+                coreml_preds = mlkitmodel.predict({"data": input_data})[
+                    "output"
+                ].flatten()
                 try:
                     self.assertEquals(coreml_preds.shape, keras_preds.shape)
                 except AssertionError:
-                    print("Shape error:\nbase_params: {}\nkeras_preds.shape: {}\ncoreml_preds.shape: {}".format(
-                        base_params, keras_preds.shape, coreml_preds.shape))
+                    print(
+                        "Shape error:\nbase_params: {}\nkeras_preds.shape: {}\ncoreml_preds.shape: {}".format(
+                            base_params, keras_preds.shape, coreml_preds.shape
+                        )
+                    )
                     shape_err_models.append(base_params)
                     shutil.rmtree(model_dir)
                     i += 1
                     continue
                 try:
                     for idx in range(0, len(coreml_preds)):
-                        relative_error = (coreml_preds[idx] - keras_preds[idx]) / coreml_preds[idx]
+                        relative_error = (
+                            coreml_preds[idx] - keras_preds[idx]
+                        ) / coreml_preds[idx]
                         self.assertAlmostEqual(relative_error, 0, places=2)
                 except AssertionError:
-                    print("Assertion error:\nbase_params: {}\nkeras_preds: {}\ncoreml_preds: {}".format(base_params,
-                                                                                                        keras_preds,
-                                                                                                        coreml_preds))
+                    print(
+                        "Assertion error:\nbase_params: {}\nkeras_preds: {}\ncoreml_preds: {}".format(
+                            base_params, keras_preds, coreml_preds
+                        )
+                    )
                     numerical_err_models.append(base_params)
             shutil.rmtree(model_dir)
             i += 1
 
-        self.assertEquals(shape_err_models, [], msg='Shape error models {}'.format(shape_err_models))
-        self.assertEquals(numerical_err_models, [], msg='Numerical error models {}'.format(numerical_err_models))
+        self.assertEquals(
+            shape_err_models, [], msg="Shape error models {}".format(shape_err_models)
+        )
+        self.assertEquals(
+            numerical_err_models,
+            [],
+            msg="Numerical error models {}".format(numerical_err_models),
+        )
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     @pytest.mark.slow
     def test_keras1_lstm_layer_stress(self):
         self._test_lstm_layer(keras_major_version=1)
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     def test_keras1_lstm_layer(self):
         self._test_lstm_layer(keras_major_version=1, limit=10)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     @pytest.mark.slow
     def test_keras2_lstm_layer_stress(self):
         self._test_lstm_layer(keras_major_version=2)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     def test_keras2_lstm_layer(self):
         self._test_lstm_layer(keras_major_version=2, limit=10)
@@ -295,8 +353,10 @@ class GRULayer(RecurrentLayerTest):
     def setUp(self):
         super(GRULayer, self).setUp()
         self.gru_params_dict = dict(
-            dropout=[{'dropout_W': 0., 'dropout_U': 0.}],
-            regularizer=[{'W_regularizer': None, 'U_regularizer': None, 'b_regularizer': None}],
+            dropout=[{"dropout_W": 0.0, "dropout_U": 0.0}],
+            regularizer=[
+                {"W_regularizer": None, "U_regularizer": None, "b_regularizer": None}
+            ],
         )
         self.gru_layer_params = list(itertools.product(*self.gru_params_dict.values()))
 
@@ -306,97 +366,121 @@ def _test_gru_layer(self, keras_major_version, limit=None):
         shape_err_models = []
         params = list(itertools.product(self.base_layer_params, self.gru_layer_params))
         np.random.shuffle(params)
-        params = [param for param in params if valid_params(dict(zip(self.params_dict.keys(), param[0])))]
+        params = [
+            param
+            for param in params
+            if valid_params(dict(zip(self.params_dict.keys(), param[0])))
+        ]
         for base_params, gru_params in params[:limit]:
             base_params = dict(zip(self.params_dict.keys(), base_params))
             gru_params = dict(zip(self.gru_params_dict.keys(), gru_params))
-            input_data = generate_input(base_params['input_dims'][0], base_params['input_dims'][1],
-                                        base_params['input_dims'][2])
+            input_data = generate_input(
+                base_params["input_dims"][0],
+                base_params["input_dims"][1],
+                base_params["input_dims"][2],
+            )
             model = Sequential()
             settings = dict(
-                activation=base_params['activation'],
-                return_sequences=base_params['return_sequences'],
-                go_backwards=base_params['go_backwards'],
-                unroll=base_params['unroll'],
+                activation=base_params["activation"],
+                return_sequences=base_params["return_sequences"],
+                go_backwards=base_params["go_backwards"],
+                unroll=base_params["unroll"],
             )
             if keras_major_version == 2:
                 model.add(
                     GRU(
-                        base_params['output_dim'],
-                        input_shape=base_params['input_dims'][1:],
-                        recurrent_dropout=gru_params['dropout']['dropout_U'],
-                        dropout=gru_params['dropout']['dropout_W'],
-                        kernel_regularizer=gru_params['regularizer']['W_regularizer'],
-                        recurrent_regularizer=gru_params['regularizer']['U_regularizer'],
-                        bias_regularizer=gru_params['regularizer']['b_regularizer'],
+                        base_params["output_dim"],
+                        input_shape=base_params["input_dims"][1:],
+                        recurrent_dropout=gru_params["dropout"]["dropout_U"],
+                        dropout=gru_params["dropout"]["dropout_W"],
+                        kernel_regularizer=gru_params["regularizer"]["W_regularizer"],
+                        recurrent_regularizer=gru_params["regularizer"][
+                            "U_regularizer"
+                        ],
+                        bias_regularizer=gru_params["regularizer"]["b_regularizer"],
                         **settings
                     )
                 )
             else:
                 model.add(
                     GRU(
-                        base_params['output_dim'],
-                        input_length=base_params['input_dims'][1],
-                        input_dim=base_params['input_dims'][2],
-                        dropout_U=gru_params['dropout']['dropout_U'],
-                        dropout_W=gru_params['dropout']['dropout_W'],
-                        W_regularizer=gru_params['regularizer']['W_regularizer'],
-                        U_regularizer=gru_params['regularizer']['U_regularizer'],
-                        b_regularizer=gru_params['regularizer']['b_regularizer'],
+                        base_params["output_dim"],
+                        input_length=base_params["input_dims"][1],
+                        input_dim=base_params["input_dims"][2],
+                        dropout_U=gru_params["dropout"]["dropout_U"],
+                        dropout_W=gru_params["dropout"]["dropout_W"],
+                        W_regularizer=gru_params["regularizer"]["W_regularizer"],
+                        U_regularizer=gru_params["regularizer"]["U_regularizer"],
+                        b_regularizer=gru_params["regularizer"]["b_regularizer"],
                         **settings
                     )
                 )
             model_dir = tempfile.mkdtemp()
-            keras_model_path = os.path.join(model_dir, 'keras.h5')
-            coreml_model_path = os.path.join(model_dir, 'keras.mlmodel')
+            keras_model_path = os.path.join(model_dir, "keras.h5")
+            coreml_model_path = os.path.join(model_dir, "keras.mlmodel")
             model.save_weights(keras_model_path)
             mlkitmodel = _get_mlkit_model_from_path(model, coreml_model_path)
-            if is_macos() and macos_version() >= (10, 13):
+            if _is_macos() and _macos_version() >= (10, 13):
                 keras_preds = model.predict(input_data).flatten()
                 input_data = np.transpose(input_data, [1, 0, 2])
-                coreml_preds = mlkitmodel.predict({'data': input_data})['output'].flatten()
+                coreml_preds = mlkitmodel.predict({"data": input_data})[
+                    "output"
+                ].flatten()
                 try:
                     self.assertEquals(coreml_preds.shape, keras_preds.shape)
                 except AssertionError:
-                    print("Shape error:\nbase_params: {}\nkeras_preds.shape: {}\ncoreml_preds.shape: {}".format(
-                        base_params, keras_preds.shape, coreml_preds.shape))
+                    print(
+                        "Shape error:\nbase_params: {}\nkeras_preds.shape: {}\ncoreml_preds.shape: {}".format(
+                            base_params, keras_preds.shape, coreml_preds.shape
+                        )
+                    )
                     shape_err_models.append(base_params)
                     shutil.rmtree(model_dir)
                     i += 1
                     continue
                 try:
                     for idx in range(0, len(coreml_preds)):
-                        relative_error = (coreml_preds[idx] - keras_preds[idx]) / coreml_preds[idx]
+                        relative_error = (
+                            coreml_preds[idx] - keras_preds[idx]
+                        ) / coreml_preds[idx]
                         self.assertAlmostEqual(relative_error, 0, places=2)
                 except AssertionError:
-                    print("Assertion error:\nbase_params: {}\nkeras_preds: {}\ncoreml_preds: {}".format(base_params,
-                                                                                                        keras_preds,
-                                                                                                        coreml_preds))
+                    print(
+                        "Assertion error:\nbase_params: {}\nkeras_preds: {}\ncoreml_preds: {}".format(
+                            base_params, keras_preds, coreml_preds
+                        )
+                    )
                     numerical_err_models.append(base_params)
             shutil.rmtree(model_dir)
             i += 1
 
-        self.assertEquals(shape_err_models, [], msg='Shape error models {}'.format(shape_err_models))
-        self.assertEquals(numerical_err_models, [], msg='Numerical error models {}'.format(numerical_err_models))
+        self.assertEquals(
+            shape_err_models, [], msg="Shape error models {}".format(shape_err_models)
+        )
+        self.assertEquals(
+            numerical_err_models,
+            [],
+            msg="Numerical error models {}".format(numerical_err_models),
+        )
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     @pytest.mark.slow
     def test_keras1_gru_layer_stress(self):
         self._test_gru_layer(keras_major_version=1)
 
-    @unittest.skipIf(not HAS_KERAS_TF, 'Missing keras 1. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS_TF, "Missing keras 1. Skipping test.")
     @pytest.mark.keras1
     def test_keras1_gru_layer(self):
         self._test_gru_layer(keras_major_version=1, limit=10)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     @pytest.mark.slow
     def test_keras2_gru_layer_stress(self):
         self._test_gru_layer(keras_major_version=2)
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, "Missing keras 2. Skipping test.")
     @pytest.mark.keras2
     def test_keras2_gru_layer(self):
         self._test_gru_layer(keras_major_version=2, limit=10)
diff --git a/coremltools/test/neural_network/test_tf_numeric.py b/coremltools/test/neural_network/test_tf_numeric.py
index cc37e891f..e41fc63be 100644
--- a/coremltools/test/neural_network/test_tf_numeric.py
+++ b/coremltools/test/neural_network/test_tf_numeric.py
@@ -1,25 +1,31 @@
-from __future__ import print_function
+from __future__ import print_function as _
 import unittest
 import numpy as np
 import coremltools.models.datatypes as datatypes
 from coremltools.models import neural_network as neural_network
 from coremltools.models import MLModel
-from coremltools.models.utils import is_macos, macos_version
-import tensorflow as tf
+from coremltools.models.utils import _is_macos, _macos_version
+from coremltools._deps import _HAS_TF, MSG_TF1_NOT_FOUND
+
+if _HAS_TF:
+    import tensorflow as tf
 import itertools
 
 np.random.seed(10)
 np.set_printoptions(precision=4, suppress=True)
 
+
+@unittest.skipIf(not _HAS_TF, MSG_TF1_NOT_FOUND)
 class CorrectnessTest(unittest.TestCase):
-    
     def _compare_shapes(self, ref_preds, coreml_preds):
         if np.squeeze(ref_preds).shape != np.squeeze(coreml_preds).shape:
             return False
-        else: 
-            return True    
-    
-    def _compare_predictions_numerical(self, ref_preds, coreml_preds, snr_thresh=15, psnr_thresh=30):
+        else:
+            return True
+
+    def _compare_predictions_numerical(
+        self, ref_preds, coreml_preds, snr_thresh=15, psnr_thresh=30
+    ):
         ref_preds = ref_preds.flatten()
         coreml_preds = coreml_preds.flatten()
         noise = coreml_preds - ref_preds
@@ -28,49 +34,63 @@ def _compare_predictions_numerical(self, ref_preds, coreml_preds, snr_thresh=15,
         max_signal_energy = np.amax(ref_preds ** 2)
 
         if noise_var > 1e-6 and signal_energy > 1e-6:
-
             SNR = 10 * np.log10(signal_energy / noise_var)
             PSNR = 10 * np.log10(max_signal_energy / noise_var)
 
-            #print('ref: ', ref_preds)
-            #print('coreml: ', coreml_preds)
-            #print('noise: ', noise)
+            # print('ref: ', ref_preds)
+            # print('coreml: ', coreml_preds)
+            # print('noise: ', noise)
 
-            print('SNR: {}, PSNR: {}'.format(SNR, PSNR))
-            print('noise var: ', np.mean(noise ** 2))
-            print('max signal energy: ', np.amax(ref_preds ** 2))
-            print('signal energy: ', np.mean(ref_preds ** 2))
+            print("SNR: {}, PSNR: {}".format(SNR, PSNR))
+            print("noise var: ", np.mean(noise ** 2))
+            print("max signal energy: ", np.amax(ref_preds ** 2))
+            print("signal energy: ", np.mean(ref_preds ** 2))
 
             self.assertGreaterEqual(PSNR, psnr_thresh)
             self.assertGreaterEqual(SNR, snr_thresh)
 
-    def _test_model(self, input_dict, ref_output_dict, coreml_model, snr_thresh=15, psnr_thresh=30, cpu_only=False):
+    def _test_model(
+        self,
+        input_dict,
+        ref_output_dict,
+        coreml_model,
+        snr_thresh=15,
+        psnr_thresh=30,
+        cpu_only=False,
+    ):
         coreml_out_dict = coreml_model.predict(input_dict, useCPUOnly=cpu_only)
         for out_ in list(ref_output_dict.keys()):
             ref_out = ref_output_dict[out_].flatten()
             coreml_out = coreml_out_dict[out_].flatten()
             self.assertEquals(len(coreml_out), len(ref_out))
-            self._compare_predictions_numerical(ref_out, coreml_out, snr_thresh=snr_thresh, psnr_thresh=psnr_thresh)
+            self._compare_predictions_numerical(
+                ref_out, coreml_out, snr_thresh=snr_thresh, psnr_thresh=psnr_thresh
+            )
 
 
-@unittest.skipUnless(is_macos(), 'Only supported for MacOS platform.')
+@unittest.skipUnless(_is_macos(), "Only supported for MacOS platform.")
 class StressTest(CorrectnessTest):
-
     def runTest(self):
         pass
 
     def test_data_reorganize(self, cpu_only=False):
-
         def get_coreml_model_reorganize(X, params):
             eval = True
             mlmodel = None
             try:
                 input_dim = X.shape[2:]
-                input_features = [('data', datatypes.Array(*input_dim))]
-                output_features = [('output', None)]
-                builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
-                builder.add_reorganize_data('reorg', 'data', 'output', mode=params["mode"],
-                                            block_size=params["block_size"])
+                input_features = [("data", datatypes.Array(*input_dim))]
+                output_features = [("output", None)]
+                builder = neural_network.NeuralNetworkBuilder(
+                    input_features, output_features
+                )
+                builder.add_reorganize_data(
+                    "reorg",
+                    "data",
+                    "output",
+                    mode=params["mode"],
+                    block_size=params["block_size"],
+                )
                 mlmodel = MLModel(builder.spec)
             except RuntimeError as e:
                 print(e)
@@ -84,50 +104,62 @@ def get_tf_predictions_reorganize(X, params):
             Cin = params["C"]
             with tf.Graph().as_default(), tf.Session() as sess:
                 x = tf.placeholder(tf.float32, shape=(1, Hin, Win, Cin))
-                if params["mode"] == 'SPACE_TO_DEPTH':
+                if params["mode"] == "SPACE_TO_DEPTH":
                     y = tf.space_to_depth(x, params["block_size"])
                 else:
                     y = tf.depth_to_space(x, params["block_size"])
                 return sess.run(y, feed_dict={x: X})
 
-        '''
+        """
         Define Params
-        '''
-        params_dict = dict( C = [1,2,8,16,15,27],
-                            H = [2,4,6,8,10,15,21,16],
-                            W = [2,4,6,8,10,15,21,16],
-                            block_size = [2,3,4,5],
-                            mode = ['SPACE_TO_DEPTH','DEPTH_TO_SPACE']
-                            )
+        """
+        params_dict = dict(
+            C=[1, 2, 8, 16, 15, 27],
+            H=[2, 4, 6, 8, 10, 15, 21, 16],
+            W=[2, 4, 6, 8, 10, 15, 21, 16],
+            block_size=[2, 3, 4, 5],
+            mode=["SPACE_TO_DEPTH", "DEPTH_TO_SPACE"],
+        )
         params = [x for x in list(itertools.product(*params_dict.values()))]
-        all_candidates = [dict(zip(params_dict.keys(), x)) for x in params]     
-        valid_params = []               
+        all_candidates = [dict(zip(params_dict.keys(), x)) for x in params]
+        valid_params = []
         for pr in all_candidates:
-            if pr["mode"] == 'SPACE_TO_DEPTH': 
+            if pr["mode"] == "SPACE_TO_DEPTH":
                 if pr["H"] % pr["block_size"] == 0 and pr["W"] % pr["block_size"] == 0:
-                    valid_params.append(pr)  
+                    valid_params.append(pr)
             else:
                 if pr["C"] % (pr["block_size"] ** 2) == 0:
-                    valid_params.append(pr)        
-        print("Total params to be tested: ", len(valid_params), "out of canditates: ", len(all_candidates))
-        '''
+                    valid_params.append(pr)
+        print(
+            "Total params to be tested: ",
+            len(valid_params),
+            "out of canditates: ",
+            len(all_candidates),
+        )
+        """
         Test
-        '''
+        """
         failed_tests_compile = []
         for i in range(len(valid_params)):
             params = valid_params[i]
-            #print("=========: ", params)
-            #if i % 10 == 0: print("======== Testing {}/{}".format(str(i), str(len(valid_params))))
-            X = np.random.rand(1,params["C"],params["H"],params["W"])
-            tf_preds = get_tf_predictions_reorganize(np.transpose(X,[0,2,3,1]), params)
-            tf_preds = np.transpose(tf_preds, [0,3,1,2])
-            coreml_model, eval = get_coreml_model_reorganize(np.expand_dims(X, axis=0), params)
+            # print("=========: ", params)
+            # if i % 10 == 0: print("======== Testing {}/{}".format(str(i), str(len(valid_params))))
+            X = np.random.rand(1, params["C"], params["H"], params["W"])
+            tf_preds = get_tf_predictions_reorganize(
+                np.transpose(X, [0, 2, 3, 1]), params
+            )
+            tf_preds = np.transpose(tf_preds, [0, 3, 1, 2])
+            coreml_model, eval = get_coreml_model_reorganize(
+                np.expand_dims(X, axis=0), params
+            )
             if eval is False:
                 failed_tests_compile.append(params)
             else:
-                input_dict = {'data': np.expand_dims(X, axis=0)}
-                ref_output_dict = {'output': tf_preds[0, :, :, :]}
-                self._test_model(input_dict, ref_output_dict, coreml_model, cpu_only=cpu_only)
+                input_dict = {"data": np.expand_dims(X, axis=0)}
+                ref_output_dict = {"output": tf_preds[0, :, :, :]}
+                self._test_model(
+                    input_dict, ref_output_dict, coreml_model, cpu_only=cpu_only
+                )
 
         self.assertEqual(failed_tests_compile, [])
 
@@ -135,32 +167,49 @@ def test_data_reorganize_cpu_only(self):
         self.test_data_reorganize(cpu_only=True)
 
     def test_depthwise_conv(self, cpu_only=False):
-
         def get_coreml_model_depthwise(X, params, w):
             eval = True
             mlmodel = None
             try:
                 input_dim = X.shape[2:]
-                input_features = [('data', datatypes.Array(*input_dim))]
-                output_features = [('output', None)]
-                builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+                input_features = [("data", datatypes.Array(*input_dim))]
+                output_features = [("output", None)]
+                builder = neural_network.NeuralNetworkBuilder(
+                    input_features, output_features
+                )
                 # tranlate weights : (Kh, Kw, kernel_channels, output_channels) == (Kh, Kw, Cin/g, Cout) == (Kh, Kw, 1, channel_multiplier * Cin)
-                w_e = np.reshape(w, (params["kernel_size"], params["kernel_size"], params["multiplier"] * params["C"], 1))
+                w_e = np.reshape(
+                    w,
+                    (
+                        params["kernel_size"],
+                        params["kernel_size"],
+                        params["multiplier"] * params["C"],
+                        1,
+                    ),
+                )
                 w_e = np.transpose(w_e, [0, 1, 3, 2])
-                if params["padding"] == 'SAME':
-                    pad_mode = 'same'
+                if params["padding"] == "SAME":
+                    pad_mode = "same"
                 else:
-                    pad_mode = 'valid'
-                builder.add_convolution('conv',
-                                        kernel_channels=1,
-                                        output_channels=params["multiplier"] * params["C"],
-                                        height=params["kernel_size"], width=params["kernel_size"],
-                                        stride_height=params["stride"], stride_width=params["stride"],
-                                        border_mode=pad_mode,
-                                        groups=params["C"],
-                                        W=w_e, b=None,
-                                        has_bias=0, is_deconv=0, output_shape=None,
-                                        input_name='data', output_name='output')
+                    pad_mode = "valid"
+                builder.add_convolution(
+                    "conv",
+                    kernel_channels=1,
+                    output_channels=params["multiplier"] * params["C"],
+                    height=params["kernel_size"],
+                    width=params["kernel_size"],
+                    stride_height=params["stride"],
+                    stride_width=params["stride"],
+                    border_mode=pad_mode,
+                    groups=params["C"],
+                    W=w_e,
+                    b=None,
+                    has_bias=0,
+                    is_deconv=0,
+                    output_shape=None,
+                    input_name="data",
+                    output_name="output",
+                )
 
                 mlmodel = MLModel(builder.spec)
             except RuntimeError as e:
@@ -175,74 +224,102 @@ def get_tf_predictions_depthwise(X, params, w):
             channel_multiplier = params["multiplier"]
             with tf.Graph().as_default(), tf.Session() as sess:
                 x = tf.placeholder(tf.float32, shape=(1, Hin, Win, Cin))
-                W = tf.constant(w, dtype=tf.float32, shape=[Kh, Kw, Cin, channel_multiplier])
-                y = tf.nn.depthwise_conv2d(x, W, strides=[1, params["stride"], params["stride"], 1],
-                                           padding=params["padding"])
+                W = tf.constant(
+                    w, dtype=tf.float32, shape=[Kh, Kw, Cin, channel_multiplier]
+                )
+                y = tf.nn.depthwise_conv2d(
+                    x,
+                    W,
+                    strides=[1, params["stride"], params["stride"], 1],
+                    padding=params["padding"],
+                )
                 return sess.run(y, feed_dict={x: X})
 
-
-        '''
+        """
         Define Params
-        '''
-        params_dict = dict( C = [1,4,7],
-                           H = [11,16],
-                           stride = [1,2,3],
-                           kernel_size = [1,2,3,5],
-                           multiplier = [1,2,3,4],
-                           padding = ['SAME', 'VALID']
-                           )
+        """
+        params_dict = dict(
+            C=[1, 4, 7],
+            H=[11, 16],
+            stride=[1, 2, 3],
+            kernel_size=[1, 2, 3, 5],
+            multiplier=[1, 2, 3, 4],
+            padding=["SAME", "VALID"],
+        )
         params = [x for x in list(itertools.product(*params_dict.values()))]
-        all_candidates = [dict(zip(params_dict.keys(), x)) for x in params]     
-        valid_params = []               
+        all_candidates = [dict(zip(params_dict.keys(), x)) for x in params]
+        valid_params = []
         for pr in all_candidates:
-            if pr["padding"] == 'VALID':
-                if np.floor((pr["H"]-pr["kernel_size"])/pr["stride"]) + 1 <= 0:
+            if pr["padding"] == "VALID":
+                if np.floor((pr["H"] - pr["kernel_size"]) / pr["stride"]) + 1 <= 0:
                     continue
-            valid_params.append(pr)       
-        print("Total params to be tested: ", len(valid_params), "out of canditates: ", len(all_candidates))
-        '''
+            valid_params.append(pr)
+        print(
+            "Total params to be tested: ",
+            len(valid_params),
+            "out of canditates: ",
+            len(all_candidates),
+        )
+        """
         Test
-        '''
+        """
         failed_tests_compile = []
         for i in range(len(valid_params)):
             params = valid_params[i]
-            #print("=========: ", params)
-            #if i % 10 == 0: print "======== Testing {}/{}".format(str(i), str(len(valid_params)))
-            X = np.random.rand(1,params["C"],params["H"],params["H"])
-            w = np.random.rand(params["kernel_size"], params["kernel_size"], params["C"], params["multiplier"])
-            tf_preds = get_tf_predictions_depthwise(np.transpose(X,[0,2,3,1]), params, w)
-            tf_preds = np.transpose(tf_preds, [0,3,1,2])
-            coreml_model, eval = get_coreml_model_depthwise(np.expand_dims(X, axis=0), params, w)
+            # print("=========: ", params)
+            # if i % 10 == 0: print "======== Testing {}/{}".format(str(i), str(len(valid_params)))
+            X = np.random.rand(1, params["C"], params["H"], params["H"])
+            w = np.random.rand(
+                params["kernel_size"],
+                params["kernel_size"],
+                params["C"],
+                params["multiplier"],
+            )
+            tf_preds = get_tf_predictions_depthwise(
+                np.transpose(X, [0, 2, 3, 1]), params, w
+            )
+            tf_preds = np.transpose(tf_preds, [0, 3, 1, 2])
+            coreml_model, eval = get_coreml_model_depthwise(
+                np.expand_dims(X, axis=0), params, w
+            )
             if eval is False:
                 failed_tests_compile.append(params)
             else:
-                input_dict = {'data': np.expand_dims(X, axis=0)}
-                ref_output_dict = {'output': tf_preds[0, :, :, :]}
-                self._test_model(input_dict, ref_output_dict, coreml_model, cpu_only=cpu_only)
+                input_dict = {"data": np.expand_dims(X, axis=0)}
+                ref_output_dict = {"output": tf_preds[0, :, :, :]}
+                self._test_model(
+                    input_dict, ref_output_dict, coreml_model, cpu_only=cpu_only
+                )
 
         self.assertEqual(failed_tests_compile, [])
 
     def test_depthwise_conv_cpu_only(self, cpu_only=False):
         self.test_depthwise_conv(cpu_only=True)
 
-    @unittest.skipUnless(
-        macos_version() >= (10, 14), 'Only supported on MacOS 10.14+')
+    @unittest.skipUnless(_macos_version() >= (10, 14), "Only supported on MacOS 10.14+")
     def test_resize_bilinear(self, cpu_only=False):
-
         def get_coreml_model_resize_bilinear(X, params):
             eval = True
             mlmodel = None
             try:
                 input_dim = X.shape[2:]
-                input_features = [('data', datatypes.Array(*input_dim))]
-                output_features = [('output', None)]
-                builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+                input_features = [("data", datatypes.Array(*input_dim))]
+                output_features = [("output", None)]
+                builder = neural_network.NeuralNetworkBuilder(
+                    input_features, output_features
+                )
                 if params["align_corners"]:
-                    mode = 'STRICT_ALIGN_ENDPOINTS_MODE'
+                    mode = "STRICT_ALIGN_ENDPOINTS_MODE"
                 else:
-                    mode = 'UPSAMPLE_MODE'
-                builder.add_resize_bilinear('resize', 'data', 'output', target_height=params["Hnew"], target_width=params["Wnew"],
-                                            mode=mode)
+                    mode = "UPSAMPLE_MODE"
+                builder.add_resize_bilinear(
+                    "resize",
+                    "data",
+                    "output",
+                    target_height=params["Hnew"],
+                    target_width=params["Wnew"],
+                    mode=mode,
+                )
                 mlmodel = MLModel(builder.spec)
             except RuntimeError as e:
                 print(e)
@@ -252,56 +329,75 @@ def get_coreml_model_resize_bilinear(X, params):
 
         def get_tf_predictions_resize_bilinear(X, params):
             with tf.Graph().as_default(), tf.Session() as sess:
-                x = tf.placeholder(tf.float32, shape=(params["batch"], params["H"], params["W"], params["ch"]))
-                y = tf.image.resize_bilinear(x, size = [params["Hnew"], params["Wnew"]], align_corners=params["align_corners"])
+                x = tf.placeholder(
+                    tf.float32,
+                    shape=(params["batch"], params["H"], params["W"], params["ch"]),
+                )
+                y = tf.image.resize_bilinear(
+                    x,
+                    size=[params["Hnew"], params["Wnew"]],
+                    align_corners=params["align_corners"],
+                )
                 return sess.run(y, feed_dict={x: X})
 
-
-        '''
+        """
         Define Params
-        '''
-        params_dict = dict( H = [1,3,10], #[1,2,3,10]
-                            W = [1,3,10], #[1,2,3,10]
-                            Hnew = [1,2,6], #[1,3,6,12,20]
-                            Wnew = [1,2,6], #[1,3,6,12,20]
-                            align_corners = [False, True], #[False, True]
-                            ch = [1,5], #[1,5]
-                            batch = [1, 3], #[1, 3]
-                            )
+        """
+        params_dict = dict(
+            H=[1, 3, 10],  # [1,2,3,10]
+            W=[1, 3, 10],  # [1,2,3,10]
+            Hnew=[1, 2, 6],  # [1,3,6,12,20]
+            Wnew=[1, 2, 6],  # [1,3,6,12,20]
+            align_corners=[False, True],  # [False, True]
+            ch=[1, 5],  # [1,5]
+            batch=[1, 3],  # [1, 3]
+        )
         params = [x for x in list(itertools.product(*params_dict.values()))]
         valid_params = [dict(zip(params_dict.keys(), x)) for x in params]
         print("Total params to be tested: {}".format(len(valid_params)))
-        '''
+        """
         Test
-        '''
+        """
         failed_tests_compile = []
         for i in range(len(valid_params)):
             params = valid_params[i]
             # #print("=========: ", params)
             if i % 100 == 0:
-                print("======================= Testing {}/{}".format(str(i), str(len(valid_params))))
-            X = np.round(255 * np.random.rand(params["batch"], params["ch"], params["H"], params["W"]))
-            tf_preds = get_tf_predictions_resize_bilinear(np.transpose(X, [0, 2, 3, 1]), params)
+                print(
+                    "======================= Testing {}/{}".format(
+                        str(i), str(len(valid_params))
+                    )
+                )
+            X = np.round(
+                255
+                * np.random.rand(
+                    params["batch"], params["ch"], params["H"], params["W"]
+                )
+            )
+            tf_preds = get_tf_predictions_resize_bilinear(
+                np.transpose(X, [0, 2, 3, 1]), params
+            )
             tf_preds = np.transpose(tf_preds, [0, 3, 1, 2])
-            coreml_model, eval = get_coreml_model_resize_bilinear(np.expand_dims(X, axis=0), params)
+            coreml_model, eval = get_coreml_model_resize_bilinear(
+                np.expand_dims(X, axis=0), params
+            )
             if eval is False:
                 failed_tests_compile.append(params)
             else:
-                input_dict = {'data': np.expand_dims(X, axis=0)}
-                ref_output_dict = {'output': np.expand_dims(tf_preds, axis=0)}
-                self._test_model(input_dict, ref_output_dict, coreml_model, cpu_only=cpu_only)
+                input_dict = {"data": np.expand_dims(X, axis=0)}
+                ref_output_dict = {"output": np.expand_dims(tf_preds, axis=0)}
+                self._test_model(
+                    input_dict, ref_output_dict, coreml_model, cpu_only=cpu_only
+                )
 
         self.assertEqual(failed_tests_compile, [])
 
-    @unittest.skipUnless(
-        macos_version() >= (10, 14), 'Only supported on MacOS 10.14+')
+    @unittest.skipUnless(_macos_version() >= (10, 14), "Only supported on MacOS 10.14+")
     def test_resize_bilinear_cpu_only(self):
         self.test_resize_bilinear(cpu_only=True)
 
-    @unittest.skipUnless(
-        macos_version() >= (10, 14), 'Only supported on MacOS 10.14+')
+    @unittest.skipUnless(_macos_version() >= (10, 14), "Only supported on MacOS 10.14+")
     def test_crop_resize(self, cpu_only=False):
-
         def get_coreml_model_crop_resize(params):
             eval = True
             mlmodel = None
@@ -309,25 +405,34 @@ def get_coreml_model_crop_resize(params):
             H = params["H"]
             W = params["W"]
             try:
-                input_features = [('data', datatypes.Array(ch,H,W))]
-                input_features.append(('roi', datatypes.Array(4, 1, 1)))
+                input_features = [("data", datatypes.Array(ch, H, W))]
+                input_features.append(("roi", datatypes.Array(4, 1, 1)))
                 if batch != 1:
-                    input_features.append(('box_ind', datatypes.Array(1, 1, 1)))
-                output_features = [('output', None)]
-                builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
+                    input_features.append(("box_ind", datatypes.Array(1, 1, 1)))
+                output_features = [("output", None)]
+                builder = neural_network.NeuralNetworkBuilder(
+                    input_features, output_features
+                )
 
                 if batch != 1:
-                    builder.add_elementwise('concat', ['box_ind','roi'], 'roi_out', 'CONCAT')
-                    input_names = ['data', 'roi_out']
+                    builder.add_elementwise(
+                        "concat", ["box_ind", "roi"], "roi_out", "CONCAT"
+                    )
+                    input_names = ["data", "roi_out"]
                 else:
-                    input_names = ['data', 'roi']
-
-                builder.add_crop_resize('resize', input_names, 'output',
-                                        target_height=params["Hnew"], target_width=params["Wnew"],
-                                        mode='ALIGN_ENDPOINTS_MODE',
-                                        normalized_roi=True,
-                                        box_indices_mode='CORNERS_HEIGHT_FIRST',
-                                        spatial_scale=1.0)
+                    input_names = ["data", "roi"]
+
+                builder.add_crop_resize(
+                    "resize",
+                    input_names,
+                    "output",
+                    target_height=params["Hnew"],
+                    target_width=params["Wnew"],
+                    mode="ALIGN_ENDPOINTS_MODE",
+                    normalized_roi=True,
+                    box_indices_mode="CORNERS_HEIGHT_FIRST",
+                    spatial_scale=1.0,
+                )
                 mlmodel = MLModel(builder.spec)
             except RuntimeError as e:
                 print(e)
@@ -338,30 +443,39 @@ def get_coreml_model_crop_resize(params):
         def get_tf_predictions_crop_resize(X, boxes, box_ind, params):
             batch, ch, n_roi = params["b_c_n"]
             with tf.Graph().as_default(), tf.Session() as sess:
-                x = tf.placeholder(tf.float32, shape=(batch, params["H"], params["W"], ch))
-                y = tf.image.crop_and_resize(x, boxes, box_ind, crop_size=[params["Hnew"], params["Wnew"]])
+                x = tf.placeholder(
+                    tf.float32, shape=(batch, params["H"], params["W"], ch)
+                )
+                y = tf.image.crop_and_resize(
+                    x, boxes, box_ind, crop_size=[params["Hnew"], params["Wnew"]]
+                )
                 return sess.run(y, feed_dict={x: X})
 
-
-        '''
+        """
         Define Params
-        '''
-        params_dict = dict( H = [1,3,10], #[1,2,3,6,10]
-                            W = [1,3,10], #[1,2,3,6,10]
-                            Hnew = [1,2,3,6], #[1,2,3,6,12,20]
-                            Wnew = [1,2,3,6], #[1,2,3,6,12,20]
-                            b_c_n = [(1,1,1),(1,2,3),(3,2,1),(3,4,3)], #[(1,1,1),(1,2,3),(3,2,1),(3,4,3)]
-                            )
+        """
+        params_dict = dict(
+            H=[1, 3, 10],  # [1,2,3,6,10]
+            W=[1, 3, 10],  # [1,2,3,6,10]
+            Hnew=[1, 2, 3, 6],  # [1,2,3,6,12,20]
+            Wnew=[1, 2, 3, 6],  # [1,2,3,6,12,20]
+            b_c_n=[
+                (1, 1, 1),
+                (1, 2, 3),
+                (3, 2, 1),
+                (3, 4, 3),
+            ],  # [(1,1,1),(1,2,3),(3,2,1),(3,4,3)]
+        )
         params = [x for x in list(itertools.product(*params_dict.values()))]
         valid_params = [dict(zip(params_dict.keys(), x)) for x in params]
         print("Total params to be tested: {}".format(len(valid_params)))
-        '''
+        """
         Test
-        '''
+        """
         failed_tests_compile = []
         for i in range(len(valid_params)):
             params = valid_params[i]
-            #print("=========: ", params)
+            # print("=========: ", params)
             # if i % 100 == 0:
             #     print("======================= Testing {}/{}".format(str(i), str(len(valid_params))))
             batch, ch, n_roi = params["b_c_n"]
@@ -381,24 +495,27 @@ def get_tf_predictions_crop_resize(X, boxes, box_ind, params):
                 assert roi[ii, 0] <= roi[ii, 2]
                 assert roi[ii, 1] <= roi[ii, 3]
 
-
-            tf_preds = get_tf_predictions_crop_resize(np.transpose(X, [0, 2, 3, 1]), roi, box_ind, params)
+            tf_preds = get_tf_predictions_crop_resize(
+                np.transpose(X, [0, 2, 3, 1]), roi, box_ind, params
+            )
             tf_preds = np.transpose(tf_preds, [0, 3, 1, 2])
             coreml_model, eval = get_coreml_model_crop_resize(params)
             if eval is False:
                 failed_tests_compile.append(params)
             else:
-                input_dict = {'data': np.expand_dims(X, axis=0)}
-                input_dict['roi'] = np.reshape(roi, (n_roi,1,4,1,1))
+                input_dict = {"data": np.expand_dims(X, axis=0)}
+                input_dict["roi"] = np.reshape(roi, (n_roi, 1, 4, 1, 1))
                 if batch != 1:
-                    input_dict['box_ind'] = np.reshape(box_ind.astype(np.float32), (n_roi,1,1,1,1))
-                ref_output_dict = {'output': np.expand_dims(tf_preds, axis=0)}
-                self._test_model(input_dict, ref_output_dict, coreml_model, cpu_only=cpu_only)
+                    input_dict["box_ind"] = np.reshape(
+                        box_ind.astype(np.float32), (n_roi, 1, 1, 1, 1)
+                    )
+                ref_output_dict = {"output": np.expand_dims(tf_preds, axis=0)}
+                self._test_model(
+                    input_dict, ref_output_dict, coreml_model, cpu_only=cpu_only
+                )
 
         self.assertEqual(failed_tests_compile, [])
 
-    @unittest.skipUnless(
-        macos_version() >= (10, 14), 'Only supported on MacOS 10.14+')
+    @unittest.skipUnless(_macos_version() >= (10, 14), "Only supported on MacOS 10.14+")
     def test_crop_resize_cpu_only(self):
         self.test_crop_resize(cpu_only=True)
-
diff --git a/coremltools/graph_visualization/__init__.py b/coremltools/test/pipeline/__init__.py
similarity index 100%
rename from coremltools/graph_visualization/__init__.py
rename to coremltools/test/pipeline/__init__.py
diff --git a/coremltools/test/pipeline/test_model_updatable.py b/coremltools/test/pipeline/test_model_updatable.py
index e0b8fae16..810a30af5 100644
--- a/coremltools/test/pipeline/test_model_updatable.py
+++ b/coremltools/test/pipeline/test_model_updatable.py
@@ -2,19 +2,23 @@
 #
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
-import os,shutil
+import os, shutil
 import numpy as _np
 import coremltools.models.datatypes as datatypes
 import unittest
+import pytest
 import tempfile
 from coremltools.models.utils import save_spec
 from coremltools.models import MLModel
-from coremltools.models.neural_network import NeuralNetworkBuilder, AdamParams, SgdParams
+from coremltools.models.neural_network import (
+    NeuralNetworkBuilder,
+    AdamParams,
+    SgdParams,
+)
 from coremltools.models.pipeline import PipelineRegressor, PipelineClassifier
 
 
 class MLModelUpdatableTest(unittest.TestCase):
-
     @classmethod
     def setUpClass(self):
         self.model_dir = tempfile.mkdtemp()
@@ -25,51 +29,59 @@ def tearDownClass(self):
             shutil.rmtree(self.model_dir)
 
     def create_base_builder(self):
-        self.input_features = [('input', datatypes.Array(3))]
-        self.output_features = [('output', None)]
+        self.input_features = [("input", datatypes.Array(3))]
+        self.output_features = [("output", None)]
         self.output_names = ["output"]
 
         builder = NeuralNetworkBuilder(self.input_features, self.output_features)
 
         W1 = _np.random.uniform(-0.5, 0.5, (3, 3))
         W2 = _np.random.uniform(-0.5, 0.5, (3, 3))
-        builder.add_inner_product(name='ip1',
-                                  W=W1,
-                                  b=None,
-                                  input_channels=3,
-                                  output_channels=3,
-                                  has_bias=False,
-                                  input_name='input',
-                                  output_name='hidden')
-        builder.add_inner_product(name='ip2',
-                                  W=W2,
-                                  b=None,
-                                  input_channels=3,
-                                  output_channels=3,
-                                  has_bias=False,
-                                  input_name='hidden',
-                                  output_name='output')
-
-        builder.make_updatable(['ip1', 'ip2'])  # or a dict for weightParams
+        builder.add_inner_product(
+            name="ip1",
+            W=W1,
+            b=None,
+            input_channels=3,
+            output_channels=3,
+            has_bias=False,
+            input_name="input",
+            output_name="hidden",
+        )
+        builder.add_inner_product(
+            name="ip2",
+            W=W2,
+            b=None,
+            input_channels=3,
+            output_channels=3,
+            has_bias=False,
+            input_name="hidden",
+            output_name="output",
+        )
+
+        builder.make_updatable(["ip1", "ip2"])  # or a dict for weightParams
         return builder
 
     def test_updatable_model_creation_ce_sgd(self):
-
         builder = self.create_base_builder()
 
-        builder.add_softmax(name='softmax', input_name='output', output_name='softmax_output')
+        builder.add_softmax(
+            name="softmax", input_name="output", output_name="softmax_output"
+        )
 
-        builder.set_categorical_cross_entropy_loss(name='cross_entropy', input='softmax_output')
+        builder.set_categorical_cross_entropy_loss(
+            name="cross_entropy", input="softmax_output"
+        )
 
         builder.set_sgd_optimizer(SgdParams(lr=1e-2, batch=10, momentum=0.0))
         builder.set_epochs(20, allowed_set=[10, 20, 30, 40])
 
-        model_path = os.path.join(self.model_dir, 'updatable_creation.mlmodel')
+        model_path = os.path.join(self.model_dir, "updatable_creation.mlmodel")
         print(model_path)
         save_spec(builder.spec, model_path)
 
         mlmodel = MLModel(model_path)
         self.assertTrue(mlmodel is not None)
+
         spec = mlmodel.get_spec()
         self.assertTrue(spec.isUpdatable)
         self.assertTrue(spec.neuralNetwork.layers[0].isUpdatable)
@@ -77,37 +89,84 @@ def test_updatable_model_creation_ce_sgd(self):
         self.assertTrue(spec.neuralNetwork.layers[1].isUpdatable)
         self.assertTrue(spec.neuralNetwork.layers[1].innerProduct.weights.isUpdatable)
 
-        self.assertTrue(spec.neuralNetwork.updateParams.lossLayers[0].categoricalCrossEntropyLossLayer is not None)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer is not None)
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.lossLayers[
+                0
+            ].categoricalCrossEntropyLossLayer
+            is not None
+        )
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.sgdOptimizer is not None
+        )
 
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.learningRate.defaultValue, 1e-2, atol=1e-4))
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.miniBatchSize.defaultValue, 10, atol=1e-4))
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.momentum.defaultValue, 0, atol=1e-8))
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.learningRate.defaultValue,
+                1e-2,
+                atol=1e-4,
+            )
+        )
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.miniBatchSize.defaultValue,
+                10,
+                atol=1e-4,
+            )
+        )
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.momentum.defaultValue,
+                0,
+                atol=1e-8,
+            )
+        )
 
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.epochs.defaultValue, 20, atol=1e-4))
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.epochs.defaultValue, 20, atol=1e-4
+            )
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.learningRate.range.minValue == 0)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.learningRate.range.maxValue == 1)
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.learningRate.range.minValue
+            == 0
+        )
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.learningRate.range.maxValue
+            == 1
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.miniBatchSize.set.values == [10])
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.miniBatchSize.set.values
+            == [10]
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.momentum.range.minValue == 0)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.momentum.range.maxValue == 1)
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.momentum.range.minValue
+            == 0
+        )
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.momentum.range.maxValue
+            == 1
+        )
 
     def test_updatable_model_creation_ce_adam(self):
-
         builder = self.create_base_builder()
 
-        builder.add_softmax(name='softmax', input_name='output', output_name='softmax_output')
+        builder.add_softmax(
+            name="softmax", input_name="output", output_name="softmax_output"
+        )
 
-        builder.set_categorical_cross_entropy_loss(name='cross_entropy', input='softmax_output')
+        builder.set_categorical_cross_entropy_loss(
+            name="cross_entropy", input="softmax_output"
+        )
 
         adam_params = AdamParams()
         adam_params.set_batch(value=10, allowed_set=[10, 20])
         builder.set_adam_optimizer(adam_params)
         builder.set_epochs(20)
 
-        model_path = os.path.join(self.model_dir, 'updatable_creation.mlmodel')
+        model_path = os.path.join(self.model_dir, "updatable_creation.mlmodel")
         print(model_path)
         save_spec(builder.spec, model_path)
 
@@ -120,44 +179,113 @@ def test_updatable_model_creation_ce_adam(self):
         self.assertTrue(spec.neuralNetwork.layers[1].isUpdatable)
         self.assertTrue(spec.neuralNetwork.layers[1].innerProduct.weights.isUpdatable)
 
-        self.assertTrue(spec.neuralNetwork.updateParams.lossLayers[0].categoricalCrossEntropyLossLayer is not None)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer is not None)
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.lossLayers[
+                0
+            ].categoricalCrossEntropyLossLayer
+            is not None
+        )
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer is not None
+        )
 
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.learningRate.defaultValue, 1e-2, atol=1e-4))
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.miniBatchSize.defaultValue, 10, atol=1e-4))
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta1.defaultValue, 0.9, atol=1e-4))
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta2.defaultValue, 0.999, atol=1e-4))
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.eps.defaultValue, 1e-8, atol=1e-8))
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.adamOptimizer.learningRate.defaultValue,
+                1e-2,
+                atol=1e-4,
+            )
+        )
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.adamOptimizer.miniBatchSize.defaultValue,
+                10,
+                atol=1e-4,
+            )
+        )
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta1.defaultValue,
+                0.9,
+                atol=1e-4,
+            )
+        )
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta2.defaultValue,
+                0.999,
+                atol=1e-4,
+            )
+        )
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.adamOptimizer.eps.defaultValue,
+                1e-8,
+                atol=1e-8,
+            )
+        )
 
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.epochs.defaultValue, 20, atol=1e-4))
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.epochs.defaultValue, 20, atol=1e-4
+            )
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.learningRate.range.minValue == 0)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.learningRate.range.maxValue == 1)
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.learningRate.range.minValue
+            == 0
+        )
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.learningRate.range.maxValue
+            == 1
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.miniBatchSize.set.values == [10, 20])
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.miniBatchSize.set.values
+            == [10, 20]
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta1.range.minValue == 0)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta1.range.maxValue == 1)
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta1.range.minValue
+            == 0
+        )
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta1.range.maxValue
+            == 1
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta2.range.minValue == 0)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta2.range.maxValue == 1)
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta2.range.minValue
+            == 0
+        )
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta2.range.maxValue
+            == 1
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.eps.range.minValue == 0)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.eps.range.maxValue == 1)
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.eps.range.minValue
+            == 0
+        )
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.eps.range.maxValue
+            == 1
+        )
 
         self.assertTrue(spec.neuralNetwork.updateParams.epochs.set.values == [20])
 
     def test_updatable_model_creation_mse_sgd(self):
-
         builder = self.create_base_builder()
 
-        builder.set_mean_squared_error_loss(name='mse', input_feature=('output', datatypes.Array(3)))
+        builder.set_mean_squared_error_loss(
+            name="mse", input_feature=("output", datatypes.Array(3))
+        )
 
         builder.set_sgd_optimizer(SgdParams(lr=1e-2, batch=10, momentum=0.0))
 
         builder.set_epochs(20)
 
-        model_path = os.path.join(self.model_dir, 'updatable_creation.mlmodel')
+        model_path = os.path.join(self.model_dir, "updatable_creation.mlmodel")
         print(model_path)
         save_spec(builder.spec, model_path)
 
@@ -170,38 +298,79 @@ def test_updatable_model_creation_mse_sgd(self):
         self.assertTrue(spec.neuralNetwork.layers[1].isUpdatable)
         self.assertTrue(spec.neuralNetwork.layers[1].innerProduct.weights.isUpdatable)
 
-        self.assertTrue(spec.neuralNetwork.updateParams.lossLayers[0].categoricalCrossEntropyLossLayer is not None)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer is not None)
-
         self.assertTrue(
-            _np.isclose(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.learningRate.defaultValue, 1e-2,
-                        atol=1e-4))
+            spec.neuralNetwork.updateParams.lossLayers[
+                0
+            ].categoricalCrossEntropyLossLayer
+            is not None
+        )
         self.assertTrue(
-            _np.isclose(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.miniBatchSize.defaultValue, 10,
-                        atol=1e-4))
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.momentum.defaultValue, 0, atol=1e-8))
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.epochs.defaultValue, 20, atol=1e-4))
+            spec.neuralNetwork.updateParams.optimizer.sgdOptimizer is not None
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.learningRate.range.minValue == 0)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.learningRate.range.maxValue == 1)
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.learningRate.defaultValue,
+                1e-2,
+                atol=1e-4,
+            )
+        )
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.miniBatchSize.defaultValue,
+                10,
+                atol=1e-4,
+            )
+        )
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.momentum.defaultValue,
+                0,
+                atol=1e-8,
+            )
+        )
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.epochs.defaultValue, 20, atol=1e-4
+            )
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.miniBatchSize.set.values == [10])
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.learningRate.range.minValue
+            == 0
+        )
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.learningRate.range.maxValue
+            == 1
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.momentum.range.minValue == 0)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.momentum.range.maxValue == 1)
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.miniBatchSize.set.values
+            == [10]
+        )
 
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.momentum.range.minValue
+            == 0
+        )
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.sgdOptimizer.momentum.range.maxValue
+            == 1
+        )
 
     def test_updatable_model_creation_mse_adam(self):
-
         builder = self.create_base_builder()
 
-        builder.set_mean_squared_error_loss(name='mse', input_feature=('output', datatypes.Array(3)))
+        builder.set_mean_squared_error_loss(
+            name="mse", input_feature=("output", datatypes.Array(3))
+        )
 
-        builder.set_adam_optimizer(AdamParams(lr=1e-2, batch=10,
-                                   beta1=0.9, beta2=0.999, eps=1e-8))
+        builder.set_adam_optimizer(
+            AdamParams(lr=1e-2, batch=10, beta1=0.9, beta2=0.999, eps=1e-8)
+        )
         builder.set_epochs(20, allowed_set=[10, 20, 30])
 
-        model_path = os.path.join(self.model_dir, 'updatable_creation.mlmodel')
+        model_path = os.path.join(self.model_dir, "updatable_creation.mlmodel")
         print(model_path)
         save_spec(builder.spec, model_path)
 
@@ -214,131 +383,226 @@ def test_updatable_model_creation_mse_adam(self):
         self.assertTrue(spec.neuralNetwork.layers[1].isUpdatable)
         self.assertTrue(spec.neuralNetwork.layers[1].innerProduct.weights.isUpdatable)
 
-        self.assertTrue(spec.neuralNetwork.updateParams.lossLayers[0].categoricalCrossEntropyLossLayer is not None)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer is not None)
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.lossLayers[
+                0
+            ].categoricalCrossEntropyLossLayer
+            is not None
+        )
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer is not None
+        )
 
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.learningRate.defaultValue, 1e-2, atol=1e-4))
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.miniBatchSize.defaultValue, 10, atol=1e-4))
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta1.defaultValue, 0.9, atol=1e-4))
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta2.defaultValue, 0.999, atol=1e-4))
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.eps.defaultValue, 1e-8, atol=1e-8))
-        self.assertTrue(_np.isclose(spec.neuralNetwork.updateParams.epochs.defaultValue, 20, atol=1e-4))
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.adamOptimizer.learningRate.defaultValue,
+                1e-2,
+                atol=1e-4,
+            )
+        )
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.adamOptimizer.miniBatchSize.defaultValue,
+                10,
+                atol=1e-4,
+            )
+        )
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta1.defaultValue,
+                0.9,
+                atol=1e-4,
+            )
+        )
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta2.defaultValue,
+                0.999,
+                atol=1e-4,
+            )
+        )
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.optimizer.adamOptimizer.eps.defaultValue,
+                1e-8,
+                atol=1e-8,
+            )
+        )
+        self.assertTrue(
+            _np.isclose(
+                spec.neuralNetwork.updateParams.epochs.defaultValue, 20, atol=1e-4
+            )
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.learningRate.range.minValue == 0)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.learningRate.range.maxValue == 1)
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.learningRate.range.minValue
+            == 0
+        )
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.learningRate.range.maxValue
+            == 1
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.miniBatchSize.set.values == [10])
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.miniBatchSize.set.values
+            == [10]
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta1.range.minValue == 0)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta1.range.maxValue == 1)
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta1.range.minValue
+            == 0
+        )
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta1.range.maxValue
+            == 1
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta2.range.minValue == 0)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta2.range.maxValue == 1)
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta2.range.minValue
+            == 0
+        )
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.beta2.range.maxValue
+            == 1
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.eps.range.minValue == 0)
-        self.assertTrue(spec.neuralNetwork.updateParams.optimizer.adamOptimizer.eps.range.maxValue == 1)
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.eps.range.minValue
+            == 0
+        )
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.optimizer.adamOptimizer.eps.range.maxValue
+            == 1
+        )
 
-        self.assertTrue(spec.neuralNetwork.updateParams.epochs.set.values == [10, 20, 30])
+        self.assertTrue(
+            spec.neuralNetwork.updateParams.epochs.set.values == [10, 20, 30]
+        )
 
     def test_nn_set_cce_without_softmax_fail(self):
-
         nn_builder = self.create_base_builder()
 
         # fails since adding CCE without softmax must raise error
         with self.assertRaises(ValueError):
-            nn_builder.set_categorical_cross_entropy_loss(name='cross_entropy', input='output')
+            nn_builder.set_categorical_cross_entropy_loss(
+                name="cross_entropy", input="output"
+            )
 
     def test_nn_set_cce_invalid(self):
         nn_builder = self.create_base_builder()
-        nn_builder.add_softmax(name='softmax', input_name='output', output_name='softmax_output')
+        nn_builder.add_softmax(
+            name="softmax", input_name="output", output_name="softmax_output"
+        )
 
         # fails since CCE input must be softmax output
         with self.assertRaises(ValueError):
-            nn_builder.set_categorical_cross_entropy_loss(name='cross_entropy', input='output')
+            nn_builder.set_categorical_cross_entropy_loss(
+                name="cross_entropy", input="output"
+            )
 
     def test_nn_set_softmax_updatable_invalid(self):
         nn_builder = self.create_base_builder()
-        nn_builder.add_softmax(name='softmax', input_name='output', output_name='softmax_output')
+        nn_builder.add_softmax(
+            name="softmax", input_name="output", output_name="softmax_output"
+        )
 
         # fails since marking softmax as updatable layer is not allowed
         with self.assertRaises(ValueError):
-            nn_builder.make_updatable(['softmax'])
+            nn_builder.make_updatable(["softmax"])
 
     def test_nn_set_training_input(self):
-
         builder = self.create_base_builder()
 
-        builder.set_mean_squared_error_loss(name='mse', input_feature=('output', datatypes.Array(3)))
+        builder.set_mean_squared_error_loss(
+            name="mse", input_feature=("output", datatypes.Array(3))
+        )
 
-        builder.set_adam_optimizer(AdamParams(lr=1e-2, batch=10,
-                                   beta1=0.9, beta2=0.999, eps=1e-8))
+        builder.set_adam_optimizer(
+            AdamParams(lr=1e-2, batch=10, beta1=0.9, beta2=0.999, eps=1e-8)
+        )
         builder.set_epochs(20, allowed_set=[10, 20, 30])
 
-        model_path = os.path.join(self.model_dir, 'updatable_creation.mlmodel')
+        model_path = os.path.join(self.model_dir, "updatable_creation.mlmodel")
         print(model_path)
         save_spec(builder.spec, model_path)
 
         mlmodel = MLModel(model_path)
         self.assertTrue(mlmodel is not None)
         spec = mlmodel.get_spec()
-        self.assertEqual(spec.description.trainingInput[0].name, 'input')
-        self.assertEqual(spec.description.trainingInput[0].type.WhichOneof('Type'), 'multiArrayType')
-        self.assertEqual(spec.description.trainingInput[1].name, 'output_true')
-        self.assertEqual(spec.description.trainingInput[1].type.WhichOneof('Type'), 'multiArrayType')
+        self.assertEqual(spec.description.trainingInput[0].name, "input")
+        self.assertEqual(
+            spec.description.trainingInput[0].type.WhichOneof("Type"), "multiArrayType"
+        )
+        self.assertEqual(spec.description.trainingInput[1].name, "output_true")
+        self.assertEqual(
+            spec.description.trainingInput[1].type.WhichOneof("Type"), "multiArrayType"
+        )
 
     def test_nn_builder_with_training_features(self):
-
-        input_features = [('input', datatypes.Array(3))]
-        output_features = [('output', datatypes.Array(3))]
+        input_features = [("input", datatypes.Array(3))]
+        output_features = [("output", datatypes.Array(3))]
         builder = NeuralNetworkBuilder(input_features, output_features)
 
         W1 = _np.random.uniform(-0.5, 0.5, (3, 3))
         W2 = _np.random.uniform(-0.5, 0.5, (3, 3))
-        builder.add_inner_product(name='ip1',
-                                  W=W1,
-                                  b=None,
-                                  input_channels=3,
-                                  output_channels=3,
-                                  has_bias=False,
-                                  input_name='input',
-                                  output_name='hidden')
-        builder.add_inner_product(name='ip2',
-                                  W=W2,
-                                  b=None,
-                                  input_channels=3,
-                                  output_channels=3,
-                                  has_bias=False,
-                                  input_name='hidden',
-                                  output_name='output')
-
-        builder.make_updatable(['ip1', 'ip2'])  # or a dict for weightParams
-
-        builder.set_mean_squared_error_loss(name='mse', input_feature=('output', datatypes.Array(3)))
-
-        builder.set_adam_optimizer(AdamParams(lr=1e-2, batch=10,
-                                   beta1=0.9, beta2=0.999, eps=1e-8))
+        builder.add_inner_product(
+            name="ip1",
+            W=W1,
+            b=None,
+            input_channels=3,
+            output_channels=3,
+            has_bias=False,
+            input_name="input",
+            output_name="hidden",
+        )
+        builder.add_inner_product(
+            name="ip2",
+            W=W2,
+            b=None,
+            input_channels=3,
+            output_channels=3,
+            has_bias=False,
+            input_name="hidden",
+            output_name="output",
+        )
+
+        builder.make_updatable(["ip1", "ip2"])  # or a dict for weightParams
+
+        builder.set_mean_squared_error_loss(
+            name="mse", input_feature=("output", datatypes.Array(3))
+        )
+
+        builder.set_adam_optimizer(
+            AdamParams(lr=1e-2, batch=10, beta1=0.9, beta2=0.999, eps=1e-8)
+        )
         builder.set_epochs(20, allowed_set=[10, 20, 30])
 
-        model_path = os.path.join(self.model_dir, 'updatable_creation.mlmodel')
+        model_path = os.path.join(self.model_dir, "updatable_creation.mlmodel")
         print(model_path)
         save_spec(builder.spec, model_path)
 
         mlmodel = MLModel(model_path)
         self.assertTrue(mlmodel is not None)
         spec = mlmodel.get_spec()
-        self.assertEqual(spec.description.trainingInput[0].name, 'input')
-        self.assertEqual(spec.description.trainingInput[0].type.WhichOneof('Type'), 'multiArrayType')
-        self.assertEqual(spec.description.trainingInput[1].name, 'output_true')
-        self.assertEqual(spec.description.trainingInput[1].type.WhichOneof('Type'), 'multiArrayType')
+        self.assertEqual(spec.description.trainingInput[0].name, "input")
+        self.assertEqual(
+            spec.description.trainingInput[0].type.WhichOneof("Type"), "multiArrayType"
+        )
+        self.assertEqual(spec.description.trainingInput[1].name, "output_true")
+        self.assertEqual(
+            spec.description.trainingInput[1].type.WhichOneof("Type"), "multiArrayType"
+        )
 
     def test_pipeline_regressor_make_updatable(self):
         builder = self.create_base_builder()
         builder.spec.isUpdatable = False
 
-        training_input = [('input', datatypes.Array(3)), ('target', 'Double')]
+        training_input = [("input", datatypes.Array(3)), ("target", "Double")]
 
         # fails due to missing sub-models
-        p_regressor = PipelineRegressor(self.input_features, self.output_names, training_input)
+        p_regressor = PipelineRegressor(
+            self.input_features, self.output_names, training_input
+        )
         with self.assertRaises(ValueError):
             p_regressor.make_updatable()
         self.assertEqual(p_regressor.spec.isUpdatable, False)
@@ -353,12 +617,18 @@ def test_pipeline_regressor_make_updatable(self):
         p_regressor.add_model(builder.spec)
 
         self.assertEqual(p_regressor.spec.isUpdatable, False)
-        p_regressor.make_updatable();
+        p_regressor.make_updatable()
         self.assertEqual(p_regressor.spec.isUpdatable, True)
-        self.assertEqual(p_regressor.spec.description.trainingInput[0].name, 'input')
-        self.assertEqual(p_regressor.spec.description.trainingInput[0].type.WhichOneof('Type'), 'multiArrayType')
-        self.assertEqual(p_regressor.spec.description.trainingInput[1].name, 'target')
-        self.assertEqual(p_regressor.spec.description.trainingInput[1].type.WhichOneof('Type'), 'doubleType')
+        self.assertEqual(p_regressor.spec.description.trainingInput[0].name, "input")
+        self.assertEqual(
+            p_regressor.spec.description.trainingInput[0].type.WhichOneof("Type"),
+            "multiArrayType",
+        )
+        self.assertEqual(p_regressor.spec.description.trainingInput[1].name, "target")
+        self.assertEqual(
+            p_regressor.spec.description.trainingInput[1].type.WhichOneof("Type"),
+            "doubleType",
+        )
 
         # fails since once updatable does not allow adding new models
         with self.assertRaises(ValueError):
@@ -368,10 +638,12 @@ def test_pipeline_regressor_make_updatable(self):
     def test_pipeline_classifier_make_updatable(self):
         builder = self.create_base_builder()
         builder.spec.isUpdatable = False
-        training_input = [('input', datatypes.Array(3)), ('target', 'String')]
+        training_input = [("input", datatypes.Array(3)), ("target", "String")]
 
         # fails due to missing sub-models
-        p_classifier = PipelineClassifier(self.input_features, self.output_names, training_features=training_input)
+        p_classifier = PipelineClassifier(
+            self.input_features, self.output_names, training_features=training_input
+        )
         with self.assertRaises(ValueError):
             p_classifier.make_updatable()
         self.assertEqual(p_classifier.spec.isUpdatable, False)
@@ -386,23 +658,28 @@ def test_pipeline_classifier_make_updatable(self):
         p_classifier.add_model(builder.spec)
 
         self.assertEqual(p_classifier.spec.isUpdatable, False)
-        p_classifier.make_updatable();
+        p_classifier.make_updatable()
         self.assertEqual(p_classifier.spec.isUpdatable, True)
-        self.assertEqual(p_classifier.spec.description.trainingInput[0].name, 'input')
-        self.assertEqual(p_classifier.spec.description.trainingInput[0].type.WhichOneof('Type'), 'multiArrayType')
-        self.assertEqual(p_classifier.spec.description.trainingInput[1].name, 'target')
-        self.assertEqual(p_classifier.spec.description.trainingInput[1].type.WhichOneof('Type'), 'stringType')
+        self.assertEqual(p_classifier.spec.description.trainingInput[0].name, "input")
+        self.assertEqual(
+            p_classifier.spec.description.trainingInput[0].type.WhichOneof("Type"),
+            "multiArrayType",
+        )
+        self.assertEqual(p_classifier.spec.description.trainingInput[1].name, "target")
+        self.assertEqual(
+            p_classifier.spec.description.trainingInput[1].type.WhichOneof("Type"),
+            "stringType",
+        )
 
         # fails since once updatable does not allow adding new models
         with self.assertRaises(ValueError):
             p_classifier.add_model(builder.spec)
         self.assertEqual(p_classifier.spec.isUpdatable, True)
 
-
     def test_pipeline_classifier_set_training_inputs(self):
         builder = self.create_base_builder()
         builder.spec.isUpdatable = False
-        training_input = [('input', datatypes.Array(3)), ('target', 'String')]
+        training_input = [("input", datatypes.Array(3)), ("target", "String")]
 
         # fails due to missing sub-models
         p_classifier = PipelineClassifier(self.input_features, self.output_names)
@@ -421,12 +698,18 @@ def test_pipeline_classifier_set_training_inputs(self):
         p_classifier.add_model(builder.spec)
 
         self.assertEqual(p_classifier.spec.isUpdatable, False)
-        p_classifier.make_updatable();
+        p_classifier.make_updatable()
         self.assertEqual(p_classifier.spec.isUpdatable, True)
-        self.assertEqual(p_classifier.spec.description.trainingInput[0].name, 'input')
-        self.assertEqual(p_classifier.spec.description.trainingInput[0].type.WhichOneof('Type'), 'multiArrayType')
-        self.assertEqual(p_classifier.spec.description.trainingInput[1].name, 'target')
-        self.assertEqual(p_classifier.spec.description.trainingInput[1].type.WhichOneof('Type'), 'stringType')
+        self.assertEqual(p_classifier.spec.description.trainingInput[0].name, "input")
+        self.assertEqual(
+            p_classifier.spec.description.trainingInput[0].type.WhichOneof("Type"),
+            "multiArrayType",
+        )
+        self.assertEqual(p_classifier.spec.description.trainingInput[1].name, "target")
+        self.assertEqual(
+            p_classifier.spec.description.trainingInput[1].type.WhichOneof("Type"),
+            "stringType",
+        )
 
         # fails since once updatable does not allow adding new models
         with self.assertRaises(ValueError):
@@ -437,4 +720,7 @@ def test_shuffle_on_by_default(self):
         builder = self.create_base_builder()
 
         # base builder already marks two layers as updatable
-        self.assertTrue(builder.nn_spec.updateParams.shuffle.defaultValue, "Shuffle not turned on by default for updatable models")
+        self.assertTrue(
+            builder.nn_spec.updateParams.shuffle.defaultValue,
+            "Shuffle not turned on by default for updatable models",
+        )
diff --git a/coremltools/test/pipeline/test_pipeline.py b/coremltools/test/pipeline/test_pipeline.py
index b48167a95..80df761bc 100644
--- a/coremltools/test/pipeline/test_pipeline.py
+++ b/coremltools/test/pipeline/test_pipeline.py
@@ -6,27 +6,27 @@
 import unittest
 import tempfile
 from coremltools.proto import FeatureTypes_pb2
-from coremltools._deps import HAS_SKLEARN, HAS_LIBSVM
+from coremltools._deps import _HAS_SKLEARN, _HAS_LIBSVM
 from coremltools.models.pipeline import PipelineRegressor, PipelineClassifier
 from coremltools.models.utils import evaluate_transformer
 import coremltools.models.datatypes as datatypes
 from coremltools.models.feature_vectorizer import create_feature_vectorizer
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.preprocessing import OneHotEncoder
     from sklearn.datasets import load_boston
     from sklearn.linear_model import LinearRegression
     from sklearn.pipeline import Pipeline
     from coremltools.converters import sklearn as converter
 
-if HAS_LIBSVM:
-    import svm
-    import svmutil
+if _HAS_LIBSVM:
+    from libsvm import svm
+    from libsvm import svmutil
     from coremltools.converters import libsvm as libsvm_converter
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikit-learn. Skipping tests.')
-@unittest.skipIf(not HAS_LIBSVM, 'Missing libsvm. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing scikit-learn. Skipping tests.")
+@unittest.skipIf(not _HAS_LIBSVM, "Missing libsvm. Skipping tests.")
 class LinearRegressionPipelineCreationTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -37,16 +37,18 @@ def setUpClass(self):
         """
         Set up the unit test by loading the dataset and training a model.
         """
-        
-        if not(HAS_SKLEARN):
+
+        if not (_HAS_SKLEARN):
             return
 
         scikit_data = load_boston()
         feature_names = scikit_data.feature_names
 
         scikit_model = LinearRegression()
-        scikit_model.fit(scikit_data['data'], scikit_data['target'])
-        scikit_spec = converter.convert(scikit_model, feature_names, 'target').get_spec()
+        scikit_model.fit(scikit_data["data"], scikit_data["target"])
+        scikit_spec = converter.convert(
+            scikit_model, feature_names, "target"
+        ).get_spec()
 
         # Save the data and the model
         self.scikit_data = scikit_data
@@ -56,8 +58,8 @@ def setUpClass(self):
     def test_pipeline_regression_creation(self):
 
         input_names = self.scikit_data.feature_names
-        output_name = 'target'
-        p_regressor = PipelineRegressor(input_names, 'target')
+        output_name = "target"
+        p_regressor = PipelineRegressor(input_names, "target")
         p_regressor.add_model(self.scikit_spec)
 
         self.assertIsNotNone(p_regressor.spec)
@@ -68,50 +70,55 @@ def test_pipeline_regression_creation(self):
         self.assertIsNotNone(spec.description)
 
         # Test the interface class
-        self.assertEqual(spec.description.predictedFeatureName,
-                'target')
+        self.assertEqual(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.output), 1)
-        self.assertEqual(spec.description.output[0].name, 'target')
-        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'),
-                'doubleType')
+        self.assertEqual(spec.description.output[0].name, "target")
+        self.assertEqual(
+            spec.description.output[0].type.WhichOneof("Type"), "doubleType"
+        )
         for input_type in spec.description.input:
-            self.assertEqual(input_type.type.WhichOneof('Type'),
-                    'doubleType')
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+            self.assertEqual(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikit-learn. Skipping tests.')
-@unittest.skipIf(not HAS_LIBSVM, 'Missing libsvm. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing scikit-learn. Skipping tests.")
+@unittest.skipIf(not _HAS_LIBSVM, "Missing libsvm. Skipping tests.")
 class LibSVMPipelineCreationTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
     """
+
     @classmethod
     def setUpClass(self):
         """
         Set up the unit test by loading the dataset and training a model.
         """
-        if not HAS_SKLEARN:
+        if not _HAS_SKLEARN:
             return
-        if not HAS_LIBSVM:
+        if not _HAS_LIBSVM:
             return
 
         scikit_data = load_boston()
-        prob = svmutil.svm_problem(scikit_data['target'] > scikit_data['target'].mean(),
-                                   scikit_data['data'].tolist())
+        prob = svmutil.svm_problem(
+            scikit_data["target"] > scikit_data["target"].mean(),
+            scikit_data["data"].tolist(),
+        )
         param = svmutil.svm_parameter()
         param.svm_type = svmutil.C_SVC
         param.kernel_type = svmutil.LINEAR
         param.eps = 1
 
         libsvm_model = svmutil.svm_train(prob, param)
-        libsvm_spec = libsvm_converter.convert(libsvm_model, scikit_data.feature_names, 'target').get_spec()
+        libsvm_spec = libsvm_converter.convert(
+            libsvm_model, scikit_data.feature_names, "target"
+        ).get_spec()
 
         # Save the data and the model
-        self.scikit_data =  scikit_data
+        self.scikit_data = scikit_data
         self.libsvm_spec = libsvm_spec
 
     def test_pipeline_classifier_creation(self):
@@ -128,23 +135,23 @@ def test_pipeline_classifier_creation(self):
         self.assertIsNotNone(spec.description)
 
         # Test the interface class
-        self.assertEqual(spec.description.predictedFeatureName,
-                'target')
+        self.assertEqual(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.output), 1)
-        self.assertEqual(spec.description.output[0].name, 'target')
-        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'),
-                'int64Type')
+        self.assertEqual(spec.description.output[0].name, "target")
+        self.assertEqual(
+            spec.description.output[0].type.WhichOneof("Type"), "int64Type"
+        )
 
         for input_type in spec.description.input:
-            self.assertEqual(input_type.type.WhichOneof('Type'),
-                    'doubleType')
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+            self.assertEqual(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikit-learn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing scikit-learn. Skipping tests.")
 class LinearRegressionPipeline(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -155,15 +162,13 @@ def setUpClass(self):
         """
         Set up the unit test by loading the dataset and training a model.
         """
-        if not HAS_SKLEARN:
+        if not _HAS_SKLEARN:
             return
         scikit_data = load_boston()
         feature_names = scikit_data.feature_names
 
-        scikit_model = Pipeline(steps = [
-                  ('linear' , LinearRegression())
-        ])
-        scikit_model.fit(scikit_data['data'], scikit_data['target'])
+        scikit_model = Pipeline(steps=[("linear", LinearRegression())])
+        scikit_model.fit(scikit_data["data"], scikit_data["target"])
 
         # Save the data and the model
         self.scikit_data = scikit_data
@@ -171,9 +176,11 @@ def setUpClass(self):
 
     def test_pipeline_regression_creation(self):
         input_names = self.scikit_data.feature_names
-        output_name = 'target'
+        output_name = "target"
 
-        p_regressor = converter.convert(self.scikit_model, input_names, 'target').get_spec()
+        p_regressor = converter.convert(
+            self.scikit_model, input_names, "target"
+        ).get_spec()
         self.assertIsNotNone(p_regressor)
         self.assertEqual(len(p_regressor.pipelineRegressor.pipeline.models), 2)
 
@@ -182,20 +189,21 @@ def test_pipeline_regression_creation(self):
         self.assertIsNotNone(spec.description)
 
         # Test the interface class
-        self.assertEqual(spec.description.predictedFeatureName,
-                'target')
+        self.assertEqual(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.output), 1)
-        self.assertEqual(spec.description.output[0].name, 'target')
-        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'),
-                'doubleType')
+        self.assertEqual(spec.description.output[0].name, "target")
+        self.assertEqual(
+            spec.description.output[0].type.WhichOneof("Type"), "doubleType"
+        )
 
         for input_type in p_regressor.description.input:
-            self.assertEqual(input_type.type.WhichOneof('Type'),
-                    'doubleType')
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, p_regressor.description.input)))
+            self.assertEqual(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(input_names),
+            sorted(map(lambda x: x.name, p_regressor.description.input)),
+        )
 
     def test_conversion_bad_inputs(self):
         """
@@ -204,4 +212,4 @@ def test_conversion_bad_inputs(self):
         # Error on converting an untrained model
         with self.assertRaises(TypeError):
             model = OneHotEncoder()
-            spec = converter.convert(model, 'data', 'out', 'regressor')
+            spec = converter.convert(model, "data", "out", "regressor")
diff --git a/coremltools/converters/nnssa/coreml/__init__.py b/coremltools/test/sklearn/__init__.py
similarity index 89%
rename from coremltools/converters/nnssa/coreml/__init__.py
rename to coremltools/test/sklearn/__init__.py
index 504510dfa..8aa13a28b 100644
--- a/coremltools/converters/nnssa/coreml/__init__.py
+++ b/coremltools/test/sklearn/__init__.py
@@ -2,4 +2,3 @@
 #
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
-from . import graph_pass
\ No newline at end of file
diff --git a/coremltools/test/sklearn/test_NuSVC.py b/coremltools/test/sklearn/test_NuSVC.py
index febc2a86e..a569de96a 100644
--- a/coremltools/test/sklearn/test_NuSVC.py
+++ b/coremltools/test/sklearn/test_NuSVC.py
@@ -10,45 +10,74 @@
 import random
 import pytest
 
-from coremltools.models.utils import evaluate_classifier,\
-    evaluate_classifier_with_probabilities, macos_version, is_macos
-from coremltools._deps import HAS_LIBSVM, HAS_SKLEARN
-
-if HAS_LIBSVM:
-    from svm import svm_parameter, svm_problem
+from coremltools.models.utils import (
+    evaluate_classifier,
+    evaluate_classifier_with_probabilities,
+    _macos_version,
+    _is_macos,
+)
+from coremltools._deps import (
+    _HAS_LIBSVM,
+    MSG_LIBSVM_NOT_FOUND,
+    _HAS_SKLEARN,
+    MSG_SKLEARN_NOT_FOUND,
+)
+
+if _HAS_LIBSVM:
+    from libsvm import svm, svmutil
     from svmutil import svm_train, svm_predict
-    import svmutil
+    from libsvm import svmutil
     from coremltools.converters import libsvm
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.svm import NuSVC
     from sklearn.preprocessing import OneHotEncoder
     from coremltools.converters import sklearn as scikit_converter
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikit-learn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, MSG_SKLEARN_NOT_FOUND)
 class NuSvcScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
     """
-    def _evaluation_test_helper(self, class_labels, use_probability_estimates, allow_slow, allowed_prob_delta=0.00001):
+
+    def _evaluation_test_helper(
+        self,
+        class_labels,
+        use_probability_estimates,
+        allow_slow,
+        allowed_prob_delta=0.00001,
+    ):
         # Parameters to test
-        kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2},
-                             {'kernel': 'linear'},
-                             {'kernel': 'poly'},  {'kernel': 'poly', 'degree': 2},  {'kernel': 'poly', 'gamma': 0.75},
-                                 {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2},
-                             {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8},
-                                 {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5}
-                             ]
-        non_kernel_parameters = [{}, {'nu': 0.75}, {'nu': 0.25, 'shrinking': True}, {'shrinking': False}]
+        kernel_parameters = [
+            {},
+            {"kernel": "rbf", "gamma": 1.2},
+            {"kernel": "linear"},
+            {"kernel": "poly"},
+            {"kernel": "poly", "degree": 2},
+            {"kernel": "poly", "gamma": 0.75},
+            {"kernel": "poly", "degree": 0, "gamma": 0.9, "coef0": 2},
+            {"kernel": "sigmoid"},
+            {"kernel": "sigmoid", "gamma": 1.3},
+            {"kernel": "sigmoid", "coef0": 0.8},
+            {"kernel": "sigmoid", "coef0": 0.8, "gamma": 0.5},
+        ]
+        non_kernel_parameters = [
+            {},
+            {"nu": 0.75},
+            {"nu": 0.25, "shrinking": True},
+            {"shrinking": False},
+        ]
 
         # Generate some random data
         x, y = [], []
         random.seed(42)
         for _ in range(50):
-            x.append([random.gauss(200,30), random.gauss(-100,22), random.gauss(100,42)])
+            x.append(
+                [random.gauss(200, 30), random.gauss(-100, 22), random.gauss(100, 42)]
+            )
             y.append(random.choice(class_labels))
-        column_names = ['x1', 'x2', 'x3']
+        column_names = ["x1", "x2", "x3"]
         # make sure first label is seen first, second is seen second, and so on.
         for i, val in enumerate(class_labels):
             y[i] = val
@@ -59,27 +88,34 @@ def _evaluation_test_helper(self, class_labels, use_probability_estimates, allow
             for param2 in kernel_parameters:
                 cur_params = param1.copy()
                 cur_params.update(param2)
-                cur_params['probability'] = use_probability_estimates
-                cur_params['max_iter'] = 10   # Don't want test to take too long
+                cur_params["probability"] = use_probability_estimates
+                cur_params["max_iter"] = 10  # Don't want test to take too long
                 # print("cur_params=" + str(cur_params))
 
                 cur_model = NuSVC(**cur_params)
                 cur_model.fit(x, y)
 
-                spec = scikit_converter.convert(cur_model, column_names, 'target')
+                spec = scikit_converter.convert(cur_model, column_names, "target")
 
-                if is_macos() and macos_version() >= (10, 13):
+                if _is_macos() and _macos_version() >= (10, 13):
                     if use_probability_estimates:
                         probability_lists = cur_model.predict_proba(x)
-                        df['classProbability'] = [dict(zip(cur_model.classes_, cur_vals)) for cur_vals in probability_lists]
-                        metrics = evaluate_classifier_with_probabilities(spec, df, probabilities='classProbability')
-                        self.assertEquals(metrics['num_key_mismatch'], 0)
-                        self.assertLess(metrics['max_probability_error'], allowed_prob_delta)
+                        df["classProbability"] = [
+                            dict(zip(cur_model.classes_, cur_vals))
+                            for cur_vals in probability_lists
+                        ]
+                        metrics = evaluate_classifier_with_probabilities(
+                            spec, df, probabilities="classProbability"
+                        )
+                        self.assertEquals(metrics["num_key_mismatch"], 0)
+                        self.assertLess(
+                            metrics["max_probability_error"], allowed_prob_delta
+                        )
                     else:
-                        df['prediction'] = cur_model.predict(x)
+                        df["prediction"] = cur_model.predict(x)
                         metrics = evaluate_classifier(spec, df, verbose=False)
-                        self.assertEquals(metrics['num_errors'], 0)
-                
+                        self.assertEquals(metrics["num_errors"], 0)
+
                 if not allow_slow:
                     break
 
@@ -88,35 +124,39 @@ def _evaluation_test_helper(self, class_labels, use_probability_estimates, allow
 
     @pytest.mark.slow
     def test_binary_class_int_label_without_probability_stress_test(self):
-        self._evaluation_test_helper([1, 3], False, allow_slow = True)
+        self._evaluation_test_helper([1, 3], False, allow_slow=True)
 
     def test_binary_class_int_label_without_probability(self):
-        self._evaluation_test_helper([1, 3], False, allow_slow = False)
+        self._evaluation_test_helper([1, 3], False, allow_slow=False)
 
     @pytest.mark.slow
     def test_binary_class_string_label_with_probability_stress_test(self):
         # Scikit Learn uses technique to normalize pairwise probabilities even for binary classification.
         # This leads to difference in probabilities.
-        self._evaluation_test_helper(["foo", "bar"], True, allow_slow = True, allowed_prob_delta=0.005)
+        self._evaluation_test_helper(
+            ["foo", "bar"], True, allow_slow=True, allowed_prob_delta=0.005
+        )
 
     def test_binary_class_string_label_with_probability(self):
         # Scikit Learn uses technique to normalize pairwise probabilities even for binary classification.
         # This leads to difference in probabilities.
-        self._evaluation_test_helper(["foo", "bar"], True, allow_slow = False, allowed_prob_delta=0.005)
+        self._evaluation_test_helper(
+            ["foo", "bar"], True, allow_slow=False, allowed_prob_delta=0.005
+        )
 
     @pytest.mark.slow
     def test_multi_class_int_label_without_probability_stress_test(self):
-        self._evaluation_test_helper([12, 33, -1, 1234], False, allow_slow = True)
+        self._evaluation_test_helper([12, 33, -1, 1234], False, allow_slow=True)
 
     def test_multi_class_int_label_without_probability(self):
-        self._evaluation_test_helper([12, 33, -1, 1234], False, allow_slow = False)
+        self._evaluation_test_helper([12, 33, -1, 1234], False, allow_slow=False)
 
     @pytest.mark.slow
     def test_multi_class_string_label_with_probability_stress_test(self):
-        self._evaluation_test_helper(['X', 'Y', 'z'], True, allow_slow = True)
+        self._evaluation_test_helper(["X", "Y", "z"], True, allow_slow=True)
 
     def test_multi_class_string_label_with_probability(self):
-        self._evaluation_test_helper(['X', 'Y', 'z'], True, allow_slow = False)
+        self._evaluation_test_helper(["X", "Y", "z"], True, allow_slow=False)
 
     def test_conversion_bad_inputs(self):
         from sklearn.preprocessing import OneHotEncoder
@@ -124,36 +164,44 @@ def test_conversion_bad_inputs(self):
         # Error on converting an untrained model
         with self.assertRaises(TypeError):
             model = NuSVC()
-            spec = scikit_converter.convert(model, 'data', 'out')
+            spec = scikit_converter.convert(model, "data", "out")
 
         # Check the expected class during conversion
         with self.assertRaises(TypeError):
             model = OneHotEncoder()
-            spec = scikit_converter.convert(model, 'data', 'out')
+            spec = scikit_converter.convert(model, "data", "out")
 
 
-@unittest.skipIf(not HAS_LIBSVM, 'Missing libsvm. Skipping tests.')
+@unittest.skipIf(not _HAS_LIBSVM, MSG_LIBSVM_NOT_FOUND)
+@unittest.skipIf(not _HAS_SKLEARN, MSG_SKLEARN_NOT_FOUND)
 class NuSVCLibSVMTest(unittest.TestCase):
     # Model parameters for testing
-    base_param = '-s 1 -q ' # model type C-SVC and quiet mode
-    non_kernel_parameters = ['', '-n 0.6 -p 0.5 -h 1', '-c 0.5 -p 0.5 -h 0']
+    base_param = "-s 1 -q"  # model type C-SVC and quiet mode
+    non_kernel_parameters = ["", "-n 0.6 -p 0.5 -h 1", "-c 0.5 -p 0.5 -h 0"]
     kernel_parameters = [
-        '-t 0', # linear kernel
-        '', '-t 2 -g 1.2',  # rbf kernel
-        '-t 1', '-t 1 -d 2', '-t 1 -g 0.75', '-t 1 -d 0 -g 0.9 -r 2',  # poly kernel
-        '-t 3', '-t 3 -g 1.3', '-t 3 -r 0.8', '-t 3 -r 0.8 -g 0.5' # sigmoid kernel
+        "-t 0",  # linear kernel
+        "",
+        "-t 2 -g 1.2",  # rbf kernel
+        "-t 1",
+        "-t 1 -d 2",
+        "-t 1 -g 0.75",
+        "-t 1 -d 0 -g 0.9 -r 2",  # poly kernel
+        "-t 3",
+        "-t 3 -g 1.3",
+        "-t 3 -r 0.8",
+        "-t 3 -r 0.8 -g 0.5",  # sigmoid kernel
     ]
 
-
     """
     Unit test class for testing the libsvm sklearn converter.
     """
+
     @classmethod
     def setUpClass(self):
         """
         Set up the unit test by loading the dataset and training a model.
         """
-        if not HAS_LIBSVM:
+        if not _HAS_LIBSVM:
             # setUpClass is still called even if class is skipped.
             return
 
@@ -162,11 +210,11 @@ def setUpClass(self):
         self.x, self.y = [], []
         random.seed(42)
         for _ in range(50):
-            self.x.append([random.gauss(200,30), random.gauss(-100,22)])
+            self.x.append([random.gauss(200, 30), random.gauss(-100, 22)])
             self.y.append(random.choice([1, 2]))
-        self.y[0] = 1           # Make sure 1 is always the first label it sees
+        self.y[0] = 1  # Make sure 1 is always the first label it sees
         self.y[1] = 2
-        self.column_names = ['x1', 'x2']
+        self.column_names = ["x1", "x2"]
         self.prob = svmutil.svm_problem(self.y, self.x)
 
         param = svmutil.svm_parameter()
@@ -181,25 +229,28 @@ def setUpClass(self):
         self.df = pd.DataFrame(self.x, columns=self.column_names)
 
     def _test_prob_model(self, param1, param2):
-        probability_param = '-b 1'
+        probability_param = "-b 1"
         df = self.df
 
-        param_str = ' '.join([self.base_param, param1, param2, probability_param])
-        param = svm_parameter(param_str)
-
+        param_str = " ".join([self.base_param, param1, param2, probability_param])
+        param = svmutil.svm_parameter(param_str)
         model = svm_train(self.prob, param)
 
         # Get predictions with probabilities as dictionaries
-        (df['prediction'], _, probability_lists) = svm_predict(self.y, self.x, model, probability_param + ' -q')
-        probability_dicts = [dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists]
-        df['probabilities'] = probability_dicts
+        (df["prediction"], _, probability_lists) = svm_predict(
+            self.y, self.x, model, probability_param + " -q"
+        )
+        probability_dicts = [
+            dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
+        ]
+        df["probabilities"] = probability_dicts
 
-        spec = libsvm.convert(model, self.column_names, 'target', 'probabilities')
+        spec = libsvm.convert(model, self.column_names, "target", "probabilities")
 
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False)
-            self.assertEquals(metrics['num_key_mismatch'], 0)
-            self.assertLess(metrics['max_probability_error'], 0.00001)
+            self.assertEquals(metrics["num_key_mismatch"], 0)
+            self.assertLess(metrics["max_probability_error"], 0.00001)
 
     @pytest.mark.slow
     def test_binary_classificaiton_with_probability_stress_test(self):
@@ -212,44 +263,47 @@ def test_binary_classificaiton_with_probability(self):
         param2 = self.kernel_parameters[0]
         self._test_prob_model(param1, param2)
 
-
     @pytest.mark.slow
-    @unittest.skip("LibSVM's Python library is broken for NuSVC without probabilities. It always segfaults during prediction time.")
+    @unittest.skip(
+        "LibSVM's Python library is broken for NuSVC without probabilities. It always segfaults during prediction time."
+    )
     def test_multi_class_without_probability(self):
         # Generate some random data.
         # This unit test should not rely on scikit learn for test data.
         x, y = [], []
         for _ in range(50):
-            x.append([random.gauss(200,30), random.gauss(-100,22), random.gauss(100,42)])
+            x.append(
+                [random.gauss(200, 30), random.gauss(-100, 22), random.gauss(100, 42)]
+            )
             y.append(random.choice([1, 2, 10, 12]))
         y[0], y[1], y[2], y[3] = 1, 2, 10, 12
-        column_names = ['x1', 'x2', 'x3']
+        column_names = ["x1", "x2", "x3"]
         prob = svmutil.svm_problem(y, x)
 
         df = pd.DataFrame(x, columns=column_names)
 
         for param1 in self.non_kernel_parameters:
             for param2 in self.kernel_parameters:
-                param_str = ' '.join([self.base_param, param1, param2])
-                param = svm_parameter(param_str)
+                param_str = " ".join([self.base_param, param1, param2])
+                param = svmutil.svm_parameter(param_str)
 
                 model = svm_train(prob, param)
 
                 # Get predictions with probabilities as dictionaries
-                (df['prediction'], _, _) = svm_predict(y, x, model, ' -q')
+                (df["prediction"], _, _) = svm_predict(y, x, model, " -q")
 
-                spec = libsvm.convert(model, column_names, 'target')
+                spec = libsvm.convert(model, column_names, "target")
 
                 metrics = evaluate_classifier(spec, df, verbose=False)
-                self.assertEquals(metrics['num_errors'], 0)
+                self.assertEquals(metrics["num_errors"], 0)
 
     def test_conversion_from_filesystem(self):
-        libsvm_model_path = tempfile.mktemp(suffix = 'model.libsvm')
+        libsvm_model_path = tempfile.mktemp(suffix="model.libsvm")
         svmutil.svm_save_model(libsvm_model_path, self.libsvm_model)
-        spec = libsvm.convert(libsvm_model_path, 'data', 'target')
+        spec = libsvm.convert(libsvm_model_path, "data", "target")
 
     def test_conversion_bad_inputs(self):
         # Check the expected class during covnersion.
         with self.assertRaises(TypeError):
             model = OneHotEncoder()
-            spec = libsvm.convert(model, 'data', 'out')
+            spec = libsvm.convert(model, "data", "out")
diff --git a/coremltools/test/sklearn/test_NuSVR.py b/coremltools/test/sklearn/test_NuSVR.py
index 0fdb0247c..b905abb13 100644
--- a/coremltools/test/sklearn/test_NuSVR.py
+++ b/coremltools/test/sklearn/test_NuSVR.py
@@ -9,56 +9,61 @@
 import unittest
 import pytest
 
-from coremltools._deps import HAS_LIBSVM
-from coremltools._deps import HAS_SKLEARN
-from coremltools.models.utils import evaluate_regressor, macos_version, is_macos
-
-if HAS_LIBSVM:
-    import svmutil
-    import svm
+from coremltools._deps import (
+    _HAS_LIBSVM,
+    MSG_LIBSVM_NOT_FOUND,
+    _HAS_SKLEARN,
+    MSG_SKLEARN_NOT_FOUND,
+)
+from coremltools.models.utils import evaluate_regressor, _macos_version, _is_macos
+
+if _HAS_LIBSVM:
+    from libsvm import svmutil
+    from libsvm import svm
     from coremltools.converters import libsvm
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.svm import NuSVR
     from sklearn.datasets import load_boston
     from sklearn.preprocessing import OneHotEncoder
     from coremltools.converters import sklearn as scikit_converter
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikit-learn. Skipping tests.')
+
+@unittest.skipIf(not _HAS_SKLEARN, MSG_SKLEARN_NOT_FOUND)
 class NuSVRScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
     """
+
     @classmethod
     def setUpClass(self):
         """
         Set up the unit test by loading the dataset and training a model.
         """
-        if not HAS_SKLEARN:
+        if not _HAS_SKLEARN:
             return
 
-        self.scikit_model = NuSVR(kernel='linear')
+        self.scikit_model = NuSVR(kernel="linear")
         self.data = load_boston()
-        self.scikit_model.fit(self.data['data'], self.data['target'])
+        self.scikit_model.fit(self.data["data"], self.data["target"])
 
     def test_conversion_bad_inputs(self):
         # Error on converting an untrained model
         with self.assertRaises(TypeError):
             model = NuSVR()
-            spec = scikit_converter.convert(model, 'data', 'out')
+            spec = scikit_converter.convert(model, "data", "out")
 
         # Check the expected class during covnersion.
         with self.assertRaises(TypeError):
             model = OneHotEncoder()
-            spec = scikit_converter.convert(model, 'data', 'out')
+            spec = scikit_converter.convert(model, "data", "out")
 
     @pytest.mark.slow
     def test_evaluation_stress_test(self):
-        self._test_evaluation(allow_slow = True)
+        self._test_evaluation(allow_slow=True)
 
     def test_evaluation(self):
-        self._test_evaluation(allow_slow = False)
-
+        self._test_evaluation(allow_slow=False)
 
     def _test_evaluation(self, allow_slow):
         """
@@ -68,22 +73,33 @@ def _test_evaluation(self, allow_slow):
         # Generate some smallish (some kernels take too long on anything else) random data
         x, y = [], []
         for _ in range(50):
-            cur_x1, cur_x2 = random.gauss(2,3), random.gauss(-1,2)
+            cur_x1, cur_x2 = random.gauss(2, 3), random.gauss(-1, 2)
             x.append([cur_x1, cur_x2])
-            y.append( 1 + 2*cur_x1 + 3*cur_x2 )
+            y.append(1 + 2 * cur_x1 + 3 * cur_x2)
 
-        input_names = ['x1', 'x2']
+        input_names = ["x1", "x2"]
         df = pd.DataFrame(x, columns=input_names)
 
         # Parameters to test
-        kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2},
-                             {'kernel': 'linear'},
-                             {'kernel': 'poly'},  {'kernel': 'poly', 'degree': 2},  {'kernel': 'poly', 'gamma': 0.75},
-                                 {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2},
-                             {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8},
-                                 {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5}
-                             ]
-        non_kernel_parameters = [{}, {'C': 1}, {'C': 1.5, 'shrinking': True}, {'C': 0.5, 'shrinking': False, 'nu': 0.9}]
+        kernel_parameters = [
+            {},
+            {"kernel": "rbf", "gamma": 1.2},
+            {"kernel": "linear"},
+            {"kernel": "poly"},
+            {"kernel": "poly", "degree": 2},
+            {"kernel": "poly", "gamma": 0.75},
+            {"kernel": "poly", "degree": 0, "gamma": 0.9, "coef0": 2},
+            {"kernel": "sigmoid"},
+            {"kernel": "sigmoid", "gamma": 1.3},
+            {"kernel": "sigmoid", "coef0": 0.8},
+            {"kernel": "sigmoid", "coef0": 0.8, "gamma": 0.5},
+        ]
+        non_kernel_parameters = [
+            {},
+            {"C": 1},
+            {"C": 1.5, "shrinking": True},
+            {"C": 0.5, "shrinking": False, "nu": 0.9},
+        ]
 
         # Test
         for param1 in non_kernel_parameters:
@@ -93,13 +109,13 @@ def _test_evaluation(self, allow_slow):
 
                 cur_model = NuSVR(**cur_params)
                 cur_model.fit(x, y)
-                df['prediction'] = cur_model.predict(x)
+                df["prediction"] = cur_model.predict(x)
 
-                spec = scikit_converter.convert(cur_model, input_names, 'target')
+                spec = scikit_converter.convert(cur_model, input_names, "target")
 
-                if is_macos() and macos_version() >= (10, 13):
+                if _is_macos() and _macos_version() >= (10, 13):
                     metrics = evaluate_regressor(spec, df)
-                    self.assertAlmostEquals(metrics['max_error'], 0)
+                    self.assertAlmostEquals(metrics["max_error"], 0)
 
                 if not allow_slow:
                     break
@@ -108,23 +124,25 @@ def _test_evaluation(self, allow_slow):
                 break
 
 
-@unittest.skipIf(not HAS_LIBSVM, 'Missing libsvm. Skipping tests.')
+@unittest.skipIf(not _HAS_LIBSVM, MSG_LIBSVM_NOT_FOUND)
+@unittest.skipIf(not _HAS_SKLEARN, MSG_SKLEARN_NOT_FOUND)
 class NuSVRLibSVMTest(unittest.TestCase):
     """
     Unit test class for testing the libsvm sklearn converter.
     """
+
     @classmethod
     def setUpClass(self):
         """
         Set up the unit test by loading the dataset and training a model.
         """
-        if not HAS_SKLEARN:
+        if not _HAS_SKLEARN:
             return
-        if not HAS_LIBSVM:
+        if not _HAS_LIBSVM:
             return
 
         scikit_data = load_boston()
-        prob = svmutil.svm_problem(scikit_data['target'], scikit_data['data'].tolist())
+        prob = svmutil.svm_problem(scikit_data["target"], scikit_data["data"].tolist())
         param = svmutil.svm_parameter()
         param.svm_type = svmutil.NU_SVR
         param.kernel_type = svmutil.LINEAR
@@ -133,27 +151,26 @@ def setUpClass(self):
         self.libsvm_model = svmutil.svm_train(prob, param)
 
     def test_conversion(self):
-        spec = libsvm.convert(self.libsvm_model, 'data', 'target')
+        spec = libsvm.convert(self.libsvm_model, "data", "target")
 
     def test_conversion_from_filesystem(self):
-        libsvm_model_path = tempfile.mktemp(suffix = 'model.libsvm')
+        libsvm_model_path = tempfile.mktemp(suffix="model.libsvm")
         svmutil.svm_save_model(libsvm_model_path, self.libsvm_model)
-        spec = libsvm.convert(libsvm_model_path, 'data', 'target')
+        spec = libsvm.convert(libsvm_model_path, "data", "target")
 
     def test_conversion_bad_inputs(self):
         # Check the expected class during covnersion.
         with self.assertRaises(TypeError):
             model = OneHotEncoder()
-            spec = libsvm.convert(model, 'data', 'out')
+            spec = libsvm.convert(model, "data", "out")
 
     @pytest.mark.slow
     def test_evaluation_stress_test(self):
-        self._test_evaluation(allow_slow = True)
+        self._test_evaluation(allow_slow=True)
 
     def test_evaluation(self):
-        self._test_evaluation(allow_slow = False)
+        self._test_evaluation(allow_slow=False)
 
-  
     def _test_evaluation(self, allow_slow):
         """
         Test that the same predictions are made
@@ -164,41 +181,47 @@ def _test_evaluation(self, allow_slow):
         # Generate some smallish (poly kernels take too long on anything else) random data
         x, y = [], []
         for _ in range(50):
-            cur_x1, cur_x2 = random.gauss(2,3), random.gauss(-1,2)
+            cur_x1, cur_x2 = random.gauss(2, 3), random.gauss(-1, 2)
             x.append([cur_x1, cur_x2])
-            y.append( 1 + 2*cur_x1 + 3*cur_x2 )
+            y.append(1 + 2 * cur_x1 + 3 * cur_x2)
 
-        input_names = ['x1', 'x2']
+        input_names = ["x1", "x2"]
         df = pd.DataFrame(x, columns=input_names)
-        prob = svm_problem(y,x)
+        prob = svm_problem(y, x)
 
         # Parameters
-        base_param = '-s 4' # model type is nu-SVR
-        non_kernel_parameters = ['', '-c 1.5 -p 0.5 -h 1', '-c 0.5 -p 0.5 -h 0']
+        base_param = "-s 4"  # model type is nu-SVR
+        non_kernel_parameters = ["", "-c 1.5 -p 0.5 -h 1", "-c 0.5 -p 0.5 -h 0"]
         kernel_parameters = [
-            '', '-t 2 -g 1.2',  # rbf kernel
-            '-t 0', # linear kernel
-            '-t 1', '-t 1 -d 2', '-t 1 -g 0.75', '-t 1 -d 0 -g 0.9 -r 2',  # poly kernel
-            '-t 3', '-t 3 -g 1.3', '-t 3 -r 0.8', '-t 3 -r 0.8 -g 0.5' # sigmoid kernel
+            "",
+            "-t 2 -g 1.2",  # rbf kernel
+            "-t 0",  # linear kernel
+            "-t 1",
+            "-t 1 -d 2",
+            "-t 1 -g 0.75",
+            "-t 1 -d 0 -g 0.9 -r 2",  # poly kernel
+            "-t 3",
+            "-t 3 -g 1.3",
+            "-t 3 -r 0.8",
+            "-t 3 -r 0.8 -g 0.5",  # sigmoid kernel
         ]
 
         for param1 in non_kernel_parameters:
             for param2 in kernel_parameters:
-                param_str = ' '.join([base_param, param1, param2])
+                param_str = " ".join([base_param, param1, param2])
                 param = svm_parameter(param_str)
 
                 model = svm_train(prob, param)
-                (df['prediction'], _, _) = svm_predict(y, x, model)
+                (df["prediction"], _, _) = svm_predict(y, x, model)
 
-                spec = libsvm.convert(model, input_names, 'target')
+                spec = libsvm.convert(model, input_names, "target")
 
-                if is_macos() and macos_version() >= (10, 13):
+                if _is_macos() and _macos_version() >= (10, 13):
                     metrics = evaluate_regressor(spec, df)
-                    self.assertAlmostEquals(metrics['max_error'], 0)
+                    self.assertAlmostEquals(metrics["max_error"], 0)
 
                 if not allow_slow:
                     break
 
             if not allow_slow:
                 break
-
diff --git a/coremltools/test/sklearn/test_SVC.py b/coremltools/test/sklearn/test_SVC.py
index 67b7b5cbf..fcc75a488 100644
--- a/coremltools/test/sklearn/test_SVC.py
+++ b/coremltools/test/sklearn/test_SVC.py
@@ -9,46 +9,70 @@
 import pandas as pd
 import random
 import pytest
+import numpy as np
 
-from coremltools.models.utils import evaluate_classifier,\
-    evaluate_classifier_with_probabilities, macos_version, is_macos
-from coremltools._deps import HAS_LIBSVM, HAS_SKLEARN
+from coremltools.models.utils import (
+    evaluate_classifier,
+    evaluate_classifier_with_probabilities,
+    _macos_version,
+    _is_macos,
+)
+from coremltools._deps import _HAS_LIBSVM, _HAS_SKLEARN
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.svm import SVC
     from coremltools.converters import sklearn as scikit_converter
 
-if HAS_LIBSVM:
+if _HAS_LIBSVM:
     from svm import svm_parameter, svm_problem
     from svmutil import svm_train, svm_predict
     from coremltools.converters import libsvm
     import svmutil
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikit-learn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing scikit-learn. Skipping tests.")
 class SvcScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
     """
-    def _evaluation_test_helper(self, class_labels, use_probability_estimates, 
-            allow_slow, allowed_prob_delta=0.00001):
+
+    def _evaluation_test_helper(
+        self,
+        class_labels,
+        use_probability_estimates,
+        allow_slow,
+        allowed_prob_delta=0.00001,
+    ):
         # Parameters to test
-        kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2},
-                             {'kernel': 'linear'},
-                             {'kernel': 'poly'},  {'kernel': 'poly', 'degree': 2},  {'kernel': 'poly', 'gamma': 0.75},
-                                 {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2},
-                             {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8},
-                                 {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5}
-                             ]
-        non_kernel_parameters = [{}, {'C': 1}, {'C': 1.5, 'shrinking': True}, {'C': 0.5, 'shrinking': False}]
+        kernel_parameters = [
+            {},
+            {"kernel": "rbf", "gamma": 1.2},
+            {"kernel": "linear"},
+            {"kernel": "poly"},
+            {"kernel": "poly", "degree": 2},
+            {"kernel": "poly", "gamma": 0.75},
+            {"kernel": "poly", "degree": 0, "gamma": 0.9, "coef0": 2},
+            {"kernel": "sigmoid"},
+            {"kernel": "sigmoid", "gamma": 1.3},
+            {"kernel": "sigmoid", "coef0": 0.8},
+            {"kernel": "sigmoid", "coef0": 0.8, "gamma": 0.5},
+        ]
+        non_kernel_parameters = [
+            {},
+            {"C": 1},
+            {"C": 1.5, "shrinking": True},
+            {"C": 0.5, "shrinking": False},
+        ]
 
         # Generate some random data
         x, y = [], []
         random.seed(42)
         for _ in range(50):
-            x.append([random.gauss(200,30), random.gauss(-100,22), random.gauss(100,42)])
+            x.append(
+                [random.gauss(200, 30), random.gauss(-100, 22), random.gauss(100, 42)]
+            )
             y.append(random.choice(class_labels))
-        column_names = ['x1', 'x2', 'x3']
+        column_names = ["x1", "x2", "x3"]
         # make sure first label is seen first, second is seen second, and so on.
         for i, val in enumerate(class_labels):
             y[i] = val
@@ -59,26 +83,33 @@ def _evaluation_test_helper(self, class_labels, use_probability_estimates,
             for param2 in kernel_parameters:
                 cur_params = param1.copy()
                 cur_params.update(param2)
-                cur_params['probability'] = use_probability_estimates
-                cur_params['max_iter'] = 10   # Don't want test to take too long
+                cur_params["probability"] = use_probability_estimates
+                cur_params["max_iter"] = 10  # Don't want test to take too long
                 print("cur_params=" + str(cur_params))
 
                 cur_model = SVC(**cur_params)
                 cur_model.fit(x, y)
 
-                spec = scikit_converter.convert(cur_model, column_names, 'target')
+                spec = scikit_converter.convert(cur_model, column_names, "target")
 
-                if is_macos() and macos_version() >= (10, 13):
+                if _is_macos() and _macos_version() >= (10, 13):
                     if use_probability_estimates:
                         probability_lists = cur_model.predict_proba(x)
-                        df['classProbability'] = [dict(zip(cur_model.classes_, cur_vals)) for cur_vals in probability_lists]
-                        metrics = evaluate_classifier_with_probabilities(spec, df, probabilities='classProbability', verbose=True)
-                        self.assertEquals(metrics['num_key_mismatch'], 0)
-                        self.assertLess(metrics['max_probability_error'], allowed_prob_delta)
+                        df["classProbability"] = [
+                            dict(zip(cur_model.classes_, cur_vals))
+                            for cur_vals in probability_lists
+                        ]
+                        metrics = evaluate_classifier_with_probabilities(
+                            spec, df, probabilities="classProbability", verbose=True
+                        )
+                        self.assertEquals(metrics["num_key_mismatch"], 0)
+                        self.assertLess(
+                            metrics["max_probability_error"], allowed_prob_delta
+                        )
                     else:
-                        df['prediction'] = cur_model.predict(x)
+                        df["prediction"] = cur_model.predict(x)
                         metrics = evaluate_classifier(spec, df, verbose=False)
-                        self.assertEquals(metrics['num_errors'], 0)
+                        self.assertEquals(metrics["num_errors"], 0)
 
                 if not allow_slow:
                     break
@@ -88,35 +119,39 @@ def _evaluation_test_helper(self, class_labels, use_probability_estimates,
 
     @pytest.mark.slow
     def test_binary_class_string_label_without_probability_stress_test(self):
-        self._evaluation_test_helper(['A', 'B'], False, allow_slow = True)
+        self._evaluation_test_helper(["A", "B"], False, allow_slow=True)
 
     def test_binary_class_string_label_without_probability(self):
-        self._evaluation_test_helper(['A', 'B'], False, allow_slow = False)
+        self._evaluation_test_helper(["A", "B"], False, allow_slow=False)
 
     @pytest.mark.slow
     def test_binary_class_string_label_with_probability_stress_test(self):
         # Scikit Learn uses technique to normalize pairwise probabilities even for binary classification.
         # This leads to difference in probabilities.
-        self._evaluation_test_helper(["foo", "bar"], True, allow_slow = True, allowed_prob_delta=0.005)
+        self._evaluation_test_helper(
+            ["foo", "bar"], True, allow_slow=True, allowed_prob_delta=0.005
+        )
 
     def test_binary_class_string_label_with_probability(self):
         # Scikit Learn uses technique to normalize pairwise probabilities even for binary classification.
         # This leads to difference in probabilities.
-        self._evaluation_test_helper(["foo", "bar"], True, allow_slow = False, allowed_prob_delta=0.005)
+        self._evaluation_test_helper(
+            ["foo", "bar"], True, allow_slow=False, allowed_prob_delta=0.005
+        )
 
     @pytest.mark.slow
     def test_multi_class_int_label_without_probability_stress_test(self):
-        self._evaluation_test_helper([12, 33, -1, 1234], False, allow_slow = True)
+        self._evaluation_test_helper([12, 33, -1, 1234], False, allow_slow=True)
 
     def test_multi_class_int_label_without_probability(self):
-        self._evaluation_test_helper([12, 33, -1, 1234], False, allow_slow = False)
+        self._evaluation_test_helper([12, 33, -1, 1234], False, allow_slow=False)
 
     @pytest.mark.slow
     def test_multi_class_int_label_with_probability_stress_test(self):
-        self._evaluation_test_helper([1, 2, 3], True, allow_slow = True)
+        self._evaluation_test_helper([1, 2, 3], True, allow_slow=True)
 
     def test_multi_class_int_label_with_probability(self):
-        self._evaluation_test_helper([1, 2, 3], True, allow_slow = False)
+        self._evaluation_test_helper([1, 2, 3], True, allow_slow=False)
 
     def test_conversion_bad_inputs(self):
         from sklearn.preprocessing import OneHotEncoder
@@ -124,36 +159,44 @@ def test_conversion_bad_inputs(self):
         # Error on converting an untrained model
         with self.assertRaises(TypeError):
             model = SVC()
-            spec = scikit_converter.convert(model, 'data', 'out')
+            spec = scikit_converter.convert(model, "data", "out")
 
         # Check the expected class during conversion
         with self.assertRaises(TypeError):
             model = OneHotEncoder()
-            spec = scikit_converter.convert(model, 'data', 'out')
+            spec = scikit_converter.convert(model, "data", "out")
 
 
-@unittest.skipIf(not HAS_LIBSVM, 'Missing libsvm. Skipping tests.')
+@unittest.skipIf(not _HAS_LIBSVM, "Missing libsvm. Skipping tests.")
 class CSVCLibSVMTest(unittest.TestCase):
     # Model parameters for testing
-    base_param = '-s 0 -q ' # model type C-SVC and quiet mode
-    non_kernel_parameters = ['', '-c 1.5 -p 0.5 -h 1', '-c 0.5 -p 0.5 -h 0']
+    base_param = "-s 0 -q "  # model type C-SVC and quiet mode
+    non_kernel_parameters = ["", "-c 1.5 -p 0.5 -h 1", "-c 0.5 -p 0.5 -h 0"]
     kernel_parameters = [
-        '-t 0', # linear kernel
-        '', '-t 2 -g 1.2',  # rbf kernel
-        '-t 1', '-t 1 -d 2', '-t 1 -g 0.75', '-t 1 -d 0 -g 0.9 -r 2',  # poly kernel
-        '-t 3', '-t 3 -g 1.3', '-t 3 -r 0.8', '-t 3 -r 0.8 -g 0.5' # sigmoid kernel
+        "-t 0",  # linear kernel
+        "",
+        "-t 2 -g 1.2",  # rbf kernel
+        "-t 1",
+        "-t 1 -d 2",
+        "-t 1 -g 0.75",
+        "-t 1 -d 0 -g 0.9 -r 2",  # poly kernel
+        "-t 3",
+        "-t 3 -g 1.3",
+        "-t 3 -r 0.8",
+        "-t 3 -r 0.8 -g 0.5",  # sigmoid kernel
     ]
     # XXX: wi params?
 
     """
     Unit test class for testing the libsvm converter.
     """
+
     @classmethod
     def setUpClass(self):
         """
         Set up the unit test by loading the dataset and training a model.
         """
-        if not HAS_LIBSVM:
+        if not _HAS_LIBSVM:
             # setUpClass is still called even if class is skipped.
             return
 
@@ -162,13 +205,13 @@ def setUpClass(self):
         self.x, self.y = [], []
         random.seed(42)
         for _ in range(50):
-            self.x.append([random.gauss(200,30), random.gauss(-100,22)])
+            self.x.append([random.gauss(200, 30), random.gauss(-100, 22)])
             self.y.append(random.choice([1, 2]))
-        self.y[0] = 1           # Make sure 1 is always the first label it sees 
+        self.y[0] = 1  # Make sure 1 is always the first label it sees
         self.y[1] = 2
-        self.column_names = ['x1', 'x2']
+        self.column_names = ["x1", "x2"]
         self.prob = svmutil.svm_problem(self.y, self.x)
-        
+
         param = svmutil.svm_parameter()
         param.svm_type = svmutil.C_SVC
         param.kernel_type = svmutil.LINEAR
@@ -178,84 +221,104 @@ def setUpClass(self):
         self.libsvm_model = svmutil.svm_train(self.prob, param)
 
     def test_default_names(self):
-        df = pd.DataFrame({'input': self.x})
+        df = pd.DataFrame({"input": self.x})
+        df["input"] = df["input"].apply(np.array)
 
         # Test with probabilities
         spec = libsvm.convert(self.libsvm_model).get_spec()
-        if is_macos() and macos_version() >= (10, 13):
-            (_, _, probability_lists) = svm_predict(self.y, self.x, self.libsvm_model, '-b 1 -q')
-            probability_dicts = [dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists]
-            df['classProbability'] = probability_dicts
-            metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False, probabilities='classProbability')
-            self.assertLess(metrics['max_probability_error'], 0.00001)
+        if _is_macos() and _macos_version() >= (10, 13):
+            (_, _, probability_lists) = svm_predict(
+                self.y, self.x, self.libsvm_model, "-b 1 -q"
+            )
+            probability_dicts = [
+                dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
+            ]
+            df["classProbability"] = probability_dicts
+            metrics = evaluate_classifier_with_probabilities(
+                spec, df, verbose=False, probabilities="classProbability"
+            )
+            self.assertLess(metrics["max_probability_error"], 0.00001)
 
         # Test model without probabilities
         no_probability_model = svmutil.svm_train(self.prob, svmutil.svm_parameter())
         spec = libsvm.convert(no_probability_model).get_spec()
         self.assertEqual(len(spec.description.output), 1)
-        self.assertEqual(spec.description.output[0].name, u'target')
-        if is_macos() and macos_version() >= (10, 13):
-            (df['prediction'], _, _) = svm_predict(self.y, self.x, no_probability_model, ' -q')
+        self.assertEqual(spec.description.output[0].name, u"target")
+        if _is_macos() and _macos_version() >= (10, 13):
+            (df["prediction"], _, _) = svm_predict(
+                self.y, self.x, no_probability_model, " -q"
+            )
             metrics = evaluate_classifier(spec, df, verbose=False)
-            self.assertEquals(metrics['num_errors'], 0)
+            self.assertEquals(metrics["num_errors"], 0)
 
     # LibSVM only supports string labels
     @pytest.mark.slow
     def test_binary_class_without_probability_stress_test(self):
-        self._evaluation_test_helper_no_probability([0, 1], allow_slow = True)
+        self._evaluation_test_helper_no_probability([0, 1], allow_slow=True)
 
     @pytest.mark.slow
     def test_binary_class_with_probability_stress_test(self):
-        self._evaluation_test_helper_with_probability([-1, 90], allow_slow = True)
+        self._evaluation_test_helper_with_probability([-1, 90], allow_slow=True)
 
     @pytest.mark.slow
     def test_multi_class_without_probability_stress_test(self):
-        self._evaluation_test_helper_no_probability([12, 33, 12341], allow_slow = True)
+        self._evaluation_test_helper_no_probability([12, 33, 12341], allow_slow=True)
 
     @pytest.mark.slow
     def test_multi_class_with_probability_stress_test(self):
-        self._evaluation_test_helper_with_probability([1, 2, 3], allow_slow = True)
+        self._evaluation_test_helper_with_probability([1, 2, 3], allow_slow=True)
 
     # LibSVM only supports string labels
     def test_binary_class_without_probability(self):
-        self._evaluation_test_helper_no_probability([0, 1], allow_slow = False)
+        self._evaluation_test_helper_no_probability([0, 1], allow_slow=False)
 
     def test_binary_class_with_probability(self):
-        self._evaluation_test_helper_with_probability([-1, 90], allow_slow = False)
+        self._evaluation_test_helper_with_probability([-1, 90], allow_slow=False)
 
     def test_multi_class_without_probability(self):
-        self._evaluation_test_helper_no_probability([12, 33, 12341], allow_slow = False)
+        self._evaluation_test_helper_no_probability([12, 33, 12341], allow_slow=False)
 
     def test_multi_class_with_probability(self):
-        self._evaluation_test_helper_with_probability([1, 2, 3], allow_slow = False)
+        self._evaluation_test_helper_with_probability([1, 2, 3], allow_slow=False)
 
     def _evaluation_test_helper_with_probability(self, labels, allow_slow):
         import copy
+
         df = pd.DataFrame(self.x, columns=self.column_names)
         y = copy.copy(self.y)
         for i, val in enumerate(labels):
             y[i] = val
-        probability_param = '-b 1'
-        
+        probability_param = "-b 1"
+
         for param1 in self.non_kernel_parameters:
             for param2 in self.kernel_parameters:
-                param_str = ' '.join([self.base_param, param1, param2, probability_param])
+                param_str = " ".join(
+                    [self.base_param, param1, param2, probability_param]
+                )
                 # print("PARAMS: ", param_str)
                 param = svm_parameter(param_str)
 
                 model = svm_train(self.prob, param)
 
                 # Get predictions with probabilities as dictionaries
-                (df['prediction'], _, probability_lists) = svm_predict(y, self.x, model, probability_param + ' -q')
-                probability_dicts = [dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists]
-                df['probabilities'] = probability_dicts
-                
-                spec = libsvm.convert(model, self.column_names, 'target', 'probabilities')
-
-                if is_macos() and macos_version() >= (10, 13):
-                    metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False)
-                    self.assertEquals(metrics['num_key_mismatch'], 0)
-                    self.assertLess(metrics['max_probability_error'], 0.00001)
+                (df["prediction"], _, probability_lists) = svm_predict(
+                    y, self.x, model, probability_param + " -q"
+                )
+                probability_dicts = [
+                    dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
+                ]
+                df["probabilities"] = probability_dicts
+
+                spec = libsvm.convert(
+                    model, self.column_names, "target", "probabilities"
+                )
+
+                if _is_macos() and _macos_version() >= (10, 13):
+                    metrics = evaluate_classifier_with_probabilities(
+                        spec, df, verbose=False
+                    )
+                    self.assertEquals(metrics["num_key_mismatch"], 0)
+                    self.assertLess(metrics["max_probability_error"], 0.00001)
 
                 if not allow_slow:
                     break
@@ -269,32 +332,34 @@ def _evaluation_test_helper_no_probability(self, labels, allow_slow):
         x, y = [], []
         random.seed(42)
         for _ in range(50):
-            x.append([random.gauss(200,30), random.gauss(-100,22), random.gauss(100,42)])
+            x.append(
+                [random.gauss(200, 30), random.gauss(-100, 22), random.gauss(100, 42)]
+            )
             y.append(random.choice(labels))
         # make sure first label is seen first, second is seen second, and so on.
         for i, val in enumerate(labels):
             y[i] = val
-        column_names = ['x1', 'x2', 'x3']
+        column_names = ["x1", "x2", "x3"]
         prob = svmutil.svm_problem(y, x)
 
         df = pd.DataFrame(x, columns=column_names)
 
         for param1 in self.non_kernel_parameters:
             for param2 in self.kernel_parameters:
-                param_str = ' '.join([self.base_param, param1, param2])
+                param_str = " ".join([self.base_param, param1, param2])
                 print("PARAMS: ", param_str)
                 param = svm_parameter(param_str)
 
                 model = svm_train(prob, param)
-                
+
                 # Get predictions with probabilities as dictionaries
-                (df['prediction'], _, _) = svm_predict(y, x, model, ' -q')
+                (df["prediction"], _, _) = svm_predict(y, x, model, " -q")
 
-                spec = libsvm.convert(model, column_names, 'target')
+                spec = libsvm.convert(model, column_names, "target")
 
-                if is_macos() and macos_version() >= (10, 13):
+                if _is_macos() and _macos_version() >= (10, 13):
                     metrics = evaluate_classifier(spec, df, verbose=False)
-                    self.assertEquals(metrics['num_errors'], 0)
+                    self.assertEquals(metrics["num_errors"], 0)
 
                 if not allow_slow:
                     break
@@ -302,11 +367,9 @@ def _evaluation_test_helper_no_probability(self, labels, allow_slow):
             if not allow_slow:
                 break
 
-
     def test_conversion_from_filesystem(self):
-        libsvm_model_path = tempfile.mktemp(suffix = 'model.libsvm')
+        libsvm_model_path = tempfile.mktemp(suffix="model.libsvm")
         svmutil.svm_save_model(libsvm_model_path, self.libsvm_model)
         # libsvm's save(...) truncates floating points. So it's not going to match self.libsvm_model any more.
-        spec = libsvm.convert(libsvm_model_path, self.column_names, 'target')
+        spec = libsvm.convert(libsvm_model_path, self.column_names, "target")
         self.assertIsNotNone(spec)
-
diff --git a/coremltools/test/sklearn/test_SVR.py b/coremltools/test/sklearn/test_SVR.py
index 75fd8e798..e72955844 100644
--- a/coremltools/test/sklearn/test_SVR.py
+++ b/coremltools/test/sklearn/test_SVR.py
@@ -4,67 +4,71 @@
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 import pandas as pd
+import numpy as np
 import random
 import tempfile
 import unittest
 import pytest
 
-from coremltools._deps import HAS_LIBSVM
-from coremltools._deps import HAS_SKLEARN
+from coremltools._deps import (
+    _HAS_LIBSVM,
+    MSG_LIBSVM_NOT_FOUND,
+    _HAS_SKLEARN,
+    MSG_SKLEARN_NOT_FOUND,
+)
+from coremltools.models.utils import evaluate_regressor, _macos_version, _is_macos
 
-from coremltools.models.utils import evaluate_regressor, macos_version, is_macos
-
-if HAS_LIBSVM:
+if _HAS_LIBSVM:
     import svmutil
     import svm
     from coremltools.converters import libsvm
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.svm import SVR
     from sklearn.datasets import load_boston
     from coremltools.converters import sklearn as sklearn_converter
     from sklearn.preprocessing import OneHotEncoder
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikit-learn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, MSG_SKLEARN_NOT_FOUND)
 class SvrScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn sklearn_converter.
     """
+
     @classmethod
     def setUpClass(self):
         """
         Set up the unit test by loading the dataset and training a model.
         """
-        if not HAS_SKLEARN:
+        if not _HAS_SKLEARN:
             return
 
         scikit_data = load_boston()
-        scikit_model = SVR(kernel='linear')
-        scikit_model.fit(scikit_data['data'], scikit_data['target'])
+        scikit_model = SVR(kernel="linear")
+        scikit_model.fit(scikit_data["data"], scikit_data["target"])
 
         # Save the data and the model
         self.scikit_data = scikit_data
         self.scikit_model = scikit_model
 
-
     def test_conversion_bad_inputs(self):
         # Error on converting an untrained model
         with self.assertRaises(TypeError):
             model = SVR()
-            spec = sklearn_converter.convert(model, 'data', 'out')
+            spec = sklearn_converter.convert(model, "data", "out")
 
         # Check the expected class during covnersion.
         with self.assertRaises(TypeError):
             model = OneHotEncoder()
-            spec = sklearn_converter.convert(model, 'data', 'out')
+            spec = sklearn_converter.convert(model, "data", "out")
 
     @pytest.mark.slow
     def test_evaluation_stress_test(self):
-        self._test_evaluation(allow_slow = True)
+        self._test_evaluation(allow_slow=True)
 
     def test_evaluation(self):
-        self._test_evaluation(allow_slow = False)
+        self._test_evaluation(allow_slow=False)
 
     def _test_evaluation(self, allow_slow):
         """
@@ -74,22 +78,33 @@ def _test_evaluation(self, allow_slow):
         # Generate some smallish (some kernels take too long on anything else) random data
         x, y = [], []
         for _ in range(50):
-            cur_x1, cur_x2 = random.gauss(2,3), random.gauss(-1,2)
+            cur_x1, cur_x2 = random.gauss(2, 3), random.gauss(-1, 2)
             x.append([cur_x1, cur_x2])
-            y.append( 1 + 2*cur_x1 + 3*cur_x2 )
+            y.append(1 + 2 * cur_x1 + 3 * cur_x2)
 
-        input_names = ['x1', 'x2']
+        input_names = ["x1", "x2"]
         df = pd.DataFrame(x, columns=input_names)
 
         # Parameters to test
-        kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2},
-                             {'kernel': 'linear'},
-                             {'kernel': 'poly'},  {'kernel': 'poly', 'degree': 2},  {'kernel': 'poly', 'gamma': 0.75},
-                                 {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2},
-                             {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8},
-                                 {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5}
-                             ]
-        non_kernel_parameters = [{}, {'C': 1}, {'C': 1.5, 'epsilon': 0.5, 'shrinking': True}, {'C': 0.5, 'epsilon': 1.5, 'shrinking': False}]
+        kernel_parameters = [
+            {},
+            {"kernel": "rbf", "gamma": 1.2},
+            {"kernel": "linear"},
+            {"kernel": "poly"},
+            {"kernel": "poly", "degree": 2},
+            {"kernel": "poly", "gamma": 0.75},
+            {"kernel": "poly", "degree": 0, "gamma": 0.9, "coef0": 2},
+            {"kernel": "sigmoid"},
+            {"kernel": "sigmoid", "gamma": 1.3},
+            {"kernel": "sigmoid", "coef0": 0.8},
+            {"kernel": "sigmoid", "coef0": 0.8, "gamma": 0.5},
+        ]
+        non_kernel_parameters = [
+            {},
+            {"C": 1},
+            {"C": 1.5, "epsilon": 0.5, "shrinking": True},
+            {"C": 0.5, "epsilon": 1.5, "shrinking": False},
+        ]
 
         # Test
         for param1 in non_kernel_parameters:
@@ -100,13 +115,13 @@ def _test_evaluation(self, allow_slow):
 
                 cur_model = SVR(**cur_params)
                 cur_model.fit(x, y)
-                df['prediction'] = cur_model.predict(x)
+                df["prediction"] = cur_model.predict(x)
 
-                spec = sklearn_converter.convert(cur_model, input_names, 'target')
+                spec = sklearn_converter.convert(cur_model, input_names, "target")
 
-                if is_macos() and macos_version() >= (10, 13):
+                if _is_macos() and _macos_version() >= (10, 13):
                     metrics = evaluate_regressor(spec, df)
-                    self.assertAlmostEquals(metrics['max_error'], 0)
+                    self.assertAlmostEquals(metrics["max_error"], 0)
 
                 if not allow_slow:
                     break
@@ -115,23 +130,25 @@ def _test_evaluation(self, allow_slow):
                 break
 
 
-@unittest.skipIf(not HAS_LIBSVM, 'Missing libsvm. Skipping tests.')
+@unittest.skipIf(not _HAS_LIBSVM, MSG_LIBSVM_NOT_FOUND)
+@unittest.skipIf(not _HAS_SKLEARN, MSG_SKLEARN_NOT_FOUND)
 class EpsilonSVRLibSVMTest(unittest.TestCase):
     """
     Unit test class for testing the libsvm sklearn converter.
     """
+
     @classmethod
     def setUpClass(self):
         """
         Set up the unit test by loading the dataset and training a model.
         """
-        if not HAS_SKLEARN:
+        if not _HAS_SKLEARN:
             return
-        if not HAS_LIBSVM:
+        if not _HAS_LIBSVM:
             return
 
         scikit_data = load_boston()
-        prob = svmutil.svm_problem(scikit_data['target'], scikit_data['data'].tolist())
+        prob = svmutil.svm_problem(scikit_data["target"], scikit_data["data"].tolist())
         param = svmutil.svm_parameter()
         param.svm_type = svmutil.EPSILON_SVR
         param.kernel_type = svmutil.LINEAR
@@ -141,44 +158,48 @@ def setUpClass(self):
 
     def test_input_names(self):
         data = load_boston()
-        df = pd.DataFrame({'input': data['data'].tolist()})
+        df = pd.DataFrame({"input": data["data"].tolist()})
+        df["input"] = df["input"].apply(np.array)
 
         # Default values
         spec = libsvm.convert(self.libsvm_model)
-        if is_macos() and macos_version() >= (10, 13):
-            (df['prediction'], _, _) = svmutil.svm_predict(data['target'], data['data'].tolist(), self.libsvm_model)
+        if _is_macos() and _macos_version() >= (10, 13):
+            (df["prediction"], _, _) = svmutil.svm_predict(
+                data["target"], data["data"].tolist(), self.libsvm_model
+            )
             metrics = evaluate_regressor(spec, df)
-            self.assertAlmostEquals(metrics['max_error'], 0)
+            self.assertAlmostEquals(metrics["max_error"], 0)
 
         # One extra parameters. This is legal/possible.
-        num_inputs = len(data['data'][0])
-        spec = libsvm.convert(self.libsvm_model, input_length=num_inputs+1)
+        num_inputs = len(data["data"][0])
+        spec = libsvm.convert(self.libsvm_model, input_length=num_inputs + 1)
 
         # Not enought input names.
-        input_names=['this', 'is', 'not', 'enought', 'names']
+        input_names = ["this", "is", "not", "enought", "names"]
         with self.assertRaises(ValueError):
             libsvm.convert(self.libsvm_model, input_names=input_names)
         with self.assertRaises(ValueError):
-            libsvm.convert(self.libsvm_model, input_length=num_inputs-1)
+            libsvm.convert(self.libsvm_model, input_length=num_inputs - 1)
 
     def test_conversion_from_filesystem(self):
-        libsvm_model_path = tempfile.mktemp(suffix = 'model.libsvm')
+        libsvm_model_path = tempfile.mktemp(suffix="model.libsvm")
         svmutil.svm_save_model(libsvm_model_path, self.libsvm_model)
-        spec = libsvm.convert(libsvm_model_path, input_names='data', target_name='target')
+        spec = libsvm.convert(
+            libsvm_model_path, input_names="data", target_name="target"
+        )
 
     def test_conversion_bad_inputs(self):
         # Check the expected class during covnersion.
         with self.assertRaises(TypeError):
             model = OneHotEncoder()
-            spec = libsvm.convert(model, 'data', 'out')
+            spec = libsvm.convert(model, "data", "out")
 
     @pytest.mark.slow
     def test_evaluation_stress_test(self):
-        self._test_evaluation(allow_slow = True)
+        self._test_evaluation(allow_slow=True)
 
     def test_evaluation(self):
-        self._test_evaluation(allow_slow = False)
-
+        self._test_evaluation(allow_slow=False)
 
     def _test_evaluation(self, allow_slow):
         """
@@ -190,42 +211,50 @@ def _test_evaluation(self, allow_slow):
         # Generate some smallish (poly kernels take too long on anything else) random data
         x, y = [], []
         for _ in range(50):
-            cur_x1, cur_x2 = random.gauss(2,3), random.gauss(-1,2)
+            cur_x1, cur_x2 = random.gauss(2, 3), random.gauss(-1, 2)
             x.append([cur_x1, cur_x2])
-            y.append( 1 + 2*cur_x1 + 3*cur_x2 )
+            y.append(1 + 2 * cur_x1 + 3 * cur_x2)
 
-        input_names = ['x1', 'x2']
+        input_names = ["x1", "x2"]
         df = pd.DataFrame(x, columns=input_names)
-        prob = svm_problem(y,x)
+        prob = svm_problem(y, x)
 
         # Parameters
-        base_param = '-s 3' # model type is epsilon SVR
-        non_kernel_parameters = ['', '-c 1.5 -p 0.5 -h 1', '-c 0.5 -p 0.5 -h 0']
+        base_param = "-s 3"  # model type is epsilon SVR
+        non_kernel_parameters = ["", "-c 1.5 -p 0.5 -h 1", "-c 0.5 -p 0.5 -h 0"]
         kernel_parameters = [
-            '', '-t 2 -g 1.2',  # rbf kernel
-            '-t 0', # linear kernel
-            '-t 1', '-t 1 -d 2', '-t 1 -g 0.75', '-t 1 -d 0 -g 0.9 -r 2',  # poly kernel
-            '-t 3', '-t 3 -g 1.3', '-t 3 -r 0.8', '-t 3 -r 0.8 -g 0.5' # sigmoid kernel
+            "",
+            "-t 2 -g 1.2",  # rbf kernel
+            "-t 0",  # linear kernel
+            "-t 1",
+            "-t 1 -d 2",
+            "-t 1 -g 0.75",
+            "-t 1 -d 0 -g 0.9 -r 2",  # poly kernel
+            "-t 3",
+            "-t 3 -g 1.3",
+            "-t 3 -r 0.8",
+            "-t 3 -r 0.8 -g 0.5",  # sigmoid kernel
         ]
 
         for param1 in non_kernel_parameters:
             for param2 in kernel_parameters:
-                param_str = ' '.join([base_param, param1, param2])
+                param_str = " ".join([base_param, param1, param2])
                 print(param_str)
                 param = svm_parameter(param_str)
 
                 model = svm_train(prob, param)
-                (df['prediction'], _, _) = svm_predict(y, x, model)
+                (df["prediction"], _, _) = svm_predict(y, x, model)
 
-                spec = libsvm.convert(model, input_names=input_names, target_name='target')
+                spec = libsvm.convert(
+                    model, input_names=input_names, target_name="target"
+                )
 
-                if is_macos() and macos_version() >= (10, 13):
+                if _is_macos() and _macos_version() >= (10, 13):
                     metrics = evaluate_regressor(spec, df)
-                    self.assertAlmostEquals(metrics['max_error'], 0)
+                    self.assertAlmostEquals(metrics["max_error"], 0)
 
                 if not allow_slow:
                     break
 
             if not allow_slow:
                 break
-
diff --git a/coremltools/test/sklearn/test_categorical_imputer.py b/coremltools/test/sklearn/test_categorical_imputer.py
index 14f849840..98eb33720 100644
--- a/coremltools/test/sklearn/test_categorical_imputer.py
+++ b/coremltools/test/sklearn/test_categorical_imputer.py
@@ -5,13 +5,14 @@
 
 import unittest
 import numpy as np
-from coremltools._deps import HAS_SKLEARN
+from coremltools._deps import _HAS_SKLEARN
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from coremltools.converters import sklearn as converter
     from sklearn.preprocessing import Imputer
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class ImputerTestCase(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -25,36 +26,35 @@ def setUpClass(self):
         from sklearn.datasets import load_boston
 
         scikit_data = load_boston()
-        scikit_model = Imputer(strategy='most_frequent', axis=0)
-        scikit_data['data'][1,8] = np.NaN
+        scikit_model = Imputer(strategy="most_frequent", axis=0)
+        scikit_data["data"][1, 8] = np.NaN
 
-        input_data = scikit_data['data'][:,8].reshape(-1, 1)
-        scikit_model.fit(input_data, scikit_data['target'])
+        input_data = scikit_data["data"][:, 8].reshape(-1, 1)
+        scikit_model.fit(input_data, scikit_data["target"])
 
         # Save the data and the model
         self.scikit_data = scikit_data
         self.scikit_model = scikit_model
 
     def test_conversion(self):
-
-        spec = converter.convert(self.scikit_model, 'data', 'out').get_spec()
+        spec = converter.convert(self.scikit_model, "data", "out").get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
 
         # Test the interface
-        self.assertTrue(spec.pipeline.models[-1].HasField('imputer'))
-        
-    def test_conversion_bad_inputs(self):
+        self.assertTrue(spec.pipeline.models[-1].HasField("imputer"))
 
+    def test_conversion_bad_inputs(self):
         # Error on converting an untrained model
         with self.assertRaises(Exception):
             model = Imputer()
-            spec = converter.convert(model, 'data', 'out')
+            spec = converter.convert(model, "data", "out")
 
         # Check the expected class during covnersion.
         with self.assertRaises(Exception):
             from sklearn.linear_model import LinearRegression
+
             model = LinearRegression()
-            spec = converter.convert(model, 'data', 'out')
+            spec = converter.convert(model, "data", "out")
diff --git a/coremltools/test/sklearn/test_composite_pipelines.py b/coremltools/test/sklearn/test_composite_pipelines.py
index b1b878939..ed68e7203 100644
--- a/coremltools/test/sklearn/test_composite_pipelines.py
+++ b/coremltools/test/sklearn/test_composite_pipelines.py
@@ -10,64 +10,67 @@
 import itertools
 import numpy as np
 
-from coremltools._deps import HAS_SKLEARN
+from coremltools._deps import _HAS_SKLEARN
 from coremltools.models.utils import evaluate_transformer
 from coremltools.models.utils import evaluate_regressor
-from coremltools.models.utils import macos_version, is_macos
+from coremltools.models.utils import _macos_version, _is_macos
 from coremltools.converters.sklearn import convert
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.datasets import load_boston
     from sklearn.ensemble import GradientBoostingRegressor
     from sklearn.pipeline import Pipeline
     from sklearn.preprocessing import StandardScaler
     from sklearn.preprocessing import OneHotEncoder
-        
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
-class GradientBoostingRegressorBostonHousingScikitNumericTest(unittest.TestCase):
 
-    def test_boston_OHE_plus_normalizer(self): 
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
+class GradientBoostingRegressorBostonHousingScikitNumericTest(unittest.TestCase):
+    def test_boston_OHE_plus_normalizer(self):
 
         data = load_boston()
 
-        pl = Pipeline([
-            ("OHE", OneHotEncoder(categorical_features = [8], sparse=False)), 
-            ("Scaler",StandardScaler())])
+        pl = Pipeline(
+            [
+                ("OHE", OneHotEncoder(categorical_features=[8], sparse=False)),
+                ("Scaler", StandardScaler()),
+            ]
+        )
 
         pl.fit(data.data, data.target)
 
         # Convert the model
-        spec = convert(pl, data.feature_names, 'out')
+        spec = convert(pl, data.feature_names, "out")
 
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             input_data = [dict(zip(data.feature_names, row)) for row in data.data]
-            output_data = [{"out" : row} for row in pl.transform(data.data)]
+            output_data = [{"out": row} for row in pl.transform(data.data)]
 
             result = evaluate_transformer(spec, input_data, output_data)
             assert result["num_errors"] == 0
-    
-    def test_boston_OHE_plus_trees(self): 
+
+    def test_boston_OHE_plus_trees(self):
 
         data = load_boston()
 
-        pl = Pipeline([
-            ("OHE", OneHotEncoder(categorical_features = [8], sparse=False)), 
-            ("Trees",GradientBoostingRegressor(random_state = 1))])
+        pl = Pipeline(
+            [
+                ("OHE", OneHotEncoder(categorical_features=[8], sparse=False)),
+                ("Trees", GradientBoostingRegressor(random_state=1)),
+            ]
+        )
 
         pl.fit(data.data, data.target)
 
         # Convert the model
-        spec = convert(pl, data.feature_names, 'target')
+        spec = convert(pl, data.feature_names, "target")
 
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             # Get predictions
             df = pd.DataFrame(data.data, columns=data.feature_names)
-            df['prediction'] = pl.predict(data.data)
+            df["prediction"] = pl.predict(data.data)
 
             # Evaluate it
-            result = evaluate_regressor(spec, df, 'target', verbose = False)
+            result = evaluate_regressor(spec, df, "target", verbose=False)
 
             assert result["max_error"] < 0.0001
-
-    
diff --git a/coremltools/test/sklearn/test_dict_vectorizer.py b/coremltools/test/sklearn/test_dict_vectorizer.py
index c8dfe737b..680d29497 100644
--- a/coremltools/test/sklearn/test_dict_vectorizer.py
+++ b/coremltools/test/sklearn/test_dict_vectorizer.py
@@ -4,20 +4,19 @@
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 import unittest
-from coremltools._deps import HAS_SKLEARN
+from coremltools._deps import _HAS_SKLEARN
 from copy import copy
 import numpy as np
 from coremltools.models.utils import evaluate_transformer
 from coremltools.models.utils import evaluate_classifier
-from coremltools.models.utils import macos_version, is_macos
+from coremltools.models.utils import _macos_version, _is_macos
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from coremltools.converters import sklearn
     from sklearn.feature_extraction import DictVectorizer
 
 
-
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class DictVectorizerScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -25,23 +24,30 @@ class DictVectorizerScikitTest(unittest.TestCase):
 
     def _test_conversion(self, data, trained_dict_vectorizer):
 
-        X = trained_dict_vectorizer.transform(data) 
+        X = trained_dict_vectorizer.transform(data)
 
-        m = sklearn.convert(trained_dict_vectorizer, 
-                input_features = "features", 
-                output_feature_names = "output")
+        m = sklearn.convert(
+            trained_dict_vectorizer,
+            input_features="features",
+            output_feature_names="output",
+        )
 
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             ret = evaluate_transformer(
-                    m, [{"features" : row} for row in data], 
-                    [{"output" : x_r} for x_r in X], True)
+                m,
+                [{"features": row} for row in data],
+                [{"output": x_r} for x_r in X],
+                True,
+            )
             assert ret["num_errors"] == 0
 
     def test_dictvectorizer(self):
 
-        D = [{"foo": 1, "bar": 3},
-             {"bar": 4, "baz": 2},
-             {"bar": 1, "quux": 1, "quuux": 2}]
+        D = [
+            {"foo": 1, "bar": 3},
+            {"bar": 4, "baz": 2},
+            {"bar": 1, "quux": 1, "quuux": 2},
+        ]
 
         for sparse in (True, False):
             for dtype in (int, np.float32, np.int16):
@@ -50,43 +56,46 @@ def test_dictvectorizer(self):
                     v = v.fit(D)
                     self._test_conversion(D, v)
 
-
     def test_unseen_or_no_features(self):
         D1 = [{"camelot": 0, "spamalot": 1}]
-        D2 = [{}, {"nothing" : 21}]
+        D2 = [{}, {"nothing": 21}]
 
         for sparse in (True, False):
             for dtype in (int, np.float32, np.int16):
                 for sort in (True, False):
                     v = DictVectorizer(sparse=sparse, dtype=dtype, sort=sort)
-                    v = v.fit(D1) 
+                    v = v.fit(D1)
                     self._test_conversion(D2, v)
 
-    def test_int_features_in_pipeline(self): 
+    def test_int_features_in_pipeline(self):
 
         import numpy.random as rn
         import pandas as pd
+
         rn.seed(0)
 
-        x_train_dict = [ dict( (rn.randint(100), 1) 
-                          for i in range(20)) 
-                            for j in range(100)]
-        y_train = [0,1]*50
+        x_train_dict = [
+            dict((rn.randint(100), 1) for i in range(20)) for j in range(100)
+        ]
+        y_train = [0, 1] * 50
 
         from sklearn.pipeline import Pipeline
         from sklearn.feature_extraction import DictVectorizer
         from sklearn.linear_model import LogisticRegression
 
-        pl = Pipeline([("dv", DictVectorizer()),  ("lm", LogisticRegression())])
+        pl = Pipeline([("dv", DictVectorizer()), ("lm", LogisticRegression())])
         pl.fit(x_train_dict, y_train)
 
         import coremltools
 
-        model = coremltools.converters.sklearn.convert(pl, input_features = "features", output_feature_names = "target")
+        model = coremltools.converters.sklearn.convert(
+            pl, input_features="features", output_feature_names="target"
+        )
 
-        if is_macos() and macos_version() >= (10, 13):
-            x = pd.DataFrame( {"features" : x_train_dict, 
-                               "prediction" : pl.predict(x_train_dict)})
+        if _is_macos() and _macos_version() >= (10, 13):
+            x = pd.DataFrame(
+                {"features": x_train_dict, "prediction": pl.predict(x_train_dict)}
+            )
 
             cur_eval_metics = evaluate_classifier(model, x)
-            self.assertEquals(cur_eval_metics['num_errors'], 0)
+            self.assertEquals(cur_eval_metics["num_errors"], 0)
diff --git a/coremltools/test/sklearn/test_feature_names.py b/coremltools/test/sklearn/test_feature_names.py
index 4e243e055..00a733a74 100644
--- a/coremltools/test/sklearn/test_feature_names.py
+++ b/coremltools/test/sklearn/test_feature_names.py
@@ -7,15 +7,18 @@
 import coremltools.models.datatypes as dt
 import six
 import unittest
-from coremltools._deps import HAS_SKLEARN
+from coremltools._deps import _HAS_SKLEARN
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
-class FeatureManagementTests(unittest.TestCase):
 
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
+class FeatureManagementTests(unittest.TestCase):
     def test_all_strings(self):
-
         features = ["a", "b", "c"]
-        processed_features = [("a", dt.Double()), ("b", dt.Double()), ("c", dt.Double())]
+        processed_features = [
+            ("a", dt.Double()),
+            ("b", dt.Double()),
+            ("c", dt.Double()),
+        ]
         out = fm.process_or_validate_features(features)
         self.assertEquals(out, processed_features)
         self.assertTrue(fm.is_valid_feature_list(out))
@@ -24,5 +27,6 @@ def test_single_array(self):
         # test both int and long as input to num_dimensions
         for t in six.integer_types:
             self.assertEquals(
-                fm.process_or_validate_features("a", num_dimensions = t(10)),
-                [("a", dt.Array(10))])
+                fm.process_or_validate_features("a", num_dimensions=t(10)),
+                [("a", dt.Array(10))],
+            )
diff --git a/coremltools/test/sklearn/test_glm_classifier.py b/coremltools/test/sklearn/test_glm_classifier.py
index 1a530b24e..7f37f6b5c 100644
--- a/coremltools/test/sklearn/test_glm_classifier.py
+++ b/coremltools/test/sklearn/test_glm_classifier.py
@@ -8,52 +8,55 @@
 import os
 import unittest
 
-from coremltools._deps import HAS_SKLEARN
+from coremltools._deps import _HAS_SKLEARN
 from coremltools.converters.sklearn import convert
-from coremltools.models.utils import evaluate_classifier,\
-    evaluate_classifier_with_probabilities, macos_version, is_macos
-
-if HAS_SKLEARN:
+from coremltools.models.utils import (
+    evaluate_classifier,
+    evaluate_classifier_with_probabilities,
+    _macos_version,
+    _is_macos,
+)
+
+if _HAS_SKLEARN:
     from sklearn.linear_model import LogisticRegression
     from sklearn.svm import LinearSVC
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
-class GlmCassifierTest(unittest.TestCase):
 
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
+class GlmCassifierTest(unittest.TestCase):
     def test_logistic_regression_binary_classification_with_string_labels(self):
-        self._conversion_and_evaluation_helper_for_logistic_regression(['Foo', 'Bar'])
+        self._conversion_and_evaluation_helper_for_logistic_regression(["Foo", "Bar"])
 
     def test_logistic_regression_multiclass_classification_with_int_labels(self):
-        self._conversion_and_evaluation_helper_for_logistic_regression([1,2,3,4])
-
+        self._conversion_and_evaluation_helper_for_logistic_regression([1, 2, 3, 4])
 
     @staticmethod
     def _generate_random_data(labels):
         import random
+
         random.seed(42)
 
         # Generate some random data
         x, y = [], []
         for _ in range(100):
-            x.append([random.gauss(2,3), random.gauss(-1,2)])
+            x.append([random.gauss(2, 3), random.gauss(-1, 2)])
             y.append(random.choice(labels))
         return x, y
 
-
     def _conversion_and_evaluation_helper_for_logistic_regression(self, class_labels):
         options = {
-            'C': (0.1, 1., 2.),
-            'fit_intercept': (True, False), 
-            'class_weight': ('balanced', None),
-            'solver': ('newton-cg', 'lbfgs', 'liblinear', 'sag')
-            }
+            "C": (0.1, 1.0, 2.0),
+            "fit_intercept": (True, False),
+            "class_weight": ("balanced", None),
+            "solver": ("newton-cg", "lbfgs", "liblinear", "sag"),
+        }
 
         # Generate a list of all combinations of options and the default parameters
         product = itertools.product(*options.values())
         args = [{}] + [dict(zip(options.keys(), p)) for p in product]
 
         x, y = GlmCassifierTest._generate_random_data(class_labels)
-        column_names = ['x1', 'x2']
+        column_names = ["x1", "x2"]
         df = pd.DataFrame(x, columns=column_names)
 
         for cur_args in args:
@@ -61,45 +64,53 @@ def _conversion_and_evaluation_helper_for_logistic_regression(self, class_labels
             cur_model = LogisticRegression(**cur_args)
             cur_model.fit(x, y)
 
-            spec = convert(cur_model, input_features=column_names,
-                           output_feature_names='target')
+            spec = convert(
+                cur_model, input_features=column_names, output_feature_names="target"
+            )
 
-            if is_macos() and macos_version() >= (10, 13):
+            if _is_macos() and _macos_version() >= (10, 13):
                 probability_lists = cur_model.predict_proba(x)
-                df['classProbability'] = [dict(zip(cur_model.classes_, cur_vals)) for cur_vals in probability_lists]
+                df["classProbability"] = [
+                    dict(zip(cur_model.classes_, cur_vals))
+                    for cur_vals in probability_lists
+                ]
 
-                metrics = evaluate_classifier_with_probabilities(spec, df, probabilities='classProbability', verbose=False)
-                self.assertEquals(metrics['num_key_mismatch'], 0)
-                self.assertLess(metrics['max_probability_error'],  0.00001)
+                metrics = evaluate_classifier_with_probabilities(
+                    spec, df, probabilities="classProbability", verbose=False
+                )
+                self.assertEquals(metrics["num_key_mismatch"], 0)
+                self.assertLess(metrics["max_probability_error"], 0.00001)
 
     def test_linear_svc_binary_classification_with_string_labels(self):
-        self._conversion_and_evaluation_helper_for_linear_svc(['Foo', 'Bar'])
+        self._conversion_and_evaluation_helper_for_linear_svc(["Foo", "Bar"])
 
     def test_linear_svc_multiclass_classification_with_int_labels(self):
-        self._conversion_and_evaluation_helper_for_linear_svc([1,2,3,4])
+        self._conversion_and_evaluation_helper_for_linear_svc([1, 2, 3, 4])
 
     def _conversion_and_evaluation_helper_for_linear_svc(self, class_labels):
-        ARGS = [ {},
-                 {'C' : .75, 'loss': 'hinge'},
-                 {'penalty': 'l1', 'dual': False},
-                 {'tol': 0.001, 'fit_intercept': False},
-                 {'intercept_scaling': 1.5}
+        ARGS = [
+            {},
+            {"C": 0.75, "loss": "hinge"},
+            {"penalty": "l1", "dual": False},
+            {"tol": 0.001, "fit_intercept": False},
+            {"intercept_scaling": 1.5},
         ]
 
         x, y = GlmCassifierTest._generate_random_data(class_labels)
-        column_names = ['x1', 'x2']
+        column_names = ["x1", "x2"]
         df = pd.DataFrame(x, columns=column_names)
-        
+
         for cur_args in ARGS:
             print(class_labels, cur_args)
             cur_model = LinearSVC(**cur_args)
             cur_model.fit(x, y)
 
-            spec = convert(cur_model, input_features=column_names,
-                           output_feature_names='target')
+            spec = convert(
+                cur_model, input_features=column_names, output_feature_names="target"
+            )
 
-            if is_macos() and macos_version() >= (10, 13):
-                df['prediction'] = cur_model.predict(x)
+            if _is_macos() and _macos_version() >= (10, 13):
+                df["prediction"] = cur_model.predict(x)
 
                 cur_eval_metics = evaluate_classifier(spec, df, verbose=False)
-                self.assertEquals(cur_eval_metics['num_errors'], 0)
+                self.assertEquals(cur_eval_metics["num_errors"], 0)
diff --git a/coremltools/test/sklearn/test_imputer.py b/coremltools/test/sklearn/test_imputer.py
index 43b9afb8e..0860413d9 100644
--- a/coremltools/test/sklearn/test_imputer.py
+++ b/coremltools/test/sklearn/test_imputer.py
@@ -4,56 +4,56 @@
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 import unittest
-from coremltools._deps import HAS_SKLEARN
+from coremltools._deps import _HAS_SKLEARN
 import numpy.random as rn
 import numpy as np
-from coremltools.models.utils import evaluate_transformer,\
-    macos_version, is_macos
+from coremltools.models.utils import evaluate_transformer, _macos_version, _is_macos
 
-
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.preprocessing import Imputer
     from coremltools.converters import sklearn as converter
 
 
-@unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                     'Only supported on macOS 10.13+')
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+@unittest.skipUnless(
+    _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+)
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class NumericalImputerTestCase(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
     """
+
     def test_conversion_boston(self):
-    
+
         from sklearn.datasets import load_boston
 
         scikit_data = load_boston()
 
-        sh = scikit_data.data.shape 
+        sh = scikit_data.data.shape
 
         rn.seed(0)
-        missing_value_indices = [(rn.randint(sh[0]), rn.randint(sh[1])) 
-                                    for k in range(sh[0])]
-        
-        for strategy in ["mean", "median", "most_frequent"]: 
-            for missing_value in [0, 'NaN', -999]:
+        missing_value_indices = [
+            (rn.randint(sh[0]), rn.randint(sh[1])) for k in range(sh[0])
+        ]
+
+        for strategy in ["mean", "median", "most_frequent"]:
+            for missing_value in [0, "NaN", -999]:
 
                 X = np.array(scikit_data.data).copy()
 
                 for i, j in missing_value_indices:
                     X[i, j] = missing_value
 
-                model = Imputer(missing_values = missing_value, strategy = strategy)
+                model = Imputer(missing_values=missing_value, strategy=strategy)
                 model = model.fit(X)
 
                 tr_X = model.transform(X.copy())
 
-                spec = converter.convert(model, scikit_data.feature_names, 'out')
+                spec = converter.convert(model, scikit_data.feature_names, "out")
 
-                input_data = [dict(zip(scikit_data.feature_names, row))
-                              for row in X]
+                input_data = [dict(zip(scikit_data.feature_names, row)) for row in X]
 
-                output_data = [{"out" : row} for row in tr_X]
+                output_data = [{"out": row} for row in tr_X]
 
                 result = evaluate_transformer(spec, input_data, output_data)
 
diff --git a/coremltools/test/sklearn/test_io_types.py b/coremltools/test/sklearn/test_io_types.py
index 32c6a8c26..b64e848c1 100644
--- a/coremltools/test/sklearn/test_io_types.py
+++ b/coremltools/test/sklearn/test_io_types.py
@@ -4,17 +4,25 @@
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 import coremltools
-from coremltools._deps import HAS_KERAS2_TF
-from coremltools.models.utils import macos_version, is_macos
-import keras
-import sklearn
-from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
-from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
-from sklearn.linear_model import LinearRegression
-from sklearn.svm import SVC, SVR
+from coremltools._deps import (
+    _HAS_KERAS2_TF,
+    MSG_KERAS2_NOT_FOUND,
+    _HAS_SKLEARN,
+    MSG_SKLEARN_NOT_FOUND,
+)
+from coremltools.models.utils import _macos_version, _is_macos
+
+if _HAS_KERAS2_TF:
+    import keras
+if _HAS_SKLEARN:
+    import sklearn
+    from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+    from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+    from sklearn.linear_model import LinearRegression
+    from sklearn.svm import SVC, SVR
+    from sklearn.datasets import load_boston
 import unittest
 import numpy as np
-from sklearn.datasets import load_boston
 import pytest
 import PIL.Image
 
@@ -33,8 +41,10 @@ def create_model(spec):
     return coremltools.models.MLModel(spec)
 
 
-@unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                     'Only supported on macOS 10.13+')
+@unittest.skipUnless(
+    _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+)
+@unittest.skipIf(not _HAS_SKLEARN, MSG_SKLEARN_NOT_FOUND)
 class TestIODataTypes(unittest.TestCase):
     """
     This class tests for different I/O feature data types for an .mlmodel
@@ -50,7 +60,7 @@ def scikit_data(self):
         return load_boston()
 
     def _feature_data_type(self, dtype):
-        feature_dict = {np.int32: 'INT32', np.float32: 'FLOAT32', np.float64: 'DOUBLE'}
+        feature_dict = {np.int32: "INT32", np.float32: "FLOAT32", np.float64: "DOUBLE"}
         return feature_dict[dtype]
 
     @property
@@ -68,7 +78,9 @@ def number_data_type(self):
 
     def _sklearn_setup(self, model, dtype, data, target):
         model.fit(data, target)
-        spec = coremltools.converters.sklearn.convert(model, 'data', 'target').get_spec()
+        spec = coremltools.converters.sklearn.convert(
+            model, "data", "target"
+        ).get_spec()
         return model, spec
 
     def _check_tree_model(self, spec, inputType, outputType, n_out):
@@ -79,268 +91,344 @@ def _check_tree_model(self, spec, inputType, outputType, n_out):
         self.assertIsNotNone(spec.treeEnsembleClassifier)
 
         # Test the interface class
-        self.assertEqual(spec.description.predictedFeatureName, 'target')
+        self.assertEqual(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.output), n_out)
-        self.assertEqual(spec.description.output[0].name, 'target')
-        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'), outputType)
-        self.assertEqual(spec.description.input[0].name, 'data')
-        self.assertEqual(spec.description.input[0].type.WhichOneof('Type'), inputType)
+        self.assertEqual(spec.description.output[0].name, "target")
+        self.assertEqual(spec.description.output[0].type.WhichOneof("Type"), outputType)
+        self.assertEqual(spec.description.input[0].name, "data")
+        self.assertEqual(spec.description.input[0].type.WhichOneof("Type"), inputType)
 
     def test_tree_regressor(self):
         for dtype in self.number_data_type.keys():
             scikit_model = DecisionTreeRegressor(random_state=1)
-            data = self.scikit_data['data'].astype(dtype)
-            target = self.scikit_data['target'].astype(dtype)
+            data = self.scikit_data["data"].astype(dtype)
+            target = self.scikit_data["target"].astype(dtype)
             scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
             test_data = data[0].reshape(1, -1)
-            self._check_tree_model(spec, 'multiArrayType', 'doubleType', 1)
+            self._check_tree_model(spec, "multiArrayType", "doubleType", 1)
             coreml_model = create_model(spec)
             try:
-                self.assertEqual(scikit_model.predict(test_data)[0].dtype,
-                                 type(coreml_model.predict({'data': test_data})['target']))
-                self.assertEqual(scikit_model.predict(test_data)[0],
-                                 coreml_model.predict({'data': test_data})['target'],
-                                 msg="{} != {} for Dtype: {}".format(
-                                     scikit_model.predict(test_data)[0],
-                                     coreml_model.predict({'data': test_data})['target'],
-                                     dtype
-                                 )
-                                 )
+                self.assertEqual(
+                    scikit_model.predict(test_data)[0].dtype,
+                    type(coreml_model.predict({"data": test_data})["target"]),
+                )
+                self.assertEqual(
+                    scikit_model.predict(test_data)[0],
+                    coreml_model.predict({"data": test_data})["target"],
+                    msg="{} != {} for Dtype: {}".format(
+                        scikit_model.predict(test_data)[0],
+                        coreml_model.predict({"data": test_data})["target"],
+                        dtype,
+                    ),
+                )
             except RuntimeError:
                 print("{} not supported. ".format(dtype))
 
     def test_random_forest_classifier(self):
         for dtype in self.number_data_type.keys():
             scikit_model = RandomForestClassifier(random_state=1)
-            data = self.scikit_data['data'].astype(dtype)
-            target = self.scikit_data['target'].astype(dtype) > self.scikit_data['target'].astype(dtype).mean()
+            data = self.scikit_data["data"].astype(dtype)
+            target = (
+                self.scikit_data["target"].astype(dtype)
+                > self.scikit_data["target"].astype(dtype).mean()
+            )
             scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
             test_data = data[0].reshape(1, -1)
-            self._check_tree_model(spec, 'multiArrayType', 'int64Type', 2)
+            self._check_tree_model(spec, "multiArrayType", "int64Type", 2)
             coreml_model = create_model(spec)
             try:
-                self.assertEqual(scikit_model.predict(test_data)[0],
-                                 bool(int(coreml_model.predict({'data': test_data})['target'])),
-                                 msg="{} != {} for Dtype: {}".format(
-                                     scikit_model.predict(test_data)[0],
-                                     bool(int(coreml_model.predict({'data': test_data})['target'])),
-                                     dtype
-                                 )
-                                 )
+                self.assertEqual(
+                    scikit_model.predict(test_data)[0],
+                    bool(int(coreml_model.predict({"data": test_data})["target"])),
+                    msg="{} != {} for Dtype: {}".format(
+                        scikit_model.predict(test_data)[0],
+                        bool(int(coreml_model.predict({"data": test_data})["target"])),
+                        dtype,
+                    ),
+                )
             except RuntimeError:
                 print("{} not supported. ".format(dtype))
 
     def test_random_forest_regressor(self):
         for dtype in self.number_data_type.keys():
             scikit_model = RandomForestRegressor(random_state=1)
-            data = self.scikit_data['data'].astype(dtype)
-            target = self.scikit_data['target'].astype(dtype)
+            data = self.scikit_data["data"].astype(dtype)
+            target = self.scikit_data["target"].astype(dtype)
             scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
             test_data = data[0].reshape(1, -1)
-            self._check_tree_model(spec, 'multiArrayType', 'doubleType', 1)
+            self._check_tree_model(spec, "multiArrayType", "doubleType", 1)
             coreml_model = create_model(spec)
             try:
-                self.assertEqual(scikit_model.predict(test_data)[0].dtype,
-                                 type(coreml_model.predict({'data': test_data})['target']))
-                self.assertAlmostEqual(scikit_model.predict(test_data)[0],
-                                       coreml_model.predict({'data': test_data})['target'],
-                                       msg="{} != {} for Dtype: {}".format(
-                                           scikit_model.predict(test_data)[0],
-                                           coreml_model.predict({'data': test_data})['target'],
-                                           dtype
-                                       )
-                                       )
+                self.assertEqual(
+                    scikit_model.predict(test_data)[0].dtype,
+                    type(coreml_model.predict({"data": test_data})["target"]),
+                )
+                self.assertAlmostEqual(
+                    scikit_model.predict(test_data)[0],
+                    coreml_model.predict({"data": test_data})["target"],
+                    msg="{} != {} for Dtype: {}".format(
+                        scikit_model.predict(test_data)[0],
+                        coreml_model.predict({"data": test_data})["target"],
+                        dtype,
+                    ),
+                )
             except RuntimeError:
                 print("{} not supported. ".format(dtype))
 
     def test_support_vector_classifier(self):
         for dtype in self.number_data_type.keys():
-            scikit_model = SVC(kernel='rbf', gamma=1.2, C=1)
-            data = self.scikit_data['data'].astype(dtype)
-            target = self.scikit_data['target'].astype(dtype) > self.scikit_data['target'].astype(dtype).mean()
+            scikit_model = SVC(kernel="rbf", gamma=1.2, C=1)
+            data = self.scikit_data["data"].astype(dtype)
+            target = (
+                self.scikit_data["target"].astype(dtype)
+                > self.scikit_data["target"].astype(dtype).mean()
+            )
             scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
             coreml_model = create_model(spec)
             for idx in range(0, 10):
                 test_data = data[idx].reshape(1, -1)
                 try:
-                    self.assertEqual(scikit_model.predict(test_data)[0],
-                                     bool(int(coreml_model.predict({'data': test_data})['target'])),
-                                     msg="{} != {} for Dtype: {}".format(
-                                         scikit_model.predict(test_data)[0],
-                                         bool(int(coreml_model.predict({'data': test_data})['target'])),
-                                         dtype
-                                     )
-                                     )
+                    self.assertEqual(
+                        scikit_model.predict(test_data)[0],
+                        bool(int(coreml_model.predict({"data": test_data})["target"])),
+                        msg="{} != {} for Dtype: {}".format(
+                            scikit_model.predict(test_data)[0],
+                            bool(
+                                int(coreml_model.predict({"data": test_data})["target"])
+                            ),
+                            dtype,
+                        ),
+                    )
                 except RuntimeError:
                     print("{} not supported. ".format(dtype))
 
     def test_support_vector_regressor(self):
         for dtype in self.number_data_type.keys():
-            scikit_model = SVR(kernel='rbf')
-            data = self.scikit_data['data'].astype(dtype)
-            target = self.scikit_data['target'].astype(dtype)
+            scikit_model = SVR(kernel="rbf")
+            data = self.scikit_data["data"].astype(dtype)
+            target = self.scikit_data["target"].astype(dtype)
             scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
             test_data = data[0].reshape(1, -1)
             coreml_model = create_model(spec)
             try:
-                self.assertEqual(scikit_model.predict(test_data)[0],
-                                 coreml_model.predict({'data': test_data})['target'],
-                                 msg="{} != {} for Dtype: {}".format(
-                                     scikit_model.predict(test_data)[0],
-                                     coreml_model.predict({'data': test_data})['target'],
-                                     dtype
-                                 )
-                                 )
+                self.assertEqual(
+                    scikit_model.predict(test_data)[0],
+                    coreml_model.predict({"data": test_data})["target"],
+                    msg="{} != {} for Dtype: {}".format(
+                        scikit_model.predict(test_data)[0],
+                        coreml_model.predict({"data": test_data})["target"],
+                        dtype,
+                    ),
+                )
             except RuntimeError:
                 print("{} not supported. ".format(dtype))
 
     def test_linear_regressor(self):
         for dtype in self.number_data_type.keys():
             scikit_model = LinearRegression(normalize=True)
-            data = self.scikit_data['data'].astype(dtype)
-            target = self.scikit_data['target'].astype(dtype)
+            data = self.scikit_data["data"].astype(dtype)
+            target = self.scikit_data["target"].astype(dtype)
             scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
             test_data = data[0].reshape(1, -1)
             coreml_model = create_model(spec)
             try:
-                self.assertEqual(scikit_model.predict(test_data)[0].dtype,
-                                 type(coreml_model.predict({'data': test_data})['target']))
-                self.assertAlmostEqual(scikit_model.predict(test_data)[0],
-                                       coreml_model.predict({'data': test_data})['target'],
-                                       msg="{} != {} for Dtype: {}".format(
-                                           scikit_model.predict(test_data)[0],
-                                           coreml_model.predict({'data': test_data})['target'],
-                                           dtype
-                                       )
-                                       )
+                self.assertEqual(
+                    scikit_model.predict(test_data)[0].dtype,
+                    type(coreml_model.predict({"data": test_data})["target"]),
+                )
+                self.assertAlmostEqual(
+                    scikit_model.predict(test_data)[0],
+                    coreml_model.predict({"data": test_data})["target"],
+                    msg="{} != {} for Dtype: {}".format(
+                        scikit_model.predict(test_data)[0],
+                        coreml_model.predict({"data": test_data})["target"],
+                        dtype,
+                    ),
+                )
             except RuntimeError:
                 print("{} not supported. ".format(dtype))
 
-    @unittest.skipIf(not HAS_KERAS2_TF, 'Missing keras 2. Skipping test.')
+    @unittest.skipIf(not _HAS_KERAS2_TF, MSG_KERAS2_NOT_FOUND)
     @pytest.mark.keras2
     def test_keras_dense_model(self):
         model = keras.models.Sequential()
-        model.add(keras.layers.Dense(3, activation='sigmoid', kernel_initializer='random_uniform',
-                                     bias_initializer='random_uniform', input_dim=3))
+        model.add(
+            keras.layers.Dense(
+                3,
+                activation="sigmoid",
+                kernel_initializer="random_uniform",
+                bias_initializer="random_uniform",
+                input_dim=3,
+            )
+        )
         for key, dtype in self.number_data_type.items():
             try:
                 input_data = np.random.rand(1, 3).astype(key)
                 keras_out = model.predict(input_data)
-                coreml_model = coremltools.converters.keras.convert(model, input_names=['data'],
-                                                                    output_names=['target'])
+                coreml_model = coremltools.converters.keras.convert(
+                    model, input_names=["data"], output_names=["target"]
+                )
                 spec = coreml_model.get_spec()
                 spec.description.output[
-                    0].type.multiArrayType.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value(
-                    self._feature_data_type(dtype))
+                    0
+                ].type.multiArrayType.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value(
+                    self._feature_data_type(dtype)
+                )
                 spec.description.input[
-                    0].type.multiArrayType.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value(
-                    self._feature_data_type(dtype))
+                    0
+                ].type.multiArrayType.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value(
+                    self._feature_data_type(dtype)
+                )
                 coreml_model = coremltools.models.MLModel(spec)
-                coreml_out = coreml_model.predict({'data': np.expand_dims(input_data, 0)})['target']
-                self.assertEqual(dtype,
-                                 coreml_out.dtype)
+                coreml_out = coreml_model.predict(
+                    {"data": np.expand_dims(input_data, 0)}
+                )["target"]
+                self.assertEqual(dtype, coreml_out.dtype)
                 if dtype != np.int32:
                     for idx in range(0, len(keras_out)):
-                        self.assertAlmostEqual(keras_out[0][idx], coreml_out[idx],
-                                               msg="{}\n{} != {}".format(dtype, keras_out, coreml_out), places=2)
+                        self.assertAlmostEqual(
+                            keras_out[0][idx],
+                            coreml_out[idx],
+                            msg="{}\n{} != {}".format(dtype, keras_out, coreml_out),
+                            places=2,
+                        )
             except KeyError:
                 print("{} not supported. ".format(dtype))
 
-
     def test_keras_embedding_model(self):
 
         model = keras.models.Sequential()
-        model.add(keras.layers.Embedding(100, 3, input_length=5, input_dtype='float32'))
+        model.add(keras.layers.Embedding(100, 3, input_length=5, input_dtype="float32"))
         for key, dtype in self.number_data_type.items():
             try:
                 input_data = np.random.randint(0, 100, size=(1, 5)).astype(key)
                 keras_out = np.squeeze(model.predict(input_data)).flatten()
-                coreml_model = coremltools.converters.keras.convert(model, input_names=['data'],
-                                                                    output_names=['target'])
+                coreml_model = coremltools.converters.keras.convert(
+                    model, input_names=["data"], output_names=["target"]
+                )
 
                 spec = coreml_model.get_spec()
                 spec.description.output[
-                    0].type.multiArrayType.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value(
-                    self._feature_data_type(dtype))
+                    0
+                ].type.multiArrayType.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value(
+                    self._feature_data_type(dtype)
+                )
                 spec.description.input[
-                    0].type.multiArrayType.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value(
-                    self._feature_data_type(dtype))
+                    0
+                ].type.multiArrayType.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value(
+                    self._feature_data_type(dtype)
+                )
                 coreml_model = coremltools.models.MLModel(spec)
                 coreml_out = np.squeeze(
-                    coreml_model.predict({'data': np.expand_dims(input_data, 0).T})['target']).flatten()
-                self.assertEqual(dtype,
-                                 coreml_out.dtype)
+                    coreml_model.predict({"data": np.expand_dims(input_data, 0).T})[
+                        "target"
+                    ]
+                ).flatten()
+                self.assertEqual(dtype, coreml_out.dtype)
                 if dtype != np.int32:
                     for idx in range(0, len(keras_out)):
-                        self.assertAlmostEqual(keras_out[idx], coreml_out[idx],
-                                               msg="{}\n{} != {}".format(dtype, keras_out, coreml_out), places=2)
+                        self.assertAlmostEqual(
+                            keras_out[idx],
+                            coreml_out[idx],
+                            msg="{}\n{} != {}".format(dtype, keras_out, coreml_out),
+                            places=2,
+                        )
             except KeyError:
                 print("{} not supported. ".format(dtype))
 
-
     def test_image_output_rgb(self):
         input_shape = (3, 10, 20)
-        input_features = [('data', coremltools.models.datatypes.Array(*input_shape))]
-        output_features = [('target', coremltools.models.datatypes.Array(*input_shape))]
-        builder = coremltools.models.neural_network.NeuralNetworkBuilder(input_features, output_features)
-        builder.add_elementwise('Identity', input_names=['data'],
-                                output_name='target', mode='ADD', alpha=0.0)
+        input_features = [("data", coremltools.models.datatypes.Array(*input_shape))]
+        output_features = [("target", coremltools.models.datatypes.Array(*input_shape))]
+        builder = coremltools.models.neural_network.NeuralNetworkBuilder(
+            input_features, output_features
+        )
+        builder.add_elementwise(
+            "Identity",
+            input_names=["data"],
+            output_name="target",
+            mode="ADD",
+            alpha=0.0,
+        )
         spec = builder.spec
         output = spec.description.output[0]
-        output.type.imageType.colorSpace = coremltools.proto.FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value('RGB')
+        output.type.imageType.colorSpace = coremltools.proto.FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value(
+            "RGB"
+        )
         output.type.imageType.height = input_shape[1]
         output.type.imageType.width = input_shape[2]
-        
+
         coreml_model = coremltools.models.MLModel(spec)
         input_data = np.floor(np.random.rand(*input_shape) * 255)
 
-        coreml_out = coreml_model.predict({'data': input_data})['target']
+        coreml_out = coreml_model.predict({"data": input_data})["target"]
         self.assertEqual(PIL.Image.Image, type(coreml_out))
-        self.assertEqual('RGBA', coreml_out.mode)
-        np.testing.assert_equal(np.uint8(input_data), np.array(coreml_out).transpose(2, 0, 1)[:3, :])
-
+        self.assertEqual("RGBA", coreml_out.mode)
+        np.testing.assert_equal(
+            np.uint8(input_data), np.array(coreml_out).transpose(2, 0, 1)[:3, :]
+        )
 
     def test_image_output_bgr(self):
         input_shape = (3, 15, 25)
-        input_features = [('data', coremltools.models.datatypes.Array(*input_shape))]
-        output_features = [('target', coremltools.models.datatypes.Array(*input_shape))]
-        builder = coremltools.models.neural_network.NeuralNetworkBuilder(input_features, output_features)
-        builder.add_elementwise('Identity', input_names=['data'],
-                                output_name='target', mode='ADD', alpha=0.0)
+        input_features = [("data", coremltools.models.datatypes.Array(*input_shape))]
+        output_features = [("target", coremltools.models.datatypes.Array(*input_shape))]
+        builder = coremltools.models.neural_network.NeuralNetworkBuilder(
+            input_features, output_features
+        )
+        builder.add_elementwise(
+            "Identity",
+            input_names=["data"],
+            output_name="target",
+            mode="ADD",
+            alpha=0.0,
+        )
         spec = builder.spec
         output = spec.description.output[0]
-        output.type.imageType.colorSpace = coremltools.proto.FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value('BGR')
+        output.type.imageType.colorSpace = coremltools.proto.FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value(
+            "BGR"
+        )
         output.type.imageType.height = input_shape[1]
         output.type.imageType.width = input_shape[2]
-        
+
         coreml_model = coremltools.models.MLModel(spec)
         input_data = np.floor(np.random.rand(*input_shape) * 255)
 
-        coreml_out = coreml_model.predict({'data': input_data})['target']
+        coreml_out = coreml_model.predict({"data": input_data})["target"]
         self.assertEqual(PIL.Image.Image, type(coreml_out))
-        self.assertEqual('RGBA', coreml_out.mode)
-        np.testing.assert_equal(np.uint8(input_data), np.array(coreml_out)[:, :, ::-1].transpose(2, 0, 1)[1:, :])
-
+        self.assertEqual("RGBA", coreml_out.mode)
+        np.testing.assert_equal(
+            np.uint8(input_data),
+            np.array(coreml_out)[:, :, ::-1].transpose(2, 0, 1)[1:, :],
+        )
 
     def test_image_output_grayscale(self):
         input_shape = (1, 20, 30)
-        input_features = [('data', coremltools.models.datatypes.Array(*input_shape))]
-        output_features = [('target', coremltools.models.datatypes.Array(*input_shape))]
-        builder = coremltools.models.neural_network.NeuralNetworkBuilder(input_features, output_features)
-        builder.add_elementwise('Identity', input_names=['data'],
-                                output_name='target', mode='ADD', alpha=0.0)
+        input_features = [("data", coremltools.models.datatypes.Array(*input_shape))]
+        output_features = [("target", coremltools.models.datatypes.Array(*input_shape))]
+        builder = coremltools.models.neural_network.NeuralNetworkBuilder(
+            input_features, output_features
+        )
+        builder.add_elementwise(
+            "Identity",
+            input_names=["data"],
+            output_name="target",
+            mode="ADD",
+            alpha=0.0,
+        )
         spec = builder.spec
         output = spec.description.output[0]
-        output.type.imageType.colorSpace = coremltools.proto.FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value('GRAYSCALE')
+        output.type.imageType.colorSpace = coremltools.proto.FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value(
+            "GRAYSCALE"
+        )
         output.type.imageType.height = input_shape[1]
         output.type.imageType.width = input_shape[2]
-        
+
         coreml_model = coremltools.models.MLModel(spec)
         input_data = np.floor(np.random.rand(*input_shape) * 255)
 
-        coreml_out = coreml_model.predict({'data': input_data})['target']
+        coreml_out = coreml_model.predict({"data": input_data})["target"]
         self.assertEqual(PIL.Image.Image, type(coreml_out))
-        self.assertEqual('L', coreml_out.mode)
+        self.assertEqual("L", coreml_out.mode)
         np.testing.assert_equal(np.uint8(input_data)[0], np.array(coreml_out))
diff --git a/coremltools/test/sklearn/test_k_neighbors_classifier.py b/coremltools/test/sklearn/test_k_neighbors_classifier.py
index f9edd7f47..c2cd4417d 100644
--- a/coremltools/test/sklearn/test_k_neighbors_classifier.py
+++ b/coremltools/test/sklearn/test_k_neighbors_classifier.py
@@ -8,15 +8,15 @@
 import numpy as np
 from scipy import sparse
 
-from coremltools._deps import HAS_SKLEARN
+from coremltools._deps import _HAS_SKLEARN
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from coremltools.converters import sklearn
     from sklearn.datasets import load_iris
     from sklearn.neighbors import KNeighborsClassifier
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class KNeighborsClassifierScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -39,45 +39,91 @@ def test_conversion_unfitted(self):
 
     def test_conversion_brute_algorithm(self):
         """Tests conversion of a scikit KNeighborsClassifier using the brute force algorithm."""
-        scikit_model = KNeighborsClassifier(algorithm='brute', n_neighbors=42)
+        scikit_model = KNeighborsClassifier(algorithm="brute", n_neighbors=42)
         scikit_model.fit(self.iris_X, self.iris_y)
 
-        coreml_model = sklearn.convert(scikit_model, 'single_input', 'single_output')
+        coreml_model = sklearn.convert(scikit_model, "single_input", "single_output")
         coreml_spec = coreml_model.get_spec()
 
         self.assertIsNotNone(coreml_spec)
         self.assertTrue(coreml_spec.HasField("kNearestNeighborsClassifier"))
-        self.assertEqual(coreml_spec.kNearestNeighborsClassifier.numberOfNeighbors.defaultValue, 42)
-        self.assertEqual(coreml_spec.kNearestNeighborsClassifier.numberOfNeighbors.range.minValue, 1)
-        self.assertEqual(coreml_spec.kNearestNeighborsClassifier.numberOfNeighbors.range.maxValue, len(self.iris_X))
-        self.assertTrue(coreml_spec.kNearestNeighborsClassifier.HasField("uniformWeighting"))
-        self.assertEqual(coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions, len(self.iris_X[0]))
-        self.assertTrue(coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.HasField("linearIndex"))
-        self.assertTrue(coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.HasField("squaredEuclideanDistance"))
+        self.assertEqual(
+            coreml_spec.kNearestNeighborsClassifier.numberOfNeighbors.defaultValue, 42
+        )
+        self.assertEqual(
+            coreml_spec.kNearestNeighborsClassifier.numberOfNeighbors.range.minValue, 1
+        )
+        self.assertEqual(
+            coreml_spec.kNearestNeighborsClassifier.numberOfNeighbors.range.maxValue,
+            len(self.iris_X),
+        )
+        self.assertTrue(
+            coreml_spec.kNearestNeighborsClassifier.HasField("uniformWeighting")
+        )
+        self.assertEqual(
+            coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions,
+            len(self.iris_X[0]),
+        )
+        self.assertTrue(
+            coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.HasField(
+                "linearIndex"
+            )
+        )
+        self.assertTrue(
+            coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.HasField(
+                "squaredEuclideanDistance"
+            )
+        )
 
         self.validate_labels(coreml_spec, self.iris_y)
         self.validate_float_samples(coreml_spec, self.iris_X)
 
     def test_conversion_kd_tree_algorithm(self):
         """Tests conversion of a scikit KNeighborsClassifier using the brute force algorithm."""
-        test_leaf_size=23
-        test_n_neighbors=42
-        scikit_model = KNeighborsClassifier(algorithm='kd_tree', leaf_size=test_leaf_size, n_neighbors=test_n_neighbors)
+        test_leaf_size = 23
+        test_n_neighbors = 42
+        scikit_model = KNeighborsClassifier(
+            algorithm="kd_tree", leaf_size=test_leaf_size, n_neighbors=test_n_neighbors
+        )
         scikit_model.fit(self.iris_X, self.iris_y)
 
-        coreml_model = sklearn.convert(scikit_model, 'single_input', 'single_output')
+        coreml_model = sklearn.convert(scikit_model, "single_input", "single_output")
         coreml_spec = coreml_model.get_spec()
 
         self.assertIsNotNone(coreml_spec)
         self.assertTrue(coreml_spec.HasField("kNearestNeighborsClassifier"))
-        self.assertEqual(coreml_spec.kNearestNeighborsClassifier.numberOfNeighbors.defaultValue, test_n_neighbors)
-        self.assertEqual(coreml_spec.kNearestNeighborsClassifier.numberOfNeighbors.range.minValue, 1)
-        self.assertEqual(coreml_spec.kNearestNeighborsClassifier.numberOfNeighbors.range.maxValue, len(self.iris_X))
-        self.assertTrue(coreml_spec.kNearestNeighborsClassifier.HasField("uniformWeighting"))
-        self.assertEqual(coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions, len(self.iris_X[0]))
-        self.assertTrue(coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.HasField("singleKdTreeIndex"))
-        self.assertEqual(test_leaf_size, coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.singleKdTreeIndex.leafSize)
-        self.assertTrue(coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.HasField("squaredEuclideanDistance"))
+        self.assertEqual(
+            coreml_spec.kNearestNeighborsClassifier.numberOfNeighbors.defaultValue,
+            test_n_neighbors,
+        )
+        self.assertEqual(
+            coreml_spec.kNearestNeighborsClassifier.numberOfNeighbors.range.minValue, 1
+        )
+        self.assertEqual(
+            coreml_spec.kNearestNeighborsClassifier.numberOfNeighbors.range.maxValue,
+            len(self.iris_X),
+        )
+        self.assertTrue(
+            coreml_spec.kNearestNeighborsClassifier.HasField("uniformWeighting")
+        )
+        self.assertEqual(
+            coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions,
+            len(self.iris_X[0]),
+        )
+        self.assertTrue(
+            coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.HasField(
+                "singleKdTreeIndex"
+            )
+        )
+        self.assertEqual(
+            test_leaf_size,
+            coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.singleKdTreeIndex.leafSize,
+        )
+        self.assertTrue(
+            coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.HasField(
+                "squaredEuclideanDistance"
+            )
+        )
 
         self.validate_labels(coreml_spec, self.iris_y)
         self.validate_float_samples(coreml_spec, self.iris_X)
@@ -85,75 +131,91 @@ def test_conversion_kd_tree_algorithm(self):
     def test_conversion_auto_algorithm(self):
         """Tests conversion of a scikit KNeighborsClassifier using the brute force algorithm."""
         test_n_neighbors = 42
-        scikit_model = KNeighborsClassifier(algorithm='auto', n_neighbors=test_n_neighbors)
+        scikit_model = KNeighborsClassifier(
+            algorithm="auto", n_neighbors=test_n_neighbors
+        )
         scikit_model.fit(self.iris_X, self.iris_y)
 
-        coreml_model = sklearn.convert(scikit_model, 'single_input', 'single_output')
+        coreml_model = sklearn.convert(scikit_model, "single_input", "single_output")
         coreml_spec = coreml_model.get_spec()
         self.assertIsNotNone(coreml_spec)
 
     def test_conversion_unsupported_algorithm(self):
         """Test a scikit KNeighborsClassifier with an invalid algorithm."""
-        scikit_model = KNeighborsClassifier(algorithm='ball_tree')
+        scikit_model = KNeighborsClassifier(algorithm="ball_tree")
         self.assertRaises(TypeError, sklearn.convert, scikit_model)
 
     def test_conversion_weight_function_good(self):
-        scikit_model = KNeighborsClassifier(weights='uniform')
+        scikit_model = KNeighborsClassifier(weights="uniform")
         scikit_model.fit(self.iris_X, self.iris_y)
 
-        coreml_model = sklearn.convert(scikit_model, 'single_input', 'single_output')
+        coreml_model = sklearn.convert(scikit_model, "single_input", "single_output")
         coreml_spec = coreml_model.get_spec()
         self.assertIsNotNone(coreml_spec)
-        self.assertTrue(coreml_spec.kNearestNeighborsClassifier.HasField("uniformWeighting"))
+        self.assertTrue(
+            coreml_spec.kNearestNeighborsClassifier.HasField("uniformWeighting")
+        )
 
     def test_conversion_unsupported_weight_function(self):
-        scikit_model = KNeighborsClassifier(algorithm='brute', weights='distance')
+        scikit_model = KNeighborsClassifier(algorithm="brute", weights="distance")
         scikit_model.fit(self.iris_X, self.iris_y)
         self.assertRaises(TypeError, sklearn.convert, scikit_model)
 
         def callable_weight_function():
             print("Inside callable_weight_function")
 
-        scikit_model = KNeighborsClassifier(algorithm='brute', weights=callable_weight_function)
+        scikit_model = KNeighborsClassifier(
+            algorithm="brute", weights=callable_weight_function
+        )
         scikit_model.fit(self.iris_X, self.iris_y)
         self.assertRaises(TypeError, sklearn.convert, scikit_model)
 
     def test_conversion_distance_function_good(self):
         """Tests conversion of a scikit KNeighborsClassifier with a valid distance metric."""
-        scikit_model = KNeighborsClassifier(algorithm='brute', metric='euclidean')
+        scikit_model = KNeighborsClassifier(algorithm="brute", metric="euclidean")
         scikit_model.fit(self.iris_X, self.iris_y)
-        coreml_model = sklearn.convert(scikit_model, 'single_input', 'single_output')
+        coreml_model = sklearn.convert(scikit_model, "single_input", "single_output")
         coreml_spec = coreml_model.get_spec()
         self.assertIsNotNone(coreml_spec)
-        self.assertTrue(coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.HasField("squaredEuclideanDistance"))
+        self.assertTrue(
+            coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.HasField(
+                "squaredEuclideanDistance"
+            )
+        )
 
         # Minkowski metric with p=2 is equivalent to the squared Euclidean distance
-        scikit_model = KNeighborsClassifier(algorithm='brute', metric='minkowski', p=2)
+        scikit_model = KNeighborsClassifier(algorithm="brute", metric="minkowski", p=2)
         scikit_model.fit(self.iris_X, self.iris_y)
         coreml_spec = coreml_model.get_spec()
         self.assertIsNotNone(coreml_spec)
-        self.assertTrue(coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.HasField("squaredEuclideanDistance"))
+        self.assertTrue(
+            coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.HasField(
+                "squaredEuclideanDistance"
+            )
+        )
 
     def test_conversion_unsupported_distance_function(self):
         """Tests conversion of a scikit KNeighborsClassifier with an invalid distance metric."""
         # There are many possible distance functions for a brute force neighbors function, but these 3 should give us
         # coverage over the converter code.
-        scikit_model = KNeighborsClassifier(algorithm='brute', metric='manhattan')
+        scikit_model = KNeighborsClassifier(algorithm="brute", metric="manhattan")
         scikit_model.fit(self.iris_X, self.iris_y)
         self.assertRaises(TypeError, sklearn.convert, scikit_model)
 
-        scikit_model = KNeighborsClassifier(algorithm='kd_tree', metric='chebyshev')
+        scikit_model = KNeighborsClassifier(algorithm="kd_tree", metric="chebyshev")
         scikit_model.fit(self.iris_X, self.iris_y)
         self.assertRaises(TypeError, sklearn.convert, scikit_model)
 
-        scikit_model = KNeighborsClassifier(algorithm='brute', metric='minkowski', p=3)
+        scikit_model = KNeighborsClassifier(algorithm="brute", metric="minkowski", p=3)
         scikit_model.fit(self.iris_X, self.iris_y)
         self.assertRaises(TypeError, sklearn.convert, scikit_model)
 
         def callable_distance_function():
             print("Inside callable_distance_function")
 
-        scikit_model = KNeighborsClassifier(algorithm='brute', metric=callable_distance_function)
+        scikit_model = KNeighborsClassifier(
+            algorithm="brute", metric=callable_distance_function
+        )
         scikit_model.fit(self.iris_X, self.iris_y)
         self.assertRaises(TypeError, sklearn.convert, scikit_model)
 
@@ -161,10 +223,14 @@ def test_conversion_with_sparse_X(self):
         """Tests conversion of a model that's fitted with sparse data."""
         num_samples = 100
         num_dims = 64
-        sparse_X = sparse.rand(num_samples, num_dims, format='csr') # KNeighborsClassifier only supports CSR format
-        y = self.iris_y[0:num_samples] # the labels themselves don't matter - just use 100 of the Iris ones
-
-        sklearn_model = KNeighborsClassifier(algorithm='brute')
+        sparse_X = sparse.rand(
+            num_samples, num_dims, format="csr"
+        )  # KNeighborsClassifier only supports CSR format
+        y = self.iris_y[
+            0:num_samples
+        ]  # the labels themselves don't matter - just use 100 of the Iris ones
+
+        sklearn_model = KNeighborsClassifier(algorithm="brute")
         sklearn_model.fit(sparse_X, y)
 
         coreml_model = sklearn.convert(sklearn_model)
@@ -174,13 +240,17 @@ def test_conversion_with_sparse_X(self):
     def test_conversion_with_sparse_y(self):
         """Tests conversion of a model that's fitted with y values in a sparse format."""
         from sklearn.model_selection import train_test_split
-        X_train, X_test, y_train, y_test = train_test_split(self.iris_X, self.iris_y, test_size=0.2, train_size=0.8)
+
+        X_train, X_test, y_train, y_test = train_test_split(
+            self.iris_X, self.iris_y, test_size=0.2, train_size=0.8
+        )
 
         from sklearn import preprocessing
+
         lb = preprocessing.LabelBinarizer(sparse_output=True)
         binarized_y = lb.fit_transform(y_train)
 
-        sklearn_model = KNeighborsClassifier(algorithm='brute')
+        sklearn_model = KNeighborsClassifier(algorithm="brute")
         sklearn_model.fit(X_train, binarized_y)
 
         self.assertRaises(ValueError, sklearn.convert, sklearn_model)
@@ -188,12 +258,20 @@ def test_conversion_with_sparse_y(self):
     def validate_labels(self, spec, expected):
         """Validate the labels returned from the converted scikit KNeighborsClassifier"""
         self.assertTrue(spec.kNearestNeighborsClassifier.HasField("int64ClassLabels"))
-        for index, label in enumerate(spec.kNearestNeighborsClassifier.int64ClassLabels.vector):
+        for index, label in enumerate(
+            spec.kNearestNeighborsClassifier.int64ClassLabels.vector
+        ):
             self.assertEqual(label, expected[index])
 
     def validate_float_samples(self, spec, expected):
         """Validate the float samples returned from the converted scikit KNeighborsClassifier"""
-        num_dimensions = spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions
-        for index, sample in enumerate(spec.kNearestNeighborsClassifier.nearestNeighborsIndex.floatSamples):
+        num_dimensions = (
+            spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions
+        )
+        for index, sample in enumerate(
+            spec.kNearestNeighborsClassifier.nearestNeighborsIndex.floatSamples
+        ):
             for dim in range(0, num_dimensions):
-                self.assertAlmostEqual(sample.vector[dim], expected[index][dim], places=6)
+                self.assertAlmostEqual(
+                    sample.vector[dim], expected[index][dim], places=6
+                )
diff --git a/coremltools/test/sklearn/test_linear_regression.py b/coremltools/test/sklearn/test_linear_regression.py
index 3ffdcad8e..da8ca6416 100644
--- a/coremltools/test/sklearn/test_linear_regression.py
+++ b/coremltools/test/sklearn/test_linear_regression.py
@@ -6,16 +6,16 @@
 import os
 import pandas as pd
 import unittest
-from coremltools._deps import HAS_SKLEARN
-from coremltools.models.utils import evaluate_regressor,\
-    macos_version, is_macos
+from coremltools._deps import _HAS_SKLEARN
+from coremltools.models.utils import evaluate_regressor, _macos_version, _is_macos
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.linear_model import LinearRegression
     from sklearn.svm import LinearSVR
     from coremltools.converters.sklearn import convert
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikitlearn. Skipping tests.')
+
+@unittest.skipIf(not _HAS_SKLEARN, "Missing scikitlearn. Skipping tests.")
 class LinearRegressionScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -31,7 +31,7 @@ def setUpClass(self):
 
         scikit_data = load_boston()
         scikit_model = LinearRegression()
-        scikit_model.fit(scikit_data['data'], scikit_data['target'])
+        scikit_model.fit(scikit_data["data"], scikit_data["target"])
 
         # Save the data and the model
         self.scikit_data = scikit_data
@@ -39,29 +39,31 @@ def setUpClass(self):
 
     def test_conversion(self):
         input_names = self.scikit_data.feature_names
-        spec = convert(self.scikit_model, input_names, 'target').get_spec()
+        spec = convert(self.scikit_model, input_names, "target").get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
 
         # Test the interface class
-        self.assertEquals(spec.description.predictedFeatureName,
-                'target')
+        self.assertEquals(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.output), 1)
-        self.assertEquals(spec.description.output[0].name, 'target')
-        self.assertEquals(spec.description.output[0].type.WhichOneof('Type'),
-                'doubleType')
+        self.assertEquals(spec.description.output[0].name, "target")
+        self.assertEquals(
+            spec.description.output[0].type.WhichOneof("Type"), "doubleType"
+        )
         for input_type in spec.description.input:
-            self.assertEquals(input_type.type.WhichOneof('Type'),
-                    'doubleType')
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+            self.assertEquals(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
 
         # Test the linear regression parameters.
-        self.assertTrue(spec.pipelineRegressor.pipeline.models[-1].HasField('glmRegressor'))
+        self.assertTrue(
+            spec.pipelineRegressor.pipeline.models[-1].HasField("glmRegressor")
+        )
         lr = spec.pipelineRegressor.pipeline.models[-1].glmRegressor
         self.assertEquals(lr.offset, self.scikit_model.intercept_)
         self.assertEquals(len(lr.weights), 1)
@@ -75,16 +77,18 @@ def test_conversion_bad_inputs(self):
         # Error on converting an untrained model
         with self.assertRaises(TypeError):
             model = LinearRegression()
-            spec = convert(model, 'data', 'out')
+            spec = convert(model, "data", "out")
 
         # Check the expected class during covnersion.
         from sklearn.preprocessing import OneHotEncoder
+
         with self.assertRaises(TypeError):
             model = OneHotEncoder()
-            spec = convert(model, 'data', 'out')
+            spec = convert(model, "data", "out")
 
-    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                         'Only supported on macOS 10.13+')
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
     def test_linear_regression_evaluation(self):
         """
         Check that the evaluation results are the same in scikit learn and coremltools
@@ -94,26 +98,28 @@ def test_linear_regression_evaluation(self):
 
         for normalize_value in (True, False):
             cur_model = LinearRegression(normalize=normalize_value)
-            cur_model.fit(self.scikit_data['data'], self.scikit_data['target'])
-            spec = convert(cur_model, input_names, 'target')
+            cur_model.fit(self.scikit_data["data"], self.scikit_data["target"])
+            spec = convert(cur_model, input_names, "target")
 
-            df['prediction'] = cur_model.predict(self.scikit_data.data)
+            df["prediction"] = cur_model.predict(self.scikit_data.data)
 
             metrics = evaluate_regressor(spec, df)
-            self.assertAlmostEquals(metrics['max_error'], 0)
+            self.assertAlmostEquals(metrics["max_error"], 0)
 
-    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                         'Only supported on macOS 10.13+')
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
     def test_linear_svr_evaluation(self):
         """
         Check that the evaluation results are the same in scikit learn and coremltools
         """
-        ARGS = [ {},
-                    {'C': 0.5, 'epsilon': 0.25},
-                    {'dual': False, 'loss': 'squared_epsilon_insensitive'},
-                    {'tol': 0.005},
-                    {'fit_intercept': False},
-                    {'intercept_scaling': 1.5}
+        ARGS = [
+            {},
+            {"C": 0.5, "epsilon": 0.25},
+            {"dual": False, "loss": "squared_epsilon_insensitive"},
+            {"tol": 0.005},
+            {"fit_intercept": False},
+            {"intercept_scaling": 1.5},
         ]
 
         input_names = self.scikit_data.feature_names
@@ -122,10 +128,10 @@ def test_linear_svr_evaluation(self):
         for cur_args in ARGS:
             print(cur_args)
             cur_model = LinearSVR(**cur_args)
-            cur_model.fit(self.scikit_data['data'], self.scikit_data['target'])
-            spec = convert(cur_model, input_names, 'target')
+            cur_model.fit(self.scikit_data["data"], self.scikit_data["target"])
+            spec = convert(cur_model, input_names, "target")
 
-            df['prediction'] = cur_model.predict(self.scikit_data.data)
+            df["prediction"] = cur_model.predict(self.scikit_data.data)
 
             metrics = evaluate_regressor(spec, df)
-            self.assertAlmostEquals(metrics['max_error'], 0)
+            self.assertAlmostEquals(metrics["max_error"], 0)
diff --git a/coremltools/test/sklearn/test_nearest_neighbors_builder.py b/coremltools/test/sklearn/test_nearest_neighbors_builder.py
index b85821784..4549083c9 100644
--- a/coremltools/test/sklearn/test_nearest_neighbors_builder.py
+++ b/coremltools/test/sklearn/test_nearest_neighbors_builder.py
@@ -5,18 +5,19 @@
 
 import os
 import shutil
-from coremltools.models.utils import is_macos
+from coremltools.models.utils import _is_macos
 
 import unittest
 
 from coremltools.models import MLModel
 from coremltools.models.nearest_neighbors import KNearestNeighborsClassifierBuilder
-from coremltools._deps import HAS_SKLEARN
-if HAS_SKLEARN:
+from coremltools._deps import _HAS_SKLEARN
+
+if _HAS_SKLEARN:
     from sklearn.datasets import load_iris
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class NearestNeighborsBuilderTest(unittest.TestCase):
     """
     Unit tests for the nearest neighbors builder class.
@@ -33,35 +34,49 @@ def tearDown(self):
         # Do any cleanup here
         pass
 
-    def create_builder(self, default_class_label='default_label'):
-        builder = KNearestNeighborsClassifierBuilder(input_name='input',
-                                                     output_name='output',
-                                                     number_of_dimensions=4,
-                                                     default_class_label=default_class_label)
+    def create_builder(self, default_class_label="default_label"):
+        builder = KNearestNeighborsClassifierBuilder(
+            input_name="input",
+            output_name="output",
+            number_of_dimensions=4,
+            default_class_label=default_class_label,
+        )
         return builder
 
     def test_builder_output_types(self):
-        builder = self.create_builder(default_class_label='default')
+        builder = self.create_builder(default_class_label="default")
         self.assertIsNotNone(builder)
-        self.assertTrue(builder.spec.kNearestNeighborsClassifier.HasField("stringClassLabels"))
+        self.assertTrue(
+            builder.spec.kNearestNeighborsClassifier.HasField("stringClassLabels")
+        )
 
         builder = self.create_builder(default_class_label=12)
         self.assertIsNotNone(builder)
-        self.assertTrue(builder.spec.kNearestNeighborsClassifier.HasField("int64ClassLabels"))
+        self.assertTrue(
+            builder.spec.kNearestNeighborsClassifier.HasField("int64ClassLabels")
+        )
 
         with self.assertRaises(TypeError):
             bad_default_label = float(21.32)
             self.create_builder(default_class_label=bad_default_label)
 
     def test_builder_training_input(self):
-        builder = self.create_builder(default_class_label='default')
+        builder = self.create_builder(default_class_label="default")
         self.assertIsNotNone(builder)
-        self.assertTrue(builder.spec.kNearestNeighborsClassifier.HasField("stringClassLabels"))
-
-        self.assertEqual(builder.spec.description.trainingInput[0].name, 'input')
-        self.assertEqual(builder.spec.description.trainingInput[0].type.WhichOneof('Type'), 'multiArrayType')
-        self.assertEqual(builder.spec.description.trainingInput[1].name, 'output')
-        self.assertEqual(builder.spec.description.trainingInput[1].type.WhichOneof('Type'), 'stringType')
+        self.assertTrue(
+            builder.spec.kNearestNeighborsClassifier.HasField("stringClassLabels")
+        )
+
+        self.assertEqual(builder.spec.description.trainingInput[0].name, "input")
+        self.assertEqual(
+            builder.spec.description.trainingInput[0].type.WhichOneof("Type"),
+            "multiArrayType",
+        )
+        self.assertEqual(builder.spec.description.trainingInput[1].name, "output")
+        self.assertEqual(
+            builder.spec.description.trainingInput[1].type.WhichOneof("Type"),
+            "stringType",
+        )
 
     def test_make_updatable(self):
         builder = self.create_builder()
@@ -77,89 +92,91 @@ def test_author(self):
         builder = self.create_builder()
         self.assertIsNotNone(builder)
 
-        self.assertEqual(builder.spec.description.metadata.author, '')
-        builder.author = 'John Doe'
-        self.assertEqual(builder.author, 'John Doe')
-        self.assertEqual(builder.spec.description.metadata.author, 'John Doe')
+        self.assertEqual(builder.spec.description.metadata.author, "")
+        builder.author = "John Doe"
+        self.assertEqual(builder.author, "John Doe")
+        self.assertEqual(builder.spec.description.metadata.author, "John Doe")
 
     def test_description(self):
         builder = self.create_builder()
         self.assertIsNotNone(builder)
 
-        self.assertEqual(builder.spec.description.metadata.shortDescription, '')
-        builder.description = 'This is a description'
-        self.assertEqual(builder.description, 'This is a description')
-        self.assertEqual(builder.spec.description.metadata.shortDescription, 'This is a description')
+        self.assertEqual(builder.spec.description.metadata.shortDescription, "")
+        builder.description = "This is a description"
+        self.assertEqual(builder.description, "This is a description")
+        self.assertEqual(
+            builder.spec.description.metadata.shortDescription, "This is a description"
+        )
 
     def test_weighting_scheme(self):
         builder = self.create_builder()
         self.assertIsNotNone(builder)
 
-        builder.weighting_scheme = 'uniform'
-        self.assertEqual(builder.weighting_scheme, 'uniform')
+        builder.weighting_scheme = "uniform"
+        self.assertEqual(builder.weighting_scheme, "uniform")
 
-        builder.weighting_scheme = 'inverse_distance'
-        self.assertEqual(builder.weighting_scheme, 'inverse_distance')
+        builder.weighting_scheme = "inverse_distance"
+        self.assertEqual(builder.weighting_scheme, "inverse_distance")
 
-        builder.weighting_scheme = 'unIfOrM'
-        self.assertEqual(builder.weighting_scheme, 'uniform')
+        builder.weighting_scheme = "unIfOrM"
+        self.assertEqual(builder.weighting_scheme, "uniform")
 
-        builder.weighting_scheme = 'InVerSE_DISTance'
-        self.assertEqual(builder.weighting_scheme, 'inverse_distance')
+        builder.weighting_scheme = "InVerSE_DISTance"
+        self.assertEqual(builder.weighting_scheme, "inverse_distance")
 
         with self.assertRaises(TypeError):
-            builder.weighting_scheme = 'test'
+            builder.weighting_scheme = "test"
 
     def test_index_type(self):
         builder = self.create_builder()
         self.assertIsNotNone(builder)
 
-        self.assertEqual(builder.index_type, 'linear')
+        self.assertEqual(builder.index_type, "linear")
         self.assertEqual(builder.leaf_size, 0)
 
-        builder.set_index_type('kd_tree')
-        self.assertEqual(builder.index_type, 'kd_tree') # test default value
+        builder.set_index_type("kd_tree")
+        self.assertEqual(builder.index_type, "kd_tree")  # test default value
         self.assertEqual(builder.leaf_size, 30)
 
-        builder.set_index_type('linear')
-        self.assertEqual(builder.index_type, 'linear')
+        builder.set_index_type("linear")
+        self.assertEqual(builder.index_type, "linear")
         self.assertEqual(builder.leaf_size, 0)
 
-        builder.set_index_type('kd_tree', leaf_size=45) # test user-defined value
-        self.assertEqual(builder.index_type, 'kd_tree')
+        builder.set_index_type("kd_tree", leaf_size=45)  # test user-defined value
+        self.assertEqual(builder.index_type, "kd_tree")
         self.assertEqual(builder.leaf_size, 45)
 
-        builder.set_index_type('linear', leaf_size=37)
-        self.assertEqual(builder.index_type, 'linear')
+        builder.set_index_type("linear", leaf_size=37)
+        self.assertEqual(builder.index_type, "linear")
         self.assertEqual(builder.leaf_size, 0)
 
-        builder.set_index_type('KD_TrEe', leaf_size=22) # test user-defined value
-        self.assertEqual(builder.index_type, 'kd_tree')
+        builder.set_index_type("KD_TrEe", leaf_size=22)  # test user-defined value
+        self.assertEqual(builder.index_type, "kd_tree")
         self.assertEqual(builder.leaf_size, 22)
 
-        builder.set_index_type('linEAR')
-        self.assertEqual(builder.index_type, 'linear')
+        builder.set_index_type("linEAR")
+        self.assertEqual(builder.index_type, "linear")
         self.assertEqual(builder.leaf_size, 0)
 
         with self.assertRaises(TypeError):
-            builder.set_index_type('unsupported_index')
+            builder.set_index_type("unsupported_index")
 
         with self.assertRaises(TypeError):
-            builder.set_index_type('kd_tree', -10)
+            builder.set_index_type("kd_tree", -10)
 
         with self.assertRaises(TypeError):
-            builder.set_index_type('kd_tree', 0)
+            builder.set_index_type("kd_tree", 0)
 
     def test_leaf_size(self):
         builder = self.create_builder()
         self.assertIsNotNone(builder)
 
-        builder.set_index_type('kd_tree', leaf_size=45) # test user-defined value
-        self.assertEqual(builder.index_type, 'kd_tree')
+        builder.set_index_type("kd_tree", leaf_size=45)  # test user-defined value
+        self.assertEqual(builder.index_type, "kd_tree")
         self.assertEqual(builder.leaf_size, 45)
 
         builder.leaf_size = 12
-        self.assertEqual(builder.index_type, 'kd_tree')
+        self.assertEqual(builder.index_type, "kd_tree")
         self.assertEqual(builder.leaf_size, 12)
 
     def test_set_number_of_neighbors_with_bounds(self):
@@ -167,7 +184,7 @@ def test_set_number_of_neighbors_with_bounds(self):
         self.assertIsNotNone(builder)
 
         self.assertEqual(builder.number_of_neighbors, 5)
-        (min_value, max_value)  = builder.number_of_neighbors_allowed_range()
+        (min_value, max_value) = builder.number_of_neighbors_allowed_range()
         self.assertEqual(min_value, 1)
         self.assertEqual(max_value, 1000)
 
@@ -179,7 +196,7 @@ def test_set_number_of_neighbors_with_bounds(self):
         allowed_values = builder.number_of_neighbors_allowed_set()
         self.assertIsNone(allowed_values)
 
-        test_set = { 3, 5, 7, 9 }
+        test_set = {3, 5, 7, 9}
         builder.set_number_of_neighbors_with_bounds(7, allowed_set=test_set)
         self.assertEqual(builder.number_of_neighbors, 7)
         allowed_values = builder.number_of_neighbors_allowed_set()
@@ -194,9 +211,11 @@ def test_set_number_of_neighbors_with_bounds_error_conditions(self):
             builder.set_number_of_neighbors_with_bounds(3)
 
         test_range = (3, 15)
-        test_set = { 1, 3, 5 }
+        test_set = {1, 3, 5}
         with self.assertRaises(ValueError):
-            builder.set_number_of_neighbors_with_bounds(3, allowed_range=test_range, allowed_set=test_set)
+            builder.set_number_of_neighbors_with_bounds(
+                3, allowed_range=test_range, allowed_set=test_set
+            )
 
         with self.assertRaises(ValueError):
             builder.set_number_of_neighbors_with_bounds(3, allowed_range=(-5, 5))
@@ -205,13 +224,15 @@ def test_set_number_of_neighbors_with_bounds_error_conditions(self):
             builder.set_number_of_neighbors_with_bounds(3, allowed_range=(5, 1))
 
         with self.assertRaises(ValueError):
-            builder.set_number_of_neighbors_with_bounds(3, allowed_range=test_range, allowed_set=test_set)
+            builder.set_number_of_neighbors_with_bounds(
+                3, allowed_range=test_range, allowed_set=test_set
+            )
 
         with self.assertRaises(ValueError):
             builder.set_number_of_neighbors_with_bounds(2, allowed_range=test_range)
 
         with self.assertRaises(TypeError):
-            builder.set_number_of_neighbors_with_bounds(5, allowed_set={ 5, -3, 7 })
+            builder.set_number_of_neighbors_with_bounds(5, allowed_set={5, -3, 7})
 
         with self.assertRaises(ValueError):
             builder.set_number_of_neighbors_with_bounds(4, allowed_set=test_set)
@@ -220,10 +241,10 @@ def test_set_number_of_neighbors_with_bounds_error_conditions(self):
             builder.set_number_of_neighbors_with_bounds(4, allowed_set=test_set)
 
         with self.assertRaises(TypeError):
-            builder.set_number_of_neighbors_with_bounds(2, allowed_set=[ 1, 2, 3 ])
+            builder.set_number_of_neighbors_with_bounds(2, allowed_set=[1, 2, 3])
 
         with self.assertRaises(TypeError):
-            builder.set_number_of_neighbors_with_bounds(4, allowed_range={ 2, 200 })
+            builder.set_number_of_neighbors_with_bounds(4, allowed_range={2, 200})
 
         with self.assertRaises(TypeError):
             builder.set_number_of_neighbors_with_bounds(4, allowed_range=(2, 10, 20))
@@ -248,7 +269,7 @@ def test_set_number_of_neighbors(self):
 
         test_set = {3, 5, 7, 9}
         builder.set_number_of_neighbors_with_bounds(7, allowed_set=test_set)
-        
+
         with self.assertRaises(ValueError):
             builder.set_number_of_neighbors_with_bounds(4, allowed_set=test_set)
         builder.set_number_of_neighbors_with_bounds(5, allowed_set=test_set)
@@ -258,7 +279,7 @@ def test_add_samples_invalid_data(self):
         builder = self.create_builder()
         self.assertIsNotNone(builder)
 
-        invalid_X = [ [1.0, 2.4] ]
+        invalid_X = [[1.0, 2.4]]
         with self.assertRaises(TypeError):
             builder.add_samples(invalid_X, self.training_y)
 
@@ -286,16 +307,16 @@ def test_add_samples_int_labels(self):
         self._validate_samples(builder.spec, self.training_X[:20], self.training_y[:20])
 
     def test_add_samples_string_labels(self):
-        builder = self.create_builder(default_class_label='default')
+        builder = self.create_builder(default_class_label="default")
         self.assertIsNotNone(builder)
 
         some_X = self.training_X[:3]
-        some_y = ['one', 'two', 'three']
+        some_y = ["one", "two", "three"]
         builder.add_samples(some_X, some_y)
         self._validate_samples(builder.spec, some_X, some_y)
 
         addl_X = self.training_X[3:6]
-        addl_y = ['four', 'five', 'six']
+        addl_y = ["four", "five", "six"]
         builder.add_samples(addl_X, addl_y)
         self._validate_samples(builder.spec, self.training_X[0:6], some_y + addl_y)
 
@@ -304,36 +325,37 @@ def test_add_samples_invalid_label_types(self):
         self.assertIsNotNone(builder_int_labels)
 
         some_X = self.training_X[:3]
-        invalid_int_y = [0, 'one', 2]
+        invalid_int_y = [0, "one", 2]
         with self.assertRaises(TypeError):
             builder_int_labels.add_samples(some_X, invalid_int_y)
 
-        builder_string_labels = self.create_builder(default_class_label='default')
+        builder_string_labels = self.create_builder(default_class_label="default")
         self.assertIsNotNone(builder_string_labels)
 
-        invalid_string_y = ['zero', 'one', 2]
+        invalid_string_y = ["zero", "one", 2]
         with self.assertRaises(TypeError):
             builder_string_labels.add_samples(some_X, invalid_string_y)
 
-    @unittest.skipUnless(is_macos(), 'Only supported on MacOS platform.')
+    @unittest.skipUnless(_is_macos(), "Only supported on MacOS platform.")
     def test_can_init_and_save_model_from_builder_with_updated_spec(self):
         builder = KNearestNeighborsClassifierBuilder(
-            input_name='input',
-            output_name='output',
+            input_name="input",
+            output_name="output",
             number_of_dimensions=10,
-            default_class_label='defaultLabel',
+            default_class_label="defaultLabel",
             k=3,
-            weighting_scheme='inverse_distance',
-            index_type='kd_tree',
-            leaf_size=50)
-        builder.author = 'CoreML Team'
-        builder.license = 'MIT'
-        builder.description = 'test_builder_with_validation'
+            weighting_scheme="inverse_distance",
+            index_type="kd_tree",
+            leaf_size=50,
+        )
+        builder.author = "CoreML Team"
+        builder.license = "MIT"
+        builder.description = "test_builder_with_validation"
 
         # Save the updated spec
         coreml_model = MLModel(builder.spec)
         self.assertIsNotNone(coreml_model)
-        coreml_model_path = '/tmp/__test_builder_with_validation.mlmodel'
+        coreml_model_path = "/tmp/__test_builder_with_validation.mlmodel"
 
         try:
             coreml_model.save(coreml_model_path)
@@ -341,17 +363,19 @@ def test_can_init_and_save_model_from_builder_with_updated_spec(self):
         finally:
             self._delete_mlmodel_and_mlmodelc(coreml_model_path)
 
-    @unittest.skipUnless(is_macos(), 'Only supported on MacOS platform.')
+    @unittest.skipUnless(_is_macos(), "Only supported on MacOS platform.")
     def test_can_init_and_save_model_from_builder_default_parameters(self):
-        builder = KNearestNeighborsClassifierBuilder(input_name='input',
-                                                     output_name='output',
-                                                     number_of_dimensions=4,
-                                                     default_class_label='defaultLabel')
+        builder = KNearestNeighborsClassifierBuilder(
+            input_name="input",
+            output_name="output",
+            number_of_dimensions=4,
+            default_class_label="defaultLabel",
+        )
 
         # Save the updated spec
         coreml_model = MLModel(builder.spec)
         self.assertIsNotNone(coreml_model)
-        coreml_model_path = '/tmp/__test_builder_with_validation.mlmodel'
+        coreml_model_path = "/tmp/__test_builder_with_validation.mlmodel"
 
         try:
             coreml_model.save(coreml_model_path)
@@ -361,17 +385,27 @@ def test_can_init_and_save_model_from_builder_default_parameters(self):
 
     def _validate_samples(self, spec, expected_X, expected_y):
         """Validate the float samples returned from the converted scikit KNeighborsClassifier"""
-        num_dimensions = spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions
-        for index, sample in enumerate(spec.kNearestNeighborsClassifier.nearestNeighborsIndex.floatSamples):
+        num_dimensions = (
+            spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions
+        )
+        for index, sample in enumerate(
+            spec.kNearestNeighborsClassifier.nearestNeighborsIndex.floatSamples
+        ):
             for dim in range(0, num_dimensions):
-                self.assertAlmostEqual(sample.vector[dim], expected_X[index][dim], places=6)
+                self.assertAlmostEqual(
+                    sample.vector[dim], expected_X[index][dim], places=6
+                )
 
         if spec.kNearestNeighborsClassifier.HasField("int64ClassLabels"):
-            for index, label in enumerate(spec.kNearestNeighborsClassifier.int64ClassLabels.vector):
+            for index, label in enumerate(
+                spec.kNearestNeighborsClassifier.int64ClassLabels.vector
+            ):
                 self.assertEqual(label, expected_y[index])
 
         elif spec.kNearestNeighborsClassifier.HasField("stringClassLabels"):
-            for index, label in enumerate(spec.kNearestNeighborsClassifier.stringClassLabels.vector):
+            for index, label in enumerate(
+                spec.kNearestNeighborsClassifier.stringClassLabels.vector
+            ):
                 self.assertEqual(label, expected_y[index])
 
     @staticmethod
@@ -379,6 +413,6 @@ def _delete_mlmodel_and_mlmodelc(path_to_mlmodel):
         """Delete the .mlmodel and .mlmodelc for the given .mlmodel."""
         if os.path.exists(path_to_mlmodel):
             os.remove(path_to_mlmodel)
-        path_to_mlmodelc = '{}c'.format(path_to_mlmodel)
+        path_to_mlmodelc = "{}c".format(path_to_mlmodel)
         if os.path.exists(path_to_mlmodelc):
             shutil.rmtree(path_to_mlmodelc)
diff --git a/coremltools/test/sklearn/test_normalizer.py b/coremltools/test/sklearn/test_normalizer.py
index f50965c80..5514eba24 100644
--- a/coremltools/test/sklearn/test_normalizer.py
+++ b/coremltools/test/sklearn/test_normalizer.py
@@ -7,18 +7,19 @@
 import random
 import unittest
 
-from coremltools.models.utils import evaluate_transformer,\
-    macos_version, is_macos
+from coremltools.models.utils import evaluate_transformer, _macos_version, _is_macos
 
-from coremltools._deps import HAS_SKLEARN
-if HAS_SKLEARN:
+from coremltools._deps import _HAS_SKLEARN
+
+if _HAS_SKLEARN:
     from sklearn.preprocessing import Normalizer
     from coremltools.converters import sklearn as converter
 
 
-@unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                     'Only supported on macOS 10.13+')
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+@unittest.skipUnless(
+    _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+)
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class NormalizerScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -26,32 +27,33 @@ class NormalizerScikitTest(unittest.TestCase):
 
     def test_random(self):
         # Generate some random data_imputeValue.multiArrayValue[i]
-        X = _np.random.random(size = (50, 3))
-
-        for param in ('l1', 'l2', 'max'):
+        X = _np.random.random(size=(50, 3))
 
+        for param in ("l1", "l2", "max"):
             cur_model = Normalizer(norm=param)
 
             output = cur_model.fit_transform(X)
 
-            spec = converter.convert(cur_model, ["a", 'b', 'c'], 'out')
+            spec = converter.convert(cur_model, ["a", "b", "c"], "out")
 
-            evaluate_transformer(spec,
-                                 [dict(zip(["a", "b", "c"], row)) for row in X],
-                                 [{"out": row} for row in output])
+            evaluate_transformer(
+                spec,
+                [dict(zip(["a", "b", "c"], row)) for row in X],
+                [{"out": row} for row in output],
+            )
 
     def test_boston(self):
         from sklearn.datasets import load_boston
-        
+
         scikit_data = load_boston()
-        scikit_model = Normalizer(norm='l2').fit(scikit_data.data)
+        scikit_model = Normalizer(norm="l2").fit(scikit_data.data)
 
-        spec = converter.convert(scikit_model, scikit_data.feature_names, 'out')
+        spec = converter.convert(scikit_model, scikit_data.feature_names, "out")
 
-        input_data = [dict(zip(scikit_data.feature_names, row))
-                for row in scikit_data.data]
+        input_data = [
+            dict(zip(scikit_data.feature_names, row)) for row in scikit_data.data
+        ]
 
-        output_data = [
-            {"out": row} for row in scikit_model.transform(scikit_data.data)]
+        output_data = [{"out": row} for row in scikit_model.transform(scikit_data.data)]
 
         evaluate_transformer(spec, input_data, output_data)
diff --git a/coremltools/test/sklearn/test_one_hot_encoder.py b/coremltools/test/sklearn/test_one_hot_encoder.py
index d4db181c8..d9604d956 100644
--- a/coremltools/test/sklearn/test_one_hot_encoder.py
+++ b/coremltools/test/sklearn/test_one_hot_encoder.py
@@ -7,12 +7,10 @@
 import unittest
 import numpy as np
 
-from coremltools._deps import HAS_SKLEARN
-from coremltools.models.utils import evaluate_transformer,\
-    macos_version, is_macos
+from coremltools._deps import _HAS_SKLEARN
+from coremltools.models.utils import evaluate_transformer, _macos_version, _is_macos
 
-
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.pipeline import Pipeline
     from sklearn.preprocessing import OneHotEncoder
     from sklearn.preprocessing import Normalizer
@@ -21,7 +19,7 @@
     from sklearn.datasets import load_boston
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class OneHotEncoderScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -33,86 +31,100 @@ def setUpClass(self):
         Set up the unit test by loading the dataset and training a model.
         """
         scikit_data = [[0], [1], [2], [4], [3], [2], [4], [5], [6], [7]]
-        scikit_data_multiple_cols = [[0, 1],  [1, 0], [2, 2], [3, 3], [4, 4]]
+        scikit_data_multiple_cols = [[0, 1], [1, 0], [2, 2], [3, 3], [4, 4]]
         scikit_model = OneHotEncoder()
         scikit_model.fit(scikit_data)
 
         # Save the data and the model
-        self.scikit_data = np.asarray(scikit_data, dtype = 'd')
-        self.scikit_data_multiple_cols = np.asarray(scikit_data_multiple_cols, dtype = 'd')
+        self.scikit_data = np.asarray(scikit_data, dtype="d")
+        self.scikit_data_multiple_cols = np.asarray(
+            scikit_data_multiple_cols, dtype="d"
+        )
         self.scikit_model = scikit_model
 
-    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                         'Only supported on macOS 10.13+')
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
     def test_conversion_one_column(self):
         # Fit a single OHE
         scikit_model = OneHotEncoder()
         scikit_model.fit(self.scikit_data)
-        spec = sklearn.convert(scikit_model, 'single_feature', 'out').get_spec()
+        spec = sklearn.convert(scikit_model, "single_feature", "out").get_spec()
 
-        test_data = [{'single_feature': row} for row in self.scikit_data]
+        test_data = [{"single_feature": row} for row in self.scikit_data]
         scikit_output = [
-            {'out': row} for row in scikit_model.transform(
-                self.scikit_data).toarray()]
+            {"out": row} for row in scikit_model.transform(self.scikit_data).toarray()
+        ]
         metrics = evaluate_transformer(spec, test_data, scikit_output)
 
         self.assertIsNotNone(spec)
         self.assertIsNotNone(spec.description)
-        self.assertEquals(metrics['num_errors'], 0)
+        self.assertEquals(metrics["num_errors"], 0)
 
-    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                         'Only supported on macOS 10.13+')
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
     def test_conversion_many_columns(self):
         scikit_model = OneHotEncoder()
         scikit_model.fit(self.scikit_data_multiple_cols)
-        spec = sklearn.convert(scikit_model, ['feature_1', 'feature_2'], 'out').get_spec()
+        spec = sklearn.convert(
+            scikit_model, ["feature_1", "feature_2"], "out"
+        ).get_spec()
 
         test_data = [
-            {'feature_1': row[0],
-             'feature_2': row[1]} for row in self.scikit_data_multiple_cols]
+            {"feature_1": row[0], "feature_2": row[1]}
+            for row in self.scikit_data_multiple_cols
+        ]
         scikit_output = [
-            {'out': row} for row in scikit_model.transform(
-                self.scikit_data_multiple_cols).toarray()]
+            {"out": row}
+            for row in scikit_model.transform(self.scikit_data_multiple_cols).toarray()
+        ]
         metrics = evaluate_transformer(spec, test_data, scikit_output)
 
         self.assertIsNotNone(spec)
         self.assertIsNotNone(spec.description)
-        self.assertEquals(metrics['num_errors'], 0)
+        self.assertEquals(metrics["num_errors"], 0)
 
-    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                         'Only supported on macOS 10.13+')
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
     def test_conversion_one_column_of_several(self):
-        scikit_model = OneHotEncoder(categorical_features = [0])
+        scikit_model = OneHotEncoder(categorical_features=[0])
         scikit_model.fit(copy(self.scikit_data_multiple_cols))
-        spec = sklearn.convert(scikit_model, ['feature_1', 'feature_2'], 'out').get_spec()
+        spec = sklearn.convert(
+            scikit_model, ["feature_1", "feature_2"], "out"
+        ).get_spec()
 
         test_data = [
-            {'feature_1': row[0],
-             'feature_2': row[1]} for row in self.scikit_data_multiple_cols]
-        scikit_output = [{'out': row} for row in scikit_model.transform(
-            self.scikit_data_multiple_cols).toarray()]
+            {"feature_1": row[0], "feature_2": row[1]}
+            for row in self.scikit_data_multiple_cols
+        ]
+        scikit_output = [
+            {"out": row}
+            for row in scikit_model.transform(self.scikit_data_multiple_cols).toarray()
+        ]
         metrics = evaluate_transformer(spec, test_data, scikit_output)
 
         self.assertIsNotNone(spec)
         self.assertIsNotNone(spec.description)
-        self.assertEquals(metrics['num_errors'], 0)
+        self.assertEquals(metrics["num_errors"], 0)
 
-    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                         'Only supported on macOS 10.13+')
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
     def test_boston_OHE(self):
         data = load_boston()
 
         for categorical_features in [[3], [8], [3, 8], [8, 3]]:
-
             model = OneHotEncoder(
-                categorical_features = categorical_features, sparse=False)
+                categorical_features=categorical_features, sparse=False
+            )
             model.fit(data.data, data.target)
 
             # Convert the model
-            spec = sklearn.convert(model, data.feature_names, 'out').get_spec()
+            spec = sklearn.convert(model, data.feature_names, "out").get_spec()
 
-            input_data = [dict(zip(data.feature_names, row))
-                          for row in data.data]
+            input_data = [dict(zip(data.feature_names, row)) for row in data.data]
             output_data = [{"out": row} for row in model.transform(data.data)]
 
             result = evaluate_transformer(spec, input_data, output_data)
@@ -120,102 +132,130 @@ def test_boston_OHE(self):
             assert result["num_errors"] == 0
 
     # This test still isn't working
-    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                         'Only supported on macOS 10.13+')
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
     def test_boston_OHE_pipeline(self):
         data = load_boston()
-            
-        for categorical_features in [ [3], [8], [3, 8], [8,3] ]:
 
+        for categorical_features in [[3], [8], [3, 8], [8, 3]]:
             # Put it in a pipeline so that we can test whether the output dimension
-            # handling is correct. 
+            # handling is correct.
 
             model = Pipeline(
-                [("OHE", OneHotEncoder(
-                    categorical_features=categorical_features)),
-                 ("Normalizer", Normalizer())])
+                [
+                    ("OHE", OneHotEncoder(categorical_features=categorical_features)),
+                    ("Normalizer", Normalizer()),
+                ]
+            )
 
             model.fit(data.data.copy(), data.target)
 
             # Convert the model
-            spec = sklearn.convert(model, data.feature_names, 'out').get_spec()
+            spec = sklearn.convert(model, data.feature_names, "out").get_spec()
 
             input_data = [dict(zip(data.feature_names, row)) for row in data.data]
-            output_data = [{"out" : row} for row in model.transform(data.data.copy())]
+            output_data = [{"out": row} for row in model.transform(data.data.copy())]
 
             result = evaluate_transformer(spec, input_data, output_data)
 
             assert result["num_errors"] == 0
 
-    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                         'Only supported on macOS 10.13+')
-    def test_random_sparse_data(self): 
+    @unittest.skipUnless(
+        _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+    )
+    def test_random_sparse_data(self):
 
         n_columns = 8
         n_categories = 20
 
         import numpy.random as rn
-        rn.seed(0)
-        categories = rn.randint(50000, size = (n_columns, n_categories) )
 
-        for dt in ['int32', 'float32', 'float64']:
-
-            _X = np.array( [[categories[j, rn.randint(n_categories)]
-                             for j in range(n_columns)] 
-                            for i in range(100)], dtype=dt)
-
-            # Test this data on a bunch of possible inputs. 
-            for sparse in (True, False): 
-                for categorical_features in ['all', [3], [4], range(2, 8),
-                                             range(0, 4), range(0, 8)]:
+        rn.seed(0)
+        categories = rn.randint(50000, size=(n_columns, n_categories))
+
+        for dt in ["int32", "float32", "float64"]:
+
+            _X = np.array(
+                [
+                    [categories[j, rn.randint(n_categories)] for j in range(n_columns)]
+                    for i in range(100)
+                ],
+                dtype=dt,
+            )
+
+            # Test this data on a bunch of possible inputs.
+            for sparse in (True, False):
+                for categorical_features in [
+                    "all",
+                    [3],
+                    [4],
+                    range(2, 8),
+                    range(0, 4),
+                    range(0, 8),
+                ]:
                     X = _X.copy()
 
                     # This appears to be the only type now working.
                     assert X.dtype == np.dtype(dt)
 
                     model = OneHotEncoder(
-                        categorical_features=categorical_features,
-                        sparse=sparse)
+                        categorical_features=categorical_features, sparse=sparse
+                    )
                     model.fit(X)
 
                     # Convert the model
-                    spec = sklearn.convert(
-                        model, [('data', Array(n_columns))], 'out')
+                    spec = sklearn.convert(model, [("data", Array(n_columns))], "out")
 
                     X_out = model.transform(X)
                     if sparse:
                         X_out = X_out.todense()
 
-                    input_data = [{'data' : row} for row in X]
-                    output_data = [{"out" : row} for row in X_out]
+                    input_data = [{"data": row} for row in X]
+                    output_data = [{"out": row} for row in X_out]
 
                     result = evaluate_transformer(spec, input_data, output_data)
 
                     assert result["num_errors"] == 0
 
             # Test normal data inside a pipeline
-            for sparse in (True, False): 
-                for categorical_features in ['all', [3], [4], range(2, 8),
-                                             range(0, 4), range(0, 8)]:
+            for sparse in (True, False):
+                for categorical_features in [
+                    "all",
+                    [3],
+                    [4],
+                    range(2, 8),
+                    range(0, 4),
+                    range(0, 8),
+                ]:
                     X = _X.copy()
 
                     model = Pipeline(
-                        [("OHE", OneHotEncoder(
-                            categorical_features=categorical_features,
-                            sparse=sparse)), ("Normalizer", Normalizer())])
+                        [
+                            (
+                                "OHE",
+                                OneHotEncoder(
+                                    categorical_features=categorical_features,
+                                    sparse=sparse,
+                                ),
+                            ),
+                            ("Normalizer", Normalizer()),
+                        ]
+                    )
 
                     model.fit(X)
 
                     # Convert the model
                     spec = sklearn.convert(
-                        model, [('data', Array(n_columns))], 'out').get_spec()
+                        model, [("data", Array(n_columns))], "out"
+                    ).get_spec()
 
                     X_out = model.transform(X)
                     if sparse:
                         X_out = X_out.todense()
 
-                    input_data = [{'data' : row} for row in X]
-                    output_data = [{"out" : row} for row in X_out]
+                    input_data = [{"data": row} for row in X]
+                    output_data = [{"out": row} for row in X_out]
 
                     result = evaluate_transformer(spec, input_data, output_data)
 
@@ -225,10 +265,11 @@ def test_conversion_bad_inputs(self):
         # Error on converting an untrained model
         with self.assertRaises(TypeError):
             model = OneHotEncoder()
-            spec = sklearn.convert(model, 'data', 'out')
+            spec = sklearn.convert(model, "data", "out")
 
         # Check the expected class during covnersion.
         with self.assertRaises(TypeError):
             from sklearn.linear_model import LinearRegression
+
             model = LinearRegression()
-            spec = sklearn.convert(model, 'data', 'out')
+            spec = sklearn.convert(model, "data", "out")
diff --git a/coremltools/test/sklearn/test_random_forest_classifier.py b/coremltools/test/sklearn/test_random_forest_classifier.py
index fbe7ea6ba..51456be84 100644
--- a/coremltools/test/sklearn/test_random_forest_classifier.py
+++ b/coremltools/test/sklearn/test_random_forest_classifier.py
@@ -4,15 +4,16 @@
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 import unittest
-from coremltools._deps import HAS_SKLEARN
+from coremltools._deps import _HAS_SKLEARN
 from coremltools.proto import Model_pb2
 from coremltools.proto import FeatureTypes_pb2
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.ensemble import RandomForestClassifier
     from coremltools.converters import sklearn as skl_converter
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class RandomForestBinaryClassifierScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -27,59 +28,62 @@ def setUpClass(self):
         from sklearn.ensemble import RandomForestClassifier
 
         scikit_data = load_boston()
-        scikit_model = RandomForestClassifier(random_state = 1)
-        target = 1 * (scikit_data['target'] > scikit_data['target'].mean())
-        scikit_model.fit(scikit_data['data'], target)
+        scikit_model = RandomForestClassifier(random_state=1)
+        target = 1 * (scikit_data["target"] > scikit_data["target"].mean())
+        scikit_model.fit(scikit_data["data"], target)
 
         # Save the data and the model
         self.scikit_data = scikit_data
         self.scikit_model = scikit_model
 
     def test_conversion(self):
-
         input_names = self.scikit_data.feature_names
-        output_name = 'target'
-        spec = skl_converter.convert(self.scikit_model, input_names, 'target').get_spec()
+        output_name = "target"
+        spec = skl_converter.convert(
+            self.scikit_model, input_names, "target"
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
 
         # Test the interface class
-        self.assertEquals(spec.description.predictedFeatureName,
-                'target')
+        self.assertEquals(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.output), 2)
-        self.assertEquals(spec.description.output[0].name, 'target')
-        self.assertEquals(spec.description.output[0].type.WhichOneof('Type'),
-                'int64Type')
+        self.assertEquals(spec.description.output[0].name, "target")
+        self.assertEquals(
+            spec.description.output[0].type.WhichOneof("Type"), "int64Type"
+        )
         for input_type in spec.description.input:
-            self.assertEquals(input_type.type.WhichOneof('Type'),
-                    'doubleType')
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+            self.assertEquals(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
 
         self.assertEquals(len(spec.pipelineClassifier.pipeline.models), 2)
-        tr = spec.pipelineClassifier.pipeline.models[-1].treeEnsembleClassifier.treeEnsemble
+        tr = spec.pipelineClassifier.pipeline.models[
+            -1
+        ].treeEnsembleClassifier.treeEnsemble
         self.assertIsNotNone(tr)
         self.assertEquals(len(tr.nodes), 1048)
 
     def test_conversion_bad_inputs(self):
-
         # Error on converting an untrained model
         with self.assertRaises(Exception):
             model = RandomForestClassifier()
-            spec = skl_converter.convert(model, 'data', 'out')
+            spec = skl_converter.convert(model, "data", "out")
 
         # Check the expected class during covnersion.
         from sklearn.preprocessing import OneHotEncoder
+
         with self.assertRaises(Exception):
             model = OneHotEncoder()
-            spec = skl_converter.convert(model, 'data', 'out')
+            spec = skl_converter.convert(model, "data", "out")
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class RandomForestMultiClassClassifierScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -95,7 +99,7 @@ def setUpClass(self):
         import numpy as np
 
         scikit_data = load_boston()
-        scikit_model = RandomForestClassifier(random_state = 1)
+        scikit_model = RandomForestClassifier(random_state=1)
         t = scikit_data.target
         target = np.digitize(t, np.histogram(t)[1]) - 1
         scikit_model.fit(scikit_data.data, target)
@@ -106,10 +110,11 @@ def setUpClass(self):
         self.scikit_model = scikit_model
 
     def test_conversion(self):
-
         input_names = self.scikit_data.feature_names
-        output_name = 'target'
-        spec = skl_converter.convert(self.scikit_model, input_names, 'target').get_spec()
+        output_name = "target"
+        spec = skl_converter.convert(
+            self.scikit_model, input_names, "target"
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
@@ -117,20 +122,25 @@ def test_conversion(self):
         self.assertIsNotNone(spec.treeEnsembleClassifier)
 
         # Test the interface class
-        self.assertEquals(spec.description.predictedFeatureName, 'target')
+        self.assertEquals(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.output), 2)
-        self.assertEquals(spec.description.output[0].name, 'target')
-        self.assertEquals(spec.description.output[0].type.WhichOneof('Type'), 'int64Type')
+        self.assertEquals(spec.description.output[0].name, "target")
+        self.assertEquals(
+            spec.description.output[0].type.WhichOneof("Type"), "int64Type"
+        )
 
         for input_type in spec.description.input:
-            self.assertEquals(input_type.type.WhichOneof('Type'), 'doubleType')
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+            self.assertEquals(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
 
         self.assertEquals(len(spec.pipelineClassifier.pipeline.models), 2)
-        tr = spec.pipelineClassifier.pipeline.models[-1].treeEnsembleClassifier.treeEnsemble
+        tr = spec.pipelineClassifier.pipeline.models[
+            -1
+        ].treeEnsembleClassifier.treeEnsemble
         self.assertIsNotNone(tr)
         self.assertEquals(len(tr.nodes), 2970)
 
@@ -138,10 +148,11 @@ def test_conversion_bad_inputs(self):
         # Error on converting an untrained model
         with self.assertRaises(Exception):
             model = RandomForestClassifier()
-            spec = skl_converter.convert(model, 'data', 'out')
+            spec = skl_converter.convert(model, "data", "out")
 
         # Check the expected class during covnersion.
         with self.assertRaises(Exception):
             from sklearn.preprocessing import OneHotEncoder
+
             model = OneHotEncoder()
-            spec = skl_converter.convert(model, 'data', 'out')
+            spec = skl_converter.convert(model, "data", "out")
diff --git a/coremltools/test/sklearn/test_random_forest_classifier_numeric.py b/coremltools/test/sklearn/test_random_forest_classifier_numeric.py
index f8896af59..b50df1be9 100644
--- a/coremltools/test/sklearn/test_random_forest_classifier_numeric.py
+++ b/coremltools/test/sklearn/test_random_forest_classifier_numeric.py
@@ -8,42 +8,46 @@
 import os
 import pandas as pd
 import numpy as np
-from coremltools._deps import HAS_SKLEARN, SKLEARN_VERSION
-from coremltools.models.utils import evaluate_classifier, \
-    macos_version, is_macos
+from coremltools._deps import _HAS_SKLEARN, _SKLEARN_VERSION
+from coremltools.models.utils import evaluate_classifier, _macos_version, _is_macos
 from distutils.version import StrictVersion
 import pytest
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.ensemble import RandomForestClassifier
     from coremltools.converters import sklearn as skl_converter
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class RandomForestClassificationBostonHousingScikitNumericTest(unittest.TestCase):
-    def _check_metrics(self, metrics, params = {}):
-        self.assertEquals(metrics['num_errors'], 0, msg = 'Failed case %s. Results %s' % (params, metrics))
+    def _check_metrics(self, metrics, params={}):
+        self.assertEquals(
+            metrics["num_errors"],
+            0,
+            msg="Failed case %s. Results %s" % (params, metrics),
+        )
 
     def _train_convert_evaluate_assert(self, **scikit_params):
-        scikit_model = RandomForestClassifier(random_state = 1, **scikit_params)
+        scikit_model = RandomForestClassifier(random_state=1, **scikit_params)
         scikit_model.fit(self.X, self.target)
 
         # Convert the model
         spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)
 
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             # Get predictions
             df = pd.DataFrame(self.X, columns=self.feature_names)
-            df['prediction'] = scikit_model.predict(self.X)
+            df["prediction"] = scikit_model.predict(self.X)
 
             # Evaluate it
-            metrics = evaluate_classifier(spec, df, verbose = False)
+            metrics = evaluate_classifier(spec, df, verbose=False)
             self._check_metrics(metrics, scikit_params)
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class RandomForestBinaryClassifierBostonHousingScikitNumericTest(
-           RandomForestClassificationBostonHousingScikitNumericTest):
+    RandomForestClassificationBostonHousingScikitNumericTest
+):
     @classmethod
     def setUpClass(self):
         """
@@ -53,29 +57,31 @@ def setUpClass(self):
 
         # Load data and train model
         scikit_data = load_boston()
-        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
-        self.target = 1 * (scikit_data['target'] > scikit_data['target'].mean())
+        self.X = scikit_data.data.astype("f").astype(
+            "d"
+        )  ## scikit-learn downcasts data
+        self.target = 1 * (scikit_data["target"] > scikit_data["target"].mean())
         self.feature_names = scikit_data.feature_names
-        self.output_name = 'target'
+        self.output_name = "target"
         self.scikit_data = scikit_data
 
     def test_simple_binary_classifier(self):
-        self._train_convert_evaluate_assert(max_depth = 13)
+        self._train_convert_evaluate_assert(max_depth=13)
 
     @pytest.mark.slow
     def test_binary_classifier_stress_test(self):
 
         options = dict(
-            n_estimators = [1, 5, 10],
-            max_depth = [1, 5, None],
-            min_samples_split = [2, 10, 0.5],
-            min_samples_leaf = [1, 5],
-            min_weight_fraction_leaf = [0.0, 0.5],
-            max_leaf_nodes = [None, 20],
+            n_estimators=[1, 5, 10],
+            max_depth=[1, 5, None],
+            min_samples_split=[2, 10, 0.5],
+            min_samples_leaf=[1, 5],
+            min_weight_fraction_leaf=[0.0, 0.5],
+            max_leaf_nodes=[None, 20],
         )
 
-        if SKLEARN_VERSION >= StrictVersion('0.19'):
-            options['min_impurity_decrease'] = [1e-07, 0.1]
+        if _SKLEARN_VERSION >= StrictVersion("0.19"):
+            options["min_impurity_decrease"] = [1e-07, 0.1]
 
         # Make a cartesian product of all options
         product = itertools.product(*options.values())
@@ -85,10 +91,11 @@ def test_binary_classifier_stress_test(self):
         for it, arg in enumerate(args):
             self._train_convert_evaluate_assert(**arg)
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
-class RandomForestMultiClassClassificationBostonHousingScikitNumericTest(
-           RandomForestClassificationBostonHousingScikitNumericTest):
 
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
+class RandomForestMultiClassClassificationBostonHousingScikitNumericTest(
+    RandomForestClassificationBostonHousingScikitNumericTest
+):
     @classmethod
     def setUpClass(self):
         from sklearn.datasets import load_boston
@@ -96,17 +103,20 @@ def setUpClass(self):
 
         # Load data and train model
         import numpy as np
+
         scikit_data = load_boston()
-        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
+        self.X = scikit_data.data.astype("f").astype(
+            "d"
+        )  ## scikit-learn downcasts data
         t = scikit_data.target
         num_classes = 3
-        target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1
-        
+        target = np.digitize(t, np.histogram(t, bins=num_classes - 1)[1]) - 1
+
         # Save the data and the model
         self.scikit_data = scikit_data
         self.target = target
         self.feature_names = scikit_data.feature_names
-        self.output_name = 'target'
+        self.output_name = "target"
 
     def test_simple_multiclass(self):
         self._train_convert_evaluate_assert()
@@ -114,16 +124,16 @@ def test_simple_multiclass(self):
     @pytest.mark.slow
     def test_multiclass_stress_test(self):
         options = dict(
-                       n_estimators = [1, 5, 10],
-                       max_depth = [1, 5, None],
-                       min_samples_split = [2, 10, 0.5],
-                       min_samples_leaf = [1, 5],
-                       min_weight_fraction_leaf = [0.0, 0.5],
-                       max_leaf_nodes = [None, 20],
+            n_estimators=[1, 5, 10],
+            max_depth=[1, 5, None],
+            min_samples_split=[2, 10, 0.5],
+            min_samples_leaf=[1, 5],
+            min_weight_fraction_leaf=[0.0, 0.5],
+            max_leaf_nodes=[None, 20],
         )
 
-        if SKLEARN_VERSION >= StrictVersion('0.19'):
-            options['min_impurity_decrease'] = [1e-07, 0.1]
+        if _SKLEARN_VERSION >= StrictVersion("0.19"):
+            options["min_impurity_decrease"] = [1e-07, 0.1]
 
         # Make a cartesian product of all options
         product = itertools.product(*options.values())
diff --git a/coremltools/test/sklearn/test_random_forest_regression.py b/coremltools/test/sklearn/test_random_forest_regression.py
index d0463e2c9..406d5130b 100644
--- a/coremltools/test/sklearn/test_random_forest_regression.py
+++ b/coremltools/test/sklearn/test_random_forest_regression.py
@@ -5,15 +5,16 @@
 
 import unittest
 from sklearn.ensemble import RandomForestRegressor
-from coremltools._deps import HAS_SKLEARN
+from coremltools._deps import _HAS_SKLEARN
 from coremltools.proto import Model_pb2
 from coremltools.proto import FeatureTypes_pb2
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.ensemble import RandomForestRegressor
     from coremltools.converters import sklearn as skl_converter
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikit-learn. Skipping tests.')
+
+@unittest.skipIf(not _HAS_SKLEARN, "Missing scikit-learn. Skipping tests.")
 class RandomForestRegressorScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -28,53 +29,56 @@ def setUpClass(self):
         from sklearn.ensemble import RandomForestRegressor
 
         scikit_data = load_boston()
-        scikit_model = RandomForestRegressor(random_state = 1)
-        scikit_model.fit(scikit_data['data'], scikit_data['target'])
+        scikit_model = RandomForestRegressor(random_state=1)
+        scikit_model.fit(scikit_data["data"], scikit_data["target"])
 
         # Save the data and the model
         self.scikit_data = scikit_data
         self.scikit_model = scikit_model
 
     def test_conversion(self):
-
         input_names = self.scikit_data.feature_names
-        output_name = 'target'
-        spec = skl_converter.convert(self.scikit_model, input_names, 'target').get_spec()
+        output_name = "target"
+        spec = skl_converter.convert(
+            self.scikit_model, input_names, "target"
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
 
         # Test the interface class
-        self.assertEquals(spec.description.predictedFeatureName,
-                'target')
+        self.assertEquals(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEquals(len(spec.description.output), 1)
-        self.assertEquals(spec.description.output[0].name, 'target')
-        self.assertEquals(spec.description.output[0].type.WhichOneof('Type'),
-                'doubleType')
+        self.assertEquals(spec.description.output[0].name, "target")
+        self.assertEquals(
+            spec.description.output[0].type.WhichOneof("Type"), "doubleType"
+        )
         for input_type in spec.description.input:
-            self.assertEquals(input_type.type.WhichOneof('Type'),
-                    'doubleType')
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+            self.assertEquals(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
 
         # Test the linear regression parameters.
         self.assertEquals(len(spec.pipelineRegressor.pipeline.models), 2)
-        tr = spec.pipelineRegressor.pipeline.models[-1].treeEnsembleRegressor.treeEnsemble
+        tr = spec.pipelineRegressor.pipeline.models[
+            -1
+        ].treeEnsembleRegressor.treeEnsemble
         self.assertIsNotNone(tr)
         self.assertEquals(len(tr.nodes), 5996)
 
     def test_conversion_bad_inputs(self):
-
         # Error on converting an untrained model
         with self.assertRaises(Exception):
             model = RandomForestRegressor()
-            spec = skl_converter.convert(model, 'data', 'out')
+            spec = skl_converter.convert(model, "data", "out")
 
         # Check the expected class during covnersion.
         from sklearn.preprocessing import OneHotEncoder
+
         with self.assertRaises(Exception):
             model = OneHotEncoder()
-            spec = skl_converter.convert(model, 'data', 'out')
+            spec = skl_converter.convert(model, "data", "out")
diff --git a/coremltools/test/sklearn/test_random_forest_regression_numeric.py b/coremltools/test/sklearn/test_random_forest_regression_numeric.py
index 793015985..c20c86bc5 100644
--- a/coremltools/test/sklearn/test_random_forest_regression_numeric.py
+++ b/coremltools/test/sklearn/test_random_forest_regression_numeric.py
@@ -7,19 +7,21 @@
 import numpy as np
 import pandas as pd
 import os
-from coremltools._deps import HAS_SKLEARN
-from coremltools.models.utils import evaluate_regressor, macos_version, is_macos
+from coremltools._deps import _HAS_SKLEARN
+from coremltools.models.utils import evaluate_regressor, _macos_version, _is_macos
 import pytest
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.ensemble import RandomForestRegressor
     from coremltools.converters import sklearn as skl_converter
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class RandomForestRegressorBostonHousingScikitNumericTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter and running both models
     """
+
     @classmethod
     def setUpClass(self):
         """
@@ -30,62 +32,73 @@ def setUpClass(self):
         # Load data and train model
         scikit_data = load_boston()
         self.scikit_data = scikit_data
-        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
+        self.X = scikit_data.data.astype("f").astype(
+            "d"
+        )  ## scikit-learn downcasts data
         self.target = scikit_data.target
         self.feature_names = scikit_data.feature_names
-        self.output_name = 'target'
+        self.output_name = "target"
 
-    def _check_metrics(self, metrics, params = {}):
+    def _check_metrics(self, metrics, params={}):
         """
         Check the metrics
         """
-        self.assertAlmostEquals(metrics['rmse'], 0.0, delta = 1e-5,
-                msg = 'Failed case %s. Results %s' % (params, metrics))
-        self.assertAlmostEquals(metrics['max_error'], 0.0, delta = 1e-5,
-                msg = 'Failed case %s. Results %s' % (params, metrics))
+        self.assertAlmostEquals(
+            metrics["rmse"],
+            0.0,
+            delta=1e-5,
+            msg="Failed case %s. Results %s" % (params, metrics),
+        )
+        self.assertAlmostEquals(
+            metrics["max_error"],
+            0.0,
+            delta=1e-5,
+            msg="Failed case %s. Results %s" % (params, metrics),
+        )
 
     def _train_convert_evaluate_assert(self, **scikit_params):
         """
         Train a scikit-learn model, convert it and then evaluate it with CoreML
         """
-        scikit_model = RandomForestRegressor(random_state = 1, **scikit_params)
+        scikit_model = RandomForestRegressor(random_state=1, **scikit_params)
         scikit_model.fit(self.X, self.target)
 
         # Convert the model
         spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)
 
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             # Get predictions
             df = pd.DataFrame(self.X, columns=self.feature_names)
-            df['prediction'] = scikit_model.predict(self.X)
+            df["prediction"] = scikit_model.predict(self.X)
 
             # Evaluate it
-            metrics = evaluate_regressor(spec, df, verbose = False)
+            metrics = evaluate_regressor(spec, df, verbose=False)
             self._check_metrics(metrics, scikit_params)
 
     def test_boston_housing_simple_regression(self):
         self._train_convert_evaluate_assert()
 
     def test_boston_housing_float_double_corner_case(self):
-        self._train_convert_evaluate_assert(max_depth = 13)
+        self._train_convert_evaluate_assert(max_depth=13)
 
     @pytest.mark.slow
     def test_boston_housing_parameter_stress_test(self):
 
         ## These are all the options in decision tree regression of scikit-learn
         options = dict(
-            criterion = ['mse'],
-            n_estimators = [1, 5, 10],
-            max_depth = [1, 5],
-            min_samples_split = [2, 10, 0.5],
-            min_samples_leaf = [1, 5],
-            min_weight_fraction_leaf = [0.0, 0.5],
-            max_leaf_nodes = [None, 20],
-            min_impurity_decrease = [1e-07, 0.1, 0.0],
+            criterion=["mse"],
+            n_estimators=[1, 5, 10],
+            max_depth=[1, 5],
+            min_samples_split=[2, 10, 0.5],
+            min_samples_leaf=[1, 5],
+            min_weight_fraction_leaf=[0.0, 0.5],
+            max_leaf_nodes=[None, 20],
+            min_impurity_decrease=[1e-07, 0.1, 0.0],
         )
 
         # Make a cartesian product of all options
         import itertools
+
         product = itertools.product(*options.values())
         args = [dict(zip(options.keys(), p)) for p in product]
 
diff --git a/coremltools/test/sklearn/test_standard_scalar.py b/coremltools/test/sklearn/test_standard_scalar.py
index c79ad5726..00ae033fe 100644
--- a/coremltools/test/sklearn/test_standard_scalar.py
+++ b/coremltools/test/sklearn/test_standard_scalar.py
@@ -5,19 +5,19 @@
 
 import unittest
 import numpy as _np
-from coremltools._deps import HAS_SKLEARN
+from coremltools._deps import _HAS_SKLEARN
 
-from coremltools.models.utils import evaluate_transformer,\
-    macos_version, is_macos
+from coremltools.models.utils import evaluate_transformer, _macos_version, _is_macos
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.preprocessing import StandardScaler
     from coremltools.converters import sklearn as converter
 
 
-@unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
-                     'Only supported on macOS 10.13+')
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikit-learn. Skipping tests.')
+@unittest.skipUnless(
+    _is_macos() and _macos_version() >= (10, 13), "Only supported on macOS 10.13+"
+)
+@unittest.skipIf(not _HAS_SKLEARN, "Missing scikit-learn. Skipping tests.")
 class StandardScalerTestCase(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -25,35 +25,37 @@ class StandardScalerTestCase(unittest.TestCase):
 
     def test_random(self):
         # Generate some random data
-        X = _np.random.random(size = (50, 3))
+        X = _np.random.random(size=(50, 3))
 
-        cur_model = StandardScaler() 
+        cur_model = StandardScaler()
 
         output = cur_model.fit_transform(X)
 
-        spec = converter.convert(cur_model, ["a", 'b', 'c'], 'out').get_spec()
+        spec = converter.convert(cur_model, ["a", "b", "c"], "out").get_spec()
 
         metrics = evaluate_transformer(
             spec,
             [dict(zip(["a", "b", "c"], row)) for row in X],
-            [{"out": row} for row in output])
+            [{"out": row} for row in output],
+        )
 
         assert metrics["num_errors"] == 0
 
     def test_boston(self):
         from sklearn.datasets import load_boston
-        
+
         scikit_data = load_boston()
         scikit_model = StandardScaler().fit(scikit_data.data)
 
         spec = converter.convert(
-            scikit_model, scikit_data.feature_names, 'out').get_spec()
+            scikit_model, scikit_data.feature_names, "out"
+        ).get_spec()
 
-        input_data = [dict(zip(scikit_data.feature_names, row))
-            for row in scikit_data.data]
+        input_data = [
+            dict(zip(scikit_data.feature_names, row)) for row in scikit_data.data
+        ]
 
-        output_data = [
-            {"out": row} for row in scikit_model.transform(scikit_data.data)]
+        output_data = [{"out": row} for row in scikit_model.transform(scikit_data.data)]
 
         metrics = evaluate_transformer(spec, input_data, output_data)
 
diff --git a/coremltools/test/sklearn/test_utils.py b/coremltools/test/sklearn/test_utils.py
index 76c04aebf..56b514f0b 100644
--- a/coremltools/test/sklearn/test_utils.py
+++ b/coremltools/test/sklearn/test_utils.py
@@ -2,12 +2,12 @@
 # # Use of this source code is governed by a BSD-3-clause license that can be # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 import unittest
-from coremltools.models.utils import rename_feature, macos_version, is_macos
+from coremltools.models.utils import rename_feature, _macos_version, _is_macos
 from coremltools.models import MLModel
-from coremltools._deps import HAS_SKLEARN
+from coremltools._deps import _HAS_SKLEARN
 import pandas as pd
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.preprocessing import OneHotEncoder
     from sklearn.datasets import load_boston
     from sklearn.linear_model import LinearRegression
@@ -15,36 +15,35 @@
     from coremltools.converters import sklearn as converter
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikit-learn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing scikit-learn. Skipping tests.")
 class PipeLineRenameTests(unittest.TestCase):
-
     @classmethod
     def setUpClass(self):
-
         scikit_data = load_boston()
         feature_names = scikit_data.feature_names
 
         scikit_model = LinearRegression()
-        scikit_model.fit(scikit_data['data'], scikit_data['target'])
+        scikit_model.fit(scikit_data["data"], scikit_data["target"])
 
         # Save the data and the model
         self.scikit_data = scikit_data
         self.scikit_model = scikit_model
 
     def test_pipeline_rename(self):
-
         # Convert
         scikit_spec = converter.convert(self.scikit_model).get_spec()
         model = MLModel(scikit_spec)
         sample_data = self.scikit_data.data[0]
-        
+
         # Rename
-        rename_feature(scikit_spec, 'input', 'renamed_input')
+        rename_feature(scikit_spec, "input", "renamed_input")
         renamed_model = MLModel(scikit_spec)
-        
+
         # Check the predictions
-        if is_macos() and macos_version() >= (10, 13):
-            out_dict = model.predict({'input': sample_data})
-            out_dict_renamed = renamed_model.predict({'renamed_input': sample_data})
+        if _is_macos() and _macos_version() >= (10, 13):
+            out_dict = model.predict({"input": sample_data})
+            out_dict_renamed = renamed_model.predict({"renamed_input": sample_data})
             self.assertAlmostEqual(list(out_dict.keys()), list(out_dict_renamed.keys()))
-            self.assertAlmostEqual(list(out_dict.values()), list(out_dict_renamed.values()))
\ No newline at end of file
+            self.assertAlmostEqual(
+                list(out_dict.values()), list(out_dict_renamed.values())
+            )
diff --git a/coremltools/converters/nnssa/commons/__init__.py b/coremltools/test/xgboost/__init__.py
similarity index 81%
rename from coremltools/converters/nnssa/commons/__init__.py
rename to coremltools/test/xgboost/__init__.py
index d2439991a..8aa13a28b 100644
--- a/coremltools/converters/nnssa/commons/__init__.py
+++ b/coremltools/test/xgboost/__init__.py
@@ -2,5 +2,3 @@
 #
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
-from . import builtins
-from . import serialization
\ No newline at end of file
diff --git a/coremltools/test/xgboost/test_boosted_trees_classifier.py b/coremltools/test/xgboost/test_boosted_trees_classifier.py
index 48a4d1ffd..7458a5892 100644
--- a/coremltools/test/xgboost/test_boosted_trees_classifier.py
+++ b/coremltools/test/xgboost/test_boosted_trees_classifier.py
@@ -11,15 +11,15 @@
 from coremltools.converters import sklearn as skl_converter
 from coremltools.proto import Model_pb2
 from coremltools.proto import FeatureTypes_pb2
-from coremltools._deps import HAS_XGBOOST
-from coremltools._deps import HAS_SKLEARN
+from coremltools._deps import _HAS_XGBOOST
+from coremltools._deps import _HAS_SKLEARN
 
-if HAS_XGBOOST:
+if _HAS_XGBOOST:
     import xgboost
     from coremltools.converters import xgboost as xgb_converter
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class GradientBoostingBinaryClassifierScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -33,19 +33,20 @@ def setUpClass(self):
         from sklearn.datasets import load_boston
 
         scikit_data = load_boston()
-        scikit_model = GradientBoostingClassifier(random_state = 1)
-        target = scikit_data['target'] > scikit_data['target'].mean()
-        scikit_model.fit(scikit_data['data'], target)
+        scikit_model = GradientBoostingClassifier(random_state=1)
+        target = scikit_data["target"] > scikit_data["target"].mean()
+        scikit_model.fit(scikit_data["data"], target)
 
         # Save the data and the model
         self.scikit_data = scikit_data
         self.scikit_model = scikit_model
 
     def test_conversion(self):
-
         input_names = self.scikit_data.feature_names
-        output_name = 'target'
-        spec = skl_converter.convert(self.scikit_model, input_names, 'target').get_spec()
+        output_name = "target"
+        spec = skl_converter.convert(
+            self.scikit_model, input_names, "target"
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
@@ -53,22 +54,24 @@ def test_conversion(self):
         self.assertIsNotNone(spec.treeEnsembleClassifier)
 
         # Test the interface class
-        self.assertEqual(spec.description.predictedFeatureName,
-                'target')
+        self.assertEqual(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.output), 2)
-        self.assertEqual(spec.description.output[0].name, 'target')
-        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'),
-                'int64Type')
+        self.assertEqual(spec.description.output[0].name, "target")
+        self.assertEqual(
+            spec.description.output[0].type.WhichOneof("Type"), "int64Type"
+        )
         for input_type in spec.description.input:
-            self.assertEqual(input_type.type.WhichOneof('Type'),
-                    'doubleType')
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+            self.assertEqual(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
 
         # Test the linear regression parameters.
-        tr = spec.pipelineClassifier.pipeline.models[1].treeEnsembleClassifier.treeEnsemble
+        tr = spec.pipelineClassifier.pipeline.models[
+            1
+        ].treeEnsembleClassifier.treeEnsemble
         self.assertIsNotNone(tr)
         self.assertEqual(len(tr.nodes), 1416)
 
@@ -76,16 +79,17 @@ def test_conversion_bad_inputs(self):
         # Error on converting an untrained model
         with self.assertRaises(Exception):
             model = GradientBoostingClassifier()
-            spec = skl_converter.convert(model, 'data', 'out')
+            spec = skl_converter.convert(model, "data", "out")
 
         # Check the expected class during covnersion.
         from sklearn.preprocessing import OneHotEncoder
+
         with self.assertRaises(Exception):
             model = OneHotEncoder()
-            spec = skl_converter.convert(model, 'data', 'out')
+            spec = skl_converter.convert(model, "data", "out")
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class GradientBoostingMulticlassClassifierScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -100,7 +104,7 @@ def setUpClass(self):
         import numpy as np
 
         scikit_data = load_boston()
-        scikit_model = GradientBoostingClassifier(random_state = 1)
+        scikit_model = GradientBoostingClassifier(random_state=1)
         t = scikit_data.target
         target = np.digitize(t, np.histogram(t)[1]) - 1
         scikit_model.fit(scikit_data.data, target)
@@ -111,28 +115,34 @@ def setUpClass(self):
         self.scikit_model = scikit_model
 
     def test_conversion(self):
-
         input_names = self.scikit_data.feature_names
-        output_name = 'target'
-        spec = skl_converter.convert(self.scikit_model, input_names, 'target').get_spec()
+        output_name = "target"
+        spec = skl_converter.convert(
+            self.scikit_model, input_names, "target"
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
-        self.assertEqual(spec.description.predictedFeatureName, 'target')
+        self.assertEqual(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.output), 2)
-        self.assertEqual(spec.description.output[0].name, 'target')
-        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'), 'int64Type')
+        self.assertEqual(spec.description.output[0].name, "target")
+        self.assertEqual(
+            spec.description.output[0].type.WhichOneof("Type"), "int64Type"
+        )
 
         for input_type in spec.description.input:
-            self.assertEqual(input_type.type.WhichOneof('Type'), 'doubleType')
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+            self.assertEqual(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
 
         self.assertEqual(len(spec.pipelineClassifier.pipeline.models), 2)
-        tr = spec.pipelineClassifier.pipeline.models[-1].treeEnsembleClassifier.treeEnsemble
+        tr = spec.pipelineClassifier.pipeline.models[
+            -1
+        ].treeEnsembleClassifier.treeEnsemble
         self.assertIsNotNone(tr)
         self.assertEqual(len(tr.nodes), 15056)
 
@@ -140,17 +150,18 @@ def test_conversion_bad_inputs(self):
         # Error on converting an untrained model
         with self.assertRaises(Exception):
             model = GradientBoostingClassifier()
-            spec = skl_converter.convert(model, 'data', 'out')
+            spec = skl_converter.convert(model, "data", "out")
 
         # Check the expected class during covnersion.
         from sklearn.preprocessing import OneHotEncoder
+
         with self.assertRaises(Exception):
             model = OneHotEncoder()
-            spec = skl_converter.convert(model, 'data', 'out')
+            spec = skl_converter.convert(model, "data", "out")
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
-@unittest.skipIf(not HAS_XGBOOST, 'Skipping, no xgboost')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
+@unittest.skipIf(not _HAS_XGBOOST, "Skipping, no xgboost")
 class GradientBoostingBinaryClassifierXGboostTest(unittest.TestCase):
     """
     Unit test class for testing xgboost converter.
@@ -165,17 +176,18 @@ def setUpClass(self):
 
         scikit_data = load_boston()
         self.xgb_model = xgboost.XGBClassifier()
-        target = scikit_data['target'] > scikit_data['target'].mean()
-        self.xgb_model.fit(scikit_data['data'], target)
+        target = scikit_data["target"] > scikit_data["target"].mean()
+        self.xgb_model.fit(scikit_data["data"], target)
 
         # Save the data and the model
         self.scikit_data = scikit_data
 
     def test_conversion(self):
-
         input_names = self.scikit_data.feature_names
-        output_name = 'target'
-        spec = xgb_converter.convert(self.xgb_model, input_names, output_name, mode="classifier").get_spec()
+        output_name = "target"
+        spec = xgb_converter.convert(
+            self.xgb_model, input_names, output_name, mode="classifier"
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
@@ -183,19 +195,19 @@ def test_conversion(self):
         self.assertIsNotNone(spec.treeEnsembleClassifier)
 
         # Test the interface class
-        self.assertEqual(spec.description.predictedFeatureName,
-                output_name)
+        self.assertEqual(spec.description.predictedFeatureName, output_name)
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.output), 2)
         self.assertEqual(spec.description.output[0].name, output_name)
-        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'),
-                'int64Type')
+        self.assertEqual(
+            spec.description.output[0].type.WhichOneof("Type"), "int64Type"
+        )
         for input_type in spec.description.input:
-            self.assertEqual(input_type.type.WhichOneof('Type'),
-                    'doubleType')
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+            self.assertEqual(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
 
         # Test the linear regression parameters.
         tr = spec.treeEnsembleClassifier.treeEnsemble
@@ -205,16 +217,16 @@ def test_conversion_bad_inputs(self):
         # Error on converting an untrained model
         with self.assertRaises(Exception):
             model = xgboost.XGBClassifier()
-            spec = xgb_converter.convert(model, 'data', 'out', mode="classifier")
+            spec = xgb_converter.convert(model, "data", "out", mode="classifier")
 
         # Check the expected class during covnersion.
         with self.assertRaises(Exception):
             model = xgboost.XGBRegressor()
-            spec = xgb_converter.convert(model, 'data', 'out', mode="classifier")
+            spec = xgb_converter.convert(model, "data", "out", mode="classifier")
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
-@unittest.skipIf(not HAS_XGBOOST, 'Skipping, no xgboost')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
+@unittest.skipIf(not _HAS_XGBOOST, "Skipping, no xgboost")
 class GradientBoostingMulticlassClassifierXGboostTest(unittest.TestCase):
     """
     Unit test class for testing xgboost converter.
@@ -231,7 +243,9 @@ def setUpClass(self):
         scikit_data = load_boston()
         t = scikit_data.target
         target = np.digitize(t, np.histogram(t)[1]) - 1
-        dtrain = xgboost.DMatrix(scikit_data.data, label=target, feature_names=scikit_data.feature_names)
+        dtrain = xgboost.DMatrix(
+            scikit_data.data, label=target, feature_names=scikit_data.feature_names
+        )
         self.xgb_model = xgboost.train({}, dtrain)
         self.target = target
 
@@ -242,7 +256,7 @@ def setUpClass(self):
     def test_conversion(self):
 
         input_names = self.scikit_data.feature_names
-        output_name = 'target'
+        output_name = "target"
         spec = xgb_converter.convert(
             self.xgb_model,
             input_names,
@@ -259,28 +273,29 @@ def test_conversion(self):
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.output), 2)
         self.assertEqual(spec.description.output[0].name, output_name)
-        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'), 'int64Type')
+        self.assertEqual(
+            spec.description.output[0].type.WhichOneof("Type"), "int64Type"
+        )
 
         for input_type in spec.description.input:
-            self.assertEqual(input_type.type.WhichOneof('Type'), 'doubleType')
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+            self.assertEqual(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
 
         # Test the linear regression parameters.
         tr = spec.treeEnsembleClassifier.treeEnsemble
         self.assertIsNotNone(tr)
 
-
     def test_conversion_from_file(self):
         import numpy as np
 
-        output_name = 'target'
+        output_name = "target"
         feature_names = self.scikit_data.feature_names
 
-
-        xgb_model_json = tempfile.mktemp('xgb_tree_model_classifier.json')
-        xgb_json_out = self.xgb_model.get_dump(with_stats=True, dump_format='json')
-        with open(xgb_model_json, 'w') as f:
+        xgb_model_json = tempfile.mktemp("xgb_tree_model_classifier.json")
+        xgb_json_out = self.xgb_model.get_dump(with_stats=True, dump_format="json")
+        with open(xgb_model_json, "w") as f:
             json.dump(xgb_json_out, f)
         spec = xgb_converter.convert(
             xgb_model_json,
@@ -296,18 +311,20 @@ def test_conversion_from_file(self):
         self.assertIsNotNone(spec.treeEnsembleRegressor)
 
         # Test the interface class
-        self.assertEqual(spec.description.predictedFeatureName,
-                output_name)
+        self.assertEqual(spec.description.predictedFeatureName, output_name)
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.output), 2)
         self.assertEqual(spec.description.output[0].name, output_name)
-        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'), 'int64Type')
+        self.assertEqual(
+            spec.description.output[0].type.WhichOneof("Type"), "int64Type"
+        )
         for input_type in spec.description.input:
-            self.assertEqual(input_type.type.WhichOneof('Type'),
-                    'doubleType')
-        self.assertEqual(sorted(self.scikit_data.feature_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+            self.assertEqual(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(self.scikit_data.feature_names),
+            sorted(map(lambda x: x.name, spec.description.input)),
+        )
 
         # Test the linear regression parameters.
         tr = spec.treeEnsembleClassifier.treeEnsemble
diff --git a/coremltools/test/xgboost/test_boosted_trees_classifier_numeric.py b/coremltools/test/xgboost/test_boosted_trees_classifier_numeric.py
index 3dfdb4db4..95ad60114 100644
--- a/coremltools/test/xgboost/test_boosted_trees_classifier_numeric.py
+++ b/coremltools/test/xgboost/test_boosted_trees_classifier_numeric.py
@@ -8,23 +8,30 @@
 import pandas as pd
 import unittest
 
-from coremltools._deps import HAS_SKLEARN, HAS_XGBOOST
-from coremltools.models.utils import evaluate_classifier,\
-    macos_version, is_macos
-if HAS_SKLEARN:
+from coremltools._deps import _HAS_SKLEARN, _HAS_XGBOOST
+from coremltools.models.utils import (
+    evaluate_classifier,
+    evaluate_classifier_with_probabilities,
+    _macos_version,
+    _is_macos,
+)
+
+if _HAS_SKLEARN:
     from sklearn.datasets import load_boston
     from sklearn.ensemble import GradientBoostingClassifier
     from coremltools.converters import sklearn as skl_converter
 
-if HAS_XGBOOST:
+if _HAS_XGBOOST:
     import xgboost
     from coremltools.converters import xgboost as xgb_converter
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class BoostedTreeClassificationBostonHousingScikitNumericTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter and running both models
     """
+
     @classmethod
     def setUpClass(self):
         """
@@ -35,50 +42,56 @@ def setUpClass(self):
         # Load data and train model
         scikit_data = load_boston()
         self.scikit_data = scikit_data
-        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
-        self.target = 1 * (scikit_data['target'] > scikit_data['target'].mean())
+        self.X = scikit_data.data.astype("f").astype(
+            "d"
+        )  ## scikit-learn downcasts data
+        self.target = 1 * (scikit_data["target"] > scikit_data["target"].mean())
         self.feature_names = scikit_data.feature_names
-        self.output_name = 'target'
+        self.output_name = "target"
 
-    def _check_metrics(self, metrics, params = {}):
-        self.assertEquals(metrics['num_errors'], 0, msg = 'Failed case %s. Results %s' % (params, metrics))
+    def _check_metrics(self, metrics, params={}):
+        self.assertEquals(
+            metrics["num_errors"],
+            0,
+            msg="Failed case %s. Results %s" % (params, metrics),
+        )
 
     def _train_convert_evaluate_assert(self, **scikit_params):
         """
         Train a scikit-learn model, convert it and then evaluate it with CoreML
         """
-        scikit_model = GradientBoostingClassifier(random_state = 1, **scikit_params)
+        scikit_model = GradientBoostingClassifier(random_state=1, **scikit_params)
         scikit_model.fit(self.X, self.target)
 
         # Convert the model
         spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)
 
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             # Get predictions
             df = pd.DataFrame(self.X, columns=self.feature_names)
-            df['prediction'] = scikit_model.predict(self.X)
+            df["prediction"] = scikit_model.predict(self.X)
 
             # Evaluate it
             metrics = evaluate_classifier(spec, df)
             self._check_metrics(metrics)
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
-class BoostedTreeBinaryClassificationBostonHousingScikitNumericTest(
-           BoostedTreeClassificationBostonHousingScikitNumericTest):
 
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
+class BoostedTreeBinaryClassificationBostonHousingScikitNumericTest(
+    BoostedTreeClassificationBostonHousingScikitNumericTest
+):
     def test_simple_binary_classifier(self):
         self._train_convert_evaluate_assert()
 
     @pytest.mark.slow
     def test_binary_classifier_stress_test(self):
-
         options = dict(
-               max_depth = [1, 10, None],
-               min_samples_split = [2, 0.5],
-               min_samples_leaf = [1, 5],
-               min_weight_fraction_leaf = [0.0, 0.5],
-               max_features = [None, 1],
-               max_leaf_nodes = [None, 20],
+            max_depth=[1, 10, None],
+            min_samples_split=[2, 0.5],
+            min_samples_leaf=[1, 5],
+            min_weight_fraction_leaf=[0.0, 0.5],
+            max_features=[None, 1],
+            max_leaf_nodes=[None, 20],
         )
 
         # Make a cartesian product of all options
@@ -89,27 +102,31 @@ def test_binary_classifier_stress_test(self):
         for it, arg in enumerate(args):
             self._train_convert_evaluate_assert(**arg)
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
-class BoostedTreeMultiClassClassificationBostonHousingScikitNumericTest(
-           BoostedTreeClassificationBostonHousingScikitNumericTest):
 
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
+class BoostedTreeMultiClassClassificationBostonHousingScikitNumericTest(
+    BoostedTreeClassificationBostonHousingScikitNumericTest
+):
     @classmethod
     def setUpClass(self):
         from sklearn.datasets import load_boston
 
         # Load data and train model
         import numpy as np
+
         scikit_data = load_boston()
         num_classes = 3
-        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
+        self.X = scikit_data.data.astype("f").astype(
+            "d"
+        )  ## scikit-learn downcasts data
         t = scikit_data.target
-        target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1
+        target = np.digitize(t, np.histogram(t, bins=num_classes - 1)[1]) - 1
 
         # Save the data and the model
         self.scikit_data = scikit_data
         self.target = target
         self.feature_names = scikit_data.feature_names
-        self.output_name = 'target'
+        self.output_name = "target"
 
     def test_simple_multiclass(self):
         self._train_convert_evaluate_assert()
@@ -117,12 +134,12 @@ def test_simple_multiclass(self):
     @pytest.mark.slow
     def test_multiclass_stress_test(self):
         options = dict(
-               max_depth = [1, 10, None],
-               min_samples_split = [2, 0.5],
-               min_samples_leaf = [1, 5],
-               min_weight_fraction_leaf = [0.0, 0.5],
-               max_features = [None, 1],
-               max_leaf_nodes = [None, 20],
+            max_depth=[1, 10, None],
+            min_samples_split=[2, 0.5],
+            min_samples_leaf=[1, 5],
+            min_weight_fraction_leaf=[0.0, 0.5],
+            max_features=[None, 1],
+            max_leaf_nodes=[None, 20],
         )
 
         # Make a cartesian product of all options
@@ -134,15 +151,19 @@ def test_multiclass_stress_test(self):
             self._train_convert_evaluate_assert(**arg)
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
-@unittest.skipIf(not HAS_XGBOOST, 'Skipping, no xgboost')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
+@unittest.skipIf(not _HAS_XGBOOST, "Skipping, no xgboost")
 class BoostedTreeClassificationBostonHousingXGboostNumericTest(unittest.TestCase):
     """
     Unit test class for testing xgboost converter and running both models
     """
 
-    def _check_metrics(self, metrics, params = {}):
-        self.assertEquals(metrics['num_errors'], 0, msg = 'Failed case %s. Results %s' % (params, metrics))
+    def _check_metrics(self, metrics, params={}):
+        self.assertEquals(
+            metrics["num_errors"],
+            0,
+            msg="Failed case %s. Results %s" % (params, metrics),
+        )
 
     def _train_convert_evaluate_assert(self, **xgboost_params):
         """
@@ -152,22 +173,26 @@ def _train_convert_evaluate_assert(self, **xgboost_params):
         xgb_model.fit(self.X, self.target)
 
         # Convert the model
-        spec = xgb_converter.convert(xgb_model, self.feature_names, self.output_name, mode="classifier")
+        spec = xgb_converter.convert(
+            xgb_model, self.feature_names, self.output_name, mode="classifier"
+        )
 
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             # Get predictions
             df = pd.DataFrame(self.X, columns=self.feature_names)
-            df['prediction'] = xgb_model.predict(self.X)
-
-            # Evaluate it
-            metrics = evaluate_classifier(spec, df)
-            self._check_metrics(metrics)
+            probabilities = xgb_model.predict_proba(self.X)
+            df["classProbability"] = [
+                dict(zip(xgb_model.classes_, cur_vals)) for cur_vals in probabilities
+            ]
+            metrics = evaluate_classifier_with_probabilities(
+                spec, df, probabilities="classProbability", verbose=False
+            )
+            self.assertEquals(metrics["num_key_mismatch"], 0)
+            self.assertLess(metrics["max_probability_error"], 1e-3)
 
     def _classifier_stress_test(self):
         options = dict(
-               max_depth = [1, 10],
-               min_child_weight = [2, 0.5],
-               max_delta_step = [1, 5],
+            max_depth=[1, 10], min_child_weight=[2, 0.5], max_delta_step=[1, 5],
         )
         # Make a cartesian product of all options
         product = itertools.product(*options.values())
@@ -177,11 +202,12 @@ def _classifier_stress_test(self):
         for it, arg in enumerate(args):
             self._train_convert_evaluate_assert(**arg)
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
-@unittest.skipIf(not HAS_XGBOOST, 'Skipping, no xgboost')
-class BoostedTreeBinaryClassificationBostonHousingXGboostNumericTest(
-           BoostedTreeClassificationBostonHousingXGboostNumericTest):
 
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
+@unittest.skipIf(not _HAS_XGBOOST, "Skipping, no xgboost")
+class BoostedTreeBinaryClassificationBostonHousingXGboostNumericTest(
+    BoostedTreeClassificationBostonHousingXGboostNumericTest
+):
     @classmethod
     def setUpClass(self):
         """
@@ -192,10 +218,12 @@ def setUpClass(self):
         # Load data and train model
         scikit_data = load_boston()
         self.scikit_data = scikit_data
-        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
-        self.target = 1 * (scikit_data['target'] > scikit_data['target'].mean())
+        self.X = scikit_data.data.astype("f").astype(
+            "d"
+        )  ## scikit-learn downcasts data
+        self.target = 1 * (scikit_data["target"] > scikit_data["target"].mean())
         self.feature_names = scikit_data.feature_names
-        self.output_name = 'target'
+        self.output_name = "target"
 
     def test_simple_binary_classifier(self):
         self._train_convert_evaluate_assert()
@@ -205,28 +233,31 @@ def test_binary_classifier_stress_test(self):
         self._classifier_stress_test()
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
-@unittest.skipIf(not HAS_XGBOOST, 'Skipping, no xgboost')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
+@unittest.skipIf(not _HAS_XGBOOST, "Skipping, no xgboost")
 class BoostedTreeMultiClassClassificationBostonHousingXGboostNumericTest(
-           BoostedTreeClassificationBostonHousingXGboostNumericTest):
-
+    BoostedTreeClassificationBostonHousingXGboostNumericTest
+):
     @classmethod
     def setUpClass(self):
         from sklearn.datasets import load_boston
 
         # Load data and train model
         import numpy as np
+
         scikit_data = load_boston()
         num_classes = 3
-        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
+        self.X = scikit_data.data.astype("f").astype(
+            "d"
+        )  ## scikit-learn downcasts data
         t = scikit_data.target
-        target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1
+        target = np.digitize(t, np.histogram(t, bins=num_classes - 1)[1]) - 1
 
         # Save the data and the model
         self.scikit_data = scikit_data
         self.target = target
         self.feature_names = scikit_data.feature_names
-        self.output_name = 'target'
+        self.output_name = "target"
 
     def test_simple_multiclass(self):
         self._train_convert_evaluate_assert()
diff --git a/coremltools/test/xgboost/test_boosted_trees_regression.py b/coremltools/test/xgboost/test_boosted_trees_regression.py
index 5cb4ffe85..17587fd91 100644
--- a/coremltools/test/xgboost/test_boosted_trees_regression.py
+++ b/coremltools/test/xgboost/test_boosted_trees_regression.py
@@ -7,20 +7,20 @@
 import tempfile
 import json
 
-from coremltools._deps import HAS_SKLEARN, HAS_XGBOOST
+from coremltools._deps import _HAS_SKLEARN, _HAS_XGBOOST
 
-if HAS_XGBOOST:
+if _HAS_XGBOOST:
     import xgboost
     from coremltools.converters import xgboost as xgb_converter
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.datasets import load_boston
     from sklearn.ensemble import GradientBoostingRegressor
     from sklearn.preprocessing import OneHotEncoder
     from coremltools.converters import sklearn as skl_converter
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikit-learn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing scikit-learn. Skipping tests.")
 class GradientBoostingRegressorScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -31,12 +31,12 @@ def setUpClass(cls):
         """
         Set up the unit test by loading the dataset and training a model.
         """
-        if not HAS_SKLEARN:
+        if not _HAS_SKLEARN:
             return
 
         scikit_data = load_boston()
-        scikit_model = GradientBoostingRegressor(random_state = 1)
-        scikit_model.fit(scikit_data['data'], scikit_data['target'])
+        scikit_model = GradientBoostingRegressor(random_state=1)
+        scikit_model.fit(scikit_data["data"], scikit_data["target"])
 
         # Save the data and the model
         cls.scikit_data = scikit_data
@@ -44,29 +44,33 @@ def setUpClass(cls):
 
     def test_conversion(self):
         input_names = self.scikit_data.feature_names
-        output_name = 'target'
-        spec = skl_converter.convert(self.scikit_model, input_names, 'target').get_spec()
+        output_name = "target"
+        spec = skl_converter.convert(
+            self.scikit_model, input_names, "target"
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
         self.assertIsNotNone(spec.description)
 
         # Test the interface class
-        self.assertEqual(spec.description.predictedFeatureName,
-                'target')
+        self.assertEqual(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.output), 1)
-        self.assertEqual(spec.description.output[0].name, 'target')
-        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'),
-                'doubleType')
+        self.assertEqual(spec.description.output[0].name, "target")
+        self.assertEqual(
+            spec.description.output[0].type.WhichOneof("Type"), "doubleType"
+        )
         for input_type in spec.description.input:
-            self.assertEqual(input_type.type.WhichOneof('Type'),
-                    'doubleType')
-        self.assertEqual(sorted(input_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
-
-        tr = spec.pipelineRegressor.pipeline.models[-1].treeEnsembleRegressor.treeEnsemble
+            self.assertEqual(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
+
+        tr = spec.pipelineRegressor.pipeline.models[
+            -1
+        ].treeEnsembleRegressor.treeEnsemble
         self.assertIsNotNone(tr)
         self.assertEqual(len(tr.nodes), 1426)
 
@@ -75,31 +79,33 @@ def test_conversion_bad_inputs(self):
         # Error on converting an untrained model
         with self.assertRaises(Exception):
             model = GradientBoostingRegressor()
-            spec = skl_converter.convert(model, 'data', 'out')
+            spec = skl_converter.convert(model, "data", "out")
 
         # Check the expected class during covnersion.
         with self.assertRaises(Exception):
             model = OneHotEncoder()
-            spec = skl_converter.convert(model, 'data', 'out')
+            spec = skl_converter.convert(model, "data", "out")
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikit-learn. Skipping tests.')
-@unittest.skipIf(not HAS_XGBOOST, 'Skipping, no xgboost')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing scikit-learn. Skipping tests.")
+@unittest.skipIf(not _HAS_XGBOOST, "Skipping, no xgboost")
 class BoostedTreeRegressorXGboostTest(unittest.TestCase):
-
     @classmethod
     def setUpClass(self):
         """
         Set up the unit test by loading the dataset and training a model.
         """
-        if not HAS_XGBOOST:
+        if not _HAS_XGBOOST:
             return
-        if not HAS_SKLEARN:
+        if not _HAS_SKLEARN:
             return
 
         scikit_data = load_boston()
-        dtrain = xgboost.DMatrix(scikit_data.data, label = scikit_data.target,
-                feature_names = scikit_data.feature_names)
+        dtrain = xgboost.DMatrix(
+            scikit_data.data,
+            label=scikit_data.target,
+            feature_names=scikit_data.feature_names,
+        )
         xgb_model = xgboost.train({}, dtrain, 1)
 
         # Save the data and the model
@@ -110,8 +116,8 @@ def setUpClass(self):
     def test_conversion(self):
 
         feature_names = self.scikit_data.feature_names
-        output_name = 'target'
-        spec = xgb_converter.convert(self.xgb_model, feature_names, 'target').get_spec()
+        output_name = "target"
+        spec = xgb_converter.convert(self.xgb_model, feature_names, "target").get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
@@ -119,19 +125,20 @@ def test_conversion(self):
         self.assertIsNotNone(spec.treeEnsembleRegressor)
 
         # Test the interface class
-        self.assertEqual(spec.description.predictedFeatureName,
-                'target')
+        self.assertEqual(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.output), 1)
-        self.assertEqual(spec.description.output[0].name, 'target')
-        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'),
-                'doubleType')
+        self.assertEqual(spec.description.output[0].name, "target")
+        self.assertEqual(
+            spec.description.output[0].type.WhichOneof("Type"), "doubleType"
+        )
         for input_type in spec.description.input:
-            self.assertEqual(input_type.type.WhichOneof('Type'),
-                    'doubleType')
-        self.assertEqual(sorted(self.feature_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+            self.assertEqual(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(self.feature_names),
+            sorted(map(lambda x: x.name, spec.description.input)),
+        )
 
         # Test the linear regression parameters.
         tr = spec.treeEnsembleRegressor.treeEnsemble
@@ -140,14 +147,14 @@ def test_conversion(self):
 
     def test_conversion_from_file(self):
 
-        output_name = 'target'
+        output_name = "target"
         feature_names = self.feature_names
 
-        xgb_model_json = tempfile.mktemp('tree_model.json')
-        xgb_json_out = self.xgb_model.get_dump(dump_format = 'json')
-        with open(xgb_model_json, 'w') as f:
+        xgb_model_json = tempfile.mktemp("tree_model.json")
+        xgb_json_out = self.xgb_model.get_dump(dump_format="json")
+        with open(xgb_model_json, "w") as f:
             json.dump(xgb_json_out, f)
-        spec = xgb_converter.convert(xgb_model_json, feature_names, 'target').get_spec()
+        spec = xgb_converter.convert(xgb_model_json, feature_names, "target").get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
@@ -155,19 +162,20 @@ def test_conversion_from_file(self):
         self.assertIsNotNone(spec.treeEnsembleRegressor)
 
         # Test the interface class
-        self.assertEqual(spec.description.predictedFeatureName,
-                'target')
+        self.assertEqual(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.output), 1)
-        self.assertEqual(spec.description.output[0].name, 'target')
-        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'),
-                'doubleType')
+        self.assertEqual(spec.description.output[0].name, "target")
+        self.assertEqual(
+            spec.description.output[0].type.WhichOneof("Type"), "doubleType"
+        )
         for input_type in spec.description.input:
-            self.assertEqual(input_type.type.WhichOneof('Type'),
-                    'doubleType')
-        self.assertEqual(sorted(self.feature_names),
-               sorted(map(lambda x: x.name, spec.description.input)))
+            self.assertEqual(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(self.feature_names),
+            sorted(map(lambda x: x.name, spec.description.input)),
+        )
 
         # Test the linear regression parameters.
         tr = spec.treeEnsembleRegressor.treeEnsemble
@@ -177,25 +185,25 @@ def test_conversion_from_file(self):
     def test_unsupported_conversion(self):
 
         feature_names = self.scikit_data.feature_names
-        output_name = 'target'
-        xgb_model = xgboost.XGBRegressor(objective = 'reg:gamma')
+        output_name = "target"
+        xgb_model = xgboost.XGBRegressor(objective="reg:gamma")
         xgb_model.fit(self.scikit_data.data, self.scikit_data.target)
         with self.assertRaises(ValueError):
-            spec = xgb_converter.convert(xgb_model, feature_names, 'target')
+            spec = xgb_converter.convert(xgb_model, feature_names, "target")
 
-        xgb_model = xgboost.XGBRegressor(objective = 'reg:tweedie')
+        xgb_model = xgboost.XGBRegressor(objective="reg:tweedie")
         xgb_model.fit(self.scikit_data.data, self.scikit_data.target)
         with self.assertRaises(ValueError):
-            spec = xgb_converter.convert(xgb_model, feature_names, 'target')
+            spec = xgb_converter.convert(xgb_model, feature_names, "target")
 
     def test_conversion_bad_inputs(self):
 
         # Error on converting an untrained model
         with self.assertRaises(TypeError):
             model = GradientBoostingRegressor()
-            spec = xgb_converter.convert(model, 'data', 'out')
+            spec = xgb_converter.convert(model, "data", "out")
 
         # Check the expected class during conversion
         with self.assertRaises(TypeError):
             model = OneHotEncoder()
-            spec = xgb_converter.convert(model, 'data', 'out')
+            spec = xgb_converter.convert(model, "data", "out")
diff --git a/coremltools/test/xgboost/test_boosted_trees_regression_numeric.py b/coremltools/test/xgboost/test_boosted_trees_regression_numeric.py
index c7dd07eb3..c22a7a78c 100644
--- a/coremltools/test/xgboost/test_boosted_trees_regression_numeric.py
+++ b/coremltools/test/xgboost/test_boosted_trees_regression_numeric.py
@@ -9,53 +9,60 @@
 import itertools
 import pytest
 
-from coremltools._deps import HAS_SKLEARN, HAS_XGBOOST
-from coremltools.models.utils import evaluate_regressor, macos_version, is_macos
+from coremltools._deps import _HAS_SKLEARN, _HAS_XGBOOST
+from coremltools.models.utils import evaluate_regressor, _macos_version, _is_macos
 
-if HAS_XGBOOST:
+if _HAS_XGBOOST:
     import xgboost
     from coremltools.converters import xgboost as xgb_converter
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.datasets import load_boston
     from sklearn.ensemble import GradientBoostingRegressor
     from coremltools.converters import sklearn as skl_converter
     from sklearn.tree import DecisionTreeRegressor
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class GradientBoostingRegressorBostonHousingScikitNumericTest(unittest.TestCase):
-
     @classmethod
     def setUpClass(self):
         # Load data and train model
         scikit_data = load_boston()
         self.scikit_data = scikit_data
-        self.X = scikit_data['data']
-        self.target = scikit_data['target']
+        self.X = scikit_data["data"]
+        self.target = scikit_data["target"]
         self.feature_names = scikit_data.feature_names
-        self.output_name = 'target'
-
-    def _check_metrics(self, metrics, params = {}):
-        self.assertAlmostEquals(metrics['rmse'], 0, delta = 1e-5,
-                msg = 'Failed case %s. Results %s' % (params, metrics))
-        self.assertAlmostEquals(metrics['max_error'], 0, delta = 1e-5,
-                msg = 'Failed case %s. Results %s' % (params, metrics))
+        self.output_name = "target"
+
+    def _check_metrics(self, metrics, params={}):
+        self.assertAlmostEquals(
+            metrics["rmse"],
+            0,
+            delta=1e-5,
+            msg="Failed case %s. Results %s" % (params, metrics),
+        )
+        self.assertAlmostEquals(
+            metrics["max_error"],
+            0,
+            delta=1e-5,
+            msg="Failed case %s. Results %s" % (params, metrics),
+        )
 
     def _train_convert_evaluate_assert(self, **scikit_params):
-        scikit_model = GradientBoostingRegressor(random_state = 1, **scikit_params)
+        scikit_model = GradientBoostingRegressor(random_state=1, **scikit_params)
         scikit_model.fit(self.X, self.target)
 
         # Convert the model
         spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)
 
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             # Get predictions
             df = pd.DataFrame(self.X, columns=self.feature_names)
-            df['prediction'] = scikit_model.predict(self.X)
+            df["prediction"] = scikit_model.predict(self.X)
 
             # Evaluate it
-            metrics = evaluate_regressor(spec, df, 'target', verbose = False)
+            metrics = evaluate_regressor(spec, df, "target", verbose=False)
             self._check_metrics(metrics, scikit_params)
 
     def test_boston_housing_simple_regression(self):
@@ -65,12 +72,12 @@ def test_boston_housing_simple_regression(self):
     def test_boston_housing_parameter_stress_test(self):
 
         options = dict(
-            max_depth = [1, 10, None],
-            min_samples_split = [2, 0.5],
-            min_samples_leaf = [1, 5],
-            min_weight_fraction_leaf = [0.0, 0.5],
-            max_features = [None, 1],
-            max_leaf_nodes = [None, 20],
+            max_depth=[1, 10, None],
+            min_samples_split=[2, 0.5],
+            min_samples_leaf=[1, 5],
+            min_weight_fraction_leaf=[0.0, 0.5],
+            max_features=[None, 1],
+            max_leaf_nodes=[None, 20],
         )
 
         # Make a cartesian product of all options
@@ -82,34 +89,45 @@ def test_boston_housing_parameter_stress_test(self):
             self._train_convert_evaluate_assert(**arg)
 
 
-@unittest.skipIf(not HAS_XGBOOST, 'Missing xgboost. Skipping')
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikit-learn. Skipping tests.')
+@unittest.skipIf(not _HAS_XGBOOST, "Missing xgboost. Skipping")
+@unittest.skipIf(not _HAS_SKLEARN, "Missing scikit-learn. Skipping tests.")
 class XgboostBoosterBostonHousingNumericTest(unittest.TestCase):
     @classmethod
     def setUpClass(self):
-        if not HAS_XGBOOST:
+        if not _HAS_XGBOOST:
             return
-        if not HAS_SKLEARN:
+        if not _HAS_SKLEARN:
             return
 
         # Load data and train model
         scikit_data = load_boston()
-        self.X = scikit_data.data.astype('f').astype('d')
-        self.dtrain = xgboost.DMatrix(scikit_data.data, label = scikit_data.target,
-                            feature_names = scikit_data.feature_names)
+        self.X = scikit_data.data.astype("f").astype("d")
+        self.dtrain = xgboost.DMatrix(
+            scikit_data.data,
+            label=scikit_data.target,
+            feature_names=scikit_data.feature_names,
+        )
         self.feature_names = scikit_data.feature_names
-        self.output_name = 'target'
+        self.output_name = "target"
 
-    def _check_metrics(self, metrics, params = {}):
+    def _check_metrics(self, metrics, params={}):
         """
         Check the metrics
         """
-        self.assertAlmostEquals(metrics['rmse'], 0, delta = 1e-4,
-                msg = 'Failed case %s. Results %s' % (params, metrics))
-        self.assertAlmostEquals(metrics['max_error'], 0, delta = 1e-4,
-                msg = 'Failed case %s. Results %s' % (params, metrics))
+        self.assertAlmostEquals(
+            metrics["rmse"],
+            0,
+            delta=1e-4,
+            msg="Failed case %s. Results %s" % (params, metrics),
+        )
+        self.assertAlmostEquals(
+            metrics["max_error"],
+            0,
+            delta=1e-4,
+            msg="Failed case %s. Results %s" % (params, metrics),
+        )
 
-    def _train_convert_evaluate_assert(self, bt_params = {}, **params):
+    def _train_convert_evaluate_assert(self, bt_params={}, **params):
         """
         Set up the unit test by loading the dataset and training a model.
         """
@@ -117,53 +135,60 @@ def _train_convert_evaluate_assert(self, bt_params = {}, **params):
         xgb_model = xgboost.train(bt_params, self.dtrain, **params)
 
         # Convert the model
-        spec = xgb_converter.convert(xgb_model, self.feature_names, self.output_name, force_32bit_float = False)
+        spec = xgb_converter.convert(
+            xgb_model, self.feature_names, self.output_name, force_32bit_float=False
+        )
 
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             # Get predictions
             df = pd.DataFrame(self.X, columns=self.feature_names)
-            df['prediction'] = xgb_model.predict(self.dtrain)
+            df["prediction"] = xgb_model.predict(self.dtrain)
 
             # Evaluate it
-            metrics = evaluate_regressor(spec, df, target = 'target', verbose = False)
+            metrics = evaluate_regressor(spec, df, target="target", verbose=False)
             self._check_metrics(metrics, bt_params)
 
     def test_boston_housing_simple_decision_tree_regression(self):
-        self._train_convert_evaluate_assert(num_boost_round = 1)
+        self._train_convert_evaluate_assert(num_boost_round=1)
 
     def test_boston_housing_simple_boosted_tree_regression(self):
-        self._train_convert_evaluate_assert(num_boost_round = 10)
+        self._train_convert_evaluate_assert(num_boost_round=10)
 
-    @pytest.mark.xfail
+    @pytest.mark.skip("fails: <rdar://63141759>")
     def test_boston_housing_simple_random_forest_regression(self):
-        self._train_convert_evaluate_assert({"subsample":0.5})
+        self._train_convert_evaluate_assert({"subsample": 0.5})
 
     def test_boston_housing_float_double_corner_case(self):
-        self._train_convert_evaluate_assert({
-                   'colsample_bytree': 1,
-                   'colsample_bylevel': 1,
-                   'scale_pos_weight': 1,
-                   'learning_rate': 0.5,
-                   'max_delta_step': 0,
-                   'min_child_weight': 1,
-                   'n_estimators': 1,
-                   'subsample': 0.5,
-                   'objective': 'reg:linear',
-                   'max_depth': 5}, num_boost_round = 2)
-        
+        self._train_convert_evaluate_assert(
+            {
+                "colsample_bytree": 1,
+                "colsample_bylevel": 1,
+                "scale_pos_weight": 1,
+                "learning_rate": 0.5,
+                "max_delta_step": 0,
+                "min_child_weight": 1,
+                "n_estimators": 1,
+                "subsample": 0.5,
+                "objective": "reg:linear",
+                "max_depth": 5,
+            },
+            num_boost_round=2,
+        )
+
     @pytest.mark.slow
     def test_boston_housing_parameter_stress_test(self):
 
         options = dict(
-             max_depth = [1, 5],
-             learning_rate = [0.1, 0.5],
-             n_estimators = [1, 10],
-             min_child_weight = [1, 2],
-             max_delta_step = [0, 0.1],
-             colsample_bytree = [1, 0.5],
-             colsample_bylevel = [1, 0.5],
-             scale_pos_weight = [1],
-             objective = ["reg:linear"])
+            max_depth=[1, 5],
+            learning_rate=[0.1, 0.5],
+            n_estimators=[1, 10],
+            min_child_weight=[1, 2],
+            max_delta_step=[0, 0.1],
+            colsample_bytree=[1, 0.5],
+            colsample_bylevel=[1, 0.5],
+            scale_pos_weight=[1],
+            objective=["reg:linear"],
+        )
 
         # Make a cartesian product of all options
         product = itertools.product(*options.values())
@@ -173,8 +198,9 @@ def test_boston_housing_parameter_stress_test(self):
         for it, arg in enumerate(args):
             self._train_convert_evaluate_assert(arg)
 
-@unittest.skipIf(not HAS_XGBOOST, 'Missing xgboost. Skipping')
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+
+@unittest.skipIf(not _HAS_XGBOOST, "Missing xgboost. Skipping")
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class XGboostRegressorBostonHousingNumericTest(unittest.TestCase):
     @classmethod
     def setUpClass(self):
@@ -189,15 +215,23 @@ def setUpClass(self):
         self.scikit_data = self.X
         self.target = scikit_data.target
         self.feature_names = scikit_data.feature_names
-        self.output_name = 'target'
-
-    def _check_metrics(self, metrics, params = {}):
-        self.assertAlmostEquals(metrics['rmse'], 0, delta = 1e-4,
-                msg = 'Failed case %s. Results %s' % (params, metrics))
-        self.assertAlmostEquals(metrics['max_error'], 0, delta = 1e-4,
-                msg = 'Failed case %s. Results %s' % (params, metrics))
+        self.output_name = "target"
+
+    def _check_metrics(self, metrics, params={}, allowed_error={}):
+        self.assertAlmostEquals(
+            metrics["rmse"],
+            allowed_error["rmse"] if "rmse" in allowed_error else 0,
+            delta=1e-2,
+            msg="Failed case %s. Results %s" % (params, metrics),
+        )
+        self.assertAlmostEquals(
+            metrics["max_error"],
+            allowed_error["max_error"] if "max_error" in allowed_error else 0,
+            delta=1e-2,
+            msg="Failed case %s. Results %s" % (params, metrics),
+        )
 
-    def _train_convert_evaluate_assert(self, bt_params = {}, **params):
+    def _train_convert_evaluate_assert(self, bt_params={}, allowed_error={}, **params):
         """
         Set up the unit test by loading the dataset and training a model.
         """
@@ -206,49 +240,61 @@ def _train_convert_evaluate_assert(self, bt_params = {}, **params):
         xgb_model.fit(self.X, self.target)
 
         # Convert the model (feature_names can't be given because of XGboost)
-        spec = xgb_converter.convert(xgb_model, self.feature_names, self.output_name, force_32bit_float = False)
+        spec = xgb_converter.convert(
+            xgb_model, self.feature_names, self.output_name, force_32bit_float=False
+        )
 
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             # Get predictions
             df = pd.DataFrame(self.X, columns=self.feature_names)
-            df['prediction'] = xgb_model.predict(self.X)
+            df["prediction"] = xgb_model.predict(self.X)
 
             # Evaluate it
-            metrics = evaluate_regressor(spec, df, target = 'target', verbose = False)
-            self._check_metrics(metrics, bt_params)
+            metrics = evaluate_regressor(spec, df, target="target", verbose=False)
+            self._check_metrics(metrics, bt_params, allowed_error)
 
     def test_boston_housing_simple_boosted_tree_regression(self):
         self._train_convert_evaluate_assert()
 
-    @pytest.mark.xfail(reason="XGBoost converter code needs to be updated")
+    @pytest.mark.skip("fails: <rdar://63141759>")
     def test_boston_housing_simple_random_forest_regression(self):
-        self._train_convert_evaluate_assert(subsample = 0.5)
+        self._train_convert_evaluate_assert(
+            allowed_error={"rmse": 0.0162, "max_error": 0.2886}, subsample=0.5
+        )
 
     def test_boston_housing_simple_decision_tree_regression(self):
-        self._train_convert_evaluate_assert(n_estimators = 1)
+        self._train_convert_evaluate_assert(n_estimators=1)
 
     def test_boston_housing_float_double_corner_case(self):
-        self._train_convert_evaluate_assert({
-                          'colsample_bytree': 1, 'colsample_bylevel': 1,
-                          'scale_pos_weight': 1, 'learning_rate': 0.1,
-                          'max_delta_step': 0, 'min_child_weight': 1,
-                          'n_estimators': 10, 'subsample': 0.3, 'objective':
-                          'reg:linear', 'max_depth': 1})
+        self._train_convert_evaluate_assert(
+            {
+                "colsample_bytree": 1,
+                "colsample_bylevel": 1,
+                "scale_pos_weight": 1,
+                "learning_rate": 0.1,
+                "max_delta_step": 0,
+                "min_child_weight": 1,
+                "n_estimators": 10,
+                "subsample": 0.3,
+                "objective": "reg:linear",
+                "max_depth": 1,
+            }
+        )
 
     @pytest.mark.slow
     def test_boston_housing_parameter_stress_test(self):
 
         options = dict(
-             max_depth = [1, 5],
-             learning_rate = [0.1, 0.5],
-             n_estimators = [1, 10],
-             objective = ["reg:linear"],
-             min_child_weight = [1, 2],
-             max_delta_step = [0, 0.1],
-             subsample = [1, 0.5, 0.3],
-             colsample_bytree = [1, 0.5],
-             colsample_bylevel = [1, 0.5],
-             scale_pos_weight = [1]
+            max_depth=[1, 5],
+            learning_rate=[0.1, 0.5],
+            n_estimators=[1, 10],
+            objective=["reg:linear"],
+            min_child_weight=[1, 2],
+            max_delta_step=[0, 0.1],
+            subsample=[1, 0.5, 0.3],
+            colsample_bytree=[1, 0.5],
+            colsample_bylevel=[1, 0.5],
+            scale_pos_weight=[1],
         )
 
         # Make a cartesian product of all options
diff --git a/coremltools/test/xgboost/test_decision_tree_classifier.py b/coremltools/test/xgboost/test_decision_tree_classifier.py
index 4ef3484e4..ccbb2cc09 100644
--- a/coremltools/test/xgboost/test_decision_tree_classifier.py
+++ b/coremltools/test/xgboost/test_decision_tree_classifier.py
@@ -4,20 +4,20 @@
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
 import unittest
-from coremltools._deps import HAS_XGBOOST
-from coremltools._deps import HAS_SKLEARN
+from coremltools._deps import _HAS_XGBOOST
+from coremltools._deps import _HAS_SKLEARN
 from coremltools.proto import Model_pb2
 from coremltools.proto import FeatureTypes_pb2
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.tree import DecisionTreeClassifier
     from coremltools.converters.sklearn import convert as skl_converter
 
-if HAS_XGBOOST:
+if _HAS_XGBOOST:
     from coremltools.converters import xgboost as xgb_converter
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikit-learn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing scikit-learn. Skipping tests.")
 class DecisionTreeBinaryClassifierScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -32,18 +32,17 @@ def setUpClass(self):
         from sklearn.tree import DecisionTreeClassifier
 
         scikit_data = load_boston()
-        scikit_model = DecisionTreeClassifier(random_state = 1)
-        target = scikit_data['target'] > scikit_data['target'].mean()
-        scikit_model.fit(scikit_data['data'], target)
+        scikit_model = DecisionTreeClassifier(random_state=1)
+        target = scikit_data["target"] > scikit_data["target"].mean()
+        scikit_model.fit(scikit_data["data"], target)
 
         # Save the data and the model
         self.scikit_data = scikit_data
         self.scikit_model = scikit_model
 
     def test_conversion(self):
-
-        output_name = 'target'
-        spec = skl_converter(self.scikit_model, 'data', 'target').get_spec()
+        output_name = "target"
+        spec = skl_converter(self.scikit_model, "data", "target").get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
@@ -51,20 +50,20 @@ def test_conversion(self):
         self.assertIsNotNone(spec.treeEnsembleClassifier)
 
         # Test the interface class
-        self.assertEqual(spec.description.predictedFeatureName,
-                'target')
+        self.assertEqual(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.output), 2)
-        self.assertEqual(spec.description.output[0].name, 'target')
-        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'),
-                'int64Type')
+        self.assertEqual(spec.description.output[0].name, "target")
+        self.assertEqual(
+            spec.description.output[0].type.WhichOneof("Type"), "int64Type"
+        )
         self.assertEqual(len(spec.description.input), 1)
 
         input_type = spec.description.input[0]
 
-        self.assertEqual(input_type.type.WhichOneof('Type'), 'multiArrayType')
-        self.assertEqual(input_type.name, 'data')
+        self.assertEqual(input_type.type.WhichOneof("Type"), "multiArrayType")
+        self.assertEqual(input_type.name, "data")
 
         # Test the linear regression parameters.
         tr = spec.treeEnsembleClassifier.treeEnsemble
@@ -72,20 +71,20 @@ def test_conversion(self):
         self.assertEqual(len(tr.nodes), 111)
 
     def test_conversion_bad_inputs(self):
-
         # Error on converting an untrained model
         with self.assertRaises(Exception):
             model = DecisionTreeClassifier()
-            spec = skl_converter(model, 'data', 'out')
+            spec = skl_converter(model, "data", "out")
 
         # Check the expected class during covnersion.
         from sklearn.preprocessing import OneHotEncoder
+
         with self.assertRaises(Exception):
             model = OneHotEncoder()
-            spec = skl_converter(model, 'data', 'out')
+            spec = skl_converter(model, "data", "out")
 
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing scikit-learn. Skipping tests.')
+@unittest.skipIf(not _HAS_SKLEARN, "Missing scikit-learn. Skipping tests.")
 class DecisionTreeMultiClassClassifierScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -102,7 +101,7 @@ def setUpClass(self):
         import numpy as np
 
         scikit_data = load_boston()
-        scikit_model = DecisionTreeClassifier(random_state = 1)
+        scikit_model = DecisionTreeClassifier(random_state=1)
         t = scikit_data.target
         target = np.digitize(t, np.histogram(t)[1]) - 1
         scikit_model.fit(scikit_data.data, target)
@@ -113,9 +112,8 @@ def setUpClass(self):
         self.scikit_model = scikit_model
 
     def test_conversion(self):
-
-        output_name = 'target'
-        spec = skl_converter(self.scikit_model, 'data', 'target').get_spec()
+        output_name = "target"
+        spec = skl_converter(self.scikit_model, "data", "target").get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
@@ -123,28 +121,32 @@ def test_conversion(self):
         self.assertIsNotNone(spec.treeEnsembleClassifier)
 
         # Test the interface class
-        self.assertEqual(spec.description.predictedFeatureName, 'target')
+        self.assertEqual(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.output), 2)
-        self.assertEqual(spec.description.output[0].name, 'target')
-        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'), 'int64Type')
-        self.assertEqual(spec.description.input[0].name, 'data')
-        self.assertEqual(spec.description.input[0].type.WhichOneof('Type'), 'multiArrayType')
+        self.assertEqual(spec.description.output[0].name, "target")
+        self.assertEqual(
+            spec.description.output[0].type.WhichOneof("Type"), "int64Type"
+        )
+        self.assertEqual(spec.description.input[0].name, "data")
+        self.assertEqual(
+            spec.description.input[0].type.WhichOneof("Type"), "multiArrayType"
+        )
 
         tr = spec.treeEnsembleClassifier.treeEnsemble
         self.assertIsNotNone(tr)
         self.assertEqual(len(tr.nodes), 315)
 
     def test_conversion_bad_inputs(self):
-
         # Error on converting an untrained model
         with self.assertRaises(Exception):
             model = DecisionTreeClassifier()
-            spec = skl_converter(model, 'data', 'out')
+            spec = skl_converter(model, "data", "out")
 
         # Check the expected class during covnersion.
         from sklearn.preprocessing import OneHotEncoder
+
         with self.assertRaises(Exception):
             model = OneHotEncoder()
-            spec = skl_converter(model, 'data', 'out')
+            spec = skl_converter(model, "data", "out")
diff --git a/coremltools/test/xgboost/test_decision_tree_classifier_numeric.py b/coremltools/test/xgboost/test_decision_tree_classifier_numeric.py
index 35a785ee1..02df86c24 100644
--- a/coremltools/test/xgboost/test_decision_tree_classifier_numeric.py
+++ b/coremltools/test/xgboost/test_decision_tree_classifier_numeric.py
@@ -7,40 +7,45 @@
 import itertools
 import pandas as pd
 import unittest
-from coremltools._deps import HAS_SKLEARN
-from coremltools.models.utils import evaluate_classifier,\
-    macos_version, is_macos
+from coremltools._deps import _HAS_SKLEARN
+from coremltools.models.utils import evaluate_classifier, _macos_version, _is_macos
 import pytest
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from coremltools.converters import sklearn as skl_converter
     from sklearn.tree import DecisionTreeClassifier
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class DecisionTreeClassificationBostonHousingScikitNumericTest(unittest.TestCase):
-    def _check_metrics(self, metrics, params = {}):
-        self.assertEquals(metrics['num_errors'], 0, msg = 'Failed case %s. Results %s' % (params, metrics))
+    def _check_metrics(self, metrics, params={}):
+        self.assertEquals(
+            metrics["num_errors"],
+            0,
+            msg="Failed case %s. Results %s" % (params, metrics),
+        )
 
     def _train_convert_evaluate_assert(self, **scikit_params):
-        scikit_model = DecisionTreeClassifier(random_state = 1, **scikit_params)
+        scikit_model = DecisionTreeClassifier(random_state=1, **scikit_params)
         scikit_model.fit(self.X, self.target)
 
         # Convert the model
         spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)
-        
-        if is_macos() and macos_version() >= (10, 13):
+
+        if _is_macos() and _macos_version() >= (10, 13):
             # Get predictions
             df = pd.DataFrame(self.X, columns=self.feature_names)
-            df['prediction'] = scikit_model.predict(self.X)
-            
+            df["prediction"] = scikit_model.predict(self.X)
+
             # Evaluate it
             metrics = evaluate_classifier(spec, df)
             self._check_metrics(metrics, scikit_params)
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
-class DecisionTreeBinaryClassificationBostonHousingScikitNumericTest(
-           DecisionTreeClassificationBostonHousingScikitNumericTest):
 
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
+class DecisionTreeBinaryClassificationBostonHousingScikitNumericTest(
+    DecisionTreeClassificationBostonHousingScikitNumericTest
+):
     @classmethod
     def setUpClass(self):
         from sklearn.datasets import load_boston
@@ -49,29 +54,32 @@ def setUpClass(self):
         # Load data and train model
         scikit_data = load_boston()
         self.scikit_data = scikit_data
-        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
-        self.target = 1 * (scikit_data['target'] > scikit_data['target'].mean())
+        self.X = scikit_data.data.astype("f").astype(
+            "d"
+        )  ## scikit-learn downcasts data
+        self.target = 1 * (scikit_data["target"] > scikit_data["target"].mean())
         self.feature_names = scikit_data.feature_names
-        self.output_name = 'target'
+        self.output_name = "target"
 
     def test_simple_binary_classifier(self):
         self._train_convert_evaluate_assert()
-    
+
     @pytest.mark.slow
     def test_binary_classifier_stress_test(self):
         options = dict(
-            splitter = ['best'],
-            max_depth = [1, 10, None],
-            min_samples_split = [2, 10, 0.5],
-            min_samples_leaf = [1, 5],
-            min_weight_fraction_leaf = [0.0, 0.5],
-            max_features = [None, 1, 5],
-            max_leaf_nodes = [None, 20],
-            presort = [False, True],
+            splitter=["best"],
+            max_depth=[1, 10, None],
+            min_samples_split=[2, 10, 0.5],
+            min_samples_leaf=[1, 5],
+            min_weight_fraction_leaf=[0.0, 0.5],
+            max_features=[None, 1, 5],
+            max_leaf_nodes=[None, 20],
+            presort=[False, True],
         )
 
         # Make a cartesian product of all options
         import itertools
+
         product = itertools.product(*options.values())
         args = [dict(zip(options.keys(), p)) for p in product]
 
@@ -79,10 +87,11 @@ def test_binary_classifier_stress_test(self):
         for it, arg in enumerate(args):
             self._train_convert_evaluate_assert(**arg)
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
-class DecisionTreeMultiClassClassificationBostonHousingScikitNumericTest(
-           DecisionTreeClassificationBostonHousingScikitNumericTest):
 
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
+class DecisionTreeMultiClassClassificationBostonHousingScikitNumericTest(
+    DecisionTreeClassificationBostonHousingScikitNumericTest
+):
     @classmethod
     def setUpClass(self):
         from sklearn.datasets import load_boston
@@ -91,15 +100,17 @@ def setUpClass(self):
         # Load data and train model
         scikit_data = load_boston()
         num_classes = 3
-        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
+        self.X = scikit_data.data.astype("f").astype(
+            "d"
+        )  ## scikit-learn downcasts data
         t = scikit_data.target
-        target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1
+        target = np.digitize(t, np.histogram(t, bins=num_classes - 1)[1]) - 1
 
         # Save the data and the model
         self.scikit_data = scikit_data
         self.target = target
         self.feature_names = scikit_data.feature_names
-        self.output_name = 'target'
+        self.output_name = "target"
 
     def test_simple_multiclass(self):
         self._train_convert_evaluate_assert()
@@ -107,14 +118,14 @@ def test_simple_multiclass(self):
     @pytest.mark.slow
     def test_multiclass_stress_test(self):
         options = dict(
-            splitter = ['best'],
-            max_depth = [1, 10, None],
-            min_samples_split = [2, 10, 0.5],
-            min_samples_leaf = [1, 5],
-            min_weight_fraction_leaf = [0.0, 0.5],
-            max_features = [None, 1, 5],
-            max_leaf_nodes = [None, 20],
-            presort = [False, True],
+            splitter=["best"],
+            max_depth=[1, 10, None],
+            min_samples_split=[2, 10, 0.5],
+            min_samples_leaf=[1, 5],
+            min_weight_fraction_leaf=[0.0, 0.5],
+            max_features=[None, 1, 5],
+            max_leaf_nodes=[None, 20],
+            presort=[False, True],
         )
 
         # Make a cartesian product of all options
diff --git a/coremltools/test/xgboost/test_decision_tree_regression.py b/coremltools/test/xgboost/test_decision_tree_regression.py
index 7b599dd99..544311f38 100644
--- a/coremltools/test/xgboost/test_decision_tree_regression.py
+++ b/coremltools/test/xgboost/test_decision_tree_regression.py
@@ -9,18 +9,19 @@
 import json
 from coremltools.proto import Model_pb2
 from coremltools.proto import FeatureTypes_pb2
-from coremltools._deps import HAS_XGBOOST
-from coremltools._deps import HAS_SKLEARN
+from coremltools._deps import _HAS_XGBOOST
+from coremltools._deps import _HAS_SKLEARN
 
-if HAS_XGBOOST:
+if _HAS_XGBOOST:
     import xgboost
     from coremltools.converters import xgboost as xgb_converter
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from coremltools.converters import sklearn as skl_converter
     from sklearn.tree import DecisionTreeRegressor
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class DecisionTreeRegressorScikitTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter.
@@ -35,18 +36,19 @@ def setUpClass(self):
         from sklearn.tree import DecisionTreeRegressor
 
         scikit_data = load_boston()
-        scikit_model = DecisionTreeRegressor(random_state = 1)
-        scikit_model.fit(scikit_data['data'], scikit_data['target'])
+        scikit_model = DecisionTreeRegressor(random_state=1)
+        scikit_model.fit(scikit_data["data"], scikit_data["target"])
 
         # Save the data and the model
         self.scikit_data = scikit_data
         self.scikit_model = scikit_model
 
     def test_conversion(self):
-
         feature_names = self.scikit_data.feature_names
-        output_name = 'target'
-        spec = skl_converter.convert(self.scikit_model, feature_names, 'target').get_spec()
+        output_name = "target"
+        spec = skl_converter.convert(
+            self.scikit_model, feature_names, "target"
+        ).get_spec()
         self.assertIsNotNone(spec)
 
         # Test the model class
@@ -54,34 +56,36 @@ def test_conversion(self):
         self.assertIsNotNone(spec.treeEnsembleRegressor)
 
         # Test the interface class
-        self.assertEqual(spec.description.predictedFeatureName,
-                'target')
+        self.assertEqual(spec.description.predictedFeatureName, "target")
 
         # Test the inputs and outputs
         self.assertEqual(len(spec.description.output), 1)
-        self.assertEqual(spec.description.output[0].name, 'target')
-        self.assertEqual(spec.description.output[0].type.WhichOneof('Type'),
-                'doubleType')
+        self.assertEqual(spec.description.output[0].name, "target")
+        self.assertEqual(
+            spec.description.output[0].type.WhichOneof("Type"), "doubleType"
+        )
         for input_type in spec.description.input:
-            self.assertEqual(input_type.type.WhichOneof('Type'),
-                    'doubleType')
-        self.assertEqual(sorted(feature_names),
-                         sorted(map(lambda x: x.name, spec.description.input)))
+            self.assertEqual(input_type.type.WhichOneof("Type"), "doubleType")
+        self.assertEqual(
+            sorted(feature_names), sorted(map(lambda x: x.name, spec.description.input))
+        )
 
         # Test the linear regression parameters.
-        tr = spec.pipelineRegressor.pipeline.models[1].treeEnsembleRegressor.treeEnsemble
+        tr = spec.pipelineRegressor.pipeline.models[
+            1
+        ].treeEnsembleRegressor.treeEnsemble
         self.assertIsNotNone(tr)
         self.assertEqual(len(tr.nodes), 935)
 
     def test_conversion_bad_inputs(self):
-
         # Error on converting an untrained model
         with self.assertRaises(Exception):
             model = DecisionTreeRegressor()
-            spec = skl_converter.convert(model, 'data', 'out')
+            spec = skl_converter.convert(model, "data", "out")
 
         # Check the expected class during covnersion.
         from sklearn.preprocessing import OneHotEncoder
+
         with self.assertRaises(Exception):
             model = OneHotEncoder()
-            spec = skl_converter.convert(model, 'data', 'out')
+            spec = skl_converter.convert(model, "data", "out")
diff --git a/coremltools/test/xgboost/test_decision_tree_regression_numeric.py b/coremltools/test/xgboost/test_decision_tree_regression_numeric.py
index 7d0fcbf03..d4cd98fdb 100644
--- a/coremltools/test/xgboost/test_decision_tree_regression_numeric.py
+++ b/coremltools/test/xgboost/test_decision_tree_regression_numeric.py
@@ -8,19 +8,21 @@
 from coremltools.models.utils import evaluate_regressor
 import pandas as pd
 import os
-from coremltools.models.utils import evaluate_regressor, macos_version, is_macos
-from coremltools._deps import HAS_SKLEARN
+from coremltools.models.utils import evaluate_regressor, _macos_version, _is_macos
+from coremltools._deps import _HAS_SKLEARN
 import pytest
 
-if HAS_SKLEARN:
+if _HAS_SKLEARN:
     from sklearn.tree import DecisionTreeRegressor
     from coremltools.converters.sklearn import _decision_tree_regressor as skl_converter
 
-@unittest.skipIf(not HAS_SKLEARN, 'Missing sklearn. Skipping tests.')
+
+@unittest.skipIf(not _HAS_SKLEARN, "Missing sklearn. Skipping tests.")
 class DecisionTreeRegressorBostonHousingScikitNumericTest(unittest.TestCase):
     """
     Unit test class for testing scikit-learn converter and running both models
     """
+
     @classmethod
     def setUpClass(self):
         """
@@ -32,61 +34,70 @@ def setUpClass(self):
         # Load data and train model
         scikit_data = load_boston()
         self.scikit_data = scikit_data
-        self.X = scikit_data['data']
-        self.target = scikit_data['target']
+        self.X = scikit_data["data"]
+        self.target = scikit_data["target"]
         self.feature_names = scikit_data.feature_names
-        self.output_name = 'target'
+        self.output_name = "target"
 
-    def _check_metrics(self, metrics, params = {}):
+    def _check_metrics(self, metrics, params={}):
         """
         Check the metrics
         """
-        self.assertAlmostEquals(metrics['rmse'], 0, delta = 1e-5,
-                msg = 'Failed case %s. Results %s' % (params, metrics))
-        self.assertAlmostEquals(metrics['max_error'], 0, delta = 1e-5,
-                msg = 'Failed case %s. Results %s' % (params, metrics))
+        self.assertAlmostEquals(
+            metrics["rmse"],
+            0,
+            delta=1e-5,
+            msg="Failed case %s. Results %s" % (params, metrics),
+        )
+        self.assertAlmostEquals(
+            metrics["max_error"],
+            0,
+            delta=1e-5,
+            msg="Failed case %s. Results %s" % (params, metrics),
+        )
 
     def _train_convert_evaluate_assert(self, **scikit_params):
         """
         Train a scikit-learn model, convert it and then evaluate it with CoreML
         """
-        scikit_model = DecisionTreeRegressor(random_state = 1, **scikit_params)
+        scikit_model = DecisionTreeRegressor(random_state=1, **scikit_params)
         scikit_model.fit(self.X, self.target)
 
         # Convert the model
         spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)
 
-        if is_macos() and macos_version() >= (10, 13):
+        if _is_macos() and _macos_version() >= (10, 13):
             # Get predictions
             df = pd.DataFrame(self.X, columns=self.feature_names)
-            df['prediction'] = scikit_model.predict(self.X)
+            df["prediction"] = scikit_model.predict(self.X)
 
             # Evaluate it
-            metrics = evaluate_regressor(spec, df, target = 'target', verbose = False)
+            metrics = evaluate_regressor(spec, df, target="target", verbose=False)
             self._check_metrics(metrics, scikit_params)
 
     def test_boston_housing_simple_regression(self):
-        self._train_convert_evaluate_assert(max_depth = 20)
+        self._train_convert_evaluate_assert(max_depth=20)
 
     @pytest.mark.slow
     def test_boston_housing_parameter_stress_test(self):
 
         ## These are all the options in decision tree regression of scikit-learn
         options = dict(
-            criterion = ['mse'],
-            splitter = ['best'],
-            max_depth = [1, 10, None],
-            min_samples_split = [2, 10, 0.5],
-            min_samples_leaf = [1, 5],
-            min_weight_fraction_leaf = [0.0, 0.5],
-            max_features = [None, 1, 5],
-            max_leaf_nodes = [None, 20],
-            min_impurity_decrease = [0.0, 1e-07, 0.1],
-            presort = [False, True],
+            criterion=["mse"],
+            splitter=["best"],
+            max_depth=[1, 10, None],
+            min_samples_split=[2, 10, 0.5],
+            min_samples_leaf=[1, 5],
+            min_weight_fraction_leaf=[0.0, 0.5],
+            max_features=[None, 1, 5],
+            max_leaf_nodes=[None, 20],
+            min_impurity_decrease=[0.0, 1e-07, 0.1],
+            presort=[False, True],
         )
 
         # Make a cartesian product of all options
         import itertools
+
         product = itertools.product(*options.values())
         args = [dict(zip(options.keys(), p)) for p in product]
 
diff --git a/coremltools/converters/nnssa/frontend/__init__.py b/coremltools/version.py
similarity index 81%
rename from coremltools/converters/nnssa/frontend/__init__.py
rename to coremltools/version.py
index 4fcd22579..fad73dbe0 100644
--- a/coremltools/converters/nnssa/frontend/__init__.py
+++ b/coremltools/version.py
@@ -3,5 +3,5 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
-from . import tensorflow
-from . import graph_pass
\ No newline at end of file
+
+__version__ = "4.0b1"  # VERSION_STRING
diff --git a/PythonIncludes/Python.h b/deps/PythonIncludes/Python.h
similarity index 100%
rename from PythonIncludes/Python.h
rename to deps/PythonIncludes/Python.h
diff --git a/PythonIncludes/frameobject.h b/deps/PythonIncludes/frameobject.h
similarity index 100%
rename from PythonIncludes/frameobject.h
rename to deps/PythonIncludes/frameobject.h
diff --git a/PythonIncludes/pythread.h b/deps/PythonIncludes/pythread.h
similarity index 100%
rename from PythonIncludes/pythread.h
rename to deps/PythonIncludes/pythread.h
diff --git a/deps/protobuf/cmake/libprotobuf.cmake b/deps/protobuf/cmake/libprotobuf.cmake
index 5313d39ef..f51ef4231 100644
--- a/deps/protobuf/cmake/libprotobuf.cmake
+++ b/deps/protobuf/cmake/libprotobuf.cmake
@@ -57,6 +57,7 @@ set(libprotobuf_files
 
 add_library(libprotobuf ${protobuf_SHARED_OR_STATIC}
   ${libprotobuf_lite_files} ${libprotobuf_files})
+target_compile_options(libprotobuf PUBLIC "-Wno-enum-compare-switch")
 target_link_libraries(libprotobuf ${CMAKE_THREAD_LIBS_INIT})
 if(protobuf_WITH_ZLIB)
     target_link_libraries(libprotobuf ${ZLIB_LIBRARIES})
diff --git a/deps/protobuf/cmake/libprotoc.cmake b/deps/protobuf/cmake/libprotoc.cmake
index b663e354b..a101c2cfa 100644
--- a/deps/protobuf/cmake/libprotoc.cmake
+++ b/deps/protobuf/cmake/libprotoc.cmake
@@ -109,6 +109,7 @@ add_custom_command(
 
 add_library(libprotoc ${protobuf_SHARED_OR_STATIC}
   ${libprotoc_files})
+target_compile_options(libprotoc PUBLIC "-Wno-enum-compare-switch")
 target_link_libraries(libprotoc libprotobuf)
 if(MSVC AND protobuf_BUILD_SHARED_LIBS)
   target_compile_definitions(libprotoc
diff --git a/docker/Dockerfile-coremltools-linux b/docker/Dockerfile-coremltools-linux
new file mode 100644
index 000000000..7f8e88e37
--- /dev/null
+++ b/docker/Dockerfile-coremltools-linux
@@ -0,0 +1,36 @@
+# An Ubuntu based image that is used for gitlab based ci infrastructure
+FROM ubuntu:20.04
+
+# Install dependencies, particularly libraries that python or CMake need
+RUN  apt-get -y update \
+     && apt-get -y install build-essential libstdc++6 \
+        python-setuptools curl git libssl-dev \
+        make vim-common zlib1g-dev libffi-dev \
+        libbz2-dev libopenblas-dev liblapack-dev \
+        zsh
+RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
+RUN apt-get -y install git-lfs
+
+WORKDIR /
+RUN  mkdir src
+ENV PATH=/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64:
+
+# Install CMake
+WORKDIR /opt
+RUN  curl -L https://github.com/Kitware/CMake/releases/download/v3.13.4/cmake-3.13.4-Linux-x86_64.tar.gz -o cmake-3.13.4-Linux-x86_64.tar.gz \
+     && tar xf cmake-3.13.4-Linux-x86_64.tar.gz
+ENV PATH=/opt/cmake-3.13.4-Linux-x86_64/bin:/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+
+# Install Anaconda and initialize it for use in ZSH
+WORKDIR /opt
+RUN curl https://repo.anaconda.com/archive/Anaconda3-2020.02-Linux-x86_64.sh -o anaconda.sh
+RUN zsh anaconda.sh -b -p /opt/anaconda && eval "$(/opt/anaconda/bin/conda shell.zsh hook)" && conda init zsh
+
+# Give Cmake hints about compilers to use.
+ENV CC="/usr/bin/gcc"
+ENV CXX="/usr/bin/g++"
+CMD ["/bin/bash"]
+
+# Start at /root
+WORKDIR /root
diff --git a/docker/Dockerfile-python2.7-build b/docker/Dockerfile-python2.7-build
deleted file mode 100644
index 798bebb9f..000000000
--- a/docker/Dockerfile-python2.7-build
+++ /dev/null
@@ -1,86 +0,0 @@
-# vim: set ft=dockerfile:
-
-# Based on Centos 6 for compatibility with older glibc versions
-FROM centos:centos6
-
-# Update yum and install some standard dev tools and dependencies
-RUN yum -y update
-RUN yum -y install git.x86_64 openssl-devel zlib-devel* bzip2-devel vim-common lapack-devel blas-devel sqlite-devel
-RUN yum -y groupinstall 'development tools'
-
-# Install visualization dependencies
-RUN yum -y install libX11-devel
-
-# Create a directory for building deps from source code and set up env vars
-WORKDIR /
-RUN mkdir src
-ENV PATH="/usr/local/bin:${PATH}"
-ENV LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH}"
-
-# Install gcc dependencies
-WORKDIR /src
-RUN curl -O https://gmplib.org/download/gmp/gmp-6.1.2.tar.bz2
-RUN tar xvf gmp-6.1.2.tar.bz2
-WORKDIR /src/gmp-6.1.2
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-WORKDIR /src
-RUN curl -O http://www.mpfr.org/mpfr-4.0.1/mpfr-4.0.1.tar.gz
-RUN tar xvf mpfr-4.0.1.tar.gz
-WORKDIR /src/mpfr-4.0.1
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-WORKDIR /src
-RUN curl -O https://ftp.gnu.org/gnu/mpc/mpc-1.1.0.tar.gz
-RUN tar xvf mpc-1.1.0.tar.gz
-WORKDIR /src/mpc-1.1.0
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-# Install gcc 5.5.0 from source
-WORKDIR /src
-RUN curl -O https://ftp.gnu.org/gnu/gcc/gcc-5.5.0/gcc-5.5.0.tar.gz
-RUN tar xvf gcc-5.5.0.tar.gz
-WORKDIR /src/gcc-5.5.0
-RUN ./configure --prefix=/usr/local --with-system-zlib --disable-multilib
-RUN make -j16
-RUN make install
-
-# Install Python 2.7 from source
-WORKDIR /src
-RUN curl -O https://www.python.org/ftp/python/2.7.13/Python-2.7.13.tgz
-RUN tar xvf Python-2.7.13.tgz
-WORKDIR /src/Python-2.7.13
-RUN ./configure --prefix=/usr/local --enable-unicode=ucs4 --enable-shared --enable-loadable-sqlite-extensions
-RUN make -j16
-RUN make install
-
-# Install cmake from binary release
-WORKDIR /opt
-RUN curl -O https://cmake.org/files/v3.10/cmake-3.10.2-Linux-x86_64.tar.gz
-RUN tar xvf cmake-3.10.2-Linux-x86_64.tar.gz
-ENV PATH="/opt/cmake-3.10.2-Linux-x86_64/bin:${PATH}"
-
-# Set compiler binary paths for CMake to pick up
-ENV CC="/usr/local/bin/gcc"
-ENV CXX="/usr/local/bin/g++"
-
-# Install pip and virtualenv
-WORKDIR /src
-RUN curl -O https://bootstrap.pypa.io/get-pip.py
-RUN python get-pip.py
-RUN pip install virtualenv
-
-# Install node.js from binary
-WORKDIR /opt
-RUN curl -O https://nodejs.org/dist/v8.9.4/node-v8.9.4-linux-x64.tar.xz
-RUN tar xvf node-v8.9.4-linux-x64.tar.xz
-ENV PATH="/opt/node-v8.9.4-linux-x64/bin:${PATH}"
-
-# Start at home directory
-WORKDIR /root
diff --git a/docker/Dockerfile-python2.7-test b/docker/Dockerfile-python2.7-test
deleted file mode 100644
index 9c572a3ed..000000000
--- a/docker/Dockerfile-python2.7-test
+++ /dev/null
@@ -1,85 +0,0 @@
-# vim: set ft=dockerfile:
-
-FROM centos:centos7
-
-# Update yum and install some standard dev tools and dependencies
-RUN yum -y update
-RUN yum -y install git.x86_64 openssl-devel zlib-devel* bzip2-devel vim-common which lapack-devel blas-devel sqlite-devel
-RUN yum -y groupinstall 'development tools'
-
-# Install visualization dependencies
-RUN yum -y install libX11-devel
-
-# Create a directory for building deps from source code and set up env vars
-WORKDIR /
-RUN mkdir src
-ENV PATH="/usr/local/bin:${PATH}"
-ENV LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH}"
-
-# Install gcc dependencies
-WORKDIR /src
-RUN curl -O https://gmplib.org/download/gmp/gmp-6.1.2.tar.bz2
-RUN tar xvf gmp-6.1.2.tar.bz2
-WORKDIR /src/gmp-6.1.2
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-WORKDIR /src
-RUN curl -O http://www.mpfr.org/mpfr-4.0.1/mpfr-4.0.1.tar.gz
-RUN tar xvf mpfr-4.0.1.tar.gz
-WORKDIR /src/mpfr-4.0.1
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-WORKDIR /src
-RUN curl -O https://ftp.gnu.org/gnu/mpc/mpc-1.1.0.tar.gz
-RUN tar xvf mpc-1.1.0.tar.gz
-WORKDIR /src/mpc-1.1.0
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-# Install gcc 5.5.0 from source
-WORKDIR /src
-RUN curl -O https://ftp.gnu.org/gnu/gcc/gcc-5.5.0/gcc-5.5.0.tar.gz
-RUN tar xvf gcc-5.5.0.tar.gz
-WORKDIR /src/gcc-5.5.0
-RUN ./configure --prefix=/usr/local --with-system-zlib --disable-multilib
-RUN make -j16
-RUN make install
-
-# Install Python 2.7 from source
-WORKDIR /src
-RUN curl -O https://www.python.org/ftp/python/2.7.13/Python-2.7.13.tgz
-RUN tar xvf Python-2.7.13.tgz
-WORKDIR /src/Python-2.7.13
-RUN ./configure --prefix=/usr/local --enable-unicode=ucs4 --enable-shared --enable-loadable-sqlite-extensions
-RUN make -j16
-RUN make install
-
-# Install cmake from binary release
-WORKDIR /opt
-RUN curl -O https://cmake.org/files/v3.10/cmake-3.10.2-Linux-x86_64.tar.gz
-RUN tar xvf cmake-3.10.2-Linux-x86_64.tar.gz
-ENV PATH="/opt/cmake-3.10.2-Linux-x86_64/bin:${PATH}"
-
-# Set compiler binary paths for CMake to pick up
-ENV CC="/usr/local/bin/gcc"
-ENV CXX="/usr/local/bin/g++"
-
-# Install pip and virtualenv
-WORKDIR /src
-RUN curl -O https://bootstrap.pypa.io/get-pip.py
-RUN python get-pip.py
-RUN pip install virtualenv
-
-# Install node.js from binary
-WORKDIR /opt
-RUN curl -O https://nodejs.org/dist/v8.9.4/node-v8.9.4-linux-x64.tar.xz
-RUN tar xvf node-v8.9.4-linux-x64.tar.xz
-ENV PATH="/opt/node-v8.9.4-linux-x64/bin:${PATH}"
-
-# Start at home directory
-WORKDIR /root
diff --git a/docker/Dockerfile-python3.5-build b/docker/Dockerfile-python3.5-build
deleted file mode 100644
index 2abf36d83..000000000
--- a/docker/Dockerfile-python3.5-build
+++ /dev/null
@@ -1,86 +0,0 @@
-# vim: set ft=dockerfile:
-
-# Based on Centos 6 for compatibility with older glibc versions
-FROM centos:centos6
-
-# Update yum and install some standard dev tools and dependencies
-RUN yum -y update
-RUN yum -y install git.x86_64 openssl-devel zlib-devel* bzip2-devel vim-common lapack-devel blas-devel sqlite-devel
-RUN yum -y groupinstall 'development tools'
-
-# Install visualization dependencies
-RUN yum -y install libX11-devel
-
-# Create a directory for building deps from source code and set up env vars
-WORKDIR /
-RUN mkdir src
-ENV PATH="/usr/local/bin:${PATH}"
-ENV LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH}"
-
-# Install gcc dependencies
-WORKDIR /src
-RUN curl -O https://gmplib.org/download/gmp/gmp-6.1.2.tar.bz2
-RUN tar xvf gmp-6.1.2.tar.bz2
-WORKDIR /src/gmp-6.1.2
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-WORKDIR /src
-RUN curl -O http://www.mpfr.org/mpfr-4.0.1/mpfr-4.0.1.tar.gz
-RUN tar xvf mpfr-4.0.1.tar.gz
-WORKDIR /src/mpfr-4.0.1
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-WORKDIR /src
-RUN curl -O https://ftp.gnu.org/gnu/mpc/mpc-1.1.0.tar.gz
-RUN tar xvf mpc-1.1.0.tar.gz
-WORKDIR /src/mpc-1.1.0
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-# Install gcc 5.5.0 from source
-WORKDIR /src
-RUN curl -O https://ftp.gnu.org/gnu/gcc/gcc-5.5.0/gcc-5.5.0.tar.gz
-RUN tar xvf gcc-5.5.0.tar.gz
-WORKDIR /src/gcc-5.5.0
-RUN ./configure --prefix=/usr/local --with-system-zlib --disable-multilib
-RUN make -j16
-RUN make install
-
-# Install Python 3.5 from source
-WORKDIR /src
-RUN curl -O https://www.python.org/ftp/python/3.5.4/Python-3.5.4.tgz
-RUN tar xvf Python-3.5.4.tgz
-WORKDIR /src/Python-3.5.4
-RUN ./configure --prefix=/usr/local --enable-unicode=ucs4 --enable-shared --enable-loadable-sqlite-extensions
-RUN make -j16
-RUN make install
-
-# Install cmake from binary release
-WORKDIR /opt
-RUN curl -O https://cmake.org/files/v3.10/cmake-3.10.2-Linux-x86_64.tar.gz
-RUN tar xvf cmake-3.10.2-Linux-x86_64.tar.gz
-ENV PATH="/opt/cmake-3.10.2-Linux-x86_64/bin:${PATH}"
-
-# Set compiler binary paths for CMake to pick up
-ENV CC="/usr/local/bin/gcc"
-ENV CXX="/usr/local/bin/g++"
-
-# Install pip and virtualenv
-WORKDIR /src
-RUN curl -O https://bootstrap.pypa.io/get-pip.py
-RUN python3 get-pip.py
-RUN pip3 install virtualenv
-
-# Install node.js from binary
-WORKDIR /opt
-RUN curl -O https://nodejs.org/dist/v8.9.4/node-v8.9.4-linux-x64.tar.xz
-RUN tar xvf node-v8.9.4-linux-x64.tar.xz
-ENV PATH="/opt/node-v8.9.4-linux-x64/bin:${PATH}"
-
-# Start at home directory
-WORKDIR /root
diff --git a/docker/Dockerfile-python3.5-test b/docker/Dockerfile-python3.5-test
deleted file mode 100644
index afb60c152..000000000
--- a/docker/Dockerfile-python3.5-test
+++ /dev/null
@@ -1,85 +0,0 @@
-# vim: set ft=dockerfile:
-
-FROM centos:centos7
-
-# Update yum and install some standard dev tools and dependencies
-RUN yum -y update
-RUN yum -y install git.x86_64 openssl-devel zlib-devel* bzip2-devel vim-common which lapack-devel blas-devel sqlite-devel
-RUN yum -y groupinstall 'development tools'
-
-# Install visualization dependencies
-RUN yum -y install libX11-devel
-
-# Create a directory for building deps from source code and set up env vars
-WORKDIR /
-RUN mkdir src
-ENV PATH="/usr/local/bin:${PATH}"
-ENV LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH}"
-
-# Install gcc dependencies
-WORKDIR /src
-RUN curl -O https://gmplib.org/download/gmp/gmp-6.1.2.tar.bz2
-RUN tar xvf gmp-6.1.2.tar.bz2
-WORKDIR /src/gmp-6.1.2
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-WORKDIR /src
-RUN curl -O http://www.mpfr.org/mpfr-4.0.1/mpfr-4.0.1.tar.gz
-RUN tar xvf mpfr-4.0.1.tar.gz
-WORKDIR /src/mpfr-4.0.1
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-WORKDIR /src
-RUN curl -O https://ftp.gnu.org/gnu/mpc/mpc-1.1.0.tar.gz
-RUN tar xvf mpc-1.1.0.tar.gz
-WORKDIR /src/mpc-1.1.0
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-# Install gcc 5.5.0 from source
-WORKDIR /src
-RUN curl -O https://ftp.gnu.org/gnu/gcc/gcc-5.5.0/gcc-5.5.0.tar.gz
-RUN tar xvf gcc-5.5.0.tar.gz
-WORKDIR /src/gcc-5.5.0
-RUN ./configure --prefix=/usr/local --with-system-zlib --disable-multilib
-RUN make -j16
-RUN make install
-
-# Install Python 3.5 from source
-WORKDIR /src
-RUN curl -O https://www.python.org/ftp/python/3.5.4/Python-3.5.4.tgz
-RUN tar xvf Python-3.5.4.tgz
-WORKDIR /src/Python-3.5.4
-RUN ./configure --prefix=/usr/local --enable-unicode=ucs4 --enable-shared --enable-loadable-sqlite-extensions
-RUN make -j16
-RUN make install
-
-# Install cmake from binary release
-WORKDIR /opt
-RUN curl -O https://cmake.org/files/v3.10/cmake-3.10.2-Linux-x86_64.tar.gz
-RUN tar xvf cmake-3.10.2-Linux-x86_64.tar.gz
-ENV PATH="/opt/cmake-3.10.2-Linux-x86_64/bin:${PATH}"
-
-# Set compiler binary paths for CMake to pick up
-ENV CC="/usr/local/bin/gcc"
-ENV CXX="/usr/local/bin/g++"
-
-# Install pip and virtualenv
-WORKDIR /src
-RUN curl -O https://bootstrap.pypa.io/get-pip.py
-RUN python3 get-pip.py
-RUN pip3 install virtualenv
-
-# Install node.js from binary
-WORKDIR /opt
-RUN curl -O https://nodejs.org/dist/v8.9.4/node-v8.9.4-linux-x64.tar.xz
-RUN tar xvf node-v8.9.4-linux-x64.tar.xz
-ENV PATH="/opt/node-v8.9.4-linux-x64/bin:${PATH}"
-
-# Start at home directory
-WORKDIR /root
diff --git a/docker/Dockerfile-python3.6-build b/docker/Dockerfile-python3.6-build
deleted file mode 100644
index e54016ad1..000000000
--- a/docker/Dockerfile-python3.6-build
+++ /dev/null
@@ -1,86 +0,0 @@
-# vim: set ft=dockerfile:
-
-# Based on Centos 6 for compatibility with older glibc versions
-FROM centos:centos6
-
-# Update yum and install some standard dev tools and dependencies
-RUN yum -y update
-RUN yum -y install git.x86_64 openssl-devel zlib-devel* bzip2-devel vim-common lapack-devel blas-devel sqlite-devel
-RUN yum -y groupinstall 'development tools'
-
-# Install visualization dependencies
-RUN yum -y install libX11-devel
-
-# Create a directory for building deps from source code and set up env vars
-WORKDIR /
-RUN mkdir src
-ENV PATH="/usr/local/bin:${PATH}"
-ENV LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH}"
-
-# Install gcc dependencies
-WORKDIR /src
-RUN curl -O https://gmplib.org/download/gmp/gmp-6.1.2.tar.bz2
-RUN tar xvf gmp-6.1.2.tar.bz2
-WORKDIR /src/gmp-6.1.2
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-WORKDIR /src
-RUN curl -O http://www.mpfr.org/mpfr-4.0.1/mpfr-4.0.1.tar.gz
-RUN tar xvf mpfr-4.0.1.tar.gz
-WORKDIR /src/mpfr-4.0.1
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-WORKDIR /src
-RUN curl -O https://ftp.gnu.org/gnu/mpc/mpc-1.1.0.tar.gz
-RUN tar xvf mpc-1.1.0.tar.gz
-WORKDIR /src/mpc-1.1.0
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-# Install gcc 5.5.0 from source
-WORKDIR /src
-RUN curl -O https://ftp.gnu.org/gnu/gcc/gcc-5.5.0/gcc-5.5.0.tar.gz
-RUN tar xvf gcc-5.5.0.tar.gz
-WORKDIR /src/gcc-5.5.0
-RUN ./configure --prefix=/usr/local --with-system-zlib --disable-multilib
-RUN make -j16
-RUN make install
-
-# Install Python 3.6 from source
-WORKDIR /src
-RUN curl -O https://www.python.org/ftp/python/3.6.4/Python-3.6.4.tgz
-RUN tar xvf Python-3.6.4.tgz
-WORKDIR /src/Python-3.6.4
-RUN ./configure --prefix=/usr/local --enable-unicode=ucs4 --enable-shared --enable-loadable-sqlite-extensions
-RUN make -j16
-RUN make install
-
-# Install cmake from binary release
-WORKDIR /opt
-RUN curl -O https://cmake.org/files/v3.10/cmake-3.10.2-Linux-x86_64.tar.gz
-RUN tar xvf cmake-3.10.2-Linux-x86_64.tar.gz
-ENV PATH="/opt/cmake-3.10.2-Linux-x86_64/bin:${PATH}"
-
-# Set compiler binary paths for CMake to pick up
-ENV CC="/usr/local/bin/gcc"
-ENV CXX="/usr/local/bin/g++"
-
-# Install pip and virtualenv
-WORKDIR /src
-RUN curl -O https://bootstrap.pypa.io/get-pip.py
-RUN python3 get-pip.py
-RUN pip3 install virtualenv
-
-# Install node.js from binary
-WORKDIR /opt
-RUN curl -O https://nodejs.org/dist/v8.9.4/node-v8.9.4-linux-x64.tar.xz
-RUN tar xvf node-v8.9.4-linux-x64.tar.xz
-ENV PATH="/opt/node-v8.9.4-linux-x64/bin:${PATH}"
-
-# Start at home directory
-WORKDIR /root
diff --git a/docker/Dockerfile-python3.6-test b/docker/Dockerfile-python3.6-test
deleted file mode 100644
index 7a2f52aea..000000000
--- a/docker/Dockerfile-python3.6-test
+++ /dev/null
@@ -1,85 +0,0 @@
-# vim: set ft=dockerfile:
-
-FROM centos:centos7
-
-# Update yum and install some standard dev tools and dependencies
-RUN yum -y update
-RUN yum -y install git.x86_64 openssl-devel zlib-devel* bzip2-devel vim-common which lapack-devel blas-devel sqlite-devel
-RUN yum -y groupinstall 'development tools'
-
-# Install visualization dependencies
-RUN yum -y install libX11-devel
-
-# Create a directory for building deps from source code and set up env vars
-WORKDIR /
-RUN mkdir src
-ENV PATH="/usr/local/bin:${PATH}"
-ENV LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib64:${LD_LIBRARY_PATH}"
-
-# Install gcc dependencies
-WORKDIR /src
-RUN curl -O https://gmplib.org/download/gmp/gmp-6.1.2.tar.bz2
-RUN tar xvf gmp-6.1.2.tar.bz2
-WORKDIR /src/gmp-6.1.2
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-WORKDIR /src
-RUN curl -O http://www.mpfr.org/mpfr-4.0.1/mpfr-4.0.1.tar.gz
-RUN tar xvf mpfr-4.0.1.tar.gz
-WORKDIR /src/mpfr-4.0.1
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-WORKDIR /src
-RUN curl -O https://ftp.gnu.org/gnu/mpc/mpc-1.1.0.tar.gz
-RUN tar xvf mpc-1.1.0.tar.gz
-WORKDIR /src/mpc-1.1.0
-RUN ./configure --prefix=/usr/local
-RUN make -j16
-RUN make install
-
-# Install gcc 5.5.0 from source
-WORKDIR /src
-RUN curl -O https://ftp.gnu.org/gnu/gcc/gcc-5.5.0/gcc-5.5.0.tar.gz
-RUN tar xvf gcc-5.5.0.tar.gz
-WORKDIR /src/gcc-5.5.0
-RUN ./configure --prefix=/usr/local --with-system-zlib --disable-multilib
-RUN make -j16
-RUN make install
-
-# Install Python 3.6 from source
-WORKDIR /src
-RUN curl -O https://www.python.org/ftp/python/3.6.4/Python-3.6.4.tgz
-RUN tar xvf Python-3.6.4.tgz
-WORKDIR /src/Python-3.6.4
-RUN ./configure --prefix=/usr/local --enable-unicode=ucs4 --enable-shared --enable-loadable-sqlite-extensions
-RUN make -j16
-RUN make install
-
-# Install cmake from binary release
-WORKDIR /opt
-RUN curl -O https://cmake.org/files/v3.10/cmake-3.10.2-Linux-x86_64.tar.gz
-RUN tar xvf cmake-3.10.2-Linux-x86_64.tar.gz
-ENV PATH="/opt/cmake-3.10.2-Linux-x86_64/bin:${PATH}"
-
-# Set compiler binary paths for CMake to pick up
-ENV CC="/usr/local/bin/gcc"
-ENV CXX="/usr/local/bin/g++"
-
-# Install pip and virtualenv
-WORKDIR /src
-RUN curl -O https://bootstrap.pypa.io/get-pip.py
-RUN python3 get-pip.py
-RUN pip3 install virtualenv
-
-# Install node.js from binary
-WORKDIR /opt
-RUN curl -O https://nodejs.org/dist/v8.9.4/node-v8.9.4-linux-x64.tar.xz
-RUN tar xvf node-v8.9.4-linux-x64.tar.xz
-ENV PATH="/opt/node-v8.9.4-linux-x64/bin:${PATH}"
-
-# Start at home directory
-WORKDIR /root
diff --git a/docker/Dockerfile-python3.7-build b/docker/Dockerfile-python3.7-build
deleted file mode 100644
index f265735ab..000000000
--- a/docker/Dockerfile-python3.7-build
+++ /dev/null
@@ -1,32 +0,0 @@
-# An Ubuntu based image that is used for gitlab based ci infrastructure
-FROM ubuntu:16.04
-# Install dependencies, particularly libraries that python or CMake need
-RUN  apt-get -y update \                                                                                                                            
-     && apt-get -y install gcc-5 g++-5 libstdc++6 \
-        python-setuptools curl git libssl-dev \
-        make vim-common zlib1g-dev libffi-dev \
-        libbz2-dev libopenblas-dev liblapack-dev \
-     && ln -s /usr/bin/g++-5 /usr/bin/g++
-WORKDIR /                                                                                                                            
-RUN  mkdir src                                                                                                                                   
-ENV PATH=/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin                                                
-ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64:                                                                                
-WORKDIR /src                                                                                                                         
-RUN  curl -O https://www.python.org/ftp/python/3.7.6/Python-3.7.6.tgz \
-     && tar xvf Python-3.7.6.tgz
-WORKDIR /src/Python-3.7.6
-RUN  ./configure --prefix=/usr/local --enable-unicode=ucs4 --enable-shared --enable-loadable-sqlite-extensions \
-     && make -j16 && make install
-WORKDIR /opt
-RUN  curl -L https://github.com/Kitware/CMake/releases/download/v3.13.4/cmake-3.13.4-Linux-x86_64.tar.gz -o cmake-3.13.4-Linux-x86_64.tar.gz \
-     && tar xf cmake-3.13.4-Linux-x86_64.tar.gz 
-ENV PATH=/opt/cmake-3.13.4-Linux-x86_64/bin:/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
-WORKDIR /src
-RUN curl -O https://bootstrap.pypa.io/get-pip.py
-RUN python3 get-pip.py
-RUN pip3 install virtualenv
-WORKDIR /root
-# Give Cmake hints about compilers to use.
-ENV CC="/usr/bin/gcc"
-ENV CXX="/usr/bin/g++"
-CMD ["/bin/bash"]
diff --git a/docs/.gitignore b/docs/.gitignore
new file mode 100644
index 000000000..cb407ba34
--- /dev/null
+++ b/docs/.gitignore
@@ -0,0 +1,2 @@
+*/Generated
+_build
diff --git a/docs/MIL/coremltools.converters.mil.ops.rst b/docs/MIL/coremltools.converters.mil.ops.rst
new file mode 100644
index 000000000..30bba1fbc
--- /dev/null
+++ b/docs/MIL/coremltools.converters.mil.ops.rst
@@ -0,0 +1,192 @@
+**********
+MIL Ops
+**********
+
+The list of operators supported by MIL.
+
+
+.. automodule:: coremltools.converters.mil.mil.ops.defs.activation
+
+  .. autoclass:: clamped_relu
+  .. autoclass:: elu
+  .. autoclass:: gelu
+  .. autoclass:: leaky_relu
+  .. autoclass:: linear_activation
+  .. autoclass:: prelu
+  .. autoclass:: relu
+  .. autoclass:: relu6
+  .. autoclass:: sigmoid
+  .. autoclass:: sigmoid_hard
+  .. autoclass:: softplus
+  .. autoclass:: softplus_parametric
+  .. autoclass:: softsign
+  .. autoclass:: thresholded_relu
+
+.. automodule:: coremltools.converters.mil.mil.ops.defs.control_flow
+
+  .. autoclass:: cond
+  .. autoclass:: const
+  .. autoclass:: select
+  .. autoclass:: while_loop
+  .. autoclass:: identity
+  .. autoclass:: make_list
+  .. autoclass:: list_length
+  .. autoclass:: list_write
+  .. autoclass:: list_read
+  .. autoclass:: list_gather
+  .. autoclass:: list_scatter
+
+.. automodule:: coremltools.converters.mil.mil.ops.defs.conv
+
+  .. autoclass:: conv
+  .. autoclass:: conv_transpose
+
+.. automodule:: coremltools.converters.mil.mil.ops.defs.elementwise_binary
+  
+  .. autoclass:: add
+  .. autoclass:: equal
+  .. autoclass:: floor_div
+  .. autoclass:: greater
+  .. autoclass:: greater_equal
+  .. autoclass:: less
+  .. autoclass:: less_equal
+  .. autoclass:: logical_and
+  .. autoclass:: logical_or
+  .. autoclass:: logical_xor
+  .. autoclass:: maximum
+  .. autoclass:: minimum
+  .. autoclass:: mul
+  .. autoclass:: not_equal
+  .. autoclass:: real_div
+  .. autoclass:: pow
+  .. autoclass:: sub
+
+.. automodule:: coremltools.converters.mil.mil.ops.defs.elementwise_unary
+
+  .. autoclass:: abs
+  .. autoclass:: acos
+  .. autoclass:: asin
+  .. autoclass:: atan
+  .. autoclass:: atanh
+  .. autoclass:: ceil
+  .. autoclass:: clip
+  .. autoclass:: cos
+  .. autoclass:: cosh
+  .. autoclass:: erf
+  .. autoclass:: exp
+  .. autoclass:: exp2
+  .. autoclass:: floor
+  .. autoclass:: inverse
+  .. autoclass:: log
+  .. autoclass:: logical_not
+  .. autoclass:: round
+  .. autoclass:: rsqrt
+  .. autoclass:: sign
+  .. autoclass:: sin
+  .. autoclass:: sinh
+  .. autoclass:: sqrt
+  .. autoclass:: square
+  .. autoclass:: tan
+  .. autoclass:: tanh
+  .. autoclass:: threshold
+  .. autoclass:: cast
+
+.. automodule:: coremltools.converters.mil.mil.ops.defs.image_resizing
+
+  .. autoclass:: upsample_nearest_neighbor
+  .. autoclass:: upsample_bilinear
+  .. autoclass:: resize_bilinear
+  .. autoclass:: crop_resize
+  .. autoclass:: crop
+  
+.. automodule:: coremltools.converters.mil.mil.ops.defs.linear
+
+  .. autoclass:: linear
+  .. autoclass:: matmul
+
+.. automodule:: coremltools.converters.mil.mil.ops.defs.normalization
+
+  .. autoclass:: batch_norm
+  .. autoclass:: instance_norm
+  .. autoclass:: l2_norm
+  .. autoclass:: local_response_norm
+
+.. automodule:: coremltools.converters.mil.mil.ops.defs.pool
+
+  .. autoclass:: avg_pool
+  .. autoclass:: l2_pool
+  .. autoclass:: max_pool
+  
+.. automodule:: coremltools.converters.mil.mil.ops.defs.random
+
+  .. autoclass:: random_bernoulli
+  .. autoclass:: random_categorical
+  .. autoclass:: random_normal
+  .. autoclass:: random_uniform
+
+.. automodule:: coremltools.converters.mil.mil.ops.defs.recurrent
+
+  .. autoclass:: gru
+  .. autoclass:: lstm
+  .. autoclass:: rnn
+
+.. automodule:: coremltools.converters.mil.mil.ops.defs.reduction
+
+  .. autoclass:: reduce_arg
+  .. autoclass:: reduce_argmax
+  .. autoclass:: reduce_argmin
+  .. autoclass:: reduce_l2_norm
+  .. autoclass:: reduce_log_sum
+  .. autoclass:: reduce_max
+  .. autoclass:: reduce_mean
+  .. autoclass:: reduce_min
+  .. autoclass:: reduce_prod
+  .. autoclass:: reduce_sum
+  .. autoclass:: reduce_sum_square
+  
+.. automodule:: coremltools.converters.mil.mil.ops.defs.scatter_gather
+
+  .. autoclass:: gather
+  .. autoclass:: scatter
+  .. autoclass:: gather_along_axis
+  .. autoclass:: scatter_along_axis
+  .. autoclass:: gather_nd
+  .. autoclass:: scatter_nd
+
+.. automodule:: coremltools.converters.mil.mil.ops.defs.slicend
+
+  .. autoclass:: slice_by_index
+  
+.. automodule:: coremltools.converters.mil.mil.ops.defs.tensor_operation
+
+  .. autoclass:: band_part
+  .. autoclass:: cumsum
+  .. autoclass:: fill
+  .. autoclass:: non_maximum_suppression
+  .. autoclass:: non_zero
+  .. autoclass:: one_hot
+  .. autoclass:: pad
+  .. autoclass:: range_1d
+  .. autoclass:: tile
+  .. autoclass:: argsort
+  .. autoclass:: topk
+  .. autoclass:: flatten
+  .. autoclass:: shape
+  .. autoclass:: concat
+  .. autoclass:: split
+  .. autoclass:: stack
+  .. autoclass:: addn
+
+.. automodule:: coremltools.converters.mil.mil.ops.defs.tensor_transformation
+
+  .. autoclass:: depth_to_space
+  .. autoclass:: expand_dims
+  .. autoclass:: reshape
+  .. autoclass:: reverse
+  .. autoclass:: reverse_sequence
+  .. autoclass:: slice_by_size
+  .. autoclass:: space_to_depth
+  .. autoclass:: squeeze
+  .. autoclass:: transpose
+  .. autoclass:: pixel_shuffle
+  .. autoclass:: sliding_windows
diff --git a/docs/Makefile b/docs/Makefile
index 2e3567a70..fd4610a9f 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -18,3 +18,9 @@ help:
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+clean:
+	rm -rf _build
+	rm -rf _static
+	rm -rf _templates
+	rm -rf **/generated
diff --git a/docs/Models/coremltools.models.MLModel.rst b/docs/Models/coremltools.models.MLModel.rst
new file mode 100644
index 000000000..152d1c47d
--- /dev/null
+++ b/docs/Models/coremltools.models.MLModel.rst
@@ -0,0 +1,8 @@
+**********
+MLModel
+**********
+
+.. automodule:: coremltools.models
+
+  .. autoclass:: MLModel
+    :members:
diff --git a/docs/Models/coremltools.models.nearest_neighbors.rst b/docs/Models/coremltools.models.nearest_neighbors.rst
new file mode 100644
index 000000000..87c6db816
--- /dev/null
+++ b/docs/Models/coremltools.models.nearest_neighbors.rst
@@ -0,0 +1,8 @@
+**********
+Nearest Neighbors
+**********
+
+.. automodule:: coremltools.models.nearest_neighbors
+
+  .. autoclass:: KNearestNeighborsClassifierBuilder
+    :members:
diff --git a/docs/Models/coremltools.models.neural_network.rst b/docs/Models/coremltools.models.neural_network.rst
new file mode 100644
index 000000000..e1e5bf367
--- /dev/null
+++ b/docs/Models/coremltools.models.neural_network.rst
@@ -0,0 +1,13 @@
+**********
+Neural Network
+**********
+
+.. automodule:: coremltools.models.neural_network
+
+    .. autoclass:: NeuralNetworkBuilder
+    .. automodule:: coremltools.models.neural_network.flexible_shape_utils
+        :members:
+    .. automodule:: coremltools.models.neural_network.quantization_utils
+        :members:
+    .. automodule:: coremltools.models.neural_network.update_optimizer_utils
+        :members:
diff --git a/docs/Models/coremltools.models.pipeline.rst b/docs/Models/coremltools.models.pipeline.rst
new file mode 100644
index 000000000..64269ed2a
--- /dev/null
+++ b/docs/Models/coremltools.models.pipeline.rst
@@ -0,0 +1,9 @@
+**********
+Pipeline
+**********
+
+.. automodule:: coremltools.models.pipeline
+
+  .. autoclass:: Pipeline
+  .. autoclass:: PipelineClassifier
+  .. autoclass:: PipelineRegressor
diff --git a/docs/Models/coremltools.models.tree_ensemble.rst b/docs/Models/coremltools.models.tree_ensemble.rst
new file mode 100644
index 000000000..12bfff1e6
--- /dev/null
+++ b/docs/Models/coremltools.models.tree_ensemble.rst
@@ -0,0 +1,12 @@
+**********
+Tree Ensemble
+**********
+
+.. automodule:: coremltools.models.tree_ensemble
+
+  .. autoclass:: TreeEnsembleBase
+    :members:
+  .. autoclass:: TreeEnsembleRegressor
+    :members:
+  .. autoclass:: TreeEnsembleClassifier
+    :members:
diff --git a/docs/Models/coremltools.models.utils.rst b/docs/Models/coremltools.models.utils.rst
new file mode 100644
index 000000000..f195bb956
--- /dev/null
+++ b/docs/Models/coremltools.models.utils.rst
@@ -0,0 +1,6 @@
+**********
+Model Utils
+**********
+
+.. automodule:: coremltools.models.utils
+    :members:
diff --git a/docs/_static/style.css b/docs/_static/style.css
deleted file mode 100644
index b7ddfd9b7..000000000
--- a/docs/_static/style.css
+++ /dev/null
@@ -1,3 +0,0 @@
-.wy-nav-content {
-    max-width: 1000px !important;
-}
diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html
deleted file mode 100644
index b0a448060..000000000
--- a/docs/_templates/layout.html
+++ /dev/null
@@ -1,4 +0,0 @@
-{% extends "!layout.html" %}
-{% block extrahead %}
-    <link href="{{ pathto("_static/style.css", True) }}" rel="stylesheet" type="text/css">
-{% endblock %}
\ No newline at end of file
diff --git a/docs/_themes/stripped/layout.html b/docs/_themes/stripped/layout.html
new file mode 100644
index 000000000..185b3aca1
--- /dev/null
+++ b/docs/_themes/stripped/layout.html
@@ -0,0 +1,7 @@
+<div class="sphinx">
+{%- block content %}
+  {%- block document %}
+    {% block body %}{% endblock %}
+  {%- endblock %}
+{%- endblock %}
+</div>
\ No newline at end of file
diff --git a/docs/_themes/stripped/style.css b/docs/_themes/stripped/style.css
new file mode 100644
index 000000000..808b3a6a5
--- /dev/null
+++ b/docs/_themes/stripped/style.css
@@ -0,0 +1,3 @@
+a.headerlink {
+    visibility: hidden;
+}
diff --git a/docs/_themes/stripped/theme.conf b/docs/_themes/stripped/theme.conf
new file mode 100644
index 000000000..4f249edd4
--- /dev/null
+++ b/docs/_themes/stripped/theme.conf
@@ -0,0 +1,5 @@
+
+[theme]
+inherit = basic
+stylesheet = style.css
+pygments_style = default
diff --git a/docs/conf.py b/docs/conf.py
index b84ddd1a0..60a9bc1b4 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -5,16 +5,16 @@
 import coremltools
 import sys
 import os
+import re
 
 for m in [
-          'converters',
-          'utils',
-          ]:
-    module_name = 'coremltools.' + m
+    "converters",
+    "utils",
+]:
+    module_name = "coremltools." + m
     sys.modules[module_name] = eval(module_name)
 
-sys.path.insert(0, os.path.abspath('.'))
-
+sys.path.insert(0, os.path.abspath("."))
 
 # -- General configuration ------------------------------------------------
 
@@ -26,29 +26,32 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-  'sphinx.ext.autodoc', 'numpydoc', 'sphinx.ext.coverage', 'sphinx.ext.mathjax',
-  'sphinx.ext.inheritance_diagram', 'sphinx.ext.autosummary', 'sphinx_rtd_theme',
-  'sphinxtogithub'
+    "sphinx.ext.autodoc",
+    "numpydoc",
+    "sphinx.ext.coverage",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.inheritance_diagram",
+    "sphinx.ext.autosummary",
+    "sphinx_rtd_theme",
 ]
 
-
 autosummary_generate = True
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
 # source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # General information about the project.
-project = u'coremltools'
-copyright = u'2017-2019, Apple Inc'
-author = u'Apple Inc.'
+project = u"coremltools"
+copyright = u"2017-2020, Apple Inc"
+author = u"Apple Inc."
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -56,15 +59,13 @@
 #
 
 import pkg_resources
-try:
-    version = pkg_resources.require("coremltools")[0].version
-except:
-    version = '3.2'
+
+version = pkg_resources.require("coremltools")[0].version
 
 # The short X.Y version.
 version = version
 # The full version, including alpha/beta/rc tags.
-release = version
+release = re.split("[a-z]+", version)[0]
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -76,37 +77,34 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 numpydoc_show_class_members = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = False
 
-html_theme = "sphinx_rtd_theme"
-html_theme_path = ["_themes", ]
+html_theme = "stripped"
+html_theme_path = [
+    "_themes",
+]
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-html_theme_options = {
-    'navigation_depth': 2,
-    'collapse_navigation': False,
-}
+html_theme_options = {}
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
+html_static_path = []
 
 # -- Options for HTMLHelp output ------------------------------------------
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'coremltoolsdoc'
-
+htmlhelp_basename = "coremltoolsdoc"
 
 # -- Options for LaTeX output ---------------------------------------------
 
@@ -114,15 +112,12 @@
     # The paper size ('letterpaper' or 'a4paper').
     #
     # 'papersize': 'letterpaper',
-
     # The font size ('10pt', '11pt' or '12pt').
     #
     # 'pointsize': '10pt',
-
     # Additional stuff for the LaTeX preamble.
     #
     # 'preamble': '',
-
     # Latex figure (float) alignment
     #
     # 'figure_align': 'htbp',
@@ -132,20 +127,20 @@
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'coremltools.tex', u'coremltools Documentation',
-     u'Apple Inc.', 'manual'),
+    (
+        master_doc,
+        "coremltools.tex",
+        u"coremltools Documentation",
+        u"Apple Inc.",
+        "manual",
+    ),
 ]
 
-
 # -- Options for manual page output ---------------------------------------
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'coremltools', u'coremltools Documentation',
-     [author], 1)
-]
-
+man_pages = [(master_doc, "coremltools", u"coremltools Documentation", [author], 1)]
 
 # -- Options for Texinfo output -------------------------------------------
 
@@ -153,29 +148,41 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'coremltools', u'coremltools Documentation',
-     author, 'coremltools', 'One line description of project.',
-     'Miscellaneous'),
+    (
+        master_doc,
+        "coremltools",
+        u"coremltools Documentation",
+        author,
+        "coremltools",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
 ]
 
-
 # -- Customizations -------------------
 
-autodoc_default_flags = ['members']
-                         #'private-members',
-                         #'special-members',
-                         #'show-inheritance']
+autodoc_default_flags = ["members"]
+
+
+# 'private-members',
+# 'special-members',
+# 'show-inheritance']
+
 
 def autodoc_skip_member(app, what, name, obj, skip, options):
     # Always do __init__
     if name == "__init__":
         return False
 
-    exclusions = ('__weakref__',  # special-members
-                  '__doc__', '__module__', '__dict__',  # undoc-members
-                 )
+    exclusions = (
+        "__weakref__",  # special-members
+        "__doc__",
+        "__module__",
+        "__dict__",  # undoc-members
+    )
     exclude = name in exclusions
     return skip or exclude
 
+
 def setup(app):
-    app.connect('autodoc-skip-member', autodoc_skip_member)
+    app.connect("autodoc-skip-member", autodoc_skip_member)
diff --git a/docs/coremltools.converters.rst b/docs/coremltools.converters.rst
deleted file mode 100644
index 998111317..000000000
--- a/docs/coremltools.converters.rst
+++ /dev/null
@@ -1,21 +0,0 @@
-**********
-Converters
-**********
-
-Automatically convert models from popular machine learning libraries such as
-TensorFlow, Keras, Caffe, scikit-learn, LIBSVM, and XGBoost to the Core ML
-format.
-
-.. automodule:: coremltools.converters
-.. currentmodule:: coremltools.converters
-
-.. autosummary::
-  :nosignatures:
-  :toctree: generated/
-
-    tensorflow.convert
-    keras.convert
-    caffe.convert
-    libsvm.convert
-    sklearn.convert
-    xgboost.convert
diff --git a/docs/coremltools.models.rst b/docs/coremltools.models.rst
deleted file mode 100644
index 9126f0bf5..000000000
--- a/docs/coremltools.models.rst
+++ /dev/null
@@ -1,28 +0,0 @@
-******
-Models
-******
-
-.. automodule:: coremltools.models
-.. automodule:: coremltools.models.nearest_neighbors
-.. automodule:: coremltools.models.neural_network
-  :noindex:
-.. currentmodule:: coremltools.models
-
-.. autosummary::
-  :nosignatures:
-  :toctree: generated/
-
-    coremltools.models.MLModel
-    coremltools.models.pipeline
-    coremltools.models.tree_ensemble
-
-
-.. autosummary::
-  :nosignatures:
-  :toctree: generated/
-
-    coremltools.models.nearest_neighbors.builder
-    coremltools.models.neural_network.builder
-    coremltools.models.neural_network.flexible_shape_utils
-    coremltools.models.neural_network.quantization_utils
-    coremltools.models.neural_network.update_optimizer_utils
diff --git a/docs/coremltools.utils.rst b/docs/coremltools.utils.rst
deleted file mode 100644
index cf60416fa..000000000
--- a/docs/coremltools.utils.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-*********
-Utilities
-*********
-
-.. automodule:: coremltools.models.utils
-.. currentmodule:: coremltools.models.utils
-
-.. autosummary::
-  :nosignatures:
-  :toctree: generated/
diff --git a/docs/index.rst b/docs/index.rst
index f924a90b6..58c746264 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,212 +1,20 @@
-###########
-coremltools
-###########
-
-.. _about:
-
-**Core ML** is an Apple framework that allows developers to easily integrate
-machine learning (ML) models into apps. Core ML is available on iOS, iPadOS,
-watchOS, macOS, and tvOS. Core ML introduces a public file format (.mlmodel)
-for a broad set of ML methods including deep neural networks (convolutional
-and recurrent), tree ensembles (boosted trees, random forest, decision trees),
-and generalized linear models. Core ML models can be directly integrated into
-apps within Xcode.
-
-**coremltools** is a Python package that can be used to:
-
-- Convert trained models from popular machine learning tools into Core ML format
-  (.mlmodel).
-- Write models to Core ML format with a simple API.
-- Making predictions using the Core ML framework (on select platforms) to
-  verify conversion.
-
-.. currentmodule:: coremltools
-
-Installation
-------------
-
-**coremltools** has the following dependencies:
-
-- numpy (1.12.1+)
-
-- protobuf (3.1.0+)
-
-In addition, it has the following soft dependencies that are only needed when
-you are converting models of these formats:
-
-- Keras (1.2.2, 2.0.4+) with TensorFlow (1.0+)
-
-- XGBoost (0.6+)
-
-- scikit-learn (0.15+)
-
-- LIBSVM
-
-The method for installing **coremltools** follows the
-`standard python package installation steps <https://packaging.python.org/installing/>`_.
-Once you have set up a python environment, run::
-
-    pip install --upgrade coremltools
-
-to install **coremltools**.
-
-Model Conversion
-================
-
-**coremltools** easily converts trained models from existing libraries. The
-following example shows how to convert a Caffe model (`AlexNet
-<http://dl.caffe.berkeleyvision.org/bvlc_alexnet.caffemodel>`_) to Core ML
-format (.mlmodel).
-
-Supporting files: `bvlc_alexnet.caffemodel <http://dl.caffe.berkeleyvision.org/bvlc_alexnet.caffemodel>`_, `deploy.prototxt <https://raw.githubusercontent.com/BVLC/caffe/master/models/bvlc_alexnet/deploy.prototxt>`_, `class_labels.txt <https://raw.githubusercontent.com/torch/tutorials/master/7_imagenet_classification/synset_words.txt>`_.
-
-.. code-block:: python
-
-    import coremltools
-
-    # Convert a Caffe model to a classifier in Core ML
-    coreml_model = coremltools.converters.caffe.convert(
-        ('bvlc_alexnet.caffemodel', 'deploy.prototxt'), predicted_feature_name='class_labels.txt'
-    )
-
-    # Now save the model
-    coreml_model.save('BVLCObjectClassifier.mlmodel')
-
-Here is another example with scikit-learn:
-
-.. code-block:: python
-
-    from sklearn.linear_model import LinearRegression
-    import pandas as pd
-
-    # Load data
-    data = pd.read_csv('houses.csv')
-
-    # Train a model
-    model = LinearRegression()
-    model.fit(data[["bedroom", "bath", "size"]], data["price"])
-
-    # Convert and save the scikit-learn model
-    import coremltools
-
-    coreml_model = coremltools.converters.sklearn.convert(model, ["bedroom", "bath", "size"], "price")
-
-Model Interface
-===============
-
-After conversion, you might want to edit the model metadata to make your model
-easier to consume in Xcode. The license, author, and other metadata get
-surfaced in the Xcode UI while the input and output descriptions are surfaced
-as comments in the code generated by Xcode for the model consumer.
-
-.. code-block:: python
-
-    # Set model metadata
-    model.author = 'John Smith'
-    model.license = 'BSD'
-    model.short_description = 'Predicts the price of a house in the Seattle area.'
-
-    # Set feature descriptions manually
-    model.input_description['bedroom'] = 'Number of bedrooms'
-    model.input_description['bathrooms'] = 'Number of bathrooms'
-    model.input_description['size'] = 'Size (in square feet)'
-
-    # Set the output descriptions
-    model.output_description['price'] = 'Price of the house'
-
-    # Save the model
-    model.save('HousePricer.mlmodel')
-
-Model Evaluation
-================
-
-After conversion, you might want to verify that the predictions made by Core ML
-match up with the source framework. To facilitate programmatic verification of
-the conversion, we provide a way for you to easily evaluate Core ML models.
-
-Here is an example using making predictions using the :code:`HousePricer.mlmodel`
-that we converted in the previous example:
-
-.. code-block:: python
-
-    import coremltools
-
-    # Load the model
-    model = coremltools.models.MLModel('HousePricer.mlmodel')
-
-    # Make predictions
-    predictions = model.predict({'bedroom': 1.0, 'bath': 1.0, 'size': 1240})
-
-Conversion Support
-==================
-
-Core ML supports conversion of trained models from a variety of training tools
-for integration into apps. The following table lists supported tool packages
-by model type:
-
-+---------------------------+--------------------------------------+
-| Model Family              | Supported Packages                   |
-+===========================+======================================+
-| Neural Networks           | Keras (1.2.2, 2.0.4+), Caffe (1.0)   |
-+---------------------------+--------------------------------------+
-| Tree Ensembles            | XGBoost (0.6), scikit-learn (0.18.1) |
-+---------------------------+--------------------------------------+
-| Generalized Linear Models | scikit-learn (0.18.1)                |
-+---------------------------+--------------------------------------+
-| Support Vector Machines   | LIBSVM (3.22), scikit-learn (0.18.1) |
-+---------------------------+--------------------------------------+
-| Feature Engineering       | scikit-learn (0.18.1)                |
-+---------------------------+--------------------------------------+
-| Pipelines                 | scikit-learn (0.18.1)                |
-+---------------------------+--------------------------------------+
-
-Model Visualization
-===================
-
-Core ML supports visualizing a converted model. This can be used to see all
-the building blocks of the model.
-
-Here is an example of visualizing the :code:`HousePricer.mlmodel`:
-
-.. code-block:: python
-
-    import coremltools
-
-    # Load the model
-    model = coremltools.models.MLModel('HousePricer.mlmodel')
-
-    # Visualize the model
-    model.visualize_spec()
-
-Model Specification
-===================
-
-A key component of Core ML is the public specification for representing machine
-learning models. This specification is defined in `protobuf <https://developers.google.com/protocol-buffers/>`_ and can be created
-using any language supported by protobuf (e.g., Python, C++, Java, C#, Perl, etc.).
-
-At a high level, the protobuf specification consists of:
-
-- Model description: Encodes names and type information of the inputs and outputs to the model.
-- Model parameters: The set of parameters required to represent a specific instance of the model.
-- Metadata: Information about the origin, license, and author of the model.
-
-For more information, please take a look at the `Core ML model specification <https://apple.github.io/coremltools/coremlspecification>`_.
-
-External Tools
-==============
-In addition to the conversion tools in this package, TensorFlow, MXNet and ONNX have their own conversion tools:
-
-- `TensorFlow <https://pypi.python.org/pypi/tfcoreml>`_
-- `MXNet <https://github.com/apache/incubator-mxnet/tree/master/tools/coreml>`_
-- `ONNX <https://github.com/onnx/onnx-coreml>`_
-
-Contents
-========
-
-.. toctree::
-    :maxdepth: 1
-
-    coremltools.converters.rst
-    coremltools.models.rst
-    coremltools.utils.rst
+.. currentmodule:: coremltools.converters
+.. autosummary::
+  :nosignatures:
+  :toctree: Converters/generated
+
+    convert
+    keras.convert
+    caffe.convert
+    libsvm.convert
+    sklearn.convert
+    xgboost.convert
+
+.. currentmodule:: coremltools.converters.mil
+.. autosummary::
+  :toctree: MIL/generated
+
+..
+    builder
+..
+    program
diff --git a/docs/readme_session.py b/docs/readme_session.py
new file mode 100644
index 000000000..3bea541b0
--- /dev/null
+++ b/docs/readme_session.py
@@ -0,0 +1,320 @@
+import json
+import os
+from requests.auth import HTTPBasicAuth
+import requests
+
+_readme_api_url = "https://dash.readme.io/api/v1/"
+
+
+class ReadMeSession:
+    # Passed for every API call
+    __headers = {"Accept": "application/json", "content-type": "application/json"}
+
+    # Map: <version string -> version info>
+    __versions = None
+
+    def __init__(self, auth_token, api_version=None):
+        self.auth_token = auth_token
+        self.__refresh_versions()
+        if api_version:
+            self.set_api_version(api_version)
+
+    # Set the version used for GET requests
+    def set_api_version(self, version):
+        self.__verify_version_exists(version)
+        self.api_version = version
+        self.__headers["x-readme-version"] = "v" + version
+        if "categories" not in self.get_version():
+            self.__refresh_categories()
+
+    # Call the readme API. api_func should be a requests-based function.
+    def __api_call(self, api_func, endpoint, print_info, data=None):
+        print(print_info + "...   ", end="")
+        response = api_func(
+            _readme_api_url + endpoint,
+            headers=self.__headers,
+            auth=HTTPBasicAuth(self.auth_token, ""),
+            data=data,
+        )
+        if response.status_code not in [200, 201, 204]:
+            print("Error (code " + str(response.status_code) + "): " + response.text)
+            return None
+        else:
+            print()
+            return None if api_func == requests.delete else json.loads(response.text)
+
+    # API GET call.
+    # If paginated, gather and concat the output for each page in the endpoint.
+    def __api_GET(self, endpoint, print_info=None, paginated=False):
+        if not print_info:
+            print_info = "API::GET(" + endpoint + ")"
+        if paginated:
+            i = 1
+            out = []
+            while True:
+                response = self.__api_call(
+                    requests.get,
+                    endpoint + "?page=" + str(i),
+                    print_info + " (page " + str(i) + ")",
+                )
+                if response is None:
+                    return None
+                if len(response) is 0:
+                    return out
+                out += response
+                i += 1
+        else:
+            return self.__api_call(requests.get, endpoint, print_info)
+
+    # API POST call.
+    # Data should be passed in as a map. The map will be converted to string.
+    def __api_POST(self, endpoint, data, print_info=None):
+        if not print_info:
+            print_info = "API::POST(" + endpoint + ")"
+
+        # Convert data to str
+        data_str = ""
+        for x, y in data.items():
+            data_str += '"' + x + '":"' + y + '",'
+        data_str = ("{" + data_str[:-1] + "}").encode("utf-8")
+        data = data_str
+
+        return self.__api_call(requests.post, endpoint, print_info, data)
+
+    # API DELETE call.
+    def __api_DELETE(self, endpoint, print_info):
+        if not print_info:
+            print_info = "API::DELETE(" + endpoint + ")"
+        return self.__api_call(requests.delete, endpoint, print_info)
+
+    # Populates version_to_info as a map: "version" -> "version info"
+    def __refresh_versions(self):
+        response = self.__api_GET("version", print_info="Fetching versions")
+        if response:
+            self.__versions = {}
+            for version in response:
+                self.get_versions()[version["version"]] = version
+
+    # Verify a version exists
+    def __verify_version_exists(self, version):
+        if version not in self.get_versions():
+            raise ValueError("Version " + version + " does not exist.")
+
+    # Get all version info
+    def get_versions(self):
+        return self.__versions
+
+    # Get a version info
+    def get_version(self):
+        versions = self.get_versions()
+        return versions[self.api_version] if self.api_version in versions else None
+
+    # Populates categories as a map: "category title" -> "category ID"
+    def __refresh_categories(self):
+        version_info = self.get_version()
+        version_info["categories"] = {}
+        categories = version_info["categories"]
+        response = self.__api_GET(
+            "categories",
+            paginated=True,
+            print_info="Fetching categories for version " + self.api_version,
+        )
+        if response is not None:
+            for category in response:
+                if category[
+                    "reference"
+                ]:  # Only get cateories that are in the API reference
+                    if category["title"] in categories:
+                        print(
+                            "Warning: There are two categories with the name "
+                            + category["title"]
+                            + " for version "
+                            + self.api_version
+                            + ". Which category this title refers"
+                            + " to will be unpredictable."
+                        )
+                    categories[category["title"]] = category
+                    self.__refresh_category_files(category["title"])
+
+    # Populate as a map: map<category, map<title, info object>>
+    def __refresh_category_files(self, category):
+        self.__verify_category_exists(category)
+        category_files = self.__api_GET(
+            "categories/" + self.get_category(category)["slug"] + "/docs",
+            print_info="Fetching docs in " + category,
+        )
+        # Populate as a map: map<title, info object>>
+        category = self.get_category(category)
+        category["files"] = {}
+        for file in category_files:
+            category["files"][file["title"]] = file
+
+    # Get all category info
+    def get_categories(self):
+        return self.get_version()["categories"]
+
+    # Get a category info
+    def get_category(self, category):
+        categories = self.get_categories()
+        return categories[category] if category in categories else None
+
+    # Get a categories' file list
+    def get_category_files(self, category):
+        self.__verify_category_exists(category)
+        return self.get_category(category)["files"]
+
+    # Verify a category exists
+    def __verify_category_exists(self, category):
+        if not self.get_category(category):
+            raise ValueError(
+                "Category "
+                + category
+                + " does not exist for version "
+                + self.api_version
+                + "."
+            )
+
+    # Create a version with default settings.
+    def create_version(
+        self, version, from_version=None, is_stable=False, is_beta=False, is_hidden=True
+    ):
+        if version in self.get_versions():
+            raise ValueError(
+                "Version " + version + " already exists! Cannot create it."
+            )
+
+        # If no source version, pick the latest one
+        if not from_version:
+            max_version = 0
+            for ver in self.get_versions():
+                ver = float(ver)
+                if ver > max_version:
+                    max_version = ver
+            from_version = str(max_version)
+
+        data = {
+            "version": "v" + version,
+            "is_stable": is_stable,
+            "is_beta": is_beta,
+            "is_hidden": is_hidden,
+            "from": from_version,
+        }
+        self.get_versions()[version] = self.__api_POST(
+            "version", data, "Creating version " + version
+        )
+
+    # Update a version
+    def update_version(self, version, is_stable=None, is_beta=None, is_hidden=None):
+        self.__verify_version_exists(version)
+        data = {
+            "version": "v" + version,
+            "is_stable": is_stable
+            if is_stable is not None
+            else self.get_versions()[version]["is_stable"],
+            "is_beta": is_beta
+            if is_beta is not None
+            else self.get_versions()[version]["is_beta"],
+            "is_hidden": is_hidden
+            if is_hidden is not None
+            else self.get_versions()[version]["is_hidden"],
+        }
+        version = self.__api_POST("version", data, "Creating version " + version)
+        for k, v in version.items():
+            self.get_versions()[version][k] = v
+
+    # Empty a category
+    def empty_category(self, category):
+        self.__verify_category_exists(category)
+        print("Emptying category " + category)
+        for title, data in self.get_category_files(category).items():
+            self.__api_DELETE(
+                "docs/" + data["slug"],
+                print_info="    Removing file " + category + "/" + title,
+            )
+        self.get_category(category)["files"] = {}
+
+    # Delete files in the given category with the given title
+    def delete_file_with_title(self, title, category):
+        self.__verify_category_exists(category)
+
+        # Search for a file with the same title.
+        files = self.get_category_files(category)
+        if title in files:
+            self.__api_DELETE(
+                "docs/" + files[title]["slug"],
+                print_info="Removing duplicate file " + category + "/" + title,
+            )
+            files.pop(title)
+
+    # Uploads all files in the folder at path to ReadMe.
+    # Can also upload individual files at path.
+    def upload(self, path, category, recursive=False):
+        self.__verify_category_exists(category)
+
+        if os.path.isdir(path):
+            if recursive:
+                # get all subdirs in path and recursively transfer all files in that subdir
+                subdirpath = path
+                onlydirs = [
+                    f
+                    for f in os.listdir(subdirpath)
+                    if os.path.isdir(os.path.join(subdirpath, f))
+                ]
+                for dir in onlydirs:
+                    self.upload(os.path.join(path, dir), category, recursive)
+
+            # get all filenames in current dir
+            files = sorted(
+                [
+                    os.path.join(path, f)
+                    for f in os.listdir(path)
+                    if os.path.isfile(os.path.join(path, f))
+                ]
+            )
+
+            # iterate through all filenames and import the html files
+            for currfilename in files:
+                self.upload(currfilename, category, recursive)
+        elif not os.path.isfile(path):
+            raise ValueError("Unable to find file at path: " + path)
+
+        currfilename = path
+        if currfilename.find(".html") != -1:
+            # open and read file
+            file = open(currfilename, "r")
+            filecontents = file.read()
+            file.close()
+            filecontents = filecontents.replace("\\", "&#92;")
+            filecontents = filecontents.replace("\n", "\\\\n")
+            filecontents = filecontents.replace("¶", "")
+            filecontents = filecontents.replace('"', "'")
+            filecontents = (
+                '[block:html]\\n{\\n \\"html\\": \\"'
+                + filecontents
+                + '\\"\\n}\\n[/block]'
+            )
+
+            firstheadline = os.path.basename(currfilename)[:-5]
+            # extract first heading and use as page title
+            # soup = BeautifulSoup(filecontents, 'html.parser')
+            # for headlines in soup.find_all("h1"):
+            # 	firstheadline = headlines.text.strip()
+            # 	break
+
+            # Delete files with identical title
+            self.delete_file_with_title(firstheadline, category)
+
+            # Set up HTML _reamde_api_url for ReadMe API
+            data = {
+                "hidden": "false",
+                "title": firstheadline,
+                "type": "basic",
+                "body": filecontents,
+                "category": self.get_category(category)["_id"],
+            }
+
+            # Create the new page
+            out = self.__api_POST(
+                "docs", data, "Uploading " + currfilename + " to category " + category
+            )
+            self.get_category_files(category)[firstheadline] = out
diff --git a/docs/upload_docs.py b/docs/upload_docs.py
new file mode 100644
index 000000000..22a7e5292
--- /dev/null
+++ b/docs/upload_docs.py
@@ -0,0 +1,90 @@
+import argparse
+import readme_session
+import functools
+import coremltools
+import pathlib
+import os
+import re
+
+parser = argparse.ArgumentParser(description="Upload docs to ReadMe.")
+parser.add_argument(
+    "--version",
+    type=str,
+    help="Version to upload.",
+    default=re.split("[a-z]+", coremltools.version.__version__)[0],
+)
+parser.add_argument(
+    "--from_source_version",
+    type=str,
+    help="Create a version from this version if current CMLT version does not have docs."
+    + "Default is the most recent version",
+    default=None,
+)
+parser.add_argument(
+    "--release_version", action="store_true", help="Release the version to the public."
+)
+parser.add_argument(
+    "--set_version_stable",
+    action="store_true",
+    help="Set this version as the stable (main) version.",
+)
+parser.add_argument("--auth_token", type=str, help="Token for authentication.")
+
+args = parser.parse_args()
+
+
+# Remove "coremltools" from the beginning of of all filenames in this path
+def sanitize_names(path):
+    if os.path.isdir(path):
+        # get all subdirs in path and recursively transfer all files in that subdir
+        subdirpath = path
+        onlydirs = [
+            f
+            for f in os.listdir(subdirpath)
+            if os.path.isdir(os.path.join(subdirpath, f))
+        ]
+        for dir in onlydirs:
+            sanitize_names(os.path.join(path, dir))
+
+    # get all filenames in current dir
+    files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
+
+    # iterate through all filenames and remove coremltools prefix
+    for file in files:
+        if file.startswith("coremltools"):
+            currpath = os.path.join(path, file)
+            newpath = os.path.join(path, file[file.find(".") + 1 :])
+            os.rename(currpath, newpath)
+
+
+# API Setup
+sess = readme_session.ReadMeSession(args.auth_token)
+
+# Create version
+if args.version not in sess.get_versions():
+    sess.create_version(args.version, args.from_source_version)
+sess.set_api_version(args.version)
+
+# Upload generated folders
+docspath = str(pathlib.Path(__file__).parent.absolute() / "_build" / "html")
+dirs = [
+    os.path.join(docspath, f)
+    for f in os.listdir(docspath)
+    if os.path.isdir(os.path.join(docspath, f))
+]
+for thisdir in dirs:
+    if os.path.basename(thisdir)[0] is not "_":
+        sanitize_names(thisdir)
+        print("--------- Processing " + thisdir + " ----------")
+        category = os.path.basename(thisdir)
+        sess.empty_category(category)
+        sess.upload(path=thisdir, category=category, recursive=True)
+        print("-------------------- Done ---------------------\n")
+
+# Release the version or set it to stable
+if args.release_version or args.set_version_stable:
+    sess.update_version(
+        version,
+        is_stable=args.set_version_stable or sess.get_version()["is_stable"],
+        is_hidden=not args.release_version or not sess.get_version()["is_hidden"],
+    )
diff --git a/examples/APIExamples.md b/examples/APIExamples.md
deleted file mode 100644
index ae71ac8e6..000000000
--- a/examples/APIExamples.md
+++ /dev/null
@@ -1,304 +0,0 @@
-# API Code snippets
-
-## Converting between MLModel and Spec
-
-```python
-import coremltools
-
-# Load MLModel
-mlmodel = coremltools.models.MLModel('path/to/the/model.mlmodel')
-
-# use model for prediction
-mlmodel.predict(...)
-
-# save the model
-mlmodel.save('path/to/the/saved/model.mlmodel')
-
-# Get spec from the model
-spec = mlmodel.get_spec()
-
-# print input/output description for the model
-print(spec.description)
-
-# get the type of Model (NeuralNetwork, SupportVectorRegressor, Pipeline etc)
-print(spec.WhichOneof('Type'))
-
-# save out the model directly from the spec
-coremltools.models.utils.save_spec(spec, 'path/to/the/saved/model.mlmodel')
-
-# convert spec to MLModel, this step compiles the model as well
-mlmodel = coremltools.models.MLModel(spec)
-
-# Load the spec from the saved .mlmodel file directly
-spec = coremltools.models.utils.load_spec('path/to/the/model.mlmodel')
-```
-
-## Visualizing Neural Network Core ML models
-
-```python
-import coremltools
-
-mlmodel = coremltools.models.MLModel('path/to/the/model.mlmodel')
-mlmodel.visualize_spec()
-
-# To print a succinct description of the neural network
-spec = mlmodel.get_spec()
-from coremltools.models.neural_network.printer import print_network_spec
-
-print_network_spec(spec, style='coding')
-# or
-print_network_spec(spec)
-```
-
-Another useful tool for visualizing CoreML models and models from other frameworks: [Netron](https://github.com/lutzroeder/netron)
-
-## Printing the pre-processing parameters
-
-This is useful for image based neural network models
-
-```python
-import coremltools
-
-spec = coremltools.models.utils.load_spec('path/to/the/saved/model.mlmodel')
-
-# Get neural network portion of the spec
-if spec.WhichOneof('Type') == 'neuralNetworkClassifier':
-    nn = spec.neuralNetworkClassifier
-if spec.WhichOneof('Type') == 'neuralNetwork':
-    nn = spec.neuralNetwork
-elif spec.WhichOneof('Type') == 'neuralNetworkRegressor':
-    nn = spec.neuralNetworkRegressor
-else:
-    raise ValueError('MLModel must have a neural network')
-
-print(nn.preprocessing)
-```
-
-## Changing MLMultiArray input/output datatypes
-
-[Here](https://github.com/apple/coremltools/blob/d07421460f9f0ad1a2e9cf8b5248670358a24a1a/mlmodel/format/FeatureTypes.proto#L106 ) is the list of supported datatypes.
-For instance, change the datatype from 'double' to 'float32':
-
-```python
-import coremltools
-from coremltools.proto import FeatureTypes_pb2 as ft
-
-model = coremltools.models.MLModel('path/to/the/saved/model.mlmodel')
-spec = model.get_spec()
-
-
-def _set_type_as_float32(feature):
-    if feature.type.HasField('multiArrayType'):
-        feature.type.multiArrayType.dataType = ft.ArrayFeatureType.FLOAT32
-
-
-# iterate over the inputs
-for input_ in spec.description.input:
-    _set_type_as_float32(input_)
-
-# iterate over the outputs
-for output_ in spec.description.output:
-    _set_type_as_float32(output_)
-
-model = coremltools.models.MLModel(spec)
-model.save('path/to/the/saved/model.mlmodel')
-```
-
-## Prediction with an image input
-
-An mlmodel that takes an input of type image requires a PIL image during the prediction call.
-
-```python
-import coremltools
-import numpy as np
-import PIL.Image
-
-# load a model whose input type is "Image"
-model = coremltools.models.MLModel('path/to/the/saved/model.mlmodel')
-
-Height = 20  # use the correct input image height
-Width = 60  # use the correct input image width
-
-
-# Scenario 1: load an image from disk
-def load_image(path, resize_to=None):
-    # resize_to: (Width, Height)
-    img = PIL.Image.open(path)
-    if resize_to is not None:
-        img = img.resize(resize_to, PIL.Image.ANTIALIAS)
-    img_np = np.array(img).astype(np.float32)
-    return img_np, img
-
-
-# load the image and resize using PIL utilities
-_, img = load_image('/path/to/image.jpg', resize_to=(Width, Height))
-out_dict = model.predict({'image': img})
-
-# Scenario 2: load an image from a numpy array
-shape = (Height, Width, 3)  # height x width x RGB
-data = np.zeros(shape, dtype=np.uint8)
-# manipulate numpy data
-pil_img = PIL.Image.fromarray(data)
-out_dict = model.predict({'image': pil_img})
-```
-
-Now, let us say the Core ML model has an input type of MultiArray, but it really represents an image.
-How can a jpeg image be used to call predict on such a model? For this, the loaded image should first
-be converted to a numpy array. Here is one way to do it:
-
-```python
-
-Height = 20  # use the correct input image height
-Width = 60  # use the correct input image width
-
-
-# assumption: the mlmodel's input is of type MultiArray and of shape (1, 3, Height, Width)
-model_expected_input_shape = (1, 3, Height, Width) # depending on the model description, this could be (3, Height, Width)
-
-# load the model
-model = coremltools.models.MLModel('path/to/the/saved/model.mlmodel')
-
-def load_image_as_numpy_array(path, resize_to=None):
-    # resize_to: (Width, Height)
-    img = PIL.Image.open(path)
-    if resize_to is not None:
-        img = img.resize(resize_to, PIL.Image.ANTIALIAS)
-    img_np = np.array(img).astype(np.float32) # shape of this numpy array is (Height, Width, 3)
-    return img_np
-
-# load the image and resize using PIL utilities
-img_as_np_array = load_image_as_numpy_array('/path/to/image.jpg', resize_to=(Width, Height)) # shape (Height, Width, 3)
-
-# note that PIL returns an image in the format in which the channel dimension is in the end,
-# which is different than CoreML's input format, so that needs to be modified
-img_as_np_array = np.transpose(img_as_np_array, (2,0,1)) # shape (3, Height, Width)
-
-# add the batch dimension if the model description has it
-img_as_np_array = np.reshape(img_as_np_array, model_expected_input_shape)
-
-# now call predict
-out_dict = model.predict({'image': img_as_np_array})
-```  
-
-
-## Building an mlmodel from scratch using Neural Network Builder
-
-The neural network [builder class](https://github.com/apple/coremltools/blob/master/coremltools/models/neural_network/builder.py) can be used to programmatically construct a CoreML model.
-Lets look at an example of
-making a tiny 2 layer model with a convolution layer (with random weights) and an activation.
-
-To find the list of all the neural network layer types supported see [this](https://github.com/aseemw/coremltools/blob/f95f9b230f6a1bd8b0d9ee298b78d7786e3e7cfd/mlmodel/format/NeuralNetwork.proto#L472)
-portion of the NeuralNetwork.proto
-
-```python
-import coremltools
-import coremltools.models.datatypes as datatypes
-from coremltools.models import neural_network as neural_network
-import numpy as np
-
-input_features = [('data', datatypes.Array(*(1, 3, 10, 10)))]
-output_features = [('output', None)]
-
-builder = neural_network.NeuralNetworkBuilder(input_features, output_features,
-                                              disable_rank5_shape_mapping=True)
-
-builder.add_convolution(name='conv',
-                        kernel_channels=3,
-                        output_channels=3,
-                        height=1,
-                        width=1,
-                        stride_height=1,
-                        stride_width=1,
-                        border_mode='valid',
-                        groups=1,
-                        W=np.random.rand(1, 1, 3, 3),
-                        b=np.random.rand(3),
-                        has_bias=True,
-                        input_name='data',
-                        output_name='conv')
-
-builder.add_activation(name='prelu',
-                       non_linearity='PRELU',
-                       input_name='conv',
-                       output_name='output',
-                       params=np.array([1.0, 2.0, 3.0]))
-
-spec = builder.spec
-model = coremltools.models.MLModel(spec)
-model.save('conv_prelu.mlmodel')
-
-output_dict = model.predict({'data': np.ones((3, 10, 10))}, useCPUOnly=False)
-print(output_dict['output'].shape)
-print(output_dict['output'].flatten()[:3])
-```
-
-## Print out layer attributes for debugging
-
-Sometimes we want to print out weights of a particular layer for debugging purposes.
-Following is an example showing how we can utilize the `protobuf` APIs to access any
-attributes include weight parameters. This code snippet uses the model we created in
-the previous example.
-
-```python
-import coremltools
-import numpy as np
-
-model = coremltools.models.MLModel('conv_prelu.mlmodel')
-
-spec = model.get_spec()
-print(spec)
-
-layer = spec.neuralNetwork.layers[0]
-weight_params = layer.convolution.weights
-
-print('Weights of {} layer: {}.'.format(layer.WhichOneof('layer'), layer.name))
-print(np.reshape(np.asarray(weight_params.floatValue), (1, 1, 3, 3)))
-```
-
-## Quantizing a neural network mlmodel
-
-```python
-from coremltools.models.neural_network.quantization_utils import quantize_weights
-
-model = coremltools.models.MLModel('model.mlmodel')
-# Example 1: 8-bit linear
-quantized_model = quantize_weights(model, nbits=8, quantization_mode="linear")
-
-# Example 2: Quantize to FP-16 weights
-quantized_model = quantize_weights(model, nbits=16)
-
-# Example 3: 4-bit k-means generated look-up table
-quantized_model = quantize_weights(model, nbits=4, quantization_mode="kmeans")
-
-# Example 4: 8-bit symmetric linear quantization skipping bias,
-# batchnorm, depthwise-convolution, and convolution layers
-# with less than 4 channels or 4096 elements
-from coremltools.models.neural_network.quantization_utils import AdvancedQuantizedLayerSelector
-
-selector = AdvancedQuantizedLayerSelector(
-    skip_layer_types=['batchnorm', 'bias', 'depthwiseConv'],
-    minimum_conv_kernel_channels=4,
-    minimum_conv_weight_count=4096)
-quantized_model = quantize_weights(model, 8, quantization_mode='linear_symmetric',
-                                   selector=selector)
-
-# Example 5: 8-bit linear quantization skipping the layer with name 'dense_2'
-from coremltools.models.neural_network.quantization_utils import QuantizedLayerSelector
-
-
-class MyLayerSelector(QuantizedLayerSelector):
-
-    def __init__(self):
-        super(MyLayerSelector, self).__init__()
-
-    def do_quantize(self, layer, **kwargs):
-        ret = super(MyLayerSelector, self).do_quantize(layer)
-        if not ret or layer.name == 'dense_2':
-            return True
-        return ret
-
-
-selector = MyLayerSelector()
-quantized_model = quantize_weights(
-    model, 8, quantization_mode='linear', selector=selector)
-```
diff --git a/examples/NeuralNetworkGuide.md b/examples/NeuralNetworkGuide.md
deleted file mode 100644
index 5d18f52ef..000000000
--- a/examples/NeuralNetworkGuide.md
+++ /dev/null
@@ -1,646 +0,0 @@
-# Neural Network Guide
-
-This document describes how to get neural network models into the Core ML format, either via automatic conversion or by building them
-from scratch programmatically. We also discuss various utilities available to edit the `mlmodel` such as quantization, making the input shape
-flexible, changing the input/output names, types, inspecting mlmodels, printing a text description of the model etc.
-
-What are the layers supported by Core ML? For the latest list along with all the parameterizations, check out the
-[neuralnetwork.proto](https://github.com/apple/coremltools/blob/master/mlmodel/format/NeuralNetwork.proto) file, which is a [protobuf](https://developers.google.com/protocol-buffers/docs/pythontutorial)
-description of a neural network model.
-Since its a big file, its easier to navigate either by starting from the [top level proto message](https://github.com/apple/coremltools/blob/875abd9707dbe65eb92a31dbb54a68d6581e68ad/mlmodel/format/NeuralNetwork.proto#L130)
-or by directly looking at the [layer types](https://github.com/apple/coremltools/blob/875abd9707dbe65eb92a31dbb54a68d6581e68ad/mlmodel/format/NeuralNetwork.proto#L472).
-An auto-generated documentation, built from the comments in the proto file, can be found [here](https://apple.github.io/coremltools/coremlspecification/sections/NeuralNetwork.html).
-
-
-Please make sure that you have installed the latest `coremltools`, `tfcoreml` (if using tensorflow converter) and `onnx-coreml` (if using ONNX converter) python packages.  
-
-```bash
-pip install --upgrade coremltools
-pip install --upgrade tfcoreml
-pip install --upgrade onnx-coreml
-```
-
-[Jupyter notebook examples for converters](../examples/neural_network_inference/) 
-
-# Table of Contents
-
-* [Keras.io converter (TF 1.x backend)](#kerasio-converter-tf-1x-backend)
-* [TensorFlow conversion](#TensorFlow-conversion)
-    * [TensorFlow 1 converter](#tensorFlow-1-converter)
-    * [TensorFlow 2 converter (tf.keras)](#tensorflow-2-converter-tfkeras)
-* [ONNX converter (PyTorch conversion)](#ONNX-converter)
-* [Building an mlmodel using the Builder API](#Building-an-mlmodel-using-the-Builder-API)
-* [Model quantization](#Model-quantization)
-* [Model prediction](#Model-predictions)
-* [Model inspection and editing](#Model-inspection-and-editing)
-  * [Printing description](#Printing-description)
-  * [Flexible input/output shapes](#Flexible-inputoutput-shapes)
-  * [Modifying input/output names and types](#Modifying-inputoutput-names-and-types)
-  * [Inspecting model for debugging](#Inspecting-model-for-debugging)
-  * [Miscellaneous Examples](#Miscellaneous-examples)
-
-## Keras.io Converter (TF 1.x backend)
-
-Models created via the [Keras.io API](https://keras.io), with Tensorflow 1.x backend,
-and saved in the `.h5` format can be converted to Core ML.
-
-The coremltools Keras converter supports Keras versions 2.2+.
-(Versions below 2.2, up to 1.2.2 are supported, however they are no longer
-maintained, i.e. no bug fixes will be made for versions below 2.2)
-
-```python
-# convert by providing path to a .h5 file
-mlmodel = coremltools.converters.keras.convert('keras_model.h5')
-mlmodel.save('coreml_model.mlmodel')
-
-# convert by providing a Keras model object
-from keras.models import load_model
-keras_model = load_model("keras_model.h5")
-mlmodel = coremltools.converters.keras.convert(keras_model)
-```
-
-The convert function can take several additional arguments, such as:
-
-* `input_names`, `output_names`: to provide custom names for inputs and outputs
-* `image_input_names`: to get an mlmodel such that its input is of type image
-* `is_bgr`, `red_bias`, `green_bias`, `blue_bias`, `image_scale`: to provide parameters for image pre-processing
-  when the input is of type image (i.e. if `image_input_names` is being used).
-* `class_labels`, `predicted_feature_name`: to produce an mlmodel of type neural network classifier
-* `model_precision`: to produce a quantized model. Equivalently, the mlmodel can be quantized post conversion as well, see the section
-  on [Model quantization](#Model-quantization)
-* `respect_trainable`: to produce an updatable mlmodel. See examples [here](https://github.com/apple/coremltools/tree/master/examples/updatable_models)
-* `add_custom_layers`, `custom_conversion_functions`: to add a custom layer in the generated mlmodel.
-  This is useful when Keras has a layer (native or lambda) that Core ML does not support.
-  For a description of CoreML custom layers see this nice [overview](http://machinethink.net/blog/coreml-custom-layers/).
-
-For a complete list of arguments that can be passed to the convert method, see [here](https://apple.github.io/coremltools/generated/coremltools.converters.keras.convert.html)
-or at the [function signature in code](https://github.com/apple/coremltools/blob/875abd9707dbe65eb92a31dbb54a68d6581e68ad/coremltools/converters/keras/_keras_converter.py#L344)
-
-#### Troubleshooting
-
-* conversion of models defined via the `tf.keras` API is not supported via the coremltools keras converter.
-  However, when `tf.keras` is used in TensorFlow 2.x and the model exported to `.h5` format, it can be converted via the 
-  [TensorFlow converter](#tensorflow-converter-tf-1x-tf-2-tfkeras-with-tf-2). 
-
-* models with Keras lambda layers: use `custom_conversion_functions`, so that Keras lambda layers can be mapped to Core ML custom layers
-
-* What if the converter errors out due to an unsupported layer, or an unsupported parameter in a layer?
-  The coremltools Keras converter targets the Core ML specification version 3, the one released during the macOS 10.14, iOS 12 release cycle.
-  Majority of the native layers of the `keras.io` API can be mapped to the iOS 12 Core ML layers.
-  However if a conversion error due to an unsupported layer comes up, the recommended route is one of the following:
-
-  - Upgrade to TensorFlow 2.x, and then use the newer `tf.keras` API and convert to Core ML via the 
-  [TensorFlow converter](#tensorflow-converter-tf-1x-tf-2-tfkeras-with-tf-2)
-  - With TensorFlow 1.x, save the Keras model as a frozen graph def file in `.pb` format, instead of `.h5`. 
-  Then use the [TensorFlow converter](#tensorflow-converter-tf-1x-tf-2-tfkeras-with-tf-2).   
-  Example: 
-  
-```python
-from keras.models import Sequential
-from keras.layers import Dense, ReLU
-
-h5_path = '/tmp/keras_model.h5'
-pb_path = '/tmp/keras_model.pb'
-mlmodel_path = '/tmp/keras_model.mlmodel'
-
-model = Sequential()
-model.add(Dense(32, input_shape=(784,)))
-model.add(ReLU())
-model.save(h5_path)
-
-input_tensor_shapes, output_node_names = _save_h5_as_frozen_pb(h5_path, pb_path) # defined below
-
-# convert the .pb file to .mlmodel via tfcoreml converter
-import tfcoreml
-mlmodel = tfcoreml.convert(
-        tf_model_path = pb_path,
-        mlmodel_path = mlmodel_path,
-        output_feature_names = output_node_names,
-        input_name_shape_dict = input_tensor_shapes,
-        minimum_ios_deployment_target='13')
-```
-
-Function to convert .h5 to .pb in Tensorflow 1.x:
-
-```python
-def _save_h5_as_frozen_pb(h5_path, frozen_model_path, has_variables=True):
-    from keras.models import load_model
-    from keras import backend as K
-    import tensorflow as tf
-    import shutil, tempfile, os
-    from tensorflow.python.tools.freeze_graph import freeze_graph
-
-    K.set_learning_phase(0)
-    model = load_model(h5_path)
-    model_dir = tempfile.mkdtemp()
-    graph_def_file = os.path.join(model_dir, 'tf_graph.pb')
-    checkpoint_file = os.path.join(model_dir, 'tf_model.ckpt')
-
-    output_node_names = []
-    if isinstance(model.output, list):
-        for idx in range(len(model.output)):
-            output_node_names.append(model.output[idx].name[:-2])
-    else:
-        output_node_names.append(model.output.name[:-2])
-
-    tf_graph = K.get_session().graph
-    tf.reset_default_graph()
-    if has_variables:
-        with tf_graph.as_default() as g:
-            saver = tf.train.Saver()
-
-    with tf.Session(graph=tf_graph) as sess:
-        sess.run(tf.global_variables_initializer())
-        # save graph definition somewhere
-        tf.train.write_graph(sess.graph, model_dir, graph_def_file, as_text=False)
-        # save the weights
-        if has_variables:
-            saver.save(sess, checkpoint_file)
-
-    K.clear_session()
-
-    # freeze the graph
-    if has_variables:
-        freeze_graph(input_graph=graph_def_file,
-                     input_saver="",
-                     input_binary=True,
-                     input_checkpoint=checkpoint_file,
-                     output_node_names=",".join(output_node_names),
-                     restore_op_name="save/restore_all",
-                     filename_tensor_name="save/Const:0",
-                     output_graph=frozen_model_path,
-                     clear_devices=True,
-                     initializer_nodes="")
-
-    if os.path.exists(model_dir):
-      shutil.rmtree(model_dir)
-
-    input_tensor_shapes = {}
-    if isinstance(model.input, list):
-        for idx in range(len(model.input)):
-            input_shape = [i for i in model.input_shape[idx]]
-            for i, d in enumerate(input_shape):
-                if d is None:
-                    input_shape[i] = 1
-
-            input_tensor_shapes[model.input[idx].name[:-2]] = input_shape
-    else:
-        input_shape = [i for i in model.input_shape]
-        for i, d in enumerate(input_shape):
-            if d is None:
-                input_shape[i] = 1
-        input_tensor_shapes[model.input.name[:-2]] = input_shape
-
-    return input_tensor_shapes, output_node_names
-
-```
-  
-
-
-Note: an alternative route that can be used in all of the cases above
- is to first convert the [Keras model to the `.onnx` format](https://github.com/onnx/keras-onnx) and then use the [ONNX converter](#ONNX-converter) described below.
- The ONNX converter has been updated to target all Core ML specification versions from 1 to 4 (iOS 11 to iOS 13).
-
-## TensorFlow conversion
-
-TensorFlow models can be converted to Core ML by using the `tfcoreml` converter
-([link](https://github.com/tf-coreml/tf-coreml) to the GitHub repo), which
-depends on the coremltools package.
-
-```bash
-pip install --upgrade tfcoreml
-```
-
-## TensorFlow 1 converter
-
-To convert models trained/saved via TensorFlow 1, first export them into the frozen graph def format, which is a protobuf file
-format with `.pb` as the extension. Frozen `.pb` files can be obtained by using TensorFlow's
-`tensorflow.python.tools.freeze_graph` utility.
-
-[This](../examples/neural_network_inference/tensorflow_converter/Tensorflow_1/linear_mnist_example.ipynb) Jupyter notebook shows how to freeze a graph to produce a `.pb` file.
-
-There are several other Jupyter notebook examples for conversion 
-[here](../examples/neural_network_inference/tensorflow_converter/Tensorflow_1).
-
-```python
-import tfcoreml
-
-tfcoreml.convert(tf_model_path='my_model.pb',
-                 mlmodel_path='my_model.mlmodel',
-                 output_feature_names=['softmax:0'],  # name of the output tensor (appended by ":0")
-                 input_name_shape_dict={'input:0': [1, 227, 227, 3]},  # map from input tensor name (placeholder op in the graph) to shape
-                 minimum_ios_deployment_target='12')
-
-# if the above invocation fails with an error, then update the
-# minimum_ios_deployment_target to invoke the newer converter path:
-
-tfcoreml.convert(tf_model_path='my_model.pb',
-                 mlmodel_path='my_model.mlmodel',
-                 output_feature_names=['softmax'],  # name of the output op
-                 input_name_shape_dict={'input': [1, 227, 227, 3]},  # map from the placeholder op in the graph to shape (can have -1s)
-                 minimum_ios_deployment_target='13')
-```
-
-The argument `minimum_ios_deployment_target` controls the set of Core ML layers that are used by the converter.
-When its value is set to `'12'`, only the set of layers that were shipped in Core ML during the iOS 12, macOS 14 release cycle are used.
-It is recommended to first use this setting, since if successful, it produces a Core ML model that can be deployed to iOS 12 and higher.
-In case, it results in an error due to an unsupported op or parameter, then the target should be set to `'13'`, so that the
-converter can utilize all the layers (including control flow, recurrent layers etc) that were shipped in Core ML in iOS 13.
-
-## TensorFlow 2 converter (tf.keras)
-
-There are 3 ways to export an inference graph in TensorFlow 2: 
-
-1. Use the `tf.keras` APIs (Sequential, Functional, or Subclassing) and export to the `.h5` HDF5 file or `SavedModel` directory format.
-2. Use TensorFlow's low-level APIs (along with `tf.keras`) and export to a `SavedModel` directory format.
-3. Use TensorFlow's low-level APIs and export as `concrete functions` format.
-
-For all 3 cases `tfcoreml`'s `convert()` function can be used to convert your model into Core ML model format. The argument `minimum_ios_deployment_target` must be set to `'13'`.
-
-**Converting a `tf.keras` HDF5 model**:
-
-```python
-from tensorflow.keras.applications import ResNet50
-import tfcoreml
-
-keras_model = ResNet50(weights=None, input_shape=(224, 224, 3))
-keras_model.save('./model.h5')
-
-# print input shape
-print(keras_model.input_shape)
-
-# get input, output node names for the TF graph from the Keras model
-input_name = keras_model.inputs[0].name.split(':')[0]
-keras_output_node_name = keras_model.outputs[0].name.split(':')[0]
-graph_output_node_name = keras_output_node_name.split('/')[-1]
-
-model = tfcoreml.convert('./model.h5',
-                         input_name_shape_dict={input_name: (1, 224, 224, 3)},
-                         output_feature_names=[graph_output_node_name],
-                         minimum_ios_deployment_target='13')
-
-
-model.save('./model.mlmodel')
-```
-
-```python
-import tensorflow as tf
-import tfcoreml
-
-keras_model = tf.keras.Sequential([
-    tf.keras.layers.Flatten(input_shape=(28, 28)),
-    tf.keras.layers.Dense(128, activation='relu'),
-    tf.keras.layers.Dense(10, activation='softmax')
-])
-
-keras_model.save('/tmp/keras_model.h5')
-
-# print input shape
-print(keras_model.input_shape)
-
-# get input, output node names for the TF graph from the Keras model
-input_name = keras_model.inputs[0].name.split(':')[0]
-keras_output_node_name = keras_model.outputs[0].name.split(':')[0]
-graph_output_node_name = keras_output_node_name.split('/')[-1]
-
-model = tfcoreml.convert(tf_model_path='/tmp/keras_model.h5',
-                         input_name_shape_dict={input_name: (1, 28, 28)},
-                         output_feature_names=[graph_output_node_name],
-                         minimum_ios_deployment_target='13')
-model.save('/tmp/keras_model.mlmodel')
-```
-
-**Converting a SavedModel:**
-
-```python
-from tensorflow.keras.applications import MobileNet
-import tfcoreml
-
-keras_model = MobileNet(weights=None, input_shape=(224, 224, 3))
-keras_model.save('./savedmodel', save_format='tf')
-# tf.saved_model.save(keras_model, './savedmodel')
-
-model = tfcoreml.convert('./savedmodel',
-                         mlmodel_path='./model.mlmodel',
-                         input_name_shape_dict={'input_1': (1, 224, 224, 3)},
-                         output_feature_names=['Identity'],
-                         minimum_ios_deployment_target='13')
-```
-
-See notebooks in [here](../examples/neural_network_inference/tensorflow_converter/Tensorflow_2) 
-or the [unit test cases](https://github.com/apple/coremltools/blob/master/coremltools/converters/tensorflow/test/test_tf_2x.py) for more examples, on how to save to `.h5` or `SavedModel` or `concrete functions`.
-
-Note: When the value of `minimum_ios_deployment_target` is set to `'13'`, `tfcoreml` directly calls coremltools to convert the TensorFlow models, as can be seen [here](https://github.com/tf-coreml/tf-coreml/blob/674c30572867cd9d00dc930c0ee625f5b27de757/tfcoreml/_tf_coreml_converter.py#L672).
-The conversion code for `minimum_ios_deployment_target` less than or equal to `'12'` is entirely present in the `tfcoreml` GitHub repo, whereas for `minimum_ios_deployment_target` is equal to `'13'` (or greater than in the future) the code is entirely present in the coremltools GitHub repo.
-
-### Known Issues / Troubleshooting
-
-- Although majority of Core ML 3 (iOS 13, macOs 15) layers have been updated in the converter, there might be a few missing layers, or cases not handled.
-  Please [file a GitHub issue](https://github.com/apple/coremltools/issues/new/choose) if you encounter a bug while using the argument `minimum_ios_deployment_target='13'`.
-- The `tf.keras` conversion is only supported when TensorFlow 2.x is used.
-- TensorFlow 2.x model conversion is not supported with Python 2.
-- Currently there are issues while exporting `tf.keras` graphs, that contain recurrent layers, to the `.h5` format
-
-## ONNX Converter
-
-PyTorch and MXNet models can be first exported to the ONNX format and then converted to Core ML via the
-`onnx-coreml`(https://github.com/onnx/onnx-coreml) converter.
-
-```shell
-pip install --upgrade onnx-coreml
-```
-
-```python
-from onnx_coreml import convert
-
-ml_model = convert(model='my_model.onnx', 
-                   minimum_ios_deployment_target='12') # or minimum_ios_deployment_target = '13'
-ml_model.save('my_model.mlmodel')
-```
-
-The argument `minimum_ios_deployment_target` controls the set of Core ML layers that are used by the converter.
-When its value is set to `'12'`, only the set of layers that were shipped in Core ML during the iOS 12, macOS 14 release cycle are used.
-It is recommended to first use this setting, since if successful, it produces a Core ML model that can be deployed to iOS 12 and higher.
-In case, it results in an error due to an unsupported op or parameter, then the target should be set to `'13'`, so that the
-converter can utilize all the layers (including control flow, recurrent layers etc) that were shipped in Core ML in iOS 13.
- 
-Additional converter arguments are explained [here](https://github.com/onnx/onnx-coreml#parameters)  
-See additional examples [here](../examples/neural_network_inference/onnx_converter) 
-
-### Converting PyTorch model 
-
-Converting PyTorch model to CoreML model is a two step process:
-1. Convert PyTorch model to ONNX model
-  - PyTorch model can be converted into ONNX model using `torch.onnx.export`
-  - Reference: https://pytorch.org/docs/stable/onnx.html#id2
-  - Tools required: [PyTorch](https://pytorch.org/get-started/locally/)
-2. Convert ONNX model to CoreML 
-  - Take the `.onnx` model and pass it to the function `onnx_coreml.convert()`
-  - Tools required: [onnx-coreml](https://pypi.org/project/onnx-coreml/)
-  
-
-
-**PyTorch to ONNX conversion:**
-
-  - Create a pyTorch model
-    ```
-      import torch
-      import torch.nn as nn
-      import torch.nn.functional as F
-
-      # Step 0 - (a) Define ML Model
-      class small_model(nn.Module):
-          def __init__(self):
-              super(small_model, self).__init__()
-              self.fc1 = nn.Linear(768, 256)
-              self.fc2 = nn.Linear(256, 10)
-
-          def forward(self, x):
-              y = F.relu(self.fc1(x))
-              y = F.softmax(self.fc2(y))
-              return y
-    ```
-  - Load model
-    ```
-      # Step 0 - (b) Create model or Load from dist
-      model = small_model()
-      dummy_input = torch.randn(768)
-    ```
-  - Convert From PyTorch to ONNX
-    ```
-      # Step 1 - PyTorch to ONNX model
-      torch.onnx.export(model, dummy_input, './small_model.onnx')
-    ```
-**ONNX to CoreML:**   
-```python
-      # Step 2 - ONNX to CoreML model
-      from onnx_coreml import convert
-      mlmodel = convert(model='./small_model.onnx', minimum_ios_deployment_target='13')
-      
-      # rename inputs/outputs
-      import coremltools
-      spec = mlmodel.get_spec()
-      coremltools.utils.rename_feature(spec, current_name='input.1', new_name='input_tensor')
-      coremltools.utils.rename_feature(spec, current_name='12', new_name='network_output')
-      mlmodel = coremltools.models.MLModel(spec)
-
-      # Save converted CoreML model
-      mlmodel.save('small_model.mlmodel')
-```
-
-#### What about frameworks other than PyTorch?
-Step 1 can be replaced by respective framework to ONNX converter.
-
-
-#### Known Issues / Troubleshooting
-
-- If the onnx opset version is greater than 9 and there are issues during conversion, please try exporting to onnx
-  opset version 9 and then converting to Core ML
-- onnx models with weight quantization and control flow layers (loop, branch) will give a conversion error since
-  support for those has not been added yet to the converter
-
-## Building an mlmodel Using the Builder API
-
-[Code snippet](https://github.com/apple/coremltools/blob/master/docs/APIExamples.md#building-an-mlmodel-from-scratch-using-neural-network-builder)
-of building a toy 2 layer Core ML model.
-
-[Here](https://github.com/huggingface/swift-coreml-transformers/blob/ec00de7414c90a4c972ce9e2838353e57d45eaf4/model_generation/gpt2.py) is an example of building the GPT2 model from scratch using the Core ML builder API, with pre-trained weights.
-  
-
-## Model Quantization
-
-`coremltools` provides utilities for performing post-training quantization for the weight parameters,
-to reduce the size of the `.mlmodel` file. By default the converters produce mlmodel that have weights
-in FP32 precision. These can be quantized to either FP16 or to 8 bits, 7 bits, up to all the way to 1 bit.
-The lower the number of bits, more the chances of degrading the model accuracy. The loss in accuracy varies with
-the model.
-
-[Here](https://github.com/apple/coremltools/blob/master/examples/APIExamples.md#quantizing-a-neural-network-mlmodel)
-is a code snippet on using the quantization utilities.
-
-## Model Predictions
-
-Neural network models can take as inputs two datatypes: either multi-arrays or image types.
-When using coremltools to call predict on a model, in the case of multi-arrays, a numpy array must be fed.
-In the case of an image, a PIL image python object should be used.
-
-Multi-array prediction:
-
-```python
-import coremltools
-import numpy as np
-
-model = coremltools.models.MLModel('path/to/the/saved/model.mlmodel')
-
-# print input description to get input shape
-print(model.description.input)
-
-input_shape = (...) # insert correct shape of the input
-
-# call predict
-output_dict = model.predict({'input_name': np.random.rand(*input_shape)}, useCPUOnly=True)
-```
-
-Image prediction:
-
-```python
-import coremltools
-import numpy as np
-import PIL.Image
-
-model = coremltools.models.MLModel('path/to/the/saved/model.mlmodel')
-
-Height = 20  # use the correct input image height
-Width = 60  # use the correct input image width
-
-
-# Scenario 1: load an image from disk
-def load_image(path, resize_to=None):
-    # resize_to: (Width, Height)
-    img = PIL.Image.open(path)
-    if resize_to is not None:
-        img = img.resize(resize_to, PIL.Image.ANTIALIAS)
-    img_np = np.array(img).astype(np.float32)
-    return img_np, img
-
-
-# load the image and resize using PIL utilities
-_, img = load_image('/path/to/image.jpg', resize_to=(Width, Height))
-out_dict = model.predict({'image': img})
-
-# Scenario 2: load an image from a numpy array
-shape = (Height, Width, 3)  # height x width x RGB
-data = np.zeros(shape, dtype=np.uint8)
-# manipulate numpy data
-pil_img = PIL.Image.fromarray(data)
-out_dict = model.predict({'image': pil_img})
-```
-
-## Model Inspection and Editing
-
-[Code snippet](https://github.com/apple/coremltools/blob/master/docs/APIExamples.md#converting-between-mlmodel-and-spec)
-for loading mlmodel and converting it to spec and vice versa.
-
-[Netron](https://github.com/lutzroeder/netron) is a nice tool to visualize Core ML neural network models.
-
-### Printing Description
-
-To print a text description of the model:
-
-```python
-import coremltools
-
-nn_mlmodel = coremltools.models.MLModel('path/to/the/model.mlmodel')
-
-# To print a succinct description of the neural network
-spec = nn_mlmodel.get_spec()
-from coremltools.models.neural_network.printer import print_network_spec
-
-print_network_spec(spec, style='coding')
-# or
-print_network_spec(spec)
-```
-
-To print information about the pre-processing parameters of the model (only applicable if the input is of type image)
-
-```python
-import coremltools
-
-def _get_nn_sepc(spec):
-    if spec.WhichOneof('Type') == 'neuralNetworkClassifier':
-        nn_spec = spec.neuralNetworkClassifier
-    if spec.WhichOneof('Type') == 'neuralNetwork':
-        nn_spec = spec.neuralNetwork
-    elif spec.WhichOneof('Type') == 'neuralNetworkRegressor':
-        nn_spec = spec.neuralNetworkRegressor
-    else:
-        raise ValueError('Spec does not have a neural network')
-
-spec = coremltools.models.utils.load_spec('path/to/the/saved/model.mlmodel')
-
-# Get neural network portion of the spec
-nn_spec = _get_nn_sepc()
-
-# print pre-processing parameter
-print(nn_spec.preprocessing)
-```
-
-### Flexible input/output shapes
-
-There are several [utilities](https://apple.github.io/coremltools/generated/coremltools.models.neural_network.flexible_shape_utils.html#module-coremltools.models.neural_network.flexible_shape_utils)
- to mark inputs with `flexible` shapes.
-
-### Modifying Input/Output Names and Types
-
-```python
-import coremltools
-
-model = coremltools.models.MLModel('path/to/the/saved/model.mlmodel')
-spec = model.get_spec()
-
-# lets say the name of the input feature is "input" that we want to rename to "input_tensor"
-
-coremltools.utils.rename_feature(spec, current_name='input', new_name='input_tensor')
-model = coremltools.models.MLModel(spec)
-model.save('path/to/the/saved/model.mlmodel')
-```
-
-[Here](https://github.com/apple/coremltools/blob/d07421460f9f0ad1a2e9cf8b5248670358a24a1a/mlmodel/format/FeatureTypes.proto#L106 ) is the list of supported datatypes.
-For instance, change the datatype from 'double' to 'float32':
-
-```python
-import coremltools
-from coremltools.proto import FeatureTypes_pb2 as ft
-
-model = coremltools.models.MLModel('path/to/the/saved/model.mlmodel')
-spec = model.get_spec()
-
-
-def _set_type_as_float32(feature):
-    if feature.type.HasField('multiArrayType'):
-        feature.type.multiArrayType.dataType = ft.ArrayFeatureType.FLOAT32
-
-
-# iterate over the inputs
-for input_ in spec.description.input:
-    _set_type_as_float32(input_)
-
-# iterate over the outputs
-for output_ in spec.description.output:
-    _set_type_as_float32(output_)
-
-model = coremltools.models.MLModel(spec)
-model.save('path/to/the/saved/model.mlmodel')
-```
-
-### Inspecting Model for Debugging
-
-Sometimes we want to print out weights of a particular layer for debugging purposes.
-Following is an example showing how we can utilize the `protobuf` APIs to access any
-attributes including the weight parameters. This code snippet uses the model we created in
-the [this](https://github.com/apple/coremltools/blob/master/docs/APIExamples.md#building-an-mlmodel-from-scratch-using-neural-network-builder)
-example.
-
-```python
-import coremltools
-import numpy as np
-
-model = coremltools.models.MLModel('conv_prelu.mlmodel')
-
-spec = model.get_spec()
-print(spec)
-
-layer = spec.neuralNetwork.layers[0]
-weight_params = layer.convolution.weights
-
-print('Weights of {} layer: {}.'.format(layer.WhichOneof('layer'), layer.name))
-print(np.reshape(np.asarray(weight_params.floatValue), (1, 1, 3, 3)))
-```
-
-### Miscellaneous Examples
-
-- [Control flow Core ML model via the builder library](../examples/neural_network_inference/Image_preprocessing_per_channel_scale.ipynb)
-- [Per channel scale pre-processing](../examples/neural_network_inference/Neural_network_control_flow_power_iteration.ipynb)
-- [Image type as output, for a style transfer network](../examples/neural_network_inference/tensorflow_converter/Tensorflow_1/style_transfer_example.ipynb)
-- [Setting image pre-processing correctly](../examples/neural_network_inference/tensorflow_converter/Tensorflow_1/inception_v1_preprocessing_steps.ipynb)
-- [More examples](../examples/)
diff --git a/examples/README.md b/examples/README.md
index e84fc34a2..8c2b3024f 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,8 +1,3 @@
-## Docs
+# Examples
 
-
-[Code Snippet examples](APIExamples.md)
-
-
-[Neural network guide](NeuralNetworkGuide.md) 
- 
\ No newline at end of file
+* [User Guides and Examples](https://coremltools.readme.io/)
\ No newline at end of file
diff --git a/examples/neural_network_inference/Image_preprocessing_per_channel_scale.ipynb b/examples/neural_network_inference/Image_preprocessing_per_channel_scale.ipynb
deleted file mode 100644
index 2559438d5..000000000
--- a/examples/neural_network_inference/Image_preprocessing_per_channel_scale.ipynb
+++ /dev/null
@@ -1,211 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "While converting to CoreML there is an option to set image preprocessing parameters. Channel wise bias and an overall scale is supported, which is quite common. However, some models may require a per channel scale parameter. \n",
-    "This can be implemented by adding a \"scale\" layer in the beginning of the network, after conversion. Let us see how this can be done by directly editing the mlmodel spec which is in protobuf format."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import coremltools\n",
-    "from keras.layers import *\n",
-    "from keras.models import Sequential\n",
-    "import numpy as np\n",
-    "from PIL import Image\n",
-    "import copy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0 : cropping2d_3_input, <keras.engine.topology.InputLayer object at 0x12b310090>\n",
-      "1 : cropping2d_3, <keras.layers.convolutional.Cropping2D object at 0x12825b390>\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Define a toy Keras network and convert to CoreML\n",
-    "input_shape = (50, 50, 3)\n",
-    "model = Sequential()\n",
-    "model.add(Cropping2D(cropping=((5,5),(5,5)), input_shape=input_shape))\n",
-    "\n",
-    "\n",
-    "mlmodel = coremltools.converters.keras.convert(model,\n",
-    "                                              image_input_names='input1',\n",
-    "                                              red_bias=-10.0, \n",
-    "                                              green_bias=-10.0, \n",
-    "                                              blue_bias=-10.0,\n",
-    "                                              image_scale=5.0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "input {\n",
-      "  name: \"input1\"\n",
-      "  type {\n",
-      "    imageType {\n",
-      "      width: 50\n",
-      "      height: 50\n",
-      "      colorSpace: RGB\n",
-      "    }\n",
-      "  }\n",
-      "}\n",
-      "output {\n",
-      "  name: \"output1\"\n",
-      "  type {\n",
-      "    multiArrayType {\n",
-      "      shape: 3\n",
-      "      shape: 40\n",
-      "      shape: 40\n",
-      "      dataType: DOUBLE\n",
-      "    }\n",
-      "  }\n",
-      "}\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "spec = mlmodel.get_spec()\n",
-    "print(spec.description)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "('output along channel at [0,0]: ', array([490., 490., 490.]))\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Lets call predict with an all constant image input\n",
-    "x = 100.0 * np.ones((3,50,50))\n",
-    "x = x.astype(np.uint8)\n",
-    "x_transpose = np.transpose(x, [1,2,0]) # PIL Image requires the format to be [H,W,C]\n",
-    "im = Image.fromarray(x_transpose)\n",
-    "\n",
-    "y = mlmodel.predict({'input1': im}, useCPUOnly=True)['output1']\n",
-    "print('output along channel at [0,0]: ', y[:,0,0])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "As expected the output values are 490. That is, ${\\textrm {scale}} * {\\textrm {input}} + {\\textrm {bias}}, \\;\\;{\\textrm{i.e.,}}\\;\\;5*100 -10 = 490$.\n",
-    "Let us insert a channel dependent scale layer in the beginning of the network, before the crop layer."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# get NN portion of the spec\n",
-    "nn_spec = spec.neuralNetwork\n",
-    "layers = nn_spec.layers # this is a list of all the layers\n",
-    "layers_copy = copy.deepcopy(layers) # make a copy of the layers, these will be added back later\n",
-    "del nn_spec.layers[:] # delete all the layers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# add a scale layer now\n",
-    "# since mlmodel is in protobuf format, we can add proto messages directly\n",
-    "# To look at more examples on how to add other layers: see \"builder.py\" file in coremltools repo\n",
-    "scale_layer = nn_spec.layers.add()\n",
-    "scale_layer.name = 'scale_layer'\n",
-    "scale_layer.input.append('input1')\n",
-    "scale_layer.output.append('input1_scaled')\n",
-    "params = scale_layer.scale\n",
-    "params.scale.floatValue.extend([1.0, 2.0, 3.0]) # scale values for RGB\n",
-    "params.shapeScale.extend([3,1,1]) # shape of the scale vector \n",
-    "\n",
-    "# now add back the rest of the layers (which happens to be just one in this case: the crop layer)\n",
-    "nn_spec.layers.extend(layers_copy)\n",
-    "\n",
-    "# need to also change the input of the crop layer to match the output of the scale layer\n",
-    "nn_spec.layers[1].input[0] = 'input1_scaled'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "('output along channel at [0,0]: ', array([ 490.,  980., 1470.]))\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Lets run the model again\n",
-    "mlmodel = coremltools.models.MLModel(spec)\n",
-    "y = mlmodel.predict({'input1': im}, useCPUOnly=True)['output1']\n",
-    "print('output along channel at [0,0]: ', y[:,0,0])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "As expected the values are scaled by 1.0, 2.0, 3.0: the parameters of the scale layer. "
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/neural_network_inference/Neural_network_control_flow_power_iteration.ipynb b/examples/neural_network_inference/Neural_network_control_flow_power_iteration.ipynb
deleted file mode 100644
index a83ec4a98..000000000
--- a/examples/neural_network_inference/Neural_network_control_flow_power_iteration.ipynb
+++ /dev/null
@@ -1,458 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In CoreML Neural Network Specification version 4 (which is available from iOS 13 and MacOS 10.15), several \"control-flow\" layers have been added. CoreML spec is described in the protobuf format and for a list of all supported layer types and documentation, see [here](https://github.com/apple/coremltools/blob/master/mlmodel/format/NeuralNetwork.proto).\n",
-    "\n",
-    "In this notebook, we build a neural network that uses a few of the new control flow layers. We will write a simple python program to compute the largest eigenvalue of a given matrix and then show how a neural network can be built to replicate that program in an mlmodel.\n",
-    "\n",
-    "We choose the [power iteration method](https://en.wikipedia.org/wiki/Power_iteration). It is a simple iterative algorithm. Given a square matrix, $A$ of dimensions $n\\times n$, it computes the largest eigenvalue (by magnitude) and the corresponding eigenvector (the algorithm can be adapted to compute all the eigenvalues, however we do not implement that here). \n",
-    "\n",
-    "Here is how the algorithm works. Pick a normalized random vector to start with, $x$, of dimension $n$. Repetitively, multiply it by the matrix and normalize it, i.e., $x\\leftarrow Ax$ and $x\\leftarrow \\frac{x}{\\left \\| x \\right \\|}$. Gradually the vector converges to the largest eigenvector. Simple as that! \n",
-    "There are a few conditions that the matrix should satisfy for this to happen, but let us not worry about it for this example. \n",
-    "For now we will assume that the matrix is real and symmetric, this guarantees the eigenvalues to be real. \n",
-    "After we have the normalized eigenvector, the corresponding eigenvalue can be computed by the formula $x^TAx$\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's code this up in Python using Numpy!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0: diff: 6.69187030143e-05\n",
-      "1: diff: 0.00208718410489\n",
-      "2: diff: 0.0614522880272\n",
-      "3: diff: 0.771617699317\n",
-      "4: diff: 0.193129218664\n",
-      "5: diff: 0.0075077446807\n",
-      "6: diff: 0.000241962094403\n",
-      "7: diff: 7.74407193072e-06\n",
-      "8: diff: 2.47796068775e-07\n",
-      "Largest eigenvalue: 8.5249 \n",
-      "('Corresponding eigenvector: ', array([-0.74152421,  0.67092611]))\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "import copy\n",
-    "\n",
-    "np.random.seed(8) # try different seeds to play with the number of iterations it takes for convergence!\n",
-    "\n",
-    "'''\n",
-    "Use power method to compute the largest eigenvalue of a real symmetric matrix\n",
-    "'''\n",
-    "\n",
-    "convergence_tolerance = 1e-6 # decrease/increase to trade off precision\n",
-    "number_of_iterations = 100 # decrease/increase to trade off precision\n",
-    "\n",
-    "def power_iteration(matrix, starting_vector):\n",
-    "    x = copy.deepcopy(starting_vector)\n",
-    "    for i in range(number_of_iterations):\n",
-    "        y = np.matmul(A,x)\n",
-    "        #normalize\n",
-    "        y = y / np.sqrt(np.sum(y**2))\n",
-    "        # compute the diff to check for convergence\n",
-    "        # we use cosine difference as both vectors are normalized and can get\n",
-    "        # rotated by 180 degrees between iterations\n",
-    "        diff = 1-abs(np.dot(x,y))\n",
-    "        # update x\n",
-    "        x = y\n",
-    "        print('{}: diff: {}'.format(i, diff))\n",
-    "        if diff < convergence_tolerance: \n",
-    "            break\n",
-    "\n",
-    "    x_t = np.transpose(x)\n",
-    "    eigen_value = np.matmul(x_t, np.matmul(A,x))\n",
-    "    return eigen_value, x\n",
-    "    \n",
-    "\n",
-    "# define the symmetric real matrix for which we need the eigenvalue. \n",
-    "A = np.array([[4,-5], [-5,3]], dtype=np.float)\n",
-    "\n",
-    "# a random starting vector\n",
-    "starting_vector = np.random.rand(2)\n",
-    "starting_vector = starting_vector / np.sqrt(np.sum(starting_vector**2)) ## normalize it\n",
-    " \n",
-    "eigen_value, eigen_vector = power_iteration(A, starting_vector)\n",
-    "\n",
-    "print('Largest eigenvalue: %.4f ' % eigen_value)\n",
-    "print('Corresponding eigenvector: ', eigen_vector)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We see that in this case, the algorithm converged, given our specified toelrance, in 9 iterations. \n",
-    "To confirm whether the eigenvalue is correct, lets use the \"linalg\" sub-package of numpy. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "numpy linalg: largest eigenvalue: 8.5249 \n",
-      "('numpy linalg: first eigenvector: ', array([ 0.74145253, -0.67100532]))\n"
-     ]
-    }
-   ],
-   "source": [
-    "from numpy import linalg as LA\n",
-    "\n",
-    "e, v = LA.eig(A)\n",
-    "idx = np.argmax(abs(e))\n",
-    "print('numpy linalg: largest eigenvalue: %.4f ' % e[idx])\n",
-    "print('numpy linalg: first eigenvector: ', v[:,idx])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Indeed we see that the eigenvalue matches with our power iteration code. The eigenvector is rotated by 180 degrees, but that is fine.\n",
-    "\n",
-    "Now, lets build an mlmodel to do the same. We use the builder API provided by coremltools to write out the protobuf messages. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import coremltools\n",
-    "import coremltools.models.datatypes as datatypes\n",
-    "from coremltools.models.neural_network import NeuralNetworkBuilder\n",
-    "\n",
-    "input_features = [('matrix', datatypes.Array(*(2,2))),\n",
-    "                  ('starting_vector', datatypes.Array(*(2,)))]\n",
-    "\n",
-    "output_features = [('maximum_eigen_value', datatypes.Array(*(1,))), \n",
-    "                   ('eigen_vector', None),\n",
-    "                   ('iteration_count', datatypes.Array(*(1,)))]\n",
-    "\n",
-    "builder = NeuralNetworkBuilder(input_features, output_features, disable_rank5_shape_mapping=True)\n",
-    "\n",
-    "# convert the starting_vector which has shape (2,) to shape (2,1) \n",
-    "# so that it can be used by the Batched-MatMul layer\n",
-    "builder.add_expand_dims('expand_dims', 'starting_vector', 'x', axes=[-1])\n",
-    "builder.add_load_constant_nd('iteration_count', 'iteration_count',\n",
-    "                             constant_value=np.zeros((1,)),\n",
-    "                             shape=(1,))\n",
-    "\n",
-    "# start building the loop\n",
-    "loop_layer = builder.add_loop('loop', max_iterations=number_of_iterations)\n",
-    "# get the builder object for the \"body\" of the loop\n",
-    "loop_body_builder = NeuralNetworkBuilder(nn_spec=loop_layer.loop.bodyNetwork)\n",
-    "\n",
-    "# matrix multiply\n",
-    "# input shapes: (n,n),(n,1)\n",
-    "# output shape: (n,1)\n",
-    "loop_body_builder.add_batched_mat_mul('bmm.1', input_names=['matrix','x'], output_name='y')\n",
-    "# normalize the vector\n",
-    "loop_body_builder.add_reduce_l2('reduce', input_name='y', output_name='norm', axes = [0])\n",
-    "loop_body_builder.add_divide_broadcastable('divide', ['y','norm'], 'y_normalized')\n",
-    "\n",
-    "# find difference with previous, which is computed as (1 - abs(cosine diff))\n",
-    "loop_body_builder.add_batched_mat_mul('cosine', ['y_normalized', 'x'], 'cosine_diff', transpose_a=True)\n",
-    "loop_body_builder.add_unary('abs_cosine','cosine_diff','abs_cosine_diff', mode='abs')\n",
-    "loop_body_builder.add_activation('diff', non_linearity='LINEAR',\n",
-    "                                 input_name='abs_cosine_diff',\n",
-    "                                 output_name='diff', params=[-1,1])\n",
-    "\n",
-    "# update iteration count\n",
-    "loop_body_builder.add_activation('iteration_count_add', non_linearity='LINEAR',\n",
-    "                                 input_name='iteration_count',\n",
-    "                                 output_name='iteration_count_plus_1', params=[1,1])\n",
-    "loop_body_builder.add_copy('iteration_count_update', 'iteration_count_plus_1', 'iteration_count')\n",
-    "\n",
-    "# update 'x'\n",
-    "loop_body_builder.add_copy('update_x', 'y_normalized', 'x')\n",
-    "\n",
-    "# add condition to break from the loop, if convergence criterion is met\n",
-    "loop_body_builder.add_less_than('cond', ['diff'], 'cond', alpha=convergence_tolerance)\n",
-    "branch_layer = loop_body_builder.add_branch('branch_layer', 'cond')\n",
-    "builder_ifbranch = NeuralNetworkBuilder(nn_spec=branch_layer.branch.ifBranch)\n",
-    "builder_ifbranch.add_loop_break('break')\n",
-    "\n",
-    "# now we are out of the loop, compute the eigenvalue\n",
-    "builder.add_batched_mat_mul('bmm.2', input_names=['matrix','x'], output_name='x_right')\n",
-    "builder.add_batched_mat_mul('bmm.3', input_names=['x','x_right'], output_name='maximum_eigen_value', transpose_a=True)\n",
-    "builder.add_squeeze('squeeze', 'x', 'eigen_vector', squeeze_all=True)\n",
-    "\n",
-    "spec = builder.spec\n",
-    "model = coremltools.models.MLModel(spec)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Okay, so now we have the mlmodel spec. Before we call predict on it, lets print it out to check whether everything looks okay. We use the utility called \"print_network_spec\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Inputs:\n",
-      "  matrix [2, 2]\n",
-      "  starting_vector [2]\n",
-      "Outputs:\n",
-      "  maximum_eigen_value [1]\n",
-      "  eigen_vector []\n",
-      "  iteration_count [1]\n",
-      "\n",
-      "\n",
-      "def model(matrix, starting_vector) :\n",
-      "\tx = \u001b[91m expandDims\u001b[00m\u001b[94m (starting_vector)\u001b[00m\n",
-      "\titeration_count = \u001b[91m loadConstantND\u001b[00m\u001b[94m (shape = \u001b[00m(1,), \u001b[94m value = \u001b[00m[0.0]\u001b[94m )\u001b[00m\n",
-      "\u001b[91m \tloop\u001b[00m\u001b[94m ()\u001b[00m\n",
-      "\t\ty = \u001b[91m batchedMatmul\u001b[00m\u001b[94m (matrix, x)\u001b[00m\n",
-      "\t\tnorm = \u001b[91m reduceL2\u001b[00m\u001b[94m (y)\u001b[00m\n",
-      "\t\ty_normalized = \u001b[91m divideBroadcastable\u001b[00m\u001b[94m (y, norm)\u001b[00m\n",
-      "\t\tcosine_diff = \u001b[91m batchedMatmul\u001b[00m\u001b[94m (y_normalized, x)\u001b[00m\n",
-      "\t\tabs_cosine_diff = \u001b[91m unary\u001b[00m\u001b[94m (cosine_diff)\u001b[00m\n",
-      "\t\tdiff = \u001b[91m activation\u001b[00m\u001b[94m (abs_cosine_diff)\u001b[00m\n",
-      "\t\titeration_count_plus_1 = \u001b[91m activation\u001b[00m\u001b[94m (iteration_count)\u001b[00m\n",
-      "\t\titeration_count = \u001b[91m copy\u001b[00m\u001b[94m (iteration_count_plus_1)\u001b[00m\n",
-      "\t\tx = \u001b[91m copy\u001b[00m\u001b[94m (y_normalized)\u001b[00m\n",
-      "\t\tcond = \u001b[91m lessThan\u001b[00m\u001b[94m (diff)\u001b[00m\n",
-      "\u001b[91m \t\tbranch\u001b[00m\u001b[94m (cond)\u001b[00m\n",
-      "\u001b[91m \t\tIfBranch:\u001b[00m\n",
-      "\u001b[91m \t\t\tloopBreak\u001b[00m\n",
-      "\tx_right = \u001b[91m batchedMatmul\u001b[00m\u001b[94m (matrix, x)\u001b[00m\n",
-      "\tmaximum_eigen_value = \u001b[91m batchedMatmul\u001b[00m\u001b[94m (x, x_right)\u001b[00m\n",
-      "\teigen_vector = \u001b[91m squeeze\u001b[00m\u001b[94m (x)\u001b[00m\n",
-      "\u001b[91m \n",
-      "\treturn \u001b[00mmaximum_eigen_value, eigen_vector, iteration_count\n"
-     ]
-    }
-   ],
-   "source": [
-    "from  coremltools.models.neural_network.printer import print_network_spec\n",
-    "print_network_spec(spec, style='coding')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CoreML computed eigenvalue: 8.5249\n",
-      "('CoreML computed eigenvector: ', array([-0.74152416,  0.67092603]), (2,))\n",
-      "CoreML iteration count: 9\n"
-     ]
-    }
-   ],
-   "source": [
-    "# call predict on CoreML model\n",
-    "input_dict = {}\n",
-    "input_dict['starting_vector'] = starting_vector\n",
-    "input_dict['matrix'] = A.astype(np.float)\n",
-    "\n",
-    "output = model.predict(input_dict)\n",
-    "coreml_eigen_value = output['maximum_eigen_value']\n",
-    "coreml_eigen_vector = output['eigen_vector']\n",
-    "\n",
-    "print('CoreML computed eigenvalue: %.4f' % coreml_eigen_value)\n",
-    "print('CoreML computed eigenvector: ', coreml_eigen_vector, coreml_eigen_vector.shape)\n",
-    "print('CoreML iteration count: %d' % output['iteration_count'])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Indeed the output matches with our python program. \n",
-    "\n",
-    "Although, we do not do it here, the parameters \"convergence_tolerance\" and \"number_of_iterations\" can be made as network inputs, so that their value can be modifed at runtime. \n",
-    "\n",
-    "Currently, the input shapes to the Core ML model are fixed, $(2, 2)$ for the matrix and $(2,)$ for the starting vector. However, we can add shape flexibility so that the same mlmodel can be run on different input sizes. There are two ways to specify shape flexibility, either through \"ranges\" or via a list of \"enumerated\" shapes. Here we specify the latter."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from coremltools.models.neural_network import flexible_shape_utils\n",
-    "\n",
-    "# (2,2) has already been provided as the default shape for \"matrix\" \n",
-    "# during initialization of the builder,\n",
-    "# here we add two more shapes that will be allowed at runtime\n",
-    "flexible_shape_utils.add_multiarray_ndshape_enumeration(spec, \n",
-    "                                                        feature_name='matrix',\n",
-    "                                                        enumerated_shapes=[(3,3), (4,4)])\n",
-    "\n",
-    "# (2,) has already been provided as the default shape for \"matrix\" \n",
-    "# during initialization of the builder,\n",
-    "# here we add two more shapes that will be allowed at runtime\n",
-    "flexible_shape_utils.add_multiarray_ndshape_enumeration(spec, \n",
-    "                                                        feature_name='starting_vector',\n",
-    "                                                        enumerated_shapes=[(3,), (4,)])\n",
-    "\n",
-    "model = coremltools.models.MLModel(spec)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0: diff: 0.99757552989\n",
-      "1: diff: 0.718149467089\n",
-      "2: diff: 0.492558374678\n",
-      "3: diff: 0.325410135011\n",
-      "4: diff: 0.208606358183\n",
-      "5: diff: 0.130795340624\n",
-      "6: diff: 0.0807677916817\n",
-      "7: diff: 0.0493798553633\n",
-      "8: diff: 0.0299993308647\n",
-      "9: diff: 0.0181536364413\n",
-      "10: diff: 0.0109588786353\n",
-      "11: diff: 0.00660585926588\n",
-      "12: diff: 0.0039783687005\n",
-      "13: diff: 0.00239467498795\n",
-      "14: diff: 0.00144094325621\n",
-      "15: diff: 0.000866886171118\n",
-      "16: diff: 0.000521466038849\n",
-      "17: diff: 0.00031366000502\n",
-      "18: diff: 0.000188657339187\n",
-      "19: diff: 0.000113468967192\n",
-      "20: diff: 6.82454629412e-05\n",
-      "21: diff: 4.1045582895e-05\n",
-      "22: diff: 2.46863363353e-05\n",
-      "23: diff: 1.48472285797e-05\n",
-      "24: diff: 8.92962598664e-06\n",
-      "25: diff: 5.37057288463e-06\n",
-      "26: diff: 3.23003808245e-06\n",
-      "27: diff: 1.94264962894e-06\n",
-      "28: diff: 1.16837216313e-06\n",
-      "29: diff: 7.02696602684e-07\n",
-      "python code: largest eigenvalue: -11.7530 \n",
-      "('python code: corresponding eigenvector: ', array([ 0.61622756,  0.52125649, -0.59038569]))\n"
-     ]
-    }
-   ],
-   "source": [
-    "# lets run the model with a (3,3) matrix \n",
-    "A = np.array([[1, -6, 8], [-6, 1, 5], [8, 5, 1]], dtype=np.float)\n",
-    "\n",
-    "starting_vector = np.random.rand(3)\n",
-    "starting_vector = starting_vector / np.sqrt(np.sum(starting_vector**2)) ## normalize it\n",
-    "\n",
-    "eigen_value, eigen_vector = power_iteration(A, starting_vector)\n",
-    "\n",
-    "print('python code: largest eigenvalue: %.4f ' % eigen_value)\n",
-    "print('python code: corresponding eigenvector: ', eigen_vector)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "numpy linalg: largest eigenvalue: -11.7530 \n",
-      "('numpy linalg: first eigenvector: ', array([-0.61583909, -0.5213392 ,  0.59071791]))\n"
-     ]
-    }
-   ],
-   "source": [
-    "from numpy import linalg as LA\n",
-    "\n",
-    "e, v = LA.eig(A)\n",
-    "idx = np.argmax(abs(e))\n",
-    "print('numpy linalg: largest eigenvalue: %.4f ' % e[idx])\n",
-    "print('numpy linalg: first eigenvector: ', v[:,idx])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CoreML computed eigenvalue: -11.7530\n",
-      "('CoreML computed eigenvector: ', array([ 0.61622757,  0.52125645, -0.59038568]), (3,))\n",
-      "CoreML iteration count: 30\n"
-     ]
-    }
-   ],
-   "source": [
-    "input_dict['starting_vector'] = starting_vector\n",
-    "input_dict['matrix'] = A.astype(np.float)\n",
-    "\n",
-    "output = model.predict(input_dict)\n",
-    "coreml_eigen_value = output['maximum_eigen_value']\n",
-    "coreml_eigen_vector = output['eigen_vector']\n",
-    "\n",
-    "print('CoreML computed eigenvalue: %.4f' % coreml_eigen_value)\n",
-    "print('CoreML computed eigenvector: ', coreml_eigen_vector, coreml_eigen_vector.shape)\n",
-    "print('CoreML iteration count: %d' % output['iteration_count'])"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/neural_network_inference/README.md b/examples/neural_network_inference/README.md
deleted file mode 100644
index e7682d318..000000000
--- a/examples/neural_network_inference/README.md
+++ /dev/null
@@ -1,19 +0,0 @@
-Neural Network Inference Examples
-=================================
-
-In this set of notebook examples, we show examples for building and editing mlmodels via coremltools. 
-
-- [Setting per channel scale image pre-processing](Image_preprocessing_per_channel_scale.ipynb)  
-This notebook shows how an mlmodel can be edited after conversion, to add to it a per channel scale pre-processing layer.
-
-- [Power iteration algorithm using a neural network Core ML model](Neural_network_control_flow_power_iteration.ipynb)  
-This notebook shows how to build an mlmodel from scratch using the neural network `builder` API in coremltools.
-In particular, this examples shows how a non-neural network algorithm involving control flow features can still be 
-expressed as an mlmodel. The feature of using `flexible shaped` inputs is also touched upon at the end of the notebook.
-
-- [Tensorflow 1 conversion examples](tensorflow_converter/Tensorflow_1)
-
-- [Tensorflow 2 conversion examples](tensorflow_converter/Tensorflow_2)
-  
-- [ONNX conversion examples](onnx_converter)
-
diff --git a/examples/neural_network_inference/onnx_converter/BERT.ipynb b/examples/neural_network_inference/onnx_converter/BERT.ipynb
deleted file mode 100644
index c81a1bae5..000000000
--- a/examples/neural_network_inference/onnx_converter/BERT.ipynb
+++ /dev/null
@@ -1,3201 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/bhushansonawaneint/miniconda3/envs/py35/lib/python3.5/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
-      "  from ._conv import register_converters as _register_converters\n"
-     ]
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "import torch.onnx\n",
-    "from onnx_coreml import convert\n",
-    "from pytorch_transformers import *\n",
-    "import numpy as np\n",
-    "from utils import _compute_SNR"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Converting PyTorch Model into CoreML model\n",
-    "1. Convert PyTorch to ONNX using PyTorch ONNX Export\n",
-    "2. Convert ONNX model from step 1 into CoreML model using onnx-coreml converter\n",
-    "\n",
-    "Following notebook will go through converting Huggingface's BERT model into CoreML model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Model Description\n",
-    "* https://huggingface.co/transformers/pretrained_models.html"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 1 BERT BASE UNCASED\n",
-    "- Please find saved PyTorch, ONNX and CoreML Model https://drive.google.com/drive/u/3/folders/1V4BxddAZ_EzQk18PRSu4lGjAQGqRfxU3"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Model path\n",
-    "TMP_DIR = '/tmp/'\n",
-    "model_name = 'bert-base-uncased'\n",
-    "pt_path = TMP_DIR + model_name + '.pt'\n",
-    "onnx_model_path = TMP_DIR + model_name + '.onnx'\n",
-    "mlmodel_path  = TMP_DIR + model_name + '.mlmodel'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load BERT Base Model\n",
-    "# Details: 12-layer, 768-hidden, 12-heads, 110M parameters. Trained on cased English text.\n",
-    "model = BertModel.from_pretrained(model_name)\n",
-    "torch.save(model, pt_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Step 1 - Convert from PyTorch to ONNX\n",
-    "test_input = torch.randint(0, 512, (1, 512))\n",
-    "torch.onnx.export(model,\n",
-    "                  test_input,\n",
-    "                  onnx_model_path,\n",
-    "                  input_names=[\"input_ids\"],\n",
-    "                  output_names=[\"start_scores\", \"end_scores\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1/960: Converting Node Type ConstantOfShape\n",
-      "2/960: Converting Node Type ConstantOfShape\n",
-      "3/960: Converting Node Type Unsqueeze\n",
-      "4/960: Converting Node Type Unsqueeze\n",
-      "5/960: Converting Node Type Sub\n",
-      "6/960: Converting Node Type Mul\n",
-      "7/960: Converting Node Type Expand\n",
-      "8/960: Converting Node Type Gather\n",
-      "9/960: Converting Node Type Gather\n",
-      "10/960: Converting Node Type Gather\n",
-      "11/960: Converting Node Type Add\n",
-      "12/960: Converting Node Type Add\n",
-      "13/960: Converting Node Type ReduceMean\n",
-      "14/960: Converting Node Type Sub\n",
-      "15/960: Converting Node Type Pow\n",
-      "16/960: Converting Node Type ReduceMean\n",
-      "17/960: Converting Node Type Add\n",
-      "18/960: Converting Node Type Sqrt\n",
-      "19/960: Converting Node Type Div\n",
-      "20/960: Converting Node Type Mul\n",
-      "21/960: Converting Node Type Add\n",
-      "22/960: Converting Node Type MatMul\n",
-      "23/960: Converting Node Type Add\n",
-      "24/960: Converting Node Type MatMul\n",
-      "25/960: Converting Node Type Add\n",
-      "26/960: Converting Node Type MatMul\n",
-      "27/960: Converting Node Type Add\n",
-      "28/960: Converting Node Type Shape\n",
-      "29/960: Converting Node Type Gather\n",
-      "30/960: Converting Node Type Shape\n",
-      "31/960: Converting Node Type Gather\n",
-      "32/960: Converting Node Type Unsqueeze\n",
-      "33/960: Converting Node Type Unsqueeze\n",
-      "34/960: Converting Node Type Concat\n",
-      "35/960: Converting Node Type Reshape\n",
-      "36/960: Converting Node Type Transpose\n",
-      "37/960: Converting Node Type Shape\n",
-      "38/960: Converting Node Type Gather\n",
-      "39/960: Converting Node Type Shape\n",
-      "40/960: Converting Node Type Gather\n",
-      "41/960: Converting Node Type Unsqueeze\n",
-      "42/960: Converting Node Type Unsqueeze\n",
-      "43/960: Converting Node Type Concat\n",
-      "44/960: Converting Node Type Reshape\n",
-      "45/960: Converting Node Type Shape\n",
-      "46/960: Converting Node Type Gather\n",
-      "47/960: Converting Node Type Shape\n",
-      "48/960: Converting Node Type Gather\n",
-      "49/960: Converting Node Type Unsqueeze\n",
-      "50/960: Converting Node Type Unsqueeze\n",
-      "51/960: Converting Node Type Concat\n",
-      "52/960: Converting Node Type Reshape\n",
-      "53/960: Converting Node Type Transpose\n",
-      "54/960: Converting Node Type Transpose\n",
-      "55/960: Converting Node Type MatMul\n",
-      "56/960: Converting Node Type Div\n",
-      "57/960: Converting Node Type Add\n",
-      "58/960: Converting Node Type Softmax\n",
-      "59/960: Converting Node Type MatMul\n",
-      "60/960: Converting Node Type Transpose\n",
-      "61/960: Converting Node Type Shape\n",
-      "62/960: Converting Node Type Gather\n",
-      "63/960: Converting Node Type Shape\n",
-      "64/960: Converting Node Type Gather\n",
-      "65/960: Converting Node Type Unsqueeze\n",
-      "66/960: Converting Node Type Unsqueeze\n",
-      "67/960: Converting Node Type Concat\n",
-      "68/960: Converting Node Type Reshape\n",
-      "69/960: Converting Node Type MatMul\n",
-      "70/960: Converting Node Type Add\n",
-      "71/960: Converting Node Type Add\n",
-      "72/960: Converting Node Type ReduceMean\n",
-      "73/960: Converting Node Type Sub\n",
-      "74/960: Converting Node Type Pow\n",
-      "75/960: Converting Node Type ReduceMean\n",
-      "76/960: Converting Node Type Add\n",
-      "77/960: Converting Node Type Sqrt\n",
-      "78/960: Converting Node Type Div\n",
-      "79/960: Converting Node Type Mul\n",
-      "80/960: Converting Node Type Add\n",
-      "81/960: Converting Node Type MatMul\n",
-      "82/960: Converting Node Type Add\n",
-      "83/960: Converting Node Type Mul\n",
-      "84/960: Converting Node Type Div\n",
-      "85/960: Converting Node Type Erf\n",
-      "86/960: Converting Node Type Add\n",
-      "87/960: Converting Node Type Mul\n",
-      "88/960: Converting Node Type MatMul\n",
-      "89/960: Converting Node Type Add\n",
-      "90/960: Converting Node Type Add\n",
-      "91/960: Converting Node Type ReduceMean\n",
-      "92/960: Converting Node Type Sub\n",
-      "93/960: Converting Node Type Pow\n",
-      "94/960: Converting Node Type ReduceMean\n",
-      "95/960: Converting Node Type Add\n",
-      "96/960: Converting Node Type Sqrt\n",
-      "97/960: Converting Node Type Div\n",
-      "98/960: Converting Node Type Mul\n",
-      "99/960: Converting Node Type Add\n",
-      "100/960: Converting Node Type MatMul\n",
-      "101/960: Converting Node Type Add\n",
-      "102/960: Converting Node Type MatMul\n",
-      "103/960: Converting Node Type Add\n",
-      "104/960: Converting Node Type MatMul\n",
-      "105/960: Converting Node Type Add\n",
-      "106/960: Converting Node Type Shape\n",
-      "107/960: Converting Node Type Gather\n",
-      "108/960: Converting Node Type Shape\n",
-      "109/960: Converting Node Type Gather\n",
-      "110/960: Converting Node Type Unsqueeze\n",
-      "111/960: Converting Node Type Unsqueeze\n",
-      "112/960: Converting Node Type Concat\n",
-      "113/960: Converting Node Type Reshape\n",
-      "114/960: Converting Node Type Transpose\n",
-      "115/960: Converting Node Type Shape\n",
-      "116/960: Converting Node Type Gather\n",
-      "117/960: Converting Node Type Shape\n",
-      "118/960: Converting Node Type Gather\n",
-      "119/960: Converting Node Type Unsqueeze\n",
-      "120/960: Converting Node Type Unsqueeze\n",
-      "121/960: Converting Node Type Concat\n",
-      "122/960: Converting Node Type Reshape\n",
-      "123/960: Converting Node Type Shape\n",
-      "124/960: Converting Node Type Gather\n",
-      "125/960: Converting Node Type Shape\n",
-      "126/960: Converting Node Type Gather\n",
-      "127/960: Converting Node Type Unsqueeze\n",
-      "128/960: Converting Node Type Unsqueeze\n",
-      "129/960: Converting Node Type Concat\n",
-      "130/960: Converting Node Type Reshape\n",
-      "131/960: Converting Node Type Transpose\n",
-      "132/960: Converting Node Type Transpose\n",
-      "133/960: Converting Node Type MatMul\n",
-      "134/960: Converting Node Type Div\n",
-      "135/960: Converting Node Type Add\n",
-      "136/960: Converting Node Type Softmax\n",
-      "137/960: Converting Node Type MatMul\n",
-      "138/960: Converting Node Type Transpose\n",
-      "139/960: Converting Node Type Shape\n",
-      "140/960: Converting Node Type Gather\n",
-      "141/960: Converting Node Type Shape\n",
-      "142/960: Converting Node Type Gather\n",
-      "143/960: Converting Node Type Unsqueeze\n",
-      "144/960: Converting Node Type Unsqueeze\n",
-      "145/960: Converting Node Type Concat\n",
-      "146/960: Converting Node Type Reshape\n",
-      "147/960: Converting Node Type MatMul\n",
-      "148/960: Converting Node Type Add\n",
-      "149/960: Converting Node Type Add\n",
-      "150/960: Converting Node Type ReduceMean\n",
-      "151/960: Converting Node Type Sub\n",
-      "152/960: Converting Node Type Pow\n",
-      "153/960: Converting Node Type ReduceMean\n",
-      "154/960: Converting Node Type Add\n",
-      "155/960: Converting Node Type Sqrt\n",
-      "156/960: Converting Node Type Div\n",
-      "157/960: Converting Node Type Mul\n",
-      "158/960: Converting Node Type Add\n",
-      "159/960: Converting Node Type MatMul\n",
-      "160/960: Converting Node Type Add\n",
-      "161/960: Converting Node Type Mul\n",
-      "162/960: Converting Node Type Div\n",
-      "163/960: Converting Node Type Erf\n",
-      "164/960: Converting Node Type Add\n",
-      "165/960: Converting Node Type Mul\n",
-      "166/960: Converting Node Type MatMul\n",
-      "167/960: Converting Node Type Add\n",
-      "168/960: Converting Node Type Add\n",
-      "169/960: Converting Node Type ReduceMean\n",
-      "170/960: Converting Node Type Sub\n",
-      "171/960: Converting Node Type Pow\n",
-      "172/960: Converting Node Type ReduceMean\n",
-      "173/960: Converting Node Type Add\n",
-      "174/960: Converting Node Type Sqrt\n",
-      "175/960: Converting Node Type Div\n",
-      "176/960: Converting Node Type Mul\n",
-      "177/960: Converting Node Type Add\n",
-      "178/960: Converting Node Type MatMul\n",
-      "179/960: Converting Node Type Add\n",
-      "180/960: Converting Node Type MatMul\n",
-      "181/960: Converting Node Type Add\n",
-      "182/960: Converting Node Type MatMul\n",
-      "183/960: Converting Node Type Add\n",
-      "184/960: Converting Node Type Shape\n",
-      "185/960: Converting Node Type Gather\n",
-      "186/960: Converting Node Type Shape\n",
-      "187/960: Converting Node Type Gather\n",
-      "188/960: Converting Node Type Unsqueeze\n",
-      "189/960: Converting Node Type Unsqueeze\n",
-      "190/960: Converting Node Type Concat\n",
-      "191/960: Converting Node Type Reshape\n",
-      "192/960: Converting Node Type Transpose\n",
-      "193/960: Converting Node Type Shape\n",
-      "194/960: Converting Node Type Gather\n",
-      "195/960: Converting Node Type Shape\n",
-      "196/960: Converting Node Type Gather\n",
-      "197/960: Converting Node Type Unsqueeze\n",
-      "198/960: Converting Node Type Unsqueeze\n",
-      "199/960: Converting Node Type Concat\n",
-      "200/960: Converting Node Type Reshape\n",
-      "201/960: Converting Node Type Shape\n",
-      "202/960: Converting Node Type Gather\n",
-      "203/960: Converting Node Type Shape\n",
-      "204/960: Converting Node Type Gather\n",
-      "205/960: Converting Node Type Unsqueeze\n",
-      "206/960: Converting Node Type Unsqueeze\n",
-      "207/960: Converting Node Type Concat\n",
-      "208/960: Converting Node Type Reshape\n",
-      "209/960: Converting Node Type Transpose\n",
-      "210/960: Converting Node Type Transpose\n",
-      "211/960: Converting Node Type MatMul\n",
-      "212/960: Converting Node Type Div\n",
-      "213/960: Converting Node Type Add\n",
-      "214/960: Converting Node Type Softmax\n",
-      "215/960: Converting Node Type MatMul\n",
-      "216/960: Converting Node Type Transpose\n",
-      "217/960: Converting Node Type Shape\n",
-      "218/960: Converting Node Type Gather\n",
-      "219/960: Converting Node Type Shape\n",
-      "220/960: Converting Node Type Gather\n",
-      "221/960: Converting Node Type Unsqueeze\n",
-      "222/960: Converting Node Type Unsqueeze\n",
-      "223/960: Converting Node Type Concat\n",
-      "224/960: Converting Node Type Reshape\n",
-      "225/960: Converting Node Type MatMul\n",
-      "226/960: Converting Node Type Add\n",
-      "227/960: Converting Node Type Add\n",
-      "228/960: Converting Node Type ReduceMean\n",
-      "229/960: Converting Node Type Sub\n",
-      "230/960: Converting Node Type Pow\n",
-      "231/960: Converting Node Type ReduceMean\n",
-      "232/960: Converting Node Type Add\n",
-      "233/960: Converting Node Type Sqrt\n",
-      "234/960: Converting Node Type Div\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "235/960: Converting Node Type Mul\n",
-      "236/960: Converting Node Type Add\n",
-      "237/960: Converting Node Type MatMul\n",
-      "238/960: Converting Node Type Add\n",
-      "239/960: Converting Node Type Mul\n",
-      "240/960: Converting Node Type Div\n",
-      "241/960: Converting Node Type Erf\n",
-      "242/960: Converting Node Type Add\n",
-      "243/960: Converting Node Type Mul\n",
-      "244/960: Converting Node Type MatMul\n",
-      "245/960: Converting Node Type Add\n",
-      "246/960: Converting Node Type Add\n",
-      "247/960: Converting Node Type ReduceMean\n",
-      "248/960: Converting Node Type Sub\n",
-      "249/960: Converting Node Type Pow\n",
-      "250/960: Converting Node Type ReduceMean\n",
-      "251/960: Converting Node Type Add\n",
-      "252/960: Converting Node Type Sqrt\n",
-      "253/960: Converting Node Type Div\n",
-      "254/960: Converting Node Type Mul\n",
-      "255/960: Converting Node Type Add\n",
-      "256/960: Converting Node Type MatMul\n",
-      "257/960: Converting Node Type Add\n",
-      "258/960: Converting Node Type MatMul\n",
-      "259/960: Converting Node Type Add\n",
-      "260/960: Converting Node Type MatMul\n",
-      "261/960: Converting Node Type Add\n",
-      "262/960: Converting Node Type Shape\n",
-      "263/960: Converting Node Type Gather\n",
-      "264/960: Converting Node Type Shape\n",
-      "265/960: Converting Node Type Gather\n",
-      "266/960: Converting Node Type Unsqueeze\n",
-      "267/960: Converting Node Type Unsqueeze\n",
-      "268/960: Converting Node Type Concat\n",
-      "269/960: Converting Node Type Reshape\n",
-      "270/960: Converting Node Type Transpose\n",
-      "271/960: Converting Node Type Shape\n",
-      "272/960: Converting Node Type Gather\n",
-      "273/960: Converting Node Type Shape\n",
-      "274/960: Converting Node Type Gather\n",
-      "275/960: Converting Node Type Unsqueeze\n",
-      "276/960: Converting Node Type Unsqueeze\n",
-      "277/960: Converting Node Type Concat\n",
-      "278/960: Converting Node Type Reshape\n",
-      "279/960: Converting Node Type Shape\n",
-      "280/960: Converting Node Type Gather\n",
-      "281/960: Converting Node Type Shape\n",
-      "282/960: Converting Node Type Gather\n",
-      "283/960: Converting Node Type Unsqueeze\n",
-      "284/960: Converting Node Type Unsqueeze\n",
-      "285/960: Converting Node Type Concat\n",
-      "286/960: Converting Node Type Reshape\n",
-      "287/960: Converting Node Type Transpose\n",
-      "288/960: Converting Node Type Transpose\n",
-      "289/960: Converting Node Type MatMul\n",
-      "290/960: Converting Node Type Div\n",
-      "291/960: Converting Node Type Add\n",
-      "292/960: Converting Node Type Softmax\n",
-      "293/960: Converting Node Type MatMul\n",
-      "294/960: Converting Node Type Transpose\n",
-      "295/960: Converting Node Type Shape\n",
-      "296/960: Converting Node Type Gather\n",
-      "297/960: Converting Node Type Shape\n",
-      "298/960: Converting Node Type Gather\n",
-      "299/960: Converting Node Type Unsqueeze\n",
-      "300/960: Converting Node Type Unsqueeze\n",
-      "301/960: Converting Node Type Concat\n",
-      "302/960: Converting Node Type Reshape\n",
-      "303/960: Converting Node Type MatMul\n",
-      "304/960: Converting Node Type Add\n",
-      "305/960: Converting Node Type Add\n",
-      "306/960: Converting Node Type ReduceMean\n",
-      "307/960: Converting Node Type Sub\n",
-      "308/960: Converting Node Type Pow\n",
-      "309/960: Converting Node Type ReduceMean\n",
-      "310/960: Converting Node Type Add\n",
-      "311/960: Converting Node Type Sqrt\n",
-      "312/960: Converting Node Type Div\n",
-      "313/960: Converting Node Type Mul\n",
-      "314/960: Converting Node Type Add\n",
-      "315/960: Converting Node Type MatMul\n",
-      "316/960: Converting Node Type Add\n",
-      "317/960: Converting Node Type Mul\n",
-      "318/960: Converting Node Type Div\n",
-      "319/960: Converting Node Type Erf\n",
-      "320/960: Converting Node Type Add\n",
-      "321/960: Converting Node Type Mul\n",
-      "322/960: Converting Node Type MatMul\n",
-      "323/960: Converting Node Type Add\n",
-      "324/960: Converting Node Type Add\n",
-      "325/960: Converting Node Type ReduceMean\n",
-      "326/960: Converting Node Type Sub\n",
-      "327/960: Converting Node Type Pow\n",
-      "328/960: Converting Node Type ReduceMean\n",
-      "329/960: Converting Node Type Add\n",
-      "330/960: Converting Node Type Sqrt\n",
-      "331/960: Converting Node Type Div\n",
-      "332/960: Converting Node Type Mul\n",
-      "333/960: Converting Node Type Add\n",
-      "334/960: Converting Node Type MatMul\n",
-      "335/960: Converting Node Type Add\n",
-      "336/960: Converting Node Type MatMul\n",
-      "337/960: Converting Node Type Add\n",
-      "338/960: Converting Node Type MatMul\n",
-      "339/960: Converting Node Type Add\n",
-      "340/960: Converting Node Type Shape\n",
-      "341/960: Converting Node Type Gather\n",
-      "342/960: Converting Node Type Shape\n",
-      "343/960: Converting Node Type Gather\n",
-      "344/960: Converting Node Type Unsqueeze\n",
-      "345/960: Converting Node Type Unsqueeze\n",
-      "346/960: Converting Node Type Concat\n",
-      "347/960: Converting Node Type Reshape\n",
-      "348/960: Converting Node Type Transpose\n",
-      "349/960: Converting Node Type Shape\n",
-      "350/960: Converting Node Type Gather\n",
-      "351/960: Converting Node Type Shape\n",
-      "352/960: Converting Node Type Gather\n",
-      "353/960: Converting Node Type Unsqueeze\n",
-      "354/960: Converting Node Type Unsqueeze\n",
-      "355/960: Converting Node Type Concat\n",
-      "356/960: Converting Node Type Reshape\n",
-      "357/960: Converting Node Type Shape\n",
-      "358/960: Converting Node Type Gather\n",
-      "359/960: Converting Node Type Shape\n",
-      "360/960: Converting Node Type Gather\n",
-      "361/960: Converting Node Type Unsqueeze\n",
-      "362/960: Converting Node Type Unsqueeze\n",
-      "363/960: Converting Node Type Concat\n",
-      "364/960: Converting Node Type Reshape\n",
-      "365/960: Converting Node Type Transpose\n",
-      "366/960: Converting Node Type Transpose\n",
-      "367/960: Converting Node Type MatMul\n",
-      "368/960: Converting Node Type Div\n",
-      "369/960: Converting Node Type Add\n",
-      "370/960: Converting Node Type Softmax\n",
-      "371/960: Converting Node Type MatMul\n",
-      "372/960: Converting Node Type Transpose\n",
-      "373/960: Converting Node Type Shape\n",
-      "374/960: Converting Node Type Gather\n",
-      "375/960: Converting Node Type Shape\n",
-      "376/960: Converting Node Type Gather\n",
-      "377/960: Converting Node Type Unsqueeze\n",
-      "378/960: Converting Node Type Unsqueeze\n",
-      "379/960: Converting Node Type Concat\n",
-      "380/960: Converting Node Type Reshape\n",
-      "381/960: Converting Node Type MatMul\n",
-      "382/960: Converting Node Type Add\n",
-      "383/960: Converting Node Type Add\n",
-      "384/960: Converting Node Type ReduceMean\n",
-      "385/960: Converting Node Type Sub\n",
-      "386/960: Converting Node Type Pow\n",
-      "387/960: Converting Node Type ReduceMean\n",
-      "388/960: Converting Node Type Add\n",
-      "389/960: Converting Node Type Sqrt\n",
-      "390/960: Converting Node Type Div\n",
-      "391/960: Converting Node Type Mul\n",
-      "392/960: Converting Node Type Add\n",
-      "393/960: Converting Node Type MatMul\n",
-      "394/960: Converting Node Type Add\n",
-      "395/960: Converting Node Type Mul\n",
-      "396/960: Converting Node Type Div\n",
-      "397/960: Converting Node Type Erf\n",
-      "398/960: Converting Node Type Add\n",
-      "399/960: Converting Node Type Mul\n",
-      "400/960: Converting Node Type MatMul\n",
-      "401/960: Converting Node Type Add\n",
-      "402/960: Converting Node Type Add\n",
-      "403/960: Converting Node Type ReduceMean\n",
-      "404/960: Converting Node Type Sub\n",
-      "405/960: Converting Node Type Pow\n",
-      "406/960: Converting Node Type ReduceMean\n",
-      "407/960: Converting Node Type Add\n",
-      "408/960: Converting Node Type Sqrt\n",
-      "409/960: Converting Node Type Div\n",
-      "410/960: Converting Node Type Mul\n",
-      "411/960: Converting Node Type Add\n",
-      "412/960: Converting Node Type MatMul\n",
-      "413/960: Converting Node Type Add\n",
-      "414/960: Converting Node Type MatMul\n",
-      "415/960: Converting Node Type Add\n",
-      "416/960: Converting Node Type MatMul\n",
-      "417/960: Converting Node Type Add\n",
-      "418/960: Converting Node Type Shape\n",
-      "419/960: Converting Node Type Gather\n",
-      "420/960: Converting Node Type Shape\n",
-      "421/960: Converting Node Type Gather\n",
-      "422/960: Converting Node Type Unsqueeze\n",
-      "423/960: Converting Node Type Unsqueeze\n",
-      "424/960: Converting Node Type Concat\n",
-      "425/960: Converting Node Type Reshape\n",
-      "426/960: Converting Node Type Transpose\n",
-      "427/960: Converting Node Type Shape\n",
-      "428/960: Converting Node Type Gather\n",
-      "429/960: Converting Node Type Shape\n",
-      "430/960: Converting Node Type Gather\n",
-      "431/960: Converting Node Type Unsqueeze\n",
-      "432/960: Converting Node Type Unsqueeze\n",
-      "433/960: Converting Node Type Concat\n",
-      "434/960: Converting Node Type Reshape\n",
-      "435/960: Converting Node Type Shape\n",
-      "436/960: Converting Node Type Gather\n",
-      "437/960: Converting Node Type Shape\n",
-      "438/960: Converting Node Type Gather\n",
-      "439/960: Converting Node Type Unsqueeze\n",
-      "440/960: Converting Node Type Unsqueeze\n",
-      "441/960: Converting Node Type Concat\n",
-      "442/960: Converting Node Type Reshape\n",
-      "443/960: Converting Node Type Transpose\n",
-      "444/960: Converting Node Type Transpose\n",
-      "445/960: Converting Node Type MatMul\n",
-      "446/960: Converting Node Type Div\n",
-      "447/960: Converting Node Type Add\n",
-      "448/960: Converting Node Type Softmax\n",
-      "449/960: Converting Node Type MatMul\n",
-      "450/960: Converting Node Type Transpose\n",
-      "451/960: Converting Node Type Shape\n",
-      "452/960: Converting Node Type Gather\n",
-      "453/960: Converting Node Type Shape\n",
-      "454/960: Converting Node Type Gather\n",
-      "455/960: Converting Node Type Unsqueeze\n",
-      "456/960: Converting Node Type Unsqueeze\n",
-      "457/960: Converting Node Type Concat\n",
-      "458/960: Converting Node Type Reshape\n",
-      "459/960: Converting Node Type MatMul\n",
-      "460/960: Converting Node Type Add\n",
-      "461/960: Converting Node Type Add\n",
-      "462/960: Converting Node Type ReduceMean\n",
-      "463/960: Converting Node Type Sub\n",
-      "464/960: Converting Node Type Pow\n",
-      "465/960: Converting Node Type ReduceMean\n",
-      "466/960: Converting Node Type Add\n",
-      "467/960: Converting Node Type Sqrt\n",
-      "468/960: Converting Node Type Div\n",
-      "469/960: Converting Node Type Mul\n",
-      "470/960: Converting Node Type Add\n",
-      "471/960: Converting Node Type MatMul\n",
-      "472/960: Converting Node Type Add\n",
-      "473/960: Converting Node Type Mul\n",
-      "474/960: Converting Node Type Div\n",
-      "475/960: Converting Node Type Erf\n",
-      "476/960: Converting Node Type Add\n",
-      "477/960: Converting Node Type Mul\n",
-      "478/960: Converting Node Type MatMul\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "479/960: Converting Node Type Add\n",
-      "480/960: Converting Node Type Add\n",
-      "481/960: Converting Node Type ReduceMean\n",
-      "482/960: Converting Node Type Sub\n",
-      "483/960: Converting Node Type Pow\n",
-      "484/960: Converting Node Type ReduceMean\n",
-      "485/960: Converting Node Type Add\n",
-      "486/960: Converting Node Type Sqrt\n",
-      "487/960: Converting Node Type Div\n",
-      "488/960: Converting Node Type Mul\n",
-      "489/960: Converting Node Type Add\n",
-      "490/960: Converting Node Type MatMul\n",
-      "491/960: Converting Node Type Add\n",
-      "492/960: Converting Node Type MatMul\n",
-      "493/960: Converting Node Type Add\n",
-      "494/960: Converting Node Type MatMul\n",
-      "495/960: Converting Node Type Add\n",
-      "496/960: Converting Node Type Shape\n",
-      "497/960: Converting Node Type Gather\n",
-      "498/960: Converting Node Type Shape\n",
-      "499/960: Converting Node Type Gather\n",
-      "500/960: Converting Node Type Unsqueeze\n",
-      "501/960: Converting Node Type Unsqueeze\n",
-      "502/960: Converting Node Type Concat\n",
-      "503/960: Converting Node Type Reshape\n",
-      "504/960: Converting Node Type Transpose\n",
-      "505/960: Converting Node Type Shape\n",
-      "506/960: Converting Node Type Gather\n",
-      "507/960: Converting Node Type Shape\n",
-      "508/960: Converting Node Type Gather\n",
-      "509/960: Converting Node Type Unsqueeze\n",
-      "510/960: Converting Node Type Unsqueeze\n",
-      "511/960: Converting Node Type Concat\n",
-      "512/960: Converting Node Type Reshape\n",
-      "513/960: Converting Node Type Shape\n",
-      "514/960: Converting Node Type Gather\n",
-      "515/960: Converting Node Type Shape\n",
-      "516/960: Converting Node Type Gather\n",
-      "517/960: Converting Node Type Unsqueeze\n",
-      "518/960: Converting Node Type Unsqueeze\n",
-      "519/960: Converting Node Type Concat\n",
-      "520/960: Converting Node Type Reshape\n",
-      "521/960: Converting Node Type Transpose\n",
-      "522/960: Converting Node Type Transpose\n",
-      "523/960: Converting Node Type MatMul\n",
-      "524/960: Converting Node Type Div\n",
-      "525/960: Converting Node Type Add\n",
-      "526/960: Converting Node Type Softmax\n",
-      "527/960: Converting Node Type MatMul\n",
-      "528/960: Converting Node Type Transpose\n",
-      "529/960: Converting Node Type Shape\n",
-      "530/960: Converting Node Type Gather\n",
-      "531/960: Converting Node Type Shape\n",
-      "532/960: Converting Node Type Gather\n",
-      "533/960: Converting Node Type Unsqueeze\n",
-      "534/960: Converting Node Type Unsqueeze\n",
-      "535/960: Converting Node Type Concat\n",
-      "536/960: Converting Node Type Reshape\n",
-      "537/960: Converting Node Type MatMul\n",
-      "538/960: Converting Node Type Add\n",
-      "539/960: Converting Node Type Add\n",
-      "540/960: Converting Node Type ReduceMean\n",
-      "541/960: Converting Node Type Sub\n",
-      "542/960: Converting Node Type Pow\n",
-      "543/960: Converting Node Type ReduceMean\n",
-      "544/960: Converting Node Type Add\n",
-      "545/960: Converting Node Type Sqrt\n",
-      "546/960: Converting Node Type Div\n",
-      "547/960: Converting Node Type Mul\n",
-      "548/960: Converting Node Type Add\n",
-      "549/960: Converting Node Type MatMul\n",
-      "550/960: Converting Node Type Add\n",
-      "551/960: Converting Node Type Mul\n",
-      "552/960: Converting Node Type Div\n",
-      "553/960: Converting Node Type Erf\n",
-      "554/960: Converting Node Type Add\n",
-      "555/960: Converting Node Type Mul\n",
-      "556/960: Converting Node Type MatMul\n",
-      "557/960: Converting Node Type Add\n",
-      "558/960: Converting Node Type Add\n",
-      "559/960: Converting Node Type ReduceMean\n",
-      "560/960: Converting Node Type Sub\n",
-      "561/960: Converting Node Type Pow\n",
-      "562/960: Converting Node Type ReduceMean\n",
-      "563/960: Converting Node Type Add\n",
-      "564/960: Converting Node Type Sqrt\n",
-      "565/960: Converting Node Type Div\n",
-      "566/960: Converting Node Type Mul\n",
-      "567/960: Converting Node Type Add\n",
-      "568/960: Converting Node Type MatMul\n",
-      "569/960: Converting Node Type Add\n",
-      "570/960: Converting Node Type MatMul\n",
-      "571/960: Converting Node Type Add\n",
-      "572/960: Converting Node Type MatMul\n",
-      "573/960: Converting Node Type Add\n",
-      "574/960: Converting Node Type Shape\n",
-      "575/960: Converting Node Type Gather\n",
-      "576/960: Converting Node Type Shape\n",
-      "577/960: Converting Node Type Gather\n",
-      "578/960: Converting Node Type Unsqueeze\n",
-      "579/960: Converting Node Type Unsqueeze\n",
-      "580/960: Converting Node Type Concat\n",
-      "581/960: Converting Node Type Reshape\n",
-      "582/960: Converting Node Type Transpose\n",
-      "583/960: Converting Node Type Shape\n",
-      "584/960: Converting Node Type Gather\n",
-      "585/960: Converting Node Type Shape\n",
-      "586/960: Converting Node Type Gather\n",
-      "587/960: Converting Node Type Unsqueeze\n",
-      "588/960: Converting Node Type Unsqueeze\n",
-      "589/960: Converting Node Type Concat\n",
-      "590/960: Converting Node Type Reshape\n",
-      "591/960: Converting Node Type Shape\n",
-      "592/960: Converting Node Type Gather\n",
-      "593/960: Converting Node Type Shape\n",
-      "594/960: Converting Node Type Gather\n",
-      "595/960: Converting Node Type Unsqueeze\n",
-      "596/960: Converting Node Type Unsqueeze\n",
-      "597/960: Converting Node Type Concat\n",
-      "598/960: Converting Node Type Reshape\n",
-      "599/960: Converting Node Type Transpose\n",
-      "600/960: Converting Node Type Transpose\n",
-      "601/960: Converting Node Type MatMul\n",
-      "602/960: Converting Node Type Div\n",
-      "603/960: Converting Node Type Add\n",
-      "604/960: Converting Node Type Softmax\n",
-      "605/960: Converting Node Type MatMul\n",
-      "606/960: Converting Node Type Transpose\n",
-      "607/960: Converting Node Type Shape\n",
-      "608/960: Converting Node Type Gather\n",
-      "609/960: Converting Node Type Shape\n",
-      "610/960: Converting Node Type Gather\n",
-      "611/960: Converting Node Type Unsqueeze\n",
-      "612/960: Converting Node Type Unsqueeze\n",
-      "613/960: Converting Node Type Concat\n",
-      "614/960: Converting Node Type Reshape\n",
-      "615/960: Converting Node Type MatMul\n",
-      "616/960: Converting Node Type Add\n",
-      "617/960: Converting Node Type Add\n",
-      "618/960: Converting Node Type ReduceMean\n",
-      "619/960: Converting Node Type Sub\n",
-      "620/960: Converting Node Type Pow\n",
-      "621/960: Converting Node Type ReduceMean\n",
-      "622/960: Converting Node Type Add\n",
-      "623/960: Converting Node Type Sqrt\n",
-      "624/960: Converting Node Type Div\n",
-      "625/960: Converting Node Type Mul\n",
-      "626/960: Converting Node Type Add\n",
-      "627/960: Converting Node Type MatMul\n",
-      "628/960: Converting Node Type Add\n",
-      "629/960: Converting Node Type Mul\n",
-      "630/960: Converting Node Type Div\n",
-      "631/960: Converting Node Type Erf\n",
-      "632/960: Converting Node Type Add\n",
-      "633/960: Converting Node Type Mul\n",
-      "634/960: Converting Node Type MatMul\n",
-      "635/960: Converting Node Type Add\n",
-      "636/960: Converting Node Type Add\n",
-      "637/960: Converting Node Type ReduceMean\n",
-      "638/960: Converting Node Type Sub\n",
-      "639/960: Converting Node Type Pow\n",
-      "640/960: Converting Node Type ReduceMean\n",
-      "641/960: Converting Node Type Add\n",
-      "642/960: Converting Node Type Sqrt\n",
-      "643/960: Converting Node Type Div\n",
-      "644/960: Converting Node Type Mul\n",
-      "645/960: Converting Node Type Add\n",
-      "646/960: Converting Node Type MatMul\n",
-      "647/960: Converting Node Type Add\n",
-      "648/960: Converting Node Type MatMul\n",
-      "649/960: Converting Node Type Add\n",
-      "650/960: Converting Node Type MatMul\n",
-      "651/960: Converting Node Type Add\n",
-      "652/960: Converting Node Type Shape\n",
-      "653/960: Converting Node Type Gather\n",
-      "654/960: Converting Node Type Shape\n",
-      "655/960: Converting Node Type Gather\n",
-      "656/960: Converting Node Type Unsqueeze\n",
-      "657/960: Converting Node Type Unsqueeze\n",
-      "658/960: Converting Node Type Concat\n",
-      "659/960: Converting Node Type Reshape\n",
-      "660/960: Converting Node Type Transpose\n",
-      "661/960: Converting Node Type Shape\n",
-      "662/960: Converting Node Type Gather\n",
-      "663/960: Converting Node Type Shape\n",
-      "664/960: Converting Node Type Gather\n",
-      "665/960: Converting Node Type Unsqueeze\n",
-      "666/960: Converting Node Type Unsqueeze\n",
-      "667/960: Converting Node Type Concat\n",
-      "668/960: Converting Node Type Reshape\n",
-      "669/960: Converting Node Type Shape\n",
-      "670/960: Converting Node Type Gather\n",
-      "671/960: Converting Node Type Shape\n",
-      "672/960: Converting Node Type Gather\n",
-      "673/960: Converting Node Type Unsqueeze\n",
-      "674/960: Converting Node Type Unsqueeze\n",
-      "675/960: Converting Node Type Concat\n",
-      "676/960: Converting Node Type Reshape\n",
-      "677/960: Converting Node Type Transpose\n",
-      "678/960: Converting Node Type Transpose\n",
-      "679/960: Converting Node Type MatMul\n",
-      "680/960: Converting Node Type Div\n",
-      "681/960: Converting Node Type Add\n",
-      "682/960: Converting Node Type Softmax\n",
-      "683/960: Converting Node Type MatMul\n",
-      "684/960: Converting Node Type Transpose\n",
-      "685/960: Converting Node Type Shape\n",
-      "686/960: Converting Node Type Gather\n",
-      "687/960: Converting Node Type Shape\n",
-      "688/960: Converting Node Type Gather\n",
-      "689/960: Converting Node Type Unsqueeze\n",
-      "690/960: Converting Node Type Unsqueeze\n",
-      "691/960: Converting Node Type Concat\n",
-      "692/960: Converting Node Type Reshape\n",
-      "693/960: Converting Node Type MatMul\n",
-      "694/960: Converting Node Type Add\n",
-      "695/960: Converting Node Type Add\n",
-      "696/960: Converting Node Type ReduceMean\n",
-      "697/960: Converting Node Type Sub\n",
-      "698/960: Converting Node Type Pow\n",
-      "699/960: Converting Node Type ReduceMean\n",
-      "700/960: Converting Node Type Add\n",
-      "701/960: Converting Node Type Sqrt\n",
-      "702/960: Converting Node Type Div\n",
-      "703/960: Converting Node Type Mul\n",
-      "704/960: Converting Node Type Add\n",
-      "705/960: Converting Node Type MatMul\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "706/960: Converting Node Type Add\n",
-      "707/960: Converting Node Type Mul\n",
-      "708/960: Converting Node Type Div\n",
-      "709/960: Converting Node Type Erf\n",
-      "710/960: Converting Node Type Add\n",
-      "711/960: Converting Node Type Mul\n",
-      "712/960: Converting Node Type MatMul\n",
-      "713/960: Converting Node Type Add\n",
-      "714/960: Converting Node Type Add\n",
-      "715/960: Converting Node Type ReduceMean\n",
-      "716/960: Converting Node Type Sub\n",
-      "717/960: Converting Node Type Pow\n",
-      "718/960: Converting Node Type ReduceMean\n",
-      "719/960: Converting Node Type Add\n",
-      "720/960: Converting Node Type Sqrt\n",
-      "721/960: Converting Node Type Div\n",
-      "722/960: Converting Node Type Mul\n",
-      "723/960: Converting Node Type Add\n",
-      "724/960: Converting Node Type MatMul\n",
-      "725/960: Converting Node Type Add\n",
-      "726/960: Converting Node Type MatMul\n",
-      "727/960: Converting Node Type Add\n",
-      "728/960: Converting Node Type MatMul\n",
-      "729/960: Converting Node Type Add\n",
-      "730/960: Converting Node Type Shape\n",
-      "731/960: Converting Node Type Gather\n",
-      "732/960: Converting Node Type Shape\n",
-      "733/960: Converting Node Type Gather\n",
-      "734/960: Converting Node Type Unsqueeze\n",
-      "735/960: Converting Node Type Unsqueeze\n",
-      "736/960: Converting Node Type Concat\n",
-      "737/960: Converting Node Type Reshape\n",
-      "738/960: Converting Node Type Transpose\n",
-      "739/960: Converting Node Type Shape\n",
-      "740/960: Converting Node Type Gather\n",
-      "741/960: Converting Node Type Shape\n",
-      "742/960: Converting Node Type Gather\n",
-      "743/960: Converting Node Type Unsqueeze\n",
-      "744/960: Converting Node Type Unsqueeze\n",
-      "745/960: Converting Node Type Concat\n",
-      "746/960: Converting Node Type Reshape\n",
-      "747/960: Converting Node Type Shape\n",
-      "748/960: Converting Node Type Gather\n",
-      "749/960: Converting Node Type Shape\n",
-      "750/960: Converting Node Type Gather\n",
-      "751/960: Converting Node Type Unsqueeze\n",
-      "752/960: Converting Node Type Unsqueeze\n",
-      "753/960: Converting Node Type Concat\n",
-      "754/960: Converting Node Type Reshape\n",
-      "755/960: Converting Node Type Transpose\n",
-      "756/960: Converting Node Type Transpose\n",
-      "757/960: Converting Node Type MatMul\n",
-      "758/960: Converting Node Type Div\n",
-      "759/960: Converting Node Type Add\n",
-      "760/960: Converting Node Type Softmax\n",
-      "761/960: Converting Node Type MatMul\n",
-      "762/960: Converting Node Type Transpose\n",
-      "763/960: Converting Node Type Shape\n",
-      "764/960: Converting Node Type Gather\n",
-      "765/960: Converting Node Type Shape\n",
-      "766/960: Converting Node Type Gather\n",
-      "767/960: Converting Node Type Unsqueeze\n",
-      "768/960: Converting Node Type Unsqueeze\n",
-      "769/960: Converting Node Type Concat\n",
-      "770/960: Converting Node Type Reshape\n",
-      "771/960: Converting Node Type MatMul\n",
-      "772/960: Converting Node Type Add\n",
-      "773/960: Converting Node Type Add\n",
-      "774/960: Converting Node Type ReduceMean\n",
-      "775/960: Converting Node Type Sub\n",
-      "776/960: Converting Node Type Pow\n",
-      "777/960: Converting Node Type ReduceMean\n",
-      "778/960: Converting Node Type Add\n",
-      "779/960: Converting Node Type Sqrt\n",
-      "780/960: Converting Node Type Div\n",
-      "781/960: Converting Node Type Mul\n",
-      "782/960: Converting Node Type Add\n",
-      "783/960: Converting Node Type MatMul\n",
-      "784/960: Converting Node Type Add\n",
-      "785/960: Converting Node Type Mul\n",
-      "786/960: Converting Node Type Div\n",
-      "787/960: Converting Node Type Erf\n",
-      "788/960: Converting Node Type Add\n",
-      "789/960: Converting Node Type Mul\n",
-      "790/960: Converting Node Type MatMul\n",
-      "791/960: Converting Node Type Add\n",
-      "792/960: Converting Node Type Add\n",
-      "793/960: Converting Node Type ReduceMean\n",
-      "794/960: Converting Node Type Sub\n",
-      "795/960: Converting Node Type Pow\n",
-      "796/960: Converting Node Type ReduceMean\n",
-      "797/960: Converting Node Type Add\n",
-      "798/960: Converting Node Type Sqrt\n",
-      "799/960: Converting Node Type Div\n",
-      "800/960: Converting Node Type Mul\n",
-      "801/960: Converting Node Type Add\n",
-      "802/960: Converting Node Type MatMul\n",
-      "803/960: Converting Node Type Add\n",
-      "804/960: Converting Node Type MatMul\n",
-      "805/960: Converting Node Type Add\n",
-      "806/960: Converting Node Type MatMul\n",
-      "807/960: Converting Node Type Add\n",
-      "808/960: Converting Node Type Shape\n",
-      "809/960: Converting Node Type Gather\n",
-      "810/960: Converting Node Type Shape\n",
-      "811/960: Converting Node Type Gather\n",
-      "812/960: Converting Node Type Unsqueeze\n",
-      "813/960: Converting Node Type Unsqueeze\n",
-      "814/960: Converting Node Type Concat\n",
-      "815/960: Converting Node Type Reshape\n",
-      "816/960: Converting Node Type Transpose\n",
-      "817/960: Converting Node Type Shape\n",
-      "818/960: Converting Node Type Gather\n",
-      "819/960: Converting Node Type Shape\n",
-      "820/960: Converting Node Type Gather\n",
-      "821/960: Converting Node Type Unsqueeze\n",
-      "822/960: Converting Node Type Unsqueeze\n",
-      "823/960: Converting Node Type Concat\n",
-      "824/960: Converting Node Type Reshape\n",
-      "825/960: Converting Node Type Shape\n",
-      "826/960: Converting Node Type Gather\n",
-      "827/960: Converting Node Type Shape\n",
-      "828/960: Converting Node Type Gather\n",
-      "829/960: Converting Node Type Unsqueeze\n",
-      "830/960: Converting Node Type Unsqueeze\n",
-      "831/960: Converting Node Type Concat\n",
-      "832/960: Converting Node Type Reshape\n",
-      "833/960: Converting Node Type Transpose\n",
-      "834/960: Converting Node Type Transpose\n",
-      "835/960: Converting Node Type MatMul\n",
-      "836/960: Converting Node Type Div\n",
-      "837/960: Converting Node Type Add\n",
-      "838/960: Converting Node Type Softmax\n",
-      "839/960: Converting Node Type MatMul\n",
-      "840/960: Converting Node Type Transpose\n",
-      "841/960: Converting Node Type Shape\n",
-      "842/960: Converting Node Type Gather\n",
-      "843/960: Converting Node Type Shape\n",
-      "844/960: Converting Node Type Gather\n",
-      "845/960: Converting Node Type Unsqueeze\n",
-      "846/960: Converting Node Type Unsqueeze\n",
-      "847/960: Converting Node Type Concat\n",
-      "848/960: Converting Node Type Reshape\n",
-      "849/960: Converting Node Type MatMul\n",
-      "850/960: Converting Node Type Add\n",
-      "851/960: Converting Node Type Add\n",
-      "852/960: Converting Node Type ReduceMean\n",
-      "853/960: Converting Node Type Sub\n",
-      "854/960: Converting Node Type Pow\n",
-      "855/960: Converting Node Type ReduceMean\n",
-      "856/960: Converting Node Type Add\n",
-      "857/960: Converting Node Type Sqrt\n",
-      "858/960: Converting Node Type Div\n",
-      "859/960: Converting Node Type Mul\n",
-      "860/960: Converting Node Type Add\n",
-      "861/960: Converting Node Type MatMul\n",
-      "862/960: Converting Node Type Add\n",
-      "863/960: Converting Node Type Mul\n",
-      "864/960: Converting Node Type Div\n",
-      "865/960: Converting Node Type Erf\n",
-      "866/960: Converting Node Type Add\n",
-      "867/960: Converting Node Type Mul\n",
-      "868/960: Converting Node Type MatMul\n",
-      "869/960: Converting Node Type Add\n",
-      "870/960: Converting Node Type Add\n",
-      "871/960: Converting Node Type ReduceMean\n",
-      "872/960: Converting Node Type Sub\n",
-      "873/960: Converting Node Type Pow\n",
-      "874/960: Converting Node Type ReduceMean\n",
-      "875/960: Converting Node Type Add\n",
-      "876/960: Converting Node Type Sqrt\n",
-      "877/960: Converting Node Type Div\n",
-      "878/960: Converting Node Type Mul\n",
-      "879/960: Converting Node Type Add\n",
-      "880/960: Converting Node Type MatMul\n",
-      "881/960: Converting Node Type Add\n",
-      "882/960: Converting Node Type MatMul\n",
-      "883/960: Converting Node Type Add\n",
-      "884/960: Converting Node Type MatMul\n",
-      "885/960: Converting Node Type Add\n",
-      "886/960: Converting Node Type Shape\n",
-      "887/960: Converting Node Type Gather\n",
-      "888/960: Converting Node Type Shape\n",
-      "889/960: Converting Node Type Gather\n",
-      "890/960: Converting Node Type Unsqueeze\n",
-      "891/960: Converting Node Type Unsqueeze\n",
-      "892/960: Converting Node Type Concat\n",
-      "893/960: Converting Node Type Reshape\n",
-      "894/960: Converting Node Type Transpose\n",
-      "895/960: Converting Node Type Shape\n",
-      "896/960: Converting Node Type Gather\n",
-      "897/960: Converting Node Type Shape\n",
-      "898/960: Converting Node Type Gather\n",
-      "899/960: Converting Node Type Unsqueeze\n",
-      "900/960: Converting Node Type Unsqueeze\n",
-      "901/960: Converting Node Type Concat\n",
-      "902/960: Converting Node Type Reshape\n",
-      "903/960: Converting Node Type Shape\n",
-      "904/960: Converting Node Type Gather\n",
-      "905/960: Converting Node Type Shape\n",
-      "906/960: Converting Node Type Gather\n",
-      "907/960: Converting Node Type Unsqueeze\n",
-      "908/960: Converting Node Type Unsqueeze\n",
-      "909/960: Converting Node Type Concat\n",
-      "910/960: Converting Node Type Reshape\n",
-      "911/960: Converting Node Type Transpose\n",
-      "912/960: Converting Node Type Transpose\n",
-      "913/960: Converting Node Type MatMul\n",
-      "914/960: Converting Node Type Div\n",
-      "915/960: Converting Node Type Add\n",
-      "916/960: Converting Node Type Softmax\n",
-      "917/960: Converting Node Type MatMul\n",
-      "918/960: Converting Node Type Transpose\n",
-      "919/960: Converting Node Type Shape\n",
-      "920/960: Converting Node Type Gather\n",
-      "921/960: Converting Node Type Shape\n",
-      "922/960: Converting Node Type Gather\n",
-      "923/960: Converting Node Type Unsqueeze\n",
-      "924/960: Converting Node Type Unsqueeze\n",
-      "925/960: Converting Node Type Concat\n",
-      "926/960: Converting Node Type Reshape\n",
-      "927/960: Converting Node Type MatMul\n",
-      "928/960: Converting Node Type Add\n",
-      "929/960: Converting Node Type Add\n",
-      "930/960: Converting Node Type ReduceMean\n",
-      "931/960: Converting Node Type Sub\n",
-      "932/960: Converting Node Type Pow\n",
-      "933/960: Converting Node Type ReduceMean\n",
-      "934/960: Converting Node Type Add\n",
-      "935/960: Converting Node Type Sqrt\n",
-      "936/960: Converting Node Type Div\n",
-      "937/960: Converting Node Type Mul\n",
-      "938/960: Converting Node Type Add\n",
-      "939/960: Converting Node Type MatMul\n",
-      "940/960: Converting Node Type Add\n",
-      "941/960: Converting Node Type Mul\n",
-      "942/960: Converting Node Type Div\n",
-      "943/960: Converting Node Type Erf\n",
-      "944/960: Converting Node Type Add\n",
-      "945/960: Converting Node Type Mul\n",
-      "946/960: Converting Node Type MatMul\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "947/960: Converting Node Type Add\n",
-      "948/960: Converting Node Type Add\n",
-      "949/960: Converting Node Type ReduceMean\n",
-      "950/960: Converting Node Type Sub\n",
-      "951/960: Converting Node Type Pow\n",
-      "952/960: Converting Node Type ReduceMean\n",
-      "953/960: Converting Node Type Add\n",
-      "954/960: Converting Node Type Sqrt\n",
-      "955/960: Converting Node Type Div\n",
-      "956/960: Converting Node Type Mul\n",
-      "957/960: Converting Node Type Add\n",
-      "958/960: Converting Node Type Gather\n",
-      "959/960: Converting Node Type Gemm\n",
-      "960/960: Converting Node Type Tanh\n",
-      "Translation to CoreML spec completed. Now compiling the CoreML model.\n",
-      "Model Compilation done.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Step 2 - Convert from ONNX to CoreML MLModel\n",
-    "mlmodel = convert(model=onnx_model_path, target_ios=\"13\", )\n",
-    "mlmodel.save(mlmodel_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Run Prediction on both the models to verify conversion correctness\n",
-    "# on given input\n",
-    "# PyTorch prediction\n",
-    "pred_pt    = model(test_input)\n",
-    "\n",
-    "# MLModel prediction\n",
-    "input_dict = {'input_ids': test_input.numpy().astype(np.float32)}\n",
-    "pred_coreml = mlmodel.predict(input_dict, useCPUOnly=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Start Scores:  SNR: 116.98051819605858 PSNR: 73.80000517139831\n",
-      "End Scores:  SNR: 93.95363293912368 PSNR: 69.97449408105454\n"
-     ]
-    }
-   ],
-   "source": [
-    "# SNR and PSNR values verification for PyTorch and CoreML MLModel\n",
-    "_compute_SNR(pred_pt[0].detach().numpy(), pred_coreml['start_scores'], 'Start Scores: ')\n",
-    "_compute_SNR(pred_pt[1].detach().numpy(), pred_coreml['end_scores'], 'End Scores: ')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 2 BERT LARGE UNCASED\n",
-    "- Please find saved PyTorch, ONNX and CoreML Model https://drive.google.com/drive/u/3/folders/1V4BxddAZ_EzQk18PRSu4lGjAQGqRfxU3"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "TMP_DIR = '/tmp/'\n",
-    "model_name = 'bert-large-uncased'\n",
-    "pt_path = TMP_DIR + model_name + '.pt'\n",
-    "onnx_model_path = TMP_DIR + model_name + '.onnx'\n",
-    "mlmodel_path  = TMP_DIR + model_name + '.mlmodel'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load BERT Large Model\n",
-    "# Details: 24-layer, 1024-hidden, 16-heads, 340M parameters. Trained on lower-cased English text.\n",
-    "model = BertModel.from_pretrained(model_name)\n",
-    "torch.save(model, pt_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# PyTorch to ONNX\n",
-    "test_input = torch.randint(0, 512, (1, 512))\n",
-    "torch.onnx.export(model,\n",
-    "                  test_input,\n",
-    "                  onnx_model_path,\n",
-    "                  input_names=[\"input_ids\"],\n",
-    "                  output_names=[\"start_scores\", \"end_scores\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1/1896: Converting Node Type ConstantOfShape\n",
-      "2/1896: Converting Node Type ConstantOfShape\n",
-      "3/1896: Converting Node Type Unsqueeze\n",
-      "4/1896: Converting Node Type Unsqueeze\n",
-      "5/1896: Converting Node Type Sub\n",
-      "6/1896: Converting Node Type Mul\n",
-      "7/1896: Converting Node Type Expand\n",
-      "8/1896: Converting Node Type Gather\n",
-      "9/1896: Converting Node Type Gather\n",
-      "10/1896: Converting Node Type Gather\n",
-      "11/1896: Converting Node Type Add\n",
-      "12/1896: Converting Node Type Add\n",
-      "13/1896: Converting Node Type ReduceMean\n",
-      "14/1896: Converting Node Type Sub\n",
-      "15/1896: Converting Node Type Pow\n",
-      "16/1896: Converting Node Type ReduceMean\n",
-      "17/1896: Converting Node Type Add\n",
-      "18/1896: Converting Node Type Sqrt\n",
-      "19/1896: Converting Node Type Div\n",
-      "20/1896: Converting Node Type Mul\n",
-      "21/1896: Converting Node Type Add\n",
-      "22/1896: Converting Node Type MatMul\n",
-      "23/1896: Converting Node Type Add\n",
-      "24/1896: Converting Node Type MatMul\n",
-      "25/1896: Converting Node Type Add\n",
-      "26/1896: Converting Node Type MatMul\n",
-      "27/1896: Converting Node Type Add\n",
-      "28/1896: Converting Node Type Shape\n",
-      "29/1896: Converting Node Type Gather\n",
-      "30/1896: Converting Node Type Shape\n",
-      "31/1896: Converting Node Type Gather\n",
-      "32/1896: Converting Node Type Unsqueeze\n",
-      "33/1896: Converting Node Type Unsqueeze\n",
-      "34/1896: Converting Node Type Concat\n",
-      "35/1896: Converting Node Type Reshape\n",
-      "36/1896: Converting Node Type Transpose\n",
-      "37/1896: Converting Node Type Shape\n",
-      "38/1896: Converting Node Type Gather\n",
-      "39/1896: Converting Node Type Shape\n",
-      "40/1896: Converting Node Type Gather\n",
-      "41/1896: Converting Node Type Unsqueeze\n",
-      "42/1896: Converting Node Type Unsqueeze\n",
-      "43/1896: Converting Node Type Concat\n",
-      "44/1896: Converting Node Type Reshape\n",
-      "45/1896: Converting Node Type Shape\n",
-      "46/1896: Converting Node Type Gather\n",
-      "47/1896: Converting Node Type Shape\n",
-      "48/1896: Converting Node Type Gather\n",
-      "49/1896: Converting Node Type Unsqueeze\n",
-      "50/1896: Converting Node Type Unsqueeze\n",
-      "51/1896: Converting Node Type Concat\n",
-      "52/1896: Converting Node Type Reshape\n",
-      "53/1896: Converting Node Type Transpose\n",
-      "54/1896: Converting Node Type Transpose\n",
-      "55/1896: Converting Node Type MatMul\n",
-      "56/1896: Converting Node Type Div\n",
-      "57/1896: Converting Node Type Add\n",
-      "58/1896: Converting Node Type Softmax\n",
-      "59/1896: Converting Node Type MatMul\n",
-      "60/1896: Converting Node Type Transpose\n",
-      "61/1896: Converting Node Type Shape\n",
-      "62/1896: Converting Node Type Gather\n",
-      "63/1896: Converting Node Type Shape\n",
-      "64/1896: Converting Node Type Gather\n",
-      "65/1896: Converting Node Type Unsqueeze\n",
-      "66/1896: Converting Node Type Unsqueeze\n",
-      "67/1896: Converting Node Type Concat\n",
-      "68/1896: Converting Node Type Reshape\n",
-      "69/1896: Converting Node Type MatMul\n",
-      "70/1896: Converting Node Type Add\n",
-      "71/1896: Converting Node Type Add\n",
-      "72/1896: Converting Node Type ReduceMean\n",
-      "73/1896: Converting Node Type Sub\n",
-      "74/1896: Converting Node Type Pow\n",
-      "75/1896: Converting Node Type ReduceMean\n",
-      "76/1896: Converting Node Type Add\n",
-      "77/1896: Converting Node Type Sqrt\n",
-      "78/1896: Converting Node Type Div\n",
-      "79/1896: Converting Node Type Mul\n",
-      "80/1896: Converting Node Type Add\n",
-      "81/1896: Converting Node Type MatMul\n",
-      "82/1896: Converting Node Type Add\n",
-      "83/1896: Converting Node Type Mul\n",
-      "84/1896: Converting Node Type Div\n",
-      "85/1896: Converting Node Type Erf\n",
-      "86/1896: Converting Node Type Add\n",
-      "87/1896: Converting Node Type Mul\n",
-      "88/1896: Converting Node Type MatMul\n",
-      "89/1896: Converting Node Type Add\n",
-      "90/1896: Converting Node Type Add\n",
-      "91/1896: Converting Node Type ReduceMean\n",
-      "92/1896: Converting Node Type Sub\n",
-      "93/1896: Converting Node Type Pow\n",
-      "94/1896: Converting Node Type ReduceMean\n",
-      "95/1896: Converting Node Type Add\n",
-      "96/1896: Converting Node Type Sqrt\n",
-      "97/1896: Converting Node Type Div\n",
-      "98/1896: Converting Node Type Mul\n",
-      "99/1896: Converting Node Type Add\n",
-      "100/1896: Converting Node Type MatMul\n",
-      "101/1896: Converting Node Type Add\n",
-      "102/1896: Converting Node Type MatMul\n",
-      "103/1896: Converting Node Type Add\n",
-      "104/1896: Converting Node Type MatMul\n",
-      "105/1896: Converting Node Type Add\n",
-      "106/1896: Converting Node Type Shape\n",
-      "107/1896: Converting Node Type Gather\n",
-      "108/1896: Converting Node Type Shape\n",
-      "109/1896: Converting Node Type Gather\n",
-      "110/1896: Converting Node Type Unsqueeze\n",
-      "111/1896: Converting Node Type Unsqueeze\n",
-      "112/1896: Converting Node Type Concat\n",
-      "113/1896: Converting Node Type Reshape\n",
-      "114/1896: Converting Node Type Transpose\n",
-      "115/1896: Converting Node Type Shape\n",
-      "116/1896: Converting Node Type Gather\n",
-      "117/1896: Converting Node Type Shape\n",
-      "118/1896: Converting Node Type Gather\n",
-      "119/1896: Converting Node Type Unsqueeze\n",
-      "120/1896: Converting Node Type Unsqueeze\n",
-      "121/1896: Converting Node Type Concat\n",
-      "122/1896: Converting Node Type Reshape\n",
-      "123/1896: Converting Node Type Shape\n",
-      "124/1896: Converting Node Type Gather\n",
-      "125/1896: Converting Node Type Shape\n",
-      "126/1896: Converting Node Type Gather\n",
-      "127/1896: Converting Node Type Unsqueeze\n",
-      "128/1896: Converting Node Type Unsqueeze\n",
-      "129/1896: Converting Node Type Concat\n",
-      "130/1896: Converting Node Type Reshape\n",
-      "131/1896: Converting Node Type Transpose\n",
-      "132/1896: Converting Node Type Transpose\n",
-      "133/1896: Converting Node Type MatMul\n",
-      "134/1896: Converting Node Type Div\n",
-      "135/1896: Converting Node Type Add\n",
-      "136/1896: Converting Node Type Softmax\n",
-      "137/1896: Converting Node Type MatMul\n",
-      "138/1896: Converting Node Type Transpose\n",
-      "139/1896: Converting Node Type Shape\n",
-      "140/1896: Converting Node Type Gather\n",
-      "141/1896: Converting Node Type Shape\n",
-      "142/1896: Converting Node Type Gather\n",
-      "143/1896: Converting Node Type Unsqueeze\n",
-      "144/1896: Converting Node Type Unsqueeze\n",
-      "145/1896: Converting Node Type Concat\n",
-      "146/1896: Converting Node Type Reshape\n",
-      "147/1896: Converting Node Type MatMul\n",
-      "148/1896: Converting Node Type Add\n",
-      "149/1896: Converting Node Type Add\n",
-      "150/1896: Converting Node Type ReduceMean\n",
-      "151/1896: Converting Node Type Sub\n",
-      "152/1896: Converting Node Type Pow\n",
-      "153/1896: Converting Node Type ReduceMean\n",
-      "154/1896: Converting Node Type Add\n",
-      "155/1896: Converting Node Type Sqrt\n",
-      "156/1896: Converting Node Type Div\n",
-      "157/1896: Converting Node Type Mul\n",
-      "158/1896: Converting Node Type Add\n",
-      "159/1896: Converting Node Type MatMul\n",
-      "160/1896: Converting Node Type Add\n",
-      "161/1896: Converting Node Type Mul\n",
-      "162/1896: Converting Node Type Div\n",
-      "163/1896: Converting Node Type Erf\n",
-      "164/1896: Converting Node Type Add\n",
-      "165/1896: Converting Node Type Mul\n",
-      "166/1896: Converting Node Type MatMul\n",
-      "167/1896: Converting Node Type Add\n",
-      "168/1896: Converting Node Type Add\n",
-      "169/1896: Converting Node Type ReduceMean\n",
-      "170/1896: Converting Node Type Sub\n",
-      "171/1896: Converting Node Type Pow\n",
-      "172/1896: Converting Node Type ReduceMean\n",
-      "173/1896: Converting Node Type Add\n",
-      "174/1896: Converting Node Type Sqrt\n",
-      "175/1896: Converting Node Type Div\n",
-      "176/1896: Converting Node Type Mul\n",
-      "177/1896: Converting Node Type Add\n",
-      "178/1896: Converting Node Type MatMul\n",
-      "179/1896: Converting Node Type Add\n",
-      "180/1896: Converting Node Type MatMul\n",
-      "181/1896: Converting Node Type Add\n",
-      "182/1896: Converting Node Type MatMul\n",
-      "183/1896: Converting Node Type Add\n",
-      "184/1896: Converting Node Type Shape\n",
-      "185/1896: Converting Node Type Gather\n",
-      "186/1896: Converting Node Type Shape\n",
-      "187/1896: Converting Node Type Gather\n",
-      "188/1896: Converting Node Type Unsqueeze\n",
-      "189/1896: Converting Node Type Unsqueeze\n",
-      "190/1896: Converting Node Type Concat\n",
-      "191/1896: Converting Node Type Reshape\n",
-      "192/1896: Converting Node Type Transpose\n",
-      "193/1896: Converting Node Type Shape\n",
-      "194/1896: Converting Node Type Gather\n",
-      "195/1896: Converting Node Type Shape\n",
-      "196/1896: Converting Node Type Gather\n",
-      "197/1896: Converting Node Type Unsqueeze\n",
-      "198/1896: Converting Node Type Unsqueeze\n",
-      "199/1896: Converting Node Type Concat\n",
-      "200/1896: Converting Node Type Reshape\n",
-      "201/1896: Converting Node Type Shape\n",
-      "202/1896: Converting Node Type Gather\n",
-      "203/1896: Converting Node Type Shape\n",
-      "204/1896: Converting Node Type Gather\n",
-      "205/1896: Converting Node Type Unsqueeze\n",
-      "206/1896: Converting Node Type Unsqueeze\n",
-      "207/1896: Converting Node Type Concat\n",
-      "208/1896: Converting Node Type Reshape\n",
-      "209/1896: Converting Node Type Transpose\n",
-      "210/1896: Converting Node Type Transpose\n",
-      "211/1896: Converting Node Type MatMul\n",
-      "212/1896: Converting Node Type Div\n",
-      "213/1896: Converting Node Type Add\n",
-      "214/1896: Converting Node Type Softmax\n",
-      "215/1896: Converting Node Type MatMul\n",
-      "216/1896: Converting Node Type Transpose\n",
-      "217/1896: Converting Node Type Shape\n",
-      "218/1896: Converting Node Type Gather\n",
-      "219/1896: Converting Node Type Shape\n",
-      "220/1896: Converting Node Type Gather\n",
-      "221/1896: Converting Node Type Unsqueeze\n",
-      "222/1896: Converting Node Type Unsqueeze\n",
-      "223/1896: Converting Node Type Concat\n",
-      "224/1896: Converting Node Type Reshape\n",
-      "225/1896: Converting Node Type MatMul\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "226/1896: Converting Node Type Add\n",
-      "227/1896: Converting Node Type Add\n",
-      "228/1896: Converting Node Type ReduceMean\n",
-      "229/1896: Converting Node Type Sub\n",
-      "230/1896: Converting Node Type Pow\n",
-      "231/1896: Converting Node Type ReduceMean\n",
-      "232/1896: Converting Node Type Add\n",
-      "233/1896: Converting Node Type Sqrt\n",
-      "234/1896: Converting Node Type Div\n",
-      "235/1896: Converting Node Type Mul\n",
-      "236/1896: Converting Node Type Add\n",
-      "237/1896: Converting Node Type MatMul\n",
-      "238/1896: Converting Node Type Add\n",
-      "239/1896: Converting Node Type Mul\n",
-      "240/1896: Converting Node Type Div\n",
-      "241/1896: Converting Node Type Erf\n",
-      "242/1896: Converting Node Type Add\n",
-      "243/1896: Converting Node Type Mul\n",
-      "244/1896: Converting Node Type MatMul\n",
-      "245/1896: Converting Node Type Add\n",
-      "246/1896: Converting Node Type Add\n",
-      "247/1896: Converting Node Type ReduceMean\n",
-      "248/1896: Converting Node Type Sub\n",
-      "249/1896: Converting Node Type Pow\n",
-      "250/1896: Converting Node Type ReduceMean\n",
-      "251/1896: Converting Node Type Add\n",
-      "252/1896: Converting Node Type Sqrt\n",
-      "253/1896: Converting Node Type Div\n",
-      "254/1896: Converting Node Type Mul\n",
-      "255/1896: Converting Node Type Add\n",
-      "256/1896: Converting Node Type MatMul\n",
-      "257/1896: Converting Node Type Add\n",
-      "258/1896: Converting Node Type MatMul\n",
-      "259/1896: Converting Node Type Add\n",
-      "260/1896: Converting Node Type MatMul\n",
-      "261/1896: Converting Node Type Add\n",
-      "262/1896: Converting Node Type Shape\n",
-      "263/1896: Converting Node Type Gather\n",
-      "264/1896: Converting Node Type Shape\n",
-      "265/1896: Converting Node Type Gather\n",
-      "266/1896: Converting Node Type Unsqueeze\n",
-      "267/1896: Converting Node Type Unsqueeze\n",
-      "268/1896: Converting Node Type Concat\n",
-      "269/1896: Converting Node Type Reshape\n",
-      "270/1896: Converting Node Type Transpose\n",
-      "271/1896: Converting Node Type Shape\n",
-      "272/1896: Converting Node Type Gather\n",
-      "273/1896: Converting Node Type Shape\n",
-      "274/1896: Converting Node Type Gather\n",
-      "275/1896: Converting Node Type Unsqueeze\n",
-      "276/1896: Converting Node Type Unsqueeze\n",
-      "277/1896: Converting Node Type Concat\n",
-      "278/1896: Converting Node Type Reshape\n",
-      "279/1896: Converting Node Type Shape\n",
-      "280/1896: Converting Node Type Gather\n",
-      "281/1896: Converting Node Type Shape\n",
-      "282/1896: Converting Node Type Gather\n",
-      "283/1896: Converting Node Type Unsqueeze\n",
-      "284/1896: Converting Node Type Unsqueeze\n",
-      "285/1896: Converting Node Type Concat\n",
-      "286/1896: Converting Node Type Reshape\n",
-      "287/1896: Converting Node Type Transpose\n",
-      "288/1896: Converting Node Type Transpose\n",
-      "289/1896: Converting Node Type MatMul\n",
-      "290/1896: Converting Node Type Div\n",
-      "291/1896: Converting Node Type Add\n",
-      "292/1896: Converting Node Type Softmax\n",
-      "293/1896: Converting Node Type MatMul\n",
-      "294/1896: Converting Node Type Transpose\n",
-      "295/1896: Converting Node Type Shape\n",
-      "296/1896: Converting Node Type Gather\n",
-      "297/1896: Converting Node Type Shape\n",
-      "298/1896: Converting Node Type Gather\n",
-      "299/1896: Converting Node Type Unsqueeze\n",
-      "300/1896: Converting Node Type Unsqueeze\n",
-      "301/1896: Converting Node Type Concat\n",
-      "302/1896: Converting Node Type Reshape\n",
-      "303/1896: Converting Node Type MatMul\n",
-      "304/1896: Converting Node Type Add\n",
-      "305/1896: Converting Node Type Add\n",
-      "306/1896: Converting Node Type ReduceMean\n",
-      "307/1896: Converting Node Type Sub\n",
-      "308/1896: Converting Node Type Pow\n",
-      "309/1896: Converting Node Type ReduceMean\n",
-      "310/1896: Converting Node Type Add\n",
-      "311/1896: Converting Node Type Sqrt\n",
-      "312/1896: Converting Node Type Div\n",
-      "313/1896: Converting Node Type Mul\n",
-      "314/1896: Converting Node Type Add\n",
-      "315/1896: Converting Node Type MatMul\n",
-      "316/1896: Converting Node Type Add\n",
-      "317/1896: Converting Node Type Mul\n",
-      "318/1896: Converting Node Type Div\n",
-      "319/1896: Converting Node Type Erf\n",
-      "320/1896: Converting Node Type Add\n",
-      "321/1896: Converting Node Type Mul\n",
-      "322/1896: Converting Node Type MatMul\n",
-      "323/1896: Converting Node Type Add\n",
-      "324/1896: Converting Node Type Add\n",
-      "325/1896: Converting Node Type ReduceMean\n",
-      "326/1896: Converting Node Type Sub\n",
-      "327/1896: Converting Node Type Pow\n",
-      "328/1896: Converting Node Type ReduceMean\n",
-      "329/1896: Converting Node Type Add\n",
-      "330/1896: Converting Node Type Sqrt\n",
-      "331/1896: Converting Node Type Div\n",
-      "332/1896: Converting Node Type Mul\n",
-      "333/1896: Converting Node Type Add\n",
-      "334/1896: Converting Node Type MatMul\n",
-      "335/1896: Converting Node Type Add\n",
-      "336/1896: Converting Node Type MatMul\n",
-      "337/1896: Converting Node Type Add\n",
-      "338/1896: Converting Node Type MatMul\n",
-      "339/1896: Converting Node Type Add\n",
-      "340/1896: Converting Node Type Shape\n",
-      "341/1896: Converting Node Type Gather\n",
-      "342/1896: Converting Node Type Shape\n",
-      "343/1896: Converting Node Type Gather\n",
-      "344/1896: Converting Node Type Unsqueeze\n",
-      "345/1896: Converting Node Type Unsqueeze\n",
-      "346/1896: Converting Node Type Concat\n",
-      "347/1896: Converting Node Type Reshape\n",
-      "348/1896: Converting Node Type Transpose\n",
-      "349/1896: Converting Node Type Shape\n",
-      "350/1896: Converting Node Type Gather\n",
-      "351/1896: Converting Node Type Shape\n",
-      "352/1896: Converting Node Type Gather\n",
-      "353/1896: Converting Node Type Unsqueeze\n",
-      "354/1896: Converting Node Type Unsqueeze\n",
-      "355/1896: Converting Node Type Concat\n",
-      "356/1896: Converting Node Type Reshape\n",
-      "357/1896: Converting Node Type Shape\n",
-      "358/1896: Converting Node Type Gather\n",
-      "359/1896: Converting Node Type Shape\n",
-      "360/1896: Converting Node Type Gather\n",
-      "361/1896: Converting Node Type Unsqueeze\n",
-      "362/1896: Converting Node Type Unsqueeze\n",
-      "363/1896: Converting Node Type Concat\n",
-      "364/1896: Converting Node Type Reshape\n",
-      "365/1896: Converting Node Type Transpose\n",
-      "366/1896: Converting Node Type Transpose\n",
-      "367/1896: Converting Node Type MatMul\n",
-      "368/1896: Converting Node Type Div\n",
-      "369/1896: Converting Node Type Add\n",
-      "370/1896: Converting Node Type Softmax\n",
-      "371/1896: Converting Node Type MatMul\n",
-      "372/1896: Converting Node Type Transpose\n",
-      "373/1896: Converting Node Type Shape\n",
-      "374/1896: Converting Node Type Gather\n",
-      "375/1896: Converting Node Type Shape\n",
-      "376/1896: Converting Node Type Gather\n",
-      "377/1896: Converting Node Type Unsqueeze\n",
-      "378/1896: Converting Node Type Unsqueeze\n",
-      "379/1896: Converting Node Type Concat\n",
-      "380/1896: Converting Node Type Reshape\n",
-      "381/1896: Converting Node Type MatMul\n",
-      "382/1896: Converting Node Type Add\n",
-      "383/1896: Converting Node Type Add\n",
-      "384/1896: Converting Node Type ReduceMean\n",
-      "385/1896: Converting Node Type Sub\n",
-      "386/1896: Converting Node Type Pow\n",
-      "387/1896: Converting Node Type ReduceMean\n",
-      "388/1896: Converting Node Type Add\n",
-      "389/1896: Converting Node Type Sqrt\n",
-      "390/1896: Converting Node Type Div\n",
-      "391/1896: Converting Node Type Mul\n",
-      "392/1896: Converting Node Type Add\n",
-      "393/1896: Converting Node Type MatMul\n",
-      "394/1896: Converting Node Type Add\n",
-      "395/1896: Converting Node Type Mul\n",
-      "396/1896: Converting Node Type Div\n",
-      "397/1896: Converting Node Type Erf\n",
-      "398/1896: Converting Node Type Add\n",
-      "399/1896: Converting Node Type Mul\n",
-      "400/1896: Converting Node Type MatMul\n",
-      "401/1896: Converting Node Type Add\n",
-      "402/1896: Converting Node Type Add\n",
-      "403/1896: Converting Node Type ReduceMean\n",
-      "404/1896: Converting Node Type Sub\n",
-      "405/1896: Converting Node Type Pow\n",
-      "406/1896: Converting Node Type ReduceMean\n",
-      "407/1896: Converting Node Type Add\n",
-      "408/1896: Converting Node Type Sqrt\n",
-      "409/1896: Converting Node Type Div\n",
-      "410/1896: Converting Node Type Mul\n",
-      "411/1896: Converting Node Type Add\n",
-      "412/1896: Converting Node Type MatMul\n",
-      "413/1896: Converting Node Type Add\n",
-      "414/1896: Converting Node Type MatMul\n",
-      "415/1896: Converting Node Type Add\n",
-      "416/1896: Converting Node Type MatMul\n",
-      "417/1896: Converting Node Type Add\n",
-      "418/1896: Converting Node Type Shape\n",
-      "419/1896: Converting Node Type Gather\n",
-      "420/1896: Converting Node Type Shape\n",
-      "421/1896: Converting Node Type Gather\n",
-      "422/1896: Converting Node Type Unsqueeze\n",
-      "423/1896: Converting Node Type Unsqueeze\n",
-      "424/1896: Converting Node Type Concat\n",
-      "425/1896: Converting Node Type Reshape\n",
-      "426/1896: Converting Node Type Transpose\n",
-      "427/1896: Converting Node Type Shape\n",
-      "428/1896: Converting Node Type Gather\n",
-      "429/1896: Converting Node Type Shape\n",
-      "430/1896: Converting Node Type Gather\n",
-      "431/1896: Converting Node Type Unsqueeze\n",
-      "432/1896: Converting Node Type Unsqueeze\n",
-      "433/1896: Converting Node Type Concat\n",
-      "434/1896: Converting Node Type Reshape\n",
-      "435/1896: Converting Node Type Shape\n",
-      "436/1896: Converting Node Type Gather\n",
-      "437/1896: Converting Node Type Shape\n",
-      "438/1896: Converting Node Type Gather\n",
-      "439/1896: Converting Node Type Unsqueeze\n",
-      "440/1896: Converting Node Type Unsqueeze\n",
-      "441/1896: Converting Node Type Concat\n",
-      "442/1896: Converting Node Type Reshape\n",
-      "443/1896: Converting Node Type Transpose\n",
-      "444/1896: Converting Node Type Transpose\n",
-      "445/1896: Converting Node Type MatMul\n",
-      "446/1896: Converting Node Type Div\n",
-      "447/1896: Converting Node Type Add\n",
-      "448/1896: Converting Node Type Softmax\n",
-      "449/1896: Converting Node Type MatMul\n",
-      "450/1896: Converting Node Type Transpose\n",
-      "451/1896: Converting Node Type Shape\n",
-      "452/1896: Converting Node Type Gather\n",
-      "453/1896: Converting Node Type Shape\n",
-      "454/1896: Converting Node Type Gather\n",
-      "455/1896: Converting Node Type Unsqueeze\n",
-      "456/1896: Converting Node Type Unsqueeze\n",
-      "457/1896: Converting Node Type Concat\n",
-      "458/1896: Converting Node Type Reshape\n",
-      "459/1896: Converting Node Type MatMul\n",
-      "460/1896: Converting Node Type Add\n",
-      "461/1896: Converting Node Type Add\n",
-      "462/1896: Converting Node Type ReduceMean\n",
-      "463/1896: Converting Node Type Sub\n",
-      "464/1896: Converting Node Type Pow\n",
-      "465/1896: Converting Node Type ReduceMean\n",
-      "466/1896: Converting Node Type Add\n",
-      "467/1896: Converting Node Type Sqrt\n",
-      "468/1896: Converting Node Type Div\n",
-      "469/1896: Converting Node Type Mul\n",
-      "470/1896: Converting Node Type Add\n",
-      "471/1896: Converting Node Type MatMul\n",
-      "472/1896: Converting Node Type Add\n",
-      "473/1896: Converting Node Type Mul\n",
-      "474/1896: Converting Node Type Div\n",
-      "475/1896: Converting Node Type Erf\n",
-      "476/1896: Converting Node Type Add\n",
-      "477/1896: Converting Node Type Mul\n",
-      "478/1896: Converting Node Type MatMul\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "479/1896: Converting Node Type Add\n",
-      "480/1896: Converting Node Type Add\n",
-      "481/1896: Converting Node Type ReduceMean\n",
-      "482/1896: Converting Node Type Sub\n",
-      "483/1896: Converting Node Type Pow\n",
-      "484/1896: Converting Node Type ReduceMean\n",
-      "485/1896: Converting Node Type Add\n",
-      "486/1896: Converting Node Type Sqrt\n",
-      "487/1896: Converting Node Type Div\n",
-      "488/1896: Converting Node Type Mul\n",
-      "489/1896: Converting Node Type Add\n",
-      "490/1896: Converting Node Type MatMul\n",
-      "491/1896: Converting Node Type Add\n",
-      "492/1896: Converting Node Type MatMul\n",
-      "493/1896: Converting Node Type Add\n",
-      "494/1896: Converting Node Type MatMul\n",
-      "495/1896: Converting Node Type Add\n",
-      "496/1896: Converting Node Type Shape\n",
-      "497/1896: Converting Node Type Gather\n",
-      "498/1896: Converting Node Type Shape\n",
-      "499/1896: Converting Node Type Gather\n",
-      "500/1896: Converting Node Type Unsqueeze\n",
-      "501/1896: Converting Node Type Unsqueeze\n",
-      "502/1896: Converting Node Type Concat\n",
-      "503/1896: Converting Node Type Reshape\n",
-      "504/1896: Converting Node Type Transpose\n",
-      "505/1896: Converting Node Type Shape\n",
-      "506/1896: Converting Node Type Gather\n",
-      "507/1896: Converting Node Type Shape\n",
-      "508/1896: Converting Node Type Gather\n",
-      "509/1896: Converting Node Type Unsqueeze\n",
-      "510/1896: Converting Node Type Unsqueeze\n",
-      "511/1896: Converting Node Type Concat\n",
-      "512/1896: Converting Node Type Reshape\n",
-      "513/1896: Converting Node Type Shape\n",
-      "514/1896: Converting Node Type Gather\n",
-      "515/1896: Converting Node Type Shape\n",
-      "516/1896: Converting Node Type Gather\n",
-      "517/1896: Converting Node Type Unsqueeze\n",
-      "518/1896: Converting Node Type Unsqueeze\n",
-      "519/1896: Converting Node Type Concat\n",
-      "520/1896: Converting Node Type Reshape\n",
-      "521/1896: Converting Node Type Transpose\n",
-      "522/1896: Converting Node Type Transpose\n",
-      "523/1896: Converting Node Type MatMul\n",
-      "524/1896: Converting Node Type Div\n",
-      "525/1896: Converting Node Type Add\n",
-      "526/1896: Converting Node Type Softmax\n",
-      "527/1896: Converting Node Type MatMul\n",
-      "528/1896: Converting Node Type Transpose\n",
-      "529/1896: Converting Node Type Shape\n",
-      "530/1896: Converting Node Type Gather\n",
-      "531/1896: Converting Node Type Shape\n",
-      "532/1896: Converting Node Type Gather\n",
-      "533/1896: Converting Node Type Unsqueeze\n",
-      "534/1896: Converting Node Type Unsqueeze\n",
-      "535/1896: Converting Node Type Concat\n",
-      "536/1896: Converting Node Type Reshape\n",
-      "537/1896: Converting Node Type MatMul\n",
-      "538/1896: Converting Node Type Add\n",
-      "539/1896: Converting Node Type Add\n",
-      "540/1896: Converting Node Type ReduceMean\n",
-      "541/1896: Converting Node Type Sub\n",
-      "542/1896: Converting Node Type Pow\n",
-      "543/1896: Converting Node Type ReduceMean\n",
-      "544/1896: Converting Node Type Add\n",
-      "545/1896: Converting Node Type Sqrt\n",
-      "546/1896: Converting Node Type Div\n",
-      "547/1896: Converting Node Type Mul\n",
-      "548/1896: Converting Node Type Add\n",
-      "549/1896: Converting Node Type MatMul\n",
-      "550/1896: Converting Node Type Add\n",
-      "551/1896: Converting Node Type Mul\n",
-      "552/1896: Converting Node Type Div\n",
-      "553/1896: Converting Node Type Erf\n",
-      "554/1896: Converting Node Type Add\n",
-      "555/1896: Converting Node Type Mul\n",
-      "556/1896: Converting Node Type MatMul\n",
-      "557/1896: Converting Node Type Add\n",
-      "558/1896: Converting Node Type Add\n",
-      "559/1896: Converting Node Type ReduceMean\n",
-      "560/1896: Converting Node Type Sub\n",
-      "561/1896: Converting Node Type Pow\n",
-      "562/1896: Converting Node Type ReduceMean\n",
-      "563/1896: Converting Node Type Add\n",
-      "564/1896: Converting Node Type Sqrt\n",
-      "565/1896: Converting Node Type Div\n",
-      "566/1896: Converting Node Type Mul\n",
-      "567/1896: Converting Node Type Add\n",
-      "568/1896: Converting Node Type MatMul\n",
-      "569/1896: Converting Node Type Add\n",
-      "570/1896: Converting Node Type MatMul\n",
-      "571/1896: Converting Node Type Add\n",
-      "572/1896: Converting Node Type MatMul\n",
-      "573/1896: Converting Node Type Add\n",
-      "574/1896: Converting Node Type Shape\n",
-      "575/1896: Converting Node Type Gather\n",
-      "576/1896: Converting Node Type Shape\n",
-      "577/1896: Converting Node Type Gather\n",
-      "578/1896: Converting Node Type Unsqueeze\n",
-      "579/1896: Converting Node Type Unsqueeze\n",
-      "580/1896: Converting Node Type Concat\n",
-      "581/1896: Converting Node Type Reshape\n",
-      "582/1896: Converting Node Type Transpose\n",
-      "583/1896: Converting Node Type Shape\n",
-      "584/1896: Converting Node Type Gather\n",
-      "585/1896: Converting Node Type Shape\n",
-      "586/1896: Converting Node Type Gather\n",
-      "587/1896: Converting Node Type Unsqueeze\n",
-      "588/1896: Converting Node Type Unsqueeze\n",
-      "589/1896: Converting Node Type Concat\n",
-      "590/1896: Converting Node Type Reshape\n",
-      "591/1896: Converting Node Type Shape\n",
-      "592/1896: Converting Node Type Gather\n",
-      "593/1896: Converting Node Type Shape\n",
-      "594/1896: Converting Node Type Gather\n",
-      "595/1896: Converting Node Type Unsqueeze\n",
-      "596/1896: Converting Node Type Unsqueeze\n",
-      "597/1896: Converting Node Type Concat\n",
-      "598/1896: Converting Node Type Reshape\n",
-      "599/1896: Converting Node Type Transpose\n",
-      "600/1896: Converting Node Type Transpose\n",
-      "601/1896: Converting Node Type MatMul\n",
-      "602/1896: Converting Node Type Div\n",
-      "603/1896: Converting Node Type Add\n",
-      "604/1896: Converting Node Type Softmax\n",
-      "605/1896: Converting Node Type MatMul\n",
-      "606/1896: Converting Node Type Transpose\n",
-      "607/1896: Converting Node Type Shape\n",
-      "608/1896: Converting Node Type Gather\n",
-      "609/1896: Converting Node Type Shape\n",
-      "610/1896: Converting Node Type Gather\n",
-      "611/1896: Converting Node Type Unsqueeze\n",
-      "612/1896: Converting Node Type Unsqueeze\n",
-      "613/1896: Converting Node Type Concat\n",
-      "614/1896: Converting Node Type Reshape\n",
-      "615/1896: Converting Node Type MatMul\n",
-      "616/1896: Converting Node Type Add\n",
-      "617/1896: Converting Node Type Add\n",
-      "618/1896: Converting Node Type ReduceMean\n",
-      "619/1896: Converting Node Type Sub\n",
-      "620/1896: Converting Node Type Pow\n",
-      "621/1896: Converting Node Type ReduceMean\n",
-      "622/1896: Converting Node Type Add\n",
-      "623/1896: Converting Node Type Sqrt\n",
-      "624/1896: Converting Node Type Div\n",
-      "625/1896: Converting Node Type Mul\n",
-      "626/1896: Converting Node Type Add\n",
-      "627/1896: Converting Node Type MatMul\n",
-      "628/1896: Converting Node Type Add\n",
-      "629/1896: Converting Node Type Mul\n",
-      "630/1896: Converting Node Type Div\n",
-      "631/1896: Converting Node Type Erf\n",
-      "632/1896: Converting Node Type Add\n",
-      "633/1896: Converting Node Type Mul\n",
-      "634/1896: Converting Node Type MatMul\n",
-      "635/1896: Converting Node Type Add\n",
-      "636/1896: Converting Node Type Add\n",
-      "637/1896: Converting Node Type ReduceMean\n",
-      "638/1896: Converting Node Type Sub\n",
-      "639/1896: Converting Node Type Pow\n",
-      "640/1896: Converting Node Type ReduceMean\n",
-      "641/1896: Converting Node Type Add\n",
-      "642/1896: Converting Node Type Sqrt\n",
-      "643/1896: Converting Node Type Div\n",
-      "644/1896: Converting Node Type Mul\n",
-      "645/1896: Converting Node Type Add\n",
-      "646/1896: Converting Node Type MatMul\n",
-      "647/1896: Converting Node Type Add\n",
-      "648/1896: Converting Node Type MatMul\n",
-      "649/1896: Converting Node Type Add\n",
-      "650/1896: Converting Node Type MatMul\n",
-      "651/1896: Converting Node Type Add\n",
-      "652/1896: Converting Node Type Shape\n",
-      "653/1896: Converting Node Type Gather\n",
-      "654/1896: Converting Node Type Shape\n",
-      "655/1896: Converting Node Type Gather\n",
-      "656/1896: Converting Node Type Unsqueeze\n",
-      "657/1896: Converting Node Type Unsqueeze\n",
-      "658/1896: Converting Node Type Concat\n",
-      "659/1896: Converting Node Type Reshape\n",
-      "660/1896: Converting Node Type Transpose\n",
-      "661/1896: Converting Node Type Shape\n",
-      "662/1896: Converting Node Type Gather\n",
-      "663/1896: Converting Node Type Shape\n",
-      "664/1896: Converting Node Type Gather\n",
-      "665/1896: Converting Node Type Unsqueeze\n",
-      "666/1896: Converting Node Type Unsqueeze\n",
-      "667/1896: Converting Node Type Concat\n",
-      "668/1896: Converting Node Type Reshape\n",
-      "669/1896: Converting Node Type Shape\n",
-      "670/1896: Converting Node Type Gather\n",
-      "671/1896: Converting Node Type Shape\n",
-      "672/1896: Converting Node Type Gather\n",
-      "673/1896: Converting Node Type Unsqueeze\n",
-      "674/1896: Converting Node Type Unsqueeze\n",
-      "675/1896: Converting Node Type Concat\n",
-      "676/1896: Converting Node Type Reshape\n",
-      "677/1896: Converting Node Type Transpose\n",
-      "678/1896: Converting Node Type Transpose\n",
-      "679/1896: Converting Node Type MatMul\n",
-      "680/1896: Converting Node Type Div\n",
-      "681/1896: Converting Node Type Add\n",
-      "682/1896: Converting Node Type Softmax\n",
-      "683/1896: Converting Node Type MatMul\n",
-      "684/1896: Converting Node Type Transpose\n",
-      "685/1896: Converting Node Type Shape\n",
-      "686/1896: Converting Node Type Gather\n",
-      "687/1896: Converting Node Type Shape\n",
-      "688/1896: Converting Node Type Gather\n",
-      "689/1896: Converting Node Type Unsqueeze\n",
-      "690/1896: Converting Node Type Unsqueeze\n",
-      "691/1896: Converting Node Type Concat\n",
-      "692/1896: Converting Node Type Reshape\n",
-      "693/1896: Converting Node Type MatMul\n",
-      "694/1896: Converting Node Type Add\n",
-      "695/1896: Converting Node Type Add\n",
-      "696/1896: Converting Node Type ReduceMean\n",
-      "697/1896: Converting Node Type Sub\n",
-      "698/1896: Converting Node Type Pow\n",
-      "699/1896: Converting Node Type ReduceMean\n",
-      "700/1896: Converting Node Type Add\n",
-      "701/1896: Converting Node Type Sqrt\n",
-      "702/1896: Converting Node Type Div\n",
-      "703/1896: Converting Node Type Mul\n",
-      "704/1896: Converting Node Type Add\n",
-      "705/1896: Converting Node Type MatMul\n",
-      "706/1896: Converting Node Type Add\n",
-      "707/1896: Converting Node Type Mul\n",
-      "708/1896: Converting Node Type Div\n",
-      "709/1896: Converting Node Type Erf\n",
-      "710/1896: Converting Node Type Add\n",
-      "711/1896: Converting Node Type Mul\n",
-      "712/1896: Converting Node Type MatMul\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "713/1896: Converting Node Type Add\n",
-      "714/1896: Converting Node Type Add\n",
-      "715/1896: Converting Node Type ReduceMean\n",
-      "716/1896: Converting Node Type Sub\n",
-      "717/1896: Converting Node Type Pow\n",
-      "718/1896: Converting Node Type ReduceMean\n",
-      "719/1896: Converting Node Type Add\n",
-      "720/1896: Converting Node Type Sqrt\n",
-      "721/1896: Converting Node Type Div\n",
-      "722/1896: Converting Node Type Mul\n",
-      "723/1896: Converting Node Type Add\n",
-      "724/1896: Converting Node Type MatMul\n",
-      "725/1896: Converting Node Type Add\n",
-      "726/1896: Converting Node Type MatMul\n",
-      "727/1896: Converting Node Type Add\n",
-      "728/1896: Converting Node Type MatMul\n",
-      "729/1896: Converting Node Type Add\n",
-      "730/1896: Converting Node Type Shape\n",
-      "731/1896: Converting Node Type Gather\n",
-      "732/1896: Converting Node Type Shape\n",
-      "733/1896: Converting Node Type Gather\n",
-      "734/1896: Converting Node Type Unsqueeze\n",
-      "735/1896: Converting Node Type Unsqueeze\n",
-      "736/1896: Converting Node Type Concat\n",
-      "737/1896: Converting Node Type Reshape\n",
-      "738/1896: Converting Node Type Transpose\n",
-      "739/1896: Converting Node Type Shape\n",
-      "740/1896: Converting Node Type Gather\n",
-      "741/1896: Converting Node Type Shape\n",
-      "742/1896: Converting Node Type Gather\n",
-      "743/1896: Converting Node Type Unsqueeze\n",
-      "744/1896: Converting Node Type Unsqueeze\n",
-      "745/1896: Converting Node Type Concat\n",
-      "746/1896: Converting Node Type Reshape\n",
-      "747/1896: Converting Node Type Shape\n",
-      "748/1896: Converting Node Type Gather\n",
-      "749/1896: Converting Node Type Shape\n",
-      "750/1896: Converting Node Type Gather\n",
-      "751/1896: Converting Node Type Unsqueeze\n",
-      "752/1896: Converting Node Type Unsqueeze\n",
-      "753/1896: Converting Node Type Concat\n",
-      "754/1896: Converting Node Type Reshape\n",
-      "755/1896: Converting Node Type Transpose\n",
-      "756/1896: Converting Node Type Transpose\n",
-      "757/1896: Converting Node Type MatMul\n",
-      "758/1896: Converting Node Type Div\n",
-      "759/1896: Converting Node Type Add\n",
-      "760/1896: Converting Node Type Softmax\n",
-      "761/1896: Converting Node Type MatMul\n",
-      "762/1896: Converting Node Type Transpose\n",
-      "763/1896: Converting Node Type Shape\n",
-      "764/1896: Converting Node Type Gather\n",
-      "765/1896: Converting Node Type Shape\n",
-      "766/1896: Converting Node Type Gather\n",
-      "767/1896: Converting Node Type Unsqueeze\n",
-      "768/1896: Converting Node Type Unsqueeze\n",
-      "769/1896: Converting Node Type Concat\n",
-      "770/1896: Converting Node Type Reshape\n",
-      "771/1896: Converting Node Type MatMul\n",
-      "772/1896: Converting Node Type Add\n",
-      "773/1896: Converting Node Type Add\n",
-      "774/1896: Converting Node Type ReduceMean\n",
-      "775/1896: Converting Node Type Sub\n",
-      "776/1896: Converting Node Type Pow\n",
-      "777/1896: Converting Node Type ReduceMean\n",
-      "778/1896: Converting Node Type Add\n",
-      "779/1896: Converting Node Type Sqrt\n",
-      "780/1896: Converting Node Type Div\n",
-      "781/1896: Converting Node Type Mul\n",
-      "782/1896: Converting Node Type Add\n",
-      "783/1896: Converting Node Type MatMul\n",
-      "784/1896: Converting Node Type Add\n",
-      "785/1896: Converting Node Type Mul\n",
-      "786/1896: Converting Node Type Div\n",
-      "787/1896: Converting Node Type Erf\n",
-      "788/1896: Converting Node Type Add\n",
-      "789/1896: Converting Node Type Mul\n",
-      "790/1896: Converting Node Type MatMul\n",
-      "791/1896: Converting Node Type Add\n",
-      "792/1896: Converting Node Type Add\n",
-      "793/1896: Converting Node Type ReduceMean\n",
-      "794/1896: Converting Node Type Sub\n",
-      "795/1896: Converting Node Type Pow\n",
-      "796/1896: Converting Node Type ReduceMean\n",
-      "797/1896: Converting Node Type Add\n",
-      "798/1896: Converting Node Type Sqrt\n",
-      "799/1896: Converting Node Type Div\n",
-      "800/1896: Converting Node Type Mul\n",
-      "801/1896: Converting Node Type Add\n",
-      "802/1896: Converting Node Type MatMul\n",
-      "803/1896: Converting Node Type Add\n",
-      "804/1896: Converting Node Type MatMul\n",
-      "805/1896: Converting Node Type Add\n",
-      "806/1896: Converting Node Type MatMul\n",
-      "807/1896: Converting Node Type Add\n",
-      "808/1896: Converting Node Type Shape\n",
-      "809/1896: Converting Node Type Gather\n",
-      "810/1896: Converting Node Type Shape\n",
-      "811/1896: Converting Node Type Gather\n",
-      "812/1896: Converting Node Type Unsqueeze\n",
-      "813/1896: Converting Node Type Unsqueeze\n",
-      "814/1896: Converting Node Type Concat\n",
-      "815/1896: Converting Node Type Reshape\n",
-      "816/1896: Converting Node Type Transpose\n",
-      "817/1896: Converting Node Type Shape\n",
-      "818/1896: Converting Node Type Gather\n",
-      "819/1896: Converting Node Type Shape\n",
-      "820/1896: Converting Node Type Gather\n",
-      "821/1896: Converting Node Type Unsqueeze\n",
-      "822/1896: Converting Node Type Unsqueeze\n",
-      "823/1896: Converting Node Type Concat\n",
-      "824/1896: Converting Node Type Reshape\n",
-      "825/1896: Converting Node Type Shape\n",
-      "826/1896: Converting Node Type Gather\n",
-      "827/1896: Converting Node Type Shape\n",
-      "828/1896: Converting Node Type Gather\n",
-      "829/1896: Converting Node Type Unsqueeze\n",
-      "830/1896: Converting Node Type Unsqueeze\n",
-      "831/1896: Converting Node Type Concat\n",
-      "832/1896: Converting Node Type Reshape\n",
-      "833/1896: Converting Node Type Transpose\n",
-      "834/1896: Converting Node Type Transpose\n",
-      "835/1896: Converting Node Type MatMul\n",
-      "836/1896: Converting Node Type Div\n",
-      "837/1896: Converting Node Type Add\n",
-      "838/1896: Converting Node Type Softmax\n",
-      "839/1896: Converting Node Type MatMul\n",
-      "840/1896: Converting Node Type Transpose\n",
-      "841/1896: Converting Node Type Shape\n",
-      "842/1896: Converting Node Type Gather\n",
-      "843/1896: Converting Node Type Shape\n",
-      "844/1896: Converting Node Type Gather\n",
-      "845/1896: Converting Node Type Unsqueeze\n",
-      "846/1896: Converting Node Type Unsqueeze\n",
-      "847/1896: Converting Node Type Concat\n",
-      "848/1896: Converting Node Type Reshape\n",
-      "849/1896: Converting Node Type MatMul\n",
-      "850/1896: Converting Node Type Add\n",
-      "851/1896: Converting Node Type Add\n",
-      "852/1896: Converting Node Type ReduceMean\n",
-      "853/1896: Converting Node Type Sub\n",
-      "854/1896: Converting Node Type Pow\n",
-      "855/1896: Converting Node Type ReduceMean\n",
-      "856/1896: Converting Node Type Add\n",
-      "857/1896: Converting Node Type Sqrt\n",
-      "858/1896: Converting Node Type Div\n",
-      "859/1896: Converting Node Type Mul\n",
-      "860/1896: Converting Node Type Add\n",
-      "861/1896: Converting Node Type MatMul\n",
-      "862/1896: Converting Node Type Add\n",
-      "863/1896: Converting Node Type Mul\n",
-      "864/1896: Converting Node Type Div\n",
-      "865/1896: Converting Node Type Erf\n",
-      "866/1896: Converting Node Type Add\n",
-      "867/1896: Converting Node Type Mul\n",
-      "868/1896: Converting Node Type MatMul\n",
-      "869/1896: Converting Node Type Add\n",
-      "870/1896: Converting Node Type Add\n",
-      "871/1896: Converting Node Type ReduceMean\n",
-      "872/1896: Converting Node Type Sub\n",
-      "873/1896: Converting Node Type Pow\n",
-      "874/1896: Converting Node Type ReduceMean\n",
-      "875/1896: Converting Node Type Add\n",
-      "876/1896: Converting Node Type Sqrt\n",
-      "877/1896: Converting Node Type Div\n",
-      "878/1896: Converting Node Type Mul\n",
-      "879/1896: Converting Node Type Add\n",
-      "880/1896: Converting Node Type MatMul\n",
-      "881/1896: Converting Node Type Add\n",
-      "882/1896: Converting Node Type MatMul\n",
-      "883/1896: Converting Node Type Add\n",
-      "884/1896: Converting Node Type MatMul\n",
-      "885/1896: Converting Node Type Add\n",
-      "886/1896: Converting Node Type Shape\n",
-      "887/1896: Converting Node Type Gather\n",
-      "888/1896: Converting Node Type Shape\n",
-      "889/1896: Converting Node Type Gather\n",
-      "890/1896: Converting Node Type Unsqueeze\n",
-      "891/1896: Converting Node Type Unsqueeze\n",
-      "892/1896: Converting Node Type Concat\n",
-      "893/1896: Converting Node Type Reshape\n",
-      "894/1896: Converting Node Type Transpose\n",
-      "895/1896: Converting Node Type Shape\n",
-      "896/1896: Converting Node Type Gather\n",
-      "897/1896: Converting Node Type Shape\n",
-      "898/1896: Converting Node Type Gather\n",
-      "899/1896: Converting Node Type Unsqueeze\n",
-      "900/1896: Converting Node Type Unsqueeze\n",
-      "901/1896: Converting Node Type Concat\n",
-      "902/1896: Converting Node Type Reshape\n",
-      "903/1896: Converting Node Type Shape\n",
-      "904/1896: Converting Node Type Gather\n",
-      "905/1896: Converting Node Type Shape\n",
-      "906/1896: Converting Node Type Gather\n",
-      "907/1896: Converting Node Type Unsqueeze\n",
-      "908/1896: Converting Node Type Unsqueeze\n",
-      "909/1896: Converting Node Type Concat\n",
-      "910/1896: Converting Node Type Reshape\n",
-      "911/1896: Converting Node Type Transpose\n",
-      "912/1896: Converting Node Type Transpose\n",
-      "913/1896: Converting Node Type MatMul\n",
-      "914/1896: Converting Node Type Div\n",
-      "915/1896: Converting Node Type Add\n",
-      "916/1896: Converting Node Type Softmax\n",
-      "917/1896: Converting Node Type MatMul\n",
-      "918/1896: Converting Node Type Transpose\n",
-      "919/1896: Converting Node Type Shape\n",
-      "920/1896: Converting Node Type Gather\n",
-      "921/1896: Converting Node Type Shape\n",
-      "922/1896: Converting Node Type Gather\n",
-      "923/1896: Converting Node Type Unsqueeze\n",
-      "924/1896: Converting Node Type Unsqueeze\n",
-      "925/1896: Converting Node Type Concat\n",
-      "926/1896: Converting Node Type Reshape\n",
-      "927/1896: Converting Node Type MatMul\n",
-      "928/1896: Converting Node Type Add\n",
-      "929/1896: Converting Node Type Add\n",
-      "930/1896: Converting Node Type ReduceMean\n",
-      "931/1896: Converting Node Type Sub\n",
-      "932/1896: Converting Node Type Pow\n",
-      "933/1896: Converting Node Type ReduceMean\n",
-      "934/1896: Converting Node Type Add\n",
-      "935/1896: Converting Node Type Sqrt\n",
-      "936/1896: Converting Node Type Div\n",
-      "937/1896: Converting Node Type Mul\n",
-      "938/1896: Converting Node Type Add\n",
-      "939/1896: Converting Node Type MatMul\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "940/1896: Converting Node Type Add\n",
-      "941/1896: Converting Node Type Mul\n",
-      "942/1896: Converting Node Type Div\n",
-      "943/1896: Converting Node Type Erf\n",
-      "944/1896: Converting Node Type Add\n",
-      "945/1896: Converting Node Type Mul\n",
-      "946/1896: Converting Node Type MatMul\n",
-      "947/1896: Converting Node Type Add\n",
-      "948/1896: Converting Node Type Add\n",
-      "949/1896: Converting Node Type ReduceMean\n",
-      "950/1896: Converting Node Type Sub\n",
-      "951/1896: Converting Node Type Pow\n",
-      "952/1896: Converting Node Type ReduceMean\n",
-      "953/1896: Converting Node Type Add\n",
-      "954/1896: Converting Node Type Sqrt\n",
-      "955/1896: Converting Node Type Div\n",
-      "956/1896: Converting Node Type Mul\n",
-      "957/1896: Converting Node Type Add\n",
-      "958/1896: Converting Node Type MatMul\n",
-      "959/1896: Converting Node Type Add\n",
-      "960/1896: Converting Node Type MatMul\n",
-      "961/1896: Converting Node Type Add\n",
-      "962/1896: Converting Node Type MatMul\n",
-      "963/1896: Converting Node Type Add\n",
-      "964/1896: Converting Node Type Shape\n",
-      "965/1896: Converting Node Type Gather\n",
-      "966/1896: Converting Node Type Shape\n",
-      "967/1896: Converting Node Type Gather\n",
-      "968/1896: Converting Node Type Unsqueeze\n",
-      "969/1896: Converting Node Type Unsqueeze\n",
-      "970/1896: Converting Node Type Concat\n",
-      "971/1896: Converting Node Type Reshape\n",
-      "972/1896: Converting Node Type Transpose\n",
-      "973/1896: Converting Node Type Shape\n",
-      "974/1896: Converting Node Type Gather\n",
-      "975/1896: Converting Node Type Shape\n",
-      "976/1896: Converting Node Type Gather\n",
-      "977/1896: Converting Node Type Unsqueeze\n",
-      "978/1896: Converting Node Type Unsqueeze\n",
-      "979/1896: Converting Node Type Concat\n",
-      "980/1896: Converting Node Type Reshape\n",
-      "981/1896: Converting Node Type Shape\n",
-      "982/1896: Converting Node Type Gather\n",
-      "983/1896: Converting Node Type Shape\n",
-      "984/1896: Converting Node Type Gather\n",
-      "985/1896: Converting Node Type Unsqueeze\n",
-      "986/1896: Converting Node Type Unsqueeze\n",
-      "987/1896: Converting Node Type Concat\n",
-      "988/1896: Converting Node Type Reshape\n",
-      "989/1896: Converting Node Type Transpose\n",
-      "990/1896: Converting Node Type Transpose\n",
-      "991/1896: Converting Node Type MatMul\n",
-      "992/1896: Converting Node Type Div\n",
-      "993/1896: Converting Node Type Add\n",
-      "994/1896: Converting Node Type Softmax\n",
-      "995/1896: Converting Node Type MatMul\n",
-      "996/1896: Converting Node Type Transpose\n",
-      "997/1896: Converting Node Type Shape\n",
-      "998/1896: Converting Node Type Gather\n",
-      "999/1896: Converting Node Type Shape\n",
-      "1000/1896: Converting Node Type Gather\n",
-      "1001/1896: Converting Node Type Unsqueeze\n",
-      "1002/1896: Converting Node Type Unsqueeze\n",
-      "1003/1896: Converting Node Type Concat\n",
-      "1004/1896: Converting Node Type Reshape\n",
-      "1005/1896: Converting Node Type MatMul\n",
-      "1006/1896: Converting Node Type Add\n",
-      "1007/1896: Converting Node Type Add\n",
-      "1008/1896: Converting Node Type ReduceMean\n",
-      "1009/1896: Converting Node Type Sub\n",
-      "1010/1896: Converting Node Type Pow\n",
-      "1011/1896: Converting Node Type ReduceMean\n",
-      "1012/1896: Converting Node Type Add\n",
-      "1013/1896: Converting Node Type Sqrt\n",
-      "1014/1896: Converting Node Type Div\n",
-      "1015/1896: Converting Node Type Mul\n",
-      "1016/1896: Converting Node Type Add\n",
-      "1017/1896: Converting Node Type MatMul\n",
-      "1018/1896: Converting Node Type Add\n",
-      "1019/1896: Converting Node Type Mul\n",
-      "1020/1896: Converting Node Type Div\n",
-      "1021/1896: Converting Node Type Erf\n",
-      "1022/1896: Converting Node Type Add\n",
-      "1023/1896: Converting Node Type Mul\n",
-      "1024/1896: Converting Node Type MatMul\n",
-      "1025/1896: Converting Node Type Add\n",
-      "1026/1896: Converting Node Type Add\n",
-      "1027/1896: Converting Node Type ReduceMean\n",
-      "1028/1896: Converting Node Type Sub\n",
-      "1029/1896: Converting Node Type Pow\n",
-      "1030/1896: Converting Node Type ReduceMean\n",
-      "1031/1896: Converting Node Type Add\n",
-      "1032/1896: Converting Node Type Sqrt\n",
-      "1033/1896: Converting Node Type Div\n",
-      "1034/1896: Converting Node Type Mul\n",
-      "1035/1896: Converting Node Type Add\n",
-      "1036/1896: Converting Node Type MatMul\n",
-      "1037/1896: Converting Node Type Add\n",
-      "1038/1896: Converting Node Type MatMul\n",
-      "1039/1896: Converting Node Type Add\n",
-      "1040/1896: Converting Node Type MatMul\n",
-      "1041/1896: Converting Node Type Add\n",
-      "1042/1896: Converting Node Type Shape\n",
-      "1043/1896: Converting Node Type Gather\n",
-      "1044/1896: Converting Node Type Shape\n",
-      "1045/1896: Converting Node Type Gather\n",
-      "1046/1896: Converting Node Type Unsqueeze\n",
-      "1047/1896: Converting Node Type Unsqueeze\n",
-      "1048/1896: Converting Node Type Concat\n",
-      "1049/1896: Converting Node Type Reshape\n",
-      "1050/1896: Converting Node Type Transpose\n",
-      "1051/1896: Converting Node Type Shape\n",
-      "1052/1896: Converting Node Type Gather\n",
-      "1053/1896: Converting Node Type Shape\n",
-      "1054/1896: Converting Node Type Gather\n",
-      "1055/1896: Converting Node Type Unsqueeze\n",
-      "1056/1896: Converting Node Type Unsqueeze\n",
-      "1057/1896: Converting Node Type Concat\n",
-      "1058/1896: Converting Node Type Reshape\n",
-      "1059/1896: Converting Node Type Shape\n",
-      "1060/1896: Converting Node Type Gather\n",
-      "1061/1896: Converting Node Type Shape\n",
-      "1062/1896: Converting Node Type Gather\n",
-      "1063/1896: Converting Node Type Unsqueeze\n",
-      "1064/1896: Converting Node Type Unsqueeze\n",
-      "1065/1896: Converting Node Type Concat\n",
-      "1066/1896: Converting Node Type Reshape\n",
-      "1067/1896: Converting Node Type Transpose\n",
-      "1068/1896: Converting Node Type Transpose\n",
-      "1069/1896: Converting Node Type MatMul\n",
-      "1070/1896: Converting Node Type Div\n",
-      "1071/1896: Converting Node Type Add\n",
-      "1072/1896: Converting Node Type Softmax\n",
-      "1073/1896: Converting Node Type MatMul\n",
-      "1074/1896: Converting Node Type Transpose\n",
-      "1075/1896: Converting Node Type Shape\n",
-      "1076/1896: Converting Node Type Gather\n",
-      "1077/1896: Converting Node Type Shape\n",
-      "1078/1896: Converting Node Type Gather\n",
-      "1079/1896: Converting Node Type Unsqueeze\n",
-      "1080/1896: Converting Node Type Unsqueeze\n",
-      "1081/1896: Converting Node Type Concat\n",
-      "1082/1896: Converting Node Type Reshape\n",
-      "1083/1896: Converting Node Type MatMul\n",
-      "1084/1896: Converting Node Type Add\n",
-      "1085/1896: Converting Node Type Add\n",
-      "1086/1896: Converting Node Type ReduceMean\n",
-      "1087/1896: Converting Node Type Sub\n",
-      "1088/1896: Converting Node Type Pow\n",
-      "1089/1896: Converting Node Type ReduceMean\n",
-      "1090/1896: Converting Node Type Add\n",
-      "1091/1896: Converting Node Type Sqrt\n",
-      "1092/1896: Converting Node Type Div\n",
-      "1093/1896: Converting Node Type Mul\n",
-      "1094/1896: Converting Node Type Add\n",
-      "1095/1896: Converting Node Type MatMul\n",
-      "1096/1896: Converting Node Type Add\n",
-      "1097/1896: Converting Node Type Mul\n",
-      "1098/1896: Converting Node Type Div\n",
-      "1099/1896: Converting Node Type Erf\n",
-      "1100/1896: Converting Node Type Add\n",
-      "1101/1896: Converting Node Type Mul\n",
-      "1102/1896: Converting Node Type MatMul\n",
-      "1103/1896: Converting Node Type Add\n",
-      "1104/1896: Converting Node Type Add\n",
-      "1105/1896: Converting Node Type ReduceMean\n",
-      "1106/1896: Converting Node Type Sub\n",
-      "1107/1896: Converting Node Type Pow\n",
-      "1108/1896: Converting Node Type ReduceMean\n",
-      "1109/1896: Converting Node Type Add\n",
-      "1110/1896: Converting Node Type Sqrt\n",
-      "1111/1896: Converting Node Type Div\n",
-      "1112/1896: Converting Node Type Mul\n",
-      "1113/1896: Converting Node Type Add\n",
-      "1114/1896: Converting Node Type MatMul\n",
-      "1115/1896: Converting Node Type Add\n",
-      "1116/1896: Converting Node Type MatMul\n",
-      "1117/1896: Converting Node Type Add\n",
-      "1118/1896: Converting Node Type MatMul\n",
-      "1119/1896: Converting Node Type Add\n",
-      "1120/1896: Converting Node Type Shape\n",
-      "1121/1896: Converting Node Type Gather\n",
-      "1122/1896: Converting Node Type Shape\n",
-      "1123/1896: Converting Node Type Gather\n",
-      "1124/1896: Converting Node Type Unsqueeze\n",
-      "1125/1896: Converting Node Type Unsqueeze\n",
-      "1126/1896: Converting Node Type Concat\n",
-      "1127/1896: Converting Node Type Reshape\n",
-      "1128/1896: Converting Node Type Transpose\n",
-      "1129/1896: Converting Node Type Shape\n",
-      "1130/1896: Converting Node Type Gather\n",
-      "1131/1896: Converting Node Type Shape\n",
-      "1132/1896: Converting Node Type Gather\n",
-      "1133/1896: Converting Node Type Unsqueeze\n",
-      "1134/1896: Converting Node Type Unsqueeze\n",
-      "1135/1896: Converting Node Type Concat\n",
-      "1136/1896: Converting Node Type Reshape\n",
-      "1137/1896: Converting Node Type Shape\n",
-      "1138/1896: Converting Node Type Gather\n",
-      "1139/1896: Converting Node Type Shape\n",
-      "1140/1896: Converting Node Type Gather\n",
-      "1141/1896: Converting Node Type Unsqueeze\n",
-      "1142/1896: Converting Node Type Unsqueeze\n",
-      "1143/1896: Converting Node Type Concat\n",
-      "1144/1896: Converting Node Type Reshape\n",
-      "1145/1896: Converting Node Type Transpose\n",
-      "1146/1896: Converting Node Type Transpose\n",
-      "1147/1896: Converting Node Type MatMul\n",
-      "1148/1896: Converting Node Type Div\n",
-      "1149/1896: Converting Node Type Add\n",
-      "1150/1896: Converting Node Type Softmax\n",
-      "1151/1896: Converting Node Type MatMul\n",
-      "1152/1896: Converting Node Type Transpose\n",
-      "1153/1896: Converting Node Type Shape\n",
-      "1154/1896: Converting Node Type Gather\n",
-      "1155/1896: Converting Node Type Shape\n",
-      "1156/1896: Converting Node Type Gather\n",
-      "1157/1896: Converting Node Type Unsqueeze\n",
-      "1158/1896: Converting Node Type Unsqueeze\n",
-      "1159/1896: Converting Node Type Concat\n",
-      "1160/1896: Converting Node Type Reshape\n",
-      "1161/1896: Converting Node Type MatMul\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1162/1896: Converting Node Type Add\n",
-      "1163/1896: Converting Node Type Add\n",
-      "1164/1896: Converting Node Type ReduceMean\n",
-      "1165/1896: Converting Node Type Sub\n",
-      "1166/1896: Converting Node Type Pow\n",
-      "1167/1896: Converting Node Type ReduceMean\n",
-      "1168/1896: Converting Node Type Add\n",
-      "1169/1896: Converting Node Type Sqrt\n",
-      "1170/1896: Converting Node Type Div\n",
-      "1171/1896: Converting Node Type Mul\n",
-      "1172/1896: Converting Node Type Add\n",
-      "1173/1896: Converting Node Type MatMul\n",
-      "1174/1896: Converting Node Type Add\n",
-      "1175/1896: Converting Node Type Mul\n",
-      "1176/1896: Converting Node Type Div\n",
-      "1177/1896: Converting Node Type Erf\n",
-      "1178/1896: Converting Node Type Add\n",
-      "1179/1896: Converting Node Type Mul\n",
-      "1180/1896: Converting Node Type MatMul\n",
-      "1181/1896: Converting Node Type Add\n",
-      "1182/1896: Converting Node Type Add\n",
-      "1183/1896: Converting Node Type ReduceMean\n",
-      "1184/1896: Converting Node Type Sub\n",
-      "1185/1896: Converting Node Type Pow\n",
-      "1186/1896: Converting Node Type ReduceMean\n",
-      "1187/1896: Converting Node Type Add\n",
-      "1188/1896: Converting Node Type Sqrt\n",
-      "1189/1896: Converting Node Type Div\n",
-      "1190/1896: Converting Node Type Mul\n",
-      "1191/1896: Converting Node Type Add\n",
-      "1192/1896: Converting Node Type MatMul\n",
-      "1193/1896: Converting Node Type Add\n",
-      "1194/1896: Converting Node Type MatMul\n",
-      "1195/1896: Converting Node Type Add\n",
-      "1196/1896: Converting Node Type MatMul\n",
-      "1197/1896: Converting Node Type Add\n",
-      "1198/1896: Converting Node Type Shape\n",
-      "1199/1896: Converting Node Type Gather\n",
-      "1200/1896: Converting Node Type Shape\n",
-      "1201/1896: Converting Node Type Gather\n",
-      "1202/1896: Converting Node Type Unsqueeze\n",
-      "1203/1896: Converting Node Type Unsqueeze\n",
-      "1204/1896: Converting Node Type Concat\n",
-      "1205/1896: Converting Node Type Reshape\n",
-      "1206/1896: Converting Node Type Transpose\n",
-      "1207/1896: Converting Node Type Shape\n",
-      "1208/1896: Converting Node Type Gather\n",
-      "1209/1896: Converting Node Type Shape\n",
-      "1210/1896: Converting Node Type Gather\n",
-      "1211/1896: Converting Node Type Unsqueeze\n",
-      "1212/1896: Converting Node Type Unsqueeze\n",
-      "1213/1896: Converting Node Type Concat\n",
-      "1214/1896: Converting Node Type Reshape\n",
-      "1215/1896: Converting Node Type Shape\n",
-      "1216/1896: Converting Node Type Gather\n",
-      "1217/1896: Converting Node Type Shape\n",
-      "1218/1896: Converting Node Type Gather\n",
-      "1219/1896: Converting Node Type Unsqueeze\n",
-      "1220/1896: Converting Node Type Unsqueeze\n",
-      "1221/1896: Converting Node Type Concat\n",
-      "1222/1896: Converting Node Type Reshape\n",
-      "1223/1896: Converting Node Type Transpose\n",
-      "1224/1896: Converting Node Type Transpose\n",
-      "1225/1896: Converting Node Type MatMul\n",
-      "1226/1896: Converting Node Type Div\n",
-      "1227/1896: Converting Node Type Add\n",
-      "1228/1896: Converting Node Type Softmax\n",
-      "1229/1896: Converting Node Type MatMul\n",
-      "1230/1896: Converting Node Type Transpose\n",
-      "1231/1896: Converting Node Type Shape\n",
-      "1232/1896: Converting Node Type Gather\n",
-      "1233/1896: Converting Node Type Shape\n",
-      "1234/1896: Converting Node Type Gather\n",
-      "1235/1896: Converting Node Type Unsqueeze\n",
-      "1236/1896: Converting Node Type Unsqueeze\n",
-      "1237/1896: Converting Node Type Concat\n",
-      "1238/1896: Converting Node Type Reshape\n",
-      "1239/1896: Converting Node Type MatMul\n",
-      "1240/1896: Converting Node Type Add\n",
-      "1241/1896: Converting Node Type Add\n",
-      "1242/1896: Converting Node Type ReduceMean\n",
-      "1243/1896: Converting Node Type Sub\n",
-      "1244/1896: Converting Node Type Pow\n",
-      "1245/1896: Converting Node Type ReduceMean\n",
-      "1246/1896: Converting Node Type Add\n",
-      "1247/1896: Converting Node Type Sqrt\n",
-      "1248/1896: Converting Node Type Div\n",
-      "1249/1896: Converting Node Type Mul\n",
-      "1250/1896: Converting Node Type Add\n",
-      "1251/1896: Converting Node Type MatMul\n",
-      "1252/1896: Converting Node Type Add\n",
-      "1253/1896: Converting Node Type Mul\n",
-      "1254/1896: Converting Node Type Div\n",
-      "1255/1896: Converting Node Type Erf\n",
-      "1256/1896: Converting Node Type Add\n",
-      "1257/1896: Converting Node Type Mul\n",
-      "1258/1896: Converting Node Type MatMul\n",
-      "1259/1896: Converting Node Type Add\n",
-      "1260/1896: Converting Node Type Add\n",
-      "1261/1896: Converting Node Type ReduceMean\n",
-      "1262/1896: Converting Node Type Sub\n",
-      "1263/1896: Converting Node Type Pow\n",
-      "1264/1896: Converting Node Type ReduceMean\n",
-      "1265/1896: Converting Node Type Add\n",
-      "1266/1896: Converting Node Type Sqrt\n",
-      "1267/1896: Converting Node Type Div\n",
-      "1268/1896: Converting Node Type Mul\n",
-      "1269/1896: Converting Node Type Add\n",
-      "1270/1896: Converting Node Type MatMul\n",
-      "1271/1896: Converting Node Type Add\n",
-      "1272/1896: Converting Node Type MatMul\n",
-      "1273/1896: Converting Node Type Add\n",
-      "1274/1896: Converting Node Type MatMul\n",
-      "1275/1896: Converting Node Type Add\n",
-      "1276/1896: Converting Node Type Shape\n",
-      "1277/1896: Converting Node Type Gather\n",
-      "1278/1896: Converting Node Type Shape\n",
-      "1279/1896: Converting Node Type Gather\n",
-      "1280/1896: Converting Node Type Unsqueeze\n",
-      "1281/1896: Converting Node Type Unsqueeze\n",
-      "1282/1896: Converting Node Type Concat\n",
-      "1283/1896: Converting Node Type Reshape\n",
-      "1284/1896: Converting Node Type Transpose\n",
-      "1285/1896: Converting Node Type Shape\n",
-      "1286/1896: Converting Node Type Gather\n",
-      "1287/1896: Converting Node Type Shape\n",
-      "1288/1896: Converting Node Type Gather\n",
-      "1289/1896: Converting Node Type Unsqueeze\n",
-      "1290/1896: Converting Node Type Unsqueeze\n",
-      "1291/1896: Converting Node Type Concat\n",
-      "1292/1896: Converting Node Type Reshape\n",
-      "1293/1896: Converting Node Type Shape\n",
-      "1294/1896: Converting Node Type Gather\n",
-      "1295/1896: Converting Node Type Shape\n",
-      "1296/1896: Converting Node Type Gather\n",
-      "1297/1896: Converting Node Type Unsqueeze\n",
-      "1298/1896: Converting Node Type Unsqueeze\n",
-      "1299/1896: Converting Node Type Concat\n",
-      "1300/1896: Converting Node Type Reshape\n",
-      "1301/1896: Converting Node Type Transpose\n",
-      "1302/1896: Converting Node Type Transpose\n",
-      "1303/1896: Converting Node Type MatMul\n",
-      "1304/1896: Converting Node Type Div\n",
-      "1305/1896: Converting Node Type Add\n",
-      "1306/1896: Converting Node Type Softmax\n",
-      "1307/1896: Converting Node Type MatMul\n",
-      "1308/1896: Converting Node Type Transpose\n",
-      "1309/1896: Converting Node Type Shape\n",
-      "1310/1896: Converting Node Type Gather\n",
-      "1311/1896: Converting Node Type Shape\n",
-      "1312/1896: Converting Node Type Gather\n",
-      "1313/1896: Converting Node Type Unsqueeze\n",
-      "1314/1896: Converting Node Type Unsqueeze\n",
-      "1315/1896: Converting Node Type Concat\n",
-      "1316/1896: Converting Node Type Reshape\n",
-      "1317/1896: Converting Node Type MatMul\n",
-      "1318/1896: Converting Node Type Add\n",
-      "1319/1896: Converting Node Type Add\n",
-      "1320/1896: Converting Node Type ReduceMean\n",
-      "1321/1896: Converting Node Type Sub\n",
-      "1322/1896: Converting Node Type Pow\n",
-      "1323/1896: Converting Node Type ReduceMean\n",
-      "1324/1896: Converting Node Type Add\n",
-      "1325/1896: Converting Node Type Sqrt\n",
-      "1326/1896: Converting Node Type Div\n",
-      "1327/1896: Converting Node Type Mul\n",
-      "1328/1896: Converting Node Type Add\n",
-      "1329/1896: Converting Node Type MatMul\n",
-      "1330/1896: Converting Node Type Add\n",
-      "1331/1896: Converting Node Type Mul\n",
-      "1332/1896: Converting Node Type Div\n",
-      "1333/1896: Converting Node Type Erf\n",
-      "1334/1896: Converting Node Type Add\n",
-      "1335/1896: Converting Node Type Mul\n",
-      "1336/1896: Converting Node Type MatMul\n",
-      "1337/1896: Converting Node Type Add\n",
-      "1338/1896: Converting Node Type Add\n",
-      "1339/1896: Converting Node Type ReduceMean\n",
-      "1340/1896: Converting Node Type Sub\n",
-      "1341/1896: Converting Node Type Pow\n",
-      "1342/1896: Converting Node Type ReduceMean\n",
-      "1343/1896: Converting Node Type Add\n",
-      "1344/1896: Converting Node Type Sqrt\n",
-      "1345/1896: Converting Node Type Div\n",
-      "1346/1896: Converting Node Type Mul\n",
-      "1347/1896: Converting Node Type Add\n",
-      "1348/1896: Converting Node Type MatMul\n",
-      "1349/1896: Converting Node Type Add\n",
-      "1350/1896: Converting Node Type MatMul\n",
-      "1351/1896: Converting Node Type Add\n",
-      "1352/1896: Converting Node Type MatMul\n",
-      "1353/1896: Converting Node Type Add\n",
-      "1354/1896: Converting Node Type Shape\n",
-      "1355/1896: Converting Node Type Gather\n",
-      "1356/1896: Converting Node Type Shape\n",
-      "1357/1896: Converting Node Type Gather\n",
-      "1358/1896: Converting Node Type Unsqueeze\n",
-      "1359/1896: Converting Node Type Unsqueeze\n",
-      "1360/1896: Converting Node Type Concat\n",
-      "1361/1896: Converting Node Type Reshape\n",
-      "1362/1896: Converting Node Type Transpose\n",
-      "1363/1896: Converting Node Type Shape\n",
-      "1364/1896: Converting Node Type Gather\n",
-      "1365/1896: Converting Node Type Shape\n",
-      "1366/1896: Converting Node Type Gather\n",
-      "1367/1896: Converting Node Type Unsqueeze\n",
-      "1368/1896: Converting Node Type Unsqueeze\n",
-      "1369/1896: Converting Node Type Concat\n",
-      "1370/1896: Converting Node Type Reshape\n",
-      "1371/1896: Converting Node Type Shape\n",
-      "1372/1896: Converting Node Type Gather\n",
-      "1373/1896: Converting Node Type Shape\n",
-      "1374/1896: Converting Node Type Gather\n",
-      "1375/1896: Converting Node Type Unsqueeze\n",
-      "1376/1896: Converting Node Type Unsqueeze\n",
-      "1377/1896: Converting Node Type Concat\n",
-      "1378/1896: Converting Node Type Reshape\n",
-      "1379/1896: Converting Node Type Transpose\n",
-      "1380/1896: Converting Node Type Transpose\n",
-      "1381/1896: Converting Node Type MatMul\n",
-      "1382/1896: Converting Node Type Div\n",
-      "1383/1896: Converting Node Type Add\n",
-      "1384/1896: Converting Node Type Softmax\n",
-      "1385/1896: Converting Node Type MatMul\n",
-      "1386/1896: Converting Node Type Transpose\n",
-      "1387/1896: Converting Node Type Shape\n",
-      "1388/1896: Converting Node Type Gather\n",
-      "1389/1896: Converting Node Type Shape\n",
-      "1390/1896: Converting Node Type Gather\n",
-      "1391/1896: Converting Node Type Unsqueeze\n",
-      "1392/1896: Converting Node Type Unsqueeze\n",
-      "1393/1896: Converting Node Type Concat\n",
-      "1394/1896: Converting Node Type Reshape\n",
-      "1395/1896: Converting Node Type MatMul\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1396/1896: Converting Node Type Add\n",
-      "1397/1896: Converting Node Type Add\n",
-      "1398/1896: Converting Node Type ReduceMean\n",
-      "1399/1896: Converting Node Type Sub\n",
-      "1400/1896: Converting Node Type Pow\n",
-      "1401/1896: Converting Node Type ReduceMean\n",
-      "1402/1896: Converting Node Type Add\n",
-      "1403/1896: Converting Node Type Sqrt\n",
-      "1404/1896: Converting Node Type Div\n",
-      "1405/1896: Converting Node Type Mul\n",
-      "1406/1896: Converting Node Type Add\n",
-      "1407/1896: Converting Node Type MatMul\n",
-      "1408/1896: Converting Node Type Add\n",
-      "1409/1896: Converting Node Type Mul\n",
-      "1410/1896: Converting Node Type Div\n",
-      "1411/1896: Converting Node Type Erf\n",
-      "1412/1896: Converting Node Type Add\n",
-      "1413/1896: Converting Node Type Mul\n",
-      "1414/1896: Converting Node Type MatMul\n",
-      "1415/1896: Converting Node Type Add\n",
-      "1416/1896: Converting Node Type Add\n",
-      "1417/1896: Converting Node Type ReduceMean\n",
-      "1418/1896: Converting Node Type Sub\n",
-      "1419/1896: Converting Node Type Pow\n",
-      "1420/1896: Converting Node Type ReduceMean\n",
-      "1421/1896: Converting Node Type Add\n",
-      "1422/1896: Converting Node Type Sqrt\n",
-      "1423/1896: Converting Node Type Div\n",
-      "1424/1896: Converting Node Type Mul\n",
-      "1425/1896: Converting Node Type Add\n",
-      "1426/1896: Converting Node Type MatMul\n",
-      "1427/1896: Converting Node Type Add\n",
-      "1428/1896: Converting Node Type MatMul\n",
-      "1429/1896: Converting Node Type Add\n",
-      "1430/1896: Converting Node Type MatMul\n",
-      "1431/1896: Converting Node Type Add\n",
-      "1432/1896: Converting Node Type Shape\n",
-      "1433/1896: Converting Node Type Gather\n",
-      "1434/1896: Converting Node Type Shape\n",
-      "1435/1896: Converting Node Type Gather\n",
-      "1436/1896: Converting Node Type Unsqueeze\n",
-      "1437/1896: Converting Node Type Unsqueeze\n",
-      "1438/1896: Converting Node Type Concat\n",
-      "1439/1896: Converting Node Type Reshape\n",
-      "1440/1896: Converting Node Type Transpose\n",
-      "1441/1896: Converting Node Type Shape\n",
-      "1442/1896: Converting Node Type Gather\n",
-      "1443/1896: Converting Node Type Shape\n",
-      "1444/1896: Converting Node Type Gather\n",
-      "1445/1896: Converting Node Type Unsqueeze\n",
-      "1446/1896: Converting Node Type Unsqueeze\n",
-      "1447/1896: Converting Node Type Concat\n",
-      "1448/1896: Converting Node Type Reshape\n",
-      "1449/1896: Converting Node Type Shape\n",
-      "1450/1896: Converting Node Type Gather\n",
-      "1451/1896: Converting Node Type Shape\n",
-      "1452/1896: Converting Node Type Gather\n",
-      "1453/1896: Converting Node Type Unsqueeze\n",
-      "1454/1896: Converting Node Type Unsqueeze\n",
-      "1455/1896: Converting Node Type Concat\n",
-      "1456/1896: Converting Node Type Reshape\n",
-      "1457/1896: Converting Node Type Transpose\n",
-      "1458/1896: Converting Node Type Transpose\n",
-      "1459/1896: Converting Node Type MatMul\n",
-      "1460/1896: Converting Node Type Div\n",
-      "1461/1896: Converting Node Type Add\n",
-      "1462/1896: Converting Node Type Softmax\n",
-      "1463/1896: Converting Node Type MatMul\n",
-      "1464/1896: Converting Node Type Transpose\n",
-      "1465/1896: Converting Node Type Shape\n",
-      "1466/1896: Converting Node Type Gather\n",
-      "1467/1896: Converting Node Type Shape\n",
-      "1468/1896: Converting Node Type Gather\n",
-      "1469/1896: Converting Node Type Unsqueeze\n",
-      "1470/1896: Converting Node Type Unsqueeze\n",
-      "1471/1896: Converting Node Type Concat\n",
-      "1472/1896: Converting Node Type Reshape\n",
-      "1473/1896: Converting Node Type MatMul\n",
-      "1474/1896: Converting Node Type Add\n",
-      "1475/1896: Converting Node Type Add\n",
-      "1476/1896: Converting Node Type ReduceMean\n",
-      "1477/1896: Converting Node Type Sub\n",
-      "1478/1896: Converting Node Type Pow\n",
-      "1479/1896: Converting Node Type ReduceMean\n",
-      "1480/1896: Converting Node Type Add\n",
-      "1481/1896: Converting Node Type Sqrt\n",
-      "1482/1896: Converting Node Type Div\n",
-      "1483/1896: Converting Node Type Mul\n",
-      "1484/1896: Converting Node Type Add\n",
-      "1485/1896: Converting Node Type MatMul\n",
-      "1486/1896: Converting Node Type Add\n",
-      "1487/1896: Converting Node Type Mul\n",
-      "1488/1896: Converting Node Type Div\n",
-      "1489/1896: Converting Node Type Erf\n",
-      "1490/1896: Converting Node Type Add\n",
-      "1491/1896: Converting Node Type Mul\n",
-      "1492/1896: Converting Node Type MatMul\n",
-      "1493/1896: Converting Node Type Add\n",
-      "1494/1896: Converting Node Type Add\n",
-      "1495/1896: Converting Node Type ReduceMean\n",
-      "1496/1896: Converting Node Type Sub\n",
-      "1497/1896: Converting Node Type Pow\n",
-      "1498/1896: Converting Node Type ReduceMean\n",
-      "1499/1896: Converting Node Type Add\n",
-      "1500/1896: Converting Node Type Sqrt\n",
-      "1501/1896: Converting Node Type Div\n",
-      "1502/1896: Converting Node Type Mul\n",
-      "1503/1896: Converting Node Type Add\n",
-      "1504/1896: Converting Node Type MatMul\n",
-      "1505/1896: Converting Node Type Add\n",
-      "1506/1896: Converting Node Type MatMul\n",
-      "1507/1896: Converting Node Type Add\n",
-      "1508/1896: Converting Node Type MatMul\n",
-      "1509/1896: Converting Node Type Add\n",
-      "1510/1896: Converting Node Type Shape\n",
-      "1511/1896: Converting Node Type Gather\n",
-      "1512/1896: Converting Node Type Shape\n",
-      "1513/1896: Converting Node Type Gather\n",
-      "1514/1896: Converting Node Type Unsqueeze\n",
-      "1515/1896: Converting Node Type Unsqueeze\n",
-      "1516/1896: Converting Node Type Concat\n",
-      "1517/1896: Converting Node Type Reshape\n",
-      "1518/1896: Converting Node Type Transpose\n",
-      "1519/1896: Converting Node Type Shape\n",
-      "1520/1896: Converting Node Type Gather\n",
-      "1521/1896: Converting Node Type Shape\n",
-      "1522/1896: Converting Node Type Gather\n",
-      "1523/1896: Converting Node Type Unsqueeze\n",
-      "1524/1896: Converting Node Type Unsqueeze\n",
-      "1525/1896: Converting Node Type Concat\n",
-      "1526/1896: Converting Node Type Reshape\n",
-      "1527/1896: Converting Node Type Shape\n",
-      "1528/1896: Converting Node Type Gather\n",
-      "1529/1896: Converting Node Type Shape\n",
-      "1530/1896: Converting Node Type Gather\n",
-      "1531/1896: Converting Node Type Unsqueeze\n",
-      "1532/1896: Converting Node Type Unsqueeze\n",
-      "1533/1896: Converting Node Type Concat\n",
-      "1534/1896: Converting Node Type Reshape\n",
-      "1535/1896: Converting Node Type Transpose\n",
-      "1536/1896: Converting Node Type Transpose\n",
-      "1537/1896: Converting Node Type MatMul\n",
-      "1538/1896: Converting Node Type Div\n",
-      "1539/1896: Converting Node Type Add\n",
-      "1540/1896: Converting Node Type Softmax\n",
-      "1541/1896: Converting Node Type MatMul\n",
-      "1542/1896: Converting Node Type Transpose\n",
-      "1543/1896: Converting Node Type Shape\n",
-      "1544/1896: Converting Node Type Gather\n",
-      "1545/1896: Converting Node Type Shape\n",
-      "1546/1896: Converting Node Type Gather\n",
-      "1547/1896: Converting Node Type Unsqueeze\n",
-      "1548/1896: Converting Node Type Unsqueeze\n",
-      "1549/1896: Converting Node Type Concat\n",
-      "1550/1896: Converting Node Type Reshape\n",
-      "1551/1896: Converting Node Type MatMul\n",
-      "1552/1896: Converting Node Type Add\n",
-      "1553/1896: Converting Node Type Add\n",
-      "1554/1896: Converting Node Type ReduceMean\n",
-      "1555/1896: Converting Node Type Sub\n",
-      "1556/1896: Converting Node Type Pow\n",
-      "1557/1896: Converting Node Type ReduceMean\n",
-      "1558/1896: Converting Node Type Add\n",
-      "1559/1896: Converting Node Type Sqrt\n",
-      "1560/1896: Converting Node Type Div\n",
-      "1561/1896: Converting Node Type Mul\n",
-      "1562/1896: Converting Node Type Add\n",
-      "1563/1896: Converting Node Type MatMul\n",
-      "1564/1896: Converting Node Type Add\n",
-      "1565/1896: Converting Node Type Mul\n",
-      "1566/1896: Converting Node Type Div\n",
-      "1567/1896: Converting Node Type Erf\n",
-      "1568/1896: Converting Node Type Add\n",
-      "1569/1896: Converting Node Type Mul\n",
-      "1570/1896: Converting Node Type MatMul\n",
-      "1571/1896: Converting Node Type Add\n",
-      "1572/1896: Converting Node Type Add\n",
-      "1573/1896: Converting Node Type ReduceMean\n",
-      "1574/1896: Converting Node Type Sub\n",
-      "1575/1896: Converting Node Type Pow\n",
-      "1576/1896: Converting Node Type ReduceMean\n",
-      "1577/1896: Converting Node Type Add\n",
-      "1578/1896: Converting Node Type Sqrt\n",
-      "1579/1896: Converting Node Type Div\n",
-      "1580/1896: Converting Node Type Mul\n",
-      "1581/1896: Converting Node Type Add\n",
-      "1582/1896: Converting Node Type MatMul\n",
-      "1583/1896: Converting Node Type Add\n",
-      "1584/1896: Converting Node Type MatMul\n",
-      "1585/1896: Converting Node Type Add\n",
-      "1586/1896: Converting Node Type MatMul\n",
-      "1587/1896: Converting Node Type Add\n",
-      "1588/1896: Converting Node Type Shape\n",
-      "1589/1896: Converting Node Type Gather\n",
-      "1590/1896: Converting Node Type Shape\n",
-      "1591/1896: Converting Node Type Gather\n",
-      "1592/1896: Converting Node Type Unsqueeze\n",
-      "1593/1896: Converting Node Type Unsqueeze\n",
-      "1594/1896: Converting Node Type Concat\n",
-      "1595/1896: Converting Node Type Reshape\n",
-      "1596/1896: Converting Node Type Transpose\n",
-      "1597/1896: Converting Node Type Shape\n",
-      "1598/1896: Converting Node Type Gather\n",
-      "1599/1896: Converting Node Type Shape\n",
-      "1600/1896: Converting Node Type Gather\n",
-      "1601/1896: Converting Node Type Unsqueeze\n",
-      "1602/1896: Converting Node Type Unsqueeze\n",
-      "1603/1896: Converting Node Type Concat\n",
-      "1604/1896: Converting Node Type Reshape\n",
-      "1605/1896: Converting Node Type Shape\n",
-      "1606/1896: Converting Node Type Gather\n",
-      "1607/1896: Converting Node Type Shape\n",
-      "1608/1896: Converting Node Type Gather\n",
-      "1609/1896: Converting Node Type Unsqueeze\n",
-      "1610/1896: Converting Node Type Unsqueeze\n",
-      "1611/1896: Converting Node Type Concat\n",
-      "1612/1896: Converting Node Type Reshape\n",
-      "1613/1896: Converting Node Type Transpose\n",
-      "1614/1896: Converting Node Type Transpose\n",
-      "1615/1896: Converting Node Type MatMul\n",
-      "1616/1896: Converting Node Type Div\n",
-      "1617/1896: Converting Node Type Add\n",
-      "1618/1896: Converting Node Type Softmax\n",
-      "1619/1896: Converting Node Type MatMul\n",
-      "1620/1896: Converting Node Type Transpose\n",
-      "1621/1896: Converting Node Type Shape\n",
-      "1622/1896: Converting Node Type Gather\n",
-      "1623/1896: Converting Node Type Shape\n",
-      "1624/1896: Converting Node Type Gather\n",
-      "1625/1896: Converting Node Type Unsqueeze\n",
-      "1626/1896: Converting Node Type Unsqueeze\n",
-      "1627/1896: Converting Node Type Concat\n",
-      "1628/1896: Converting Node Type Reshape\n",
-      "1629/1896: Converting Node Type MatMul\n",
-      "1630/1896: Converting Node Type Add\n",
-      "1631/1896: Converting Node Type Add\n",
-      "1632/1896: Converting Node Type ReduceMean\n",
-      "1633/1896: Converting Node Type Sub\n",
-      "1634/1896: Converting Node Type Pow\n",
-      "1635/1896: Converting Node Type ReduceMean\n",
-      "1636/1896: Converting Node Type Add\n",
-      "1637/1896: Converting Node Type Sqrt\n",
-      "1638/1896: Converting Node Type Div\n",
-      "1639/1896: Converting Node Type Mul\n",
-      "1640/1896: Converting Node Type Add\n",
-      "1641/1896: Converting Node Type MatMul\n",
-      "1642/1896: Converting Node Type Add\n",
-      "1643/1896: Converting Node Type Mul\n",
-      "1644/1896: Converting Node Type Div\n",
-      "1645/1896: Converting Node Type Erf\n",
-      "1646/1896: Converting Node Type Add\n",
-      "1647/1896: Converting Node Type Mul\n",
-      "1648/1896: Converting Node Type MatMul\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1649/1896: Converting Node Type Add\n",
-      "1650/1896: Converting Node Type Add\n",
-      "1651/1896: Converting Node Type ReduceMean\n",
-      "1652/1896: Converting Node Type Sub\n",
-      "1653/1896: Converting Node Type Pow\n",
-      "1654/1896: Converting Node Type ReduceMean\n",
-      "1655/1896: Converting Node Type Add\n",
-      "1656/1896: Converting Node Type Sqrt\n",
-      "1657/1896: Converting Node Type Div\n",
-      "1658/1896: Converting Node Type Mul\n",
-      "1659/1896: Converting Node Type Add\n",
-      "1660/1896: Converting Node Type MatMul\n",
-      "1661/1896: Converting Node Type Add\n",
-      "1662/1896: Converting Node Type MatMul\n",
-      "1663/1896: Converting Node Type Add\n",
-      "1664/1896: Converting Node Type MatMul\n",
-      "1665/1896: Converting Node Type Add\n",
-      "1666/1896: Converting Node Type Shape\n",
-      "1667/1896: Converting Node Type Gather\n",
-      "1668/1896: Converting Node Type Shape\n",
-      "1669/1896: Converting Node Type Gather\n",
-      "1670/1896: Converting Node Type Unsqueeze\n",
-      "1671/1896: Converting Node Type Unsqueeze\n",
-      "1672/1896: Converting Node Type Concat\n",
-      "1673/1896: Converting Node Type Reshape\n",
-      "1674/1896: Converting Node Type Transpose\n",
-      "1675/1896: Converting Node Type Shape\n",
-      "1676/1896: Converting Node Type Gather\n",
-      "1677/1896: Converting Node Type Shape\n",
-      "1678/1896: Converting Node Type Gather\n",
-      "1679/1896: Converting Node Type Unsqueeze\n",
-      "1680/1896: Converting Node Type Unsqueeze\n",
-      "1681/1896: Converting Node Type Concat\n",
-      "1682/1896: Converting Node Type Reshape\n",
-      "1683/1896: Converting Node Type Shape\n",
-      "1684/1896: Converting Node Type Gather\n",
-      "1685/1896: Converting Node Type Shape\n",
-      "1686/1896: Converting Node Type Gather\n",
-      "1687/1896: Converting Node Type Unsqueeze\n",
-      "1688/1896: Converting Node Type Unsqueeze\n",
-      "1689/1896: Converting Node Type Concat\n",
-      "1690/1896: Converting Node Type Reshape\n",
-      "1691/1896: Converting Node Type Transpose\n",
-      "1692/1896: Converting Node Type Transpose\n",
-      "1693/1896: Converting Node Type MatMul\n",
-      "1694/1896: Converting Node Type Div\n",
-      "1695/1896: Converting Node Type Add\n",
-      "1696/1896: Converting Node Type Softmax\n",
-      "1697/1896: Converting Node Type MatMul\n",
-      "1698/1896: Converting Node Type Transpose\n",
-      "1699/1896: Converting Node Type Shape\n",
-      "1700/1896: Converting Node Type Gather\n",
-      "1701/1896: Converting Node Type Shape\n",
-      "1702/1896: Converting Node Type Gather\n",
-      "1703/1896: Converting Node Type Unsqueeze\n",
-      "1704/1896: Converting Node Type Unsqueeze\n",
-      "1705/1896: Converting Node Type Concat\n",
-      "1706/1896: Converting Node Type Reshape\n",
-      "1707/1896: Converting Node Type MatMul\n",
-      "1708/1896: Converting Node Type Add\n",
-      "1709/1896: Converting Node Type Add\n",
-      "1710/1896: Converting Node Type ReduceMean\n",
-      "1711/1896: Converting Node Type Sub\n",
-      "1712/1896: Converting Node Type Pow\n",
-      "1713/1896: Converting Node Type ReduceMean\n",
-      "1714/1896: Converting Node Type Add\n",
-      "1715/1896: Converting Node Type Sqrt\n",
-      "1716/1896: Converting Node Type Div\n",
-      "1717/1896: Converting Node Type Mul\n",
-      "1718/1896: Converting Node Type Add\n",
-      "1719/1896: Converting Node Type MatMul\n",
-      "1720/1896: Converting Node Type Add\n",
-      "1721/1896: Converting Node Type Mul\n",
-      "1722/1896: Converting Node Type Div\n",
-      "1723/1896: Converting Node Type Erf\n",
-      "1724/1896: Converting Node Type Add\n",
-      "1725/1896: Converting Node Type Mul\n",
-      "1726/1896: Converting Node Type MatMul\n",
-      "1727/1896: Converting Node Type Add\n",
-      "1728/1896: Converting Node Type Add\n",
-      "1729/1896: Converting Node Type ReduceMean\n",
-      "1730/1896: Converting Node Type Sub\n",
-      "1731/1896: Converting Node Type Pow\n",
-      "1732/1896: Converting Node Type ReduceMean\n",
-      "1733/1896: Converting Node Type Add\n",
-      "1734/1896: Converting Node Type Sqrt\n",
-      "1735/1896: Converting Node Type Div\n",
-      "1736/1896: Converting Node Type Mul\n",
-      "1737/1896: Converting Node Type Add\n",
-      "1738/1896: Converting Node Type MatMul\n",
-      "1739/1896: Converting Node Type Add\n",
-      "1740/1896: Converting Node Type MatMul\n",
-      "1741/1896: Converting Node Type Add\n",
-      "1742/1896: Converting Node Type MatMul\n",
-      "1743/1896: Converting Node Type Add\n",
-      "1744/1896: Converting Node Type Shape\n",
-      "1745/1896: Converting Node Type Gather\n",
-      "1746/1896: Converting Node Type Shape\n",
-      "1747/1896: Converting Node Type Gather\n",
-      "1748/1896: Converting Node Type Unsqueeze\n",
-      "1749/1896: Converting Node Type Unsqueeze\n",
-      "1750/1896: Converting Node Type Concat\n",
-      "1751/1896: Converting Node Type Reshape\n",
-      "1752/1896: Converting Node Type Transpose\n",
-      "1753/1896: Converting Node Type Shape\n",
-      "1754/1896: Converting Node Type Gather\n",
-      "1755/1896: Converting Node Type Shape\n",
-      "1756/1896: Converting Node Type Gather\n",
-      "1757/1896: Converting Node Type Unsqueeze\n",
-      "1758/1896: Converting Node Type Unsqueeze\n",
-      "1759/1896: Converting Node Type Concat\n",
-      "1760/1896: Converting Node Type Reshape\n",
-      "1761/1896: Converting Node Type Shape\n",
-      "1762/1896: Converting Node Type Gather\n",
-      "1763/1896: Converting Node Type Shape\n",
-      "1764/1896: Converting Node Type Gather\n",
-      "1765/1896: Converting Node Type Unsqueeze\n",
-      "1766/1896: Converting Node Type Unsqueeze\n",
-      "1767/1896: Converting Node Type Concat\n",
-      "1768/1896: Converting Node Type Reshape\n",
-      "1769/1896: Converting Node Type Transpose\n",
-      "1770/1896: Converting Node Type Transpose\n",
-      "1771/1896: Converting Node Type MatMul\n",
-      "1772/1896: Converting Node Type Div\n",
-      "1773/1896: Converting Node Type Add\n",
-      "1774/1896: Converting Node Type Softmax\n",
-      "1775/1896: Converting Node Type MatMul\n",
-      "1776/1896: Converting Node Type Transpose\n",
-      "1777/1896: Converting Node Type Shape\n",
-      "1778/1896: Converting Node Type Gather\n",
-      "1779/1896: Converting Node Type Shape\n",
-      "1780/1896: Converting Node Type Gather\n",
-      "1781/1896: Converting Node Type Unsqueeze\n",
-      "1782/1896: Converting Node Type Unsqueeze\n",
-      "1783/1896: Converting Node Type Concat\n",
-      "1784/1896: Converting Node Type Reshape\n",
-      "1785/1896: Converting Node Type MatMul\n",
-      "1786/1896: Converting Node Type Add\n",
-      "1787/1896: Converting Node Type Add\n",
-      "1788/1896: Converting Node Type ReduceMean\n",
-      "1789/1896: Converting Node Type Sub\n",
-      "1790/1896: Converting Node Type Pow\n",
-      "1791/1896: Converting Node Type ReduceMean\n",
-      "1792/1896: Converting Node Type Add\n",
-      "1793/1896: Converting Node Type Sqrt\n",
-      "1794/1896: Converting Node Type Div\n",
-      "1795/1896: Converting Node Type Mul\n",
-      "1796/1896: Converting Node Type Add\n",
-      "1797/1896: Converting Node Type MatMul\n",
-      "1798/1896: Converting Node Type Add\n",
-      "1799/1896: Converting Node Type Mul\n",
-      "1800/1896: Converting Node Type Div\n",
-      "1801/1896: Converting Node Type Erf\n",
-      "1802/1896: Converting Node Type Add\n",
-      "1803/1896: Converting Node Type Mul\n",
-      "1804/1896: Converting Node Type MatMul\n",
-      "1805/1896: Converting Node Type Add\n",
-      "1806/1896: Converting Node Type Add\n",
-      "1807/1896: Converting Node Type ReduceMean\n",
-      "1808/1896: Converting Node Type Sub\n",
-      "1809/1896: Converting Node Type Pow\n",
-      "1810/1896: Converting Node Type ReduceMean\n",
-      "1811/1896: Converting Node Type Add\n",
-      "1812/1896: Converting Node Type Sqrt\n",
-      "1813/1896: Converting Node Type Div\n",
-      "1814/1896: Converting Node Type Mul\n",
-      "1815/1896: Converting Node Type Add\n",
-      "1816/1896: Converting Node Type MatMul\n",
-      "1817/1896: Converting Node Type Add\n",
-      "1818/1896: Converting Node Type MatMul\n",
-      "1819/1896: Converting Node Type Add\n",
-      "1820/1896: Converting Node Type MatMul\n",
-      "1821/1896: Converting Node Type Add\n",
-      "1822/1896: Converting Node Type Shape\n",
-      "1823/1896: Converting Node Type Gather\n",
-      "1824/1896: Converting Node Type Shape\n",
-      "1825/1896: Converting Node Type Gather\n",
-      "1826/1896: Converting Node Type Unsqueeze\n",
-      "1827/1896: Converting Node Type Unsqueeze\n",
-      "1828/1896: Converting Node Type Concat\n",
-      "1829/1896: Converting Node Type Reshape\n",
-      "1830/1896: Converting Node Type Transpose\n",
-      "1831/1896: Converting Node Type Shape\n",
-      "1832/1896: Converting Node Type Gather\n",
-      "1833/1896: Converting Node Type Shape\n",
-      "1834/1896: Converting Node Type Gather\n",
-      "1835/1896: Converting Node Type Unsqueeze\n",
-      "1836/1896: Converting Node Type Unsqueeze\n",
-      "1837/1896: Converting Node Type Concat\n",
-      "1838/1896: Converting Node Type Reshape\n",
-      "1839/1896: Converting Node Type Shape\n",
-      "1840/1896: Converting Node Type Gather\n",
-      "1841/1896: Converting Node Type Shape\n",
-      "1842/1896: Converting Node Type Gather\n",
-      "1843/1896: Converting Node Type Unsqueeze\n",
-      "1844/1896: Converting Node Type Unsqueeze\n",
-      "1845/1896: Converting Node Type Concat\n",
-      "1846/1896: Converting Node Type Reshape\n",
-      "1847/1896: Converting Node Type Transpose\n",
-      "1848/1896: Converting Node Type Transpose\n",
-      "1849/1896: Converting Node Type MatMul\n",
-      "1850/1896: Converting Node Type Div\n",
-      "1851/1896: Converting Node Type Add\n",
-      "1852/1896: Converting Node Type Softmax\n",
-      "1853/1896: Converting Node Type MatMul\n",
-      "1854/1896: Converting Node Type Transpose\n",
-      "1855/1896: Converting Node Type Shape\n",
-      "1856/1896: Converting Node Type Gather\n",
-      "1857/1896: Converting Node Type Shape\n",
-      "1858/1896: Converting Node Type Gather\n",
-      "1859/1896: Converting Node Type Unsqueeze\n",
-      "1860/1896: Converting Node Type Unsqueeze\n",
-      "1861/1896: Converting Node Type Concat\n",
-      "1862/1896: Converting Node Type Reshape\n",
-      "1863/1896: Converting Node Type MatMul\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1864/1896: Converting Node Type Add\n",
-      "1865/1896: Converting Node Type Add\n",
-      "1866/1896: Converting Node Type ReduceMean\n",
-      "1867/1896: Converting Node Type Sub\n",
-      "1868/1896: Converting Node Type Pow\n",
-      "1869/1896: Converting Node Type ReduceMean\n",
-      "1870/1896: Converting Node Type Add\n",
-      "1871/1896: Converting Node Type Sqrt\n",
-      "1872/1896: Converting Node Type Div\n",
-      "1873/1896: Converting Node Type Mul\n",
-      "1874/1896: Converting Node Type Add\n",
-      "1875/1896: Converting Node Type MatMul\n",
-      "1876/1896: Converting Node Type Add\n",
-      "1877/1896: Converting Node Type Mul\n",
-      "1878/1896: Converting Node Type Div\n",
-      "1879/1896: Converting Node Type Erf\n",
-      "1880/1896: Converting Node Type Add\n",
-      "1881/1896: Converting Node Type Mul\n",
-      "1882/1896: Converting Node Type MatMul\n",
-      "1883/1896: Converting Node Type Add\n",
-      "1884/1896: Converting Node Type Add\n",
-      "1885/1896: Converting Node Type ReduceMean\n",
-      "1886/1896: Converting Node Type Sub\n",
-      "1887/1896: Converting Node Type Pow\n",
-      "1888/1896: Converting Node Type ReduceMean\n",
-      "1889/1896: Converting Node Type Add\n",
-      "1890/1896: Converting Node Type Sqrt\n",
-      "1891/1896: Converting Node Type Div\n",
-      "1892/1896: Converting Node Type Mul\n",
-      "1893/1896: Converting Node Type Add\n",
-      "1894/1896: Converting Node Type Gather\n",
-      "1895/1896: Converting Node Type Gemm\n",
-      "1896/1896: Converting Node Type Tanh\n",
-      "Translation to CoreML spec completed. Now compiling the CoreML model.\n",
-      "Model Compilation done.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# ONNX to CoreML MLModel\n",
-    "mlmodel = convert(model=onnx_model_path, target_ios=\"13\")\n",
-    "mlmodel.save(mlmodel_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# PyTorch prediction\n",
-    "pred_pt    = model(test_input)\n",
-    "\n",
-    "# MLModel prediction\n",
-    "input_dict = {'input_ids': test_input.numpy().astype(np.float32)}\n",
-    "pred_coreml = mlmodel.predict(input_dict, useCPUOnly=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Start Scores:  SNR: 86.73614996879037 PSNR: 52.80535921101816\n",
-      "End Scores:  SNR: 92.02044419648912 PSNR: 68.02353195339077\n"
-     ]
-    }
-   ],
-   "source": [
-    "_compute_SNR(pred_pt[0].detach().numpy(), pred_coreml['start_scores'], 'Start Scores: ')\n",
-    "_compute_SNR(pred_pt[1].detach().numpy(), pred_coreml['end_scores'], 'End Scores: ')"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.5.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/neural_network_inference/onnx_converter/readme.md b/examples/neural_network_inference/onnx_converter/readme.md
deleted file mode 100644
index 64f6fddc4..000000000
--- a/examples/neural_network_inference/onnx_converter/readme.md
+++ /dev/null
@@ -1,5 +0,0 @@
-## Convert ONNX models to the Core ML model format
-
-- [Simple pytorch model to onnx to coreml](small_example.py)
-
-- [BERT model conversion](BERT.ipynb)
\ No newline at end of file
diff --git a/examples/neural_network_inference/onnx_converter/small_example.py b/examples/neural_network_inference/onnx_converter/small_example.py
deleted file mode 100644
index 379382901..000000000
--- a/examples/neural_network_inference/onnx_converter/small_example.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from onnx_coreml import convert
-
-# Step 0 - (a) Define ML Model
-class small_model(nn.Module):
-    def __init__(self):
-        super(small_model, self).__init__()
-        self.fc1 = nn.Linear(768, 256)
-        self.fc2 = nn.Linear(256, 10)
-
-    def forward(self, x):
-        y = F.relu(self.fc1(x))
-        y = F.softmax(self.fc2(y))
-        return y
-
-# Step 0 - (b) Create model or Load from dist
-model = small_model()
-dummy_input = torch.randn(768)
-
-# Step 1 - PyTorch to ONNX model
-torch.onnx.export(model, dummy_input, './small_model.onnx')
-
-# Step 2 - ONNX to CoreML model
-mlmodel = convert(model='./small_model.onnx', minimum_ios_deployment_target='13')
-# Save converted CoreML model
-mlmodel.save('small_model.mlmodel')
diff --git a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/custom_layer_examples.ipynb b/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/custom_layer_examples.ipynb
deleted file mode 100644
index ad40f49f1..000000000
--- a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/custom_layer_examples.ipynb
+++ /dev/null
@@ -1,533 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This notebook demonstrates the process of adding custom layers to the CoreML model during conversion. We discuss  three examples.\n",
-    "\n",
-    "For TensorFlow operations (ops for short) that are not translatable to any of the CoreML layers, custom layers can be inserted in the CoreML model (list of CoreML layers can be found [here](https://github.com/apple/coremltools/blob/master/mlmodel/format/NeuralNetwork.proto) or [here](https://apple.github.io/coremltools/coremlspecification/sections/NeuralNetwork.html)). At runtime, CoreML framework will look for the implementation code of the custom layers, which has to be provided by the developer in their app.   \n",
-    "Custom layer is a [proto message](https://github.com/apple/coremltools/blob/5b5b8190764ffe78110be6b4d0edbeebe0253a6e/mlmodel/format/NeuralNetwork.proto#L2280), like any other neural network layer in the .mlmodel file (which is in the protobuf format), that can hold the parameters and weights (if any) associated with the TF op.\n",
-    "Here is the [documentation](https://developer.apple.com/documentation/coreml/core_ml_api/creating_a_custom_layer) on CoreML custom layers and a nice detailed [blogpost](http://machinethink.net/blog/coreml-custom-layers/). \n",
-    "\n",
-    "There are two ways in which a custom layer can be added during conversion from TF:\n",
-    "\n",
-    "1. Specify the argument \"add_custom_layers=True\" during conversion. This will automatically check for unsupported ops and insert a coreml custom layer message in place of that op. The message can be later edited, if required, to add/remove any parameters.            \n",
-    "\n",
-    "2. Specify the arguments \"add_custom_layers=True\" and \"custom_conversion_functions\" to the converter. The second argument is a dictionary, with keys that are either op types or op names and values are user-defined function handles. The functions receive TensorFlow [op](https://github.com/tensorflow/tensorflow/blob/51ef16057b4625e0a3e2943a9f1bbf856cf098ca/tensorflow/python/framework/ops.py#L3707) object and the CoreML neural network [builder object](https://github.com/apple/coremltools/blob/5b5b8190764ffe78110be6b4d0edbeebe0253a6e/coremltools/models/neural_network.py#L34) and give the user full control on how to handle the TF op and which layers to add to the CoreML graph. When the key is an op type, the function is called whenever op of that type is encountered while traversing the TF graph. Operation names as keys are useful for targeting specific ops. \n",
-    "\n",
-    "Lets now dive into the examples to make this process clear. \n",
-    "\n",
-    "First up, setting up some utilities:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from __future__ import print_function\n",
-    "import tensorflow as tf\n",
-    "import tensorflow.contrib.slim as slim\n",
-    "from tensorflow.python.tools.freeze_graph import freeze_graph\n",
-    "import numpy as np\n",
-    "import shutil\n",
-    "import tempfile\n",
-    "import os\n",
-    "import tfcoreml\n",
-    "import coremltools\n",
-    "from coremltools.proto import NeuralNetwork_pb2\n",
-    "import netron # we use netron: https://github.com/lutzroeder/Netron for visualization. Comment out this line and all the calls to the \"_visualize\" method, if you do not want to use it. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# A utility function to freeze rhe graph. It will be used later\n",
-    "def _simple_run_and_freeze(graph, output_name, frozen_model_file='', feed_dict={}):\n",
-    "    \n",
-    "    model_dir = tempfile.mkdtemp()\n",
-    "    graph_def_file = os.path.join(model_dir, 'tf_graph.pbtxt')\n",
-    "    checkpoint_file = os.path.join(model_dir, 'tf_model.ckpt')\n",
-    "    \n",
-    "    tf.reset_default_graph()\n",
-    "    with graph.as_default() as g:\n",
-    "      saver = tf.train.Saver()\n",
-    "\n",
-    "    with tf.Session(graph = graph) as sess:\n",
-    "      # initialize\n",
-    "      sess.run(tf.global_variables_initializer())\n",
-    "      # run the result\n",
-    "      fetch = graph.get_operation_by_name(output_name).outputs[0]\n",
-    "      tf_result = sess.run(fetch, feed_dict=feed_dict)\n",
-    "      # save graph definition somewhere\n",
-    "      tf.train.write_graph(sess.graph, model_dir, graph_def_file)\n",
-    "      # save the weights\n",
-    "      saver.save(sess, checkpoint_file)\n",
-    "    \n",
-    "    freeze_graph(input_graph=graph_def_file,\n",
-    "                 input_saver=\"\",\n",
-    "                 input_binary=False,\n",
-    "                 input_checkpoint=checkpoint_file,\n",
-    "                 output_node_names=output_name,\n",
-    "                 restore_op_name=\"save/restore_all\",\n",
-    "                 filename_tensor_name=\"save/Const:0\",\n",
-    "                 output_graph=frozen_model_file,\n",
-    "                 clear_devices=True,\n",
-    "                 initializer_nodes=\"\")\n",
-    "    \n",
-    "    if os.path.exists(model_dir):\n",
-    "        shutil.rmtree(model_dir)\n",
-    "    \n",
-    "    return tf_result\n",
-    "\n",
-    "# A utility function that takes an MLModel instance and prints info about Neural network layers inside.\n",
-    "# It prints short info about all the NN layers and the full description of any custom layer found\n",
-    "def _print_coreml_nn_layer_info(spec):\n",
-    "    nn_layers = coremltools.models.utils._get_nn_layers(spec)\n",
-    "    for i, layer in enumerate(nn_layers):\n",
-    "        if layer.WhichOneof('layer') == 'custom':\n",
-    "            print( 'layer_id = ', i)\n",
-    "            print( layer)\n",
-    "        else:\n",
-    "            print('{}: layer type: ({}) , inputs: {}, outputs: {}'.\n",
-    "              format(i,layer.WhichOneof('layer'), \", \".join([x for x in layer.input]), \", \".join([x for x in layer.output])))\n",
-    "\n",
-    "# We use \"netron\" for visualization. \n",
-    "def _visualize(network_path, port_number):\n",
-    "    \n",
-    "    def visualize_using_netron(path, port_number):\n",
-    "        netron.serve_file(path, browse = True, port=port_number)\n",
-    "    \n",
-    "    from threading import Thread\n",
-    "    import time\n",
-    "    \n",
-    "    d = Thread(target = visualize_using_netron, args = (network_path, port_number,))\n",
-    "    d.setDaemon(True)\n",
-    "    d.start()\n",
-    "    time.sleep(5)\n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Lets define the first TF graph. This one applies a dense layer and normalizes it. It uses the [\"Tile\"](https://www.tensorflow.org/versions/master/api_docs/python/tf/tile) op that CoreML does not support. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# define a TF graph: input -> Dense -> unit norm -> output\n",
-    "graph = tf.Graph()\n",
-    "with graph.as_default() as g:\n",
-    "    inputs = tf.placeholder(tf.float32, shape=[None,8], name='input')\n",
-    "    with slim.arg_scope([slim.fully_connected],\n",
-    "          weights_initializer=tf.truncated_normal_initializer(0.0, 0.2),\n",
-    "          weights_regularizer=slim.l2_regularizer(0.0005)):\n",
-    "        y = slim.fully_connected(inputs, 10, scope='fc')\n",
-    "        y = slim.unit_norm(y,dim=1)\n",
-    "\n",
-    "output_name = y.op.name\n",
-    "X = np.random.rand(1,8)\n",
-    "frozen_model_file = 'unit_norm_graph.pb'\n",
-    "coreml_model_path = 'unit_norm_graph.mlmodel'\n",
-    "out = _simple_run_and_freeze(graph, output_name, frozen_model_file, feed_dict={'input:0' : X})\n",
-    "print( 'TF out: ', output_name, out.shape, np.sum(out ** 2))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# visualize the frozen TF model\n",
-    "_visualize(frozen_model_file, np.random.randint(8000, 9000))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Try to convert it : this call should raise an error\n",
-    "coreml_model = tfcoreml.convert(\n",
-    "        tf_model_path=frozen_model_file,\n",
-    "        mlmodel_path=coreml_model_path,\n",
-    "        input_name_shape_dict={'input:0':[1,8]},\n",
-    "        output_feature_names=['UnitNorm/div:0'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# we got an unsupported op error. Try again with custom Flag set to true\n",
-    "coreml_model = tfcoreml.convert(\n",
-    "        tf_model_path=frozen_model_file,\n",
-    "        mlmodel_path=coreml_model_path,\n",
-    "        input_name_shape_dict={'input:0':[1,8]},\n",
-    "        output_feature_names=['UnitNorm/div:0'],\n",
-    "        add_custom_layers=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We can see that the \"Tile\" op was made into a custom layer in the CoreML model. This op takes in two inputs, it recasts the first one into the shape given by the second input (by repetition). Here is the [documentation](https://www.tensorflow.org/versions/master/api_docs/python/tf/tile).  "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# visualize CoreML model\n",
-    "_visualize(coreml_model_path, np.random.randint(8000, 9000))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Note: As we can see in the visualization, the tensors whose values do not change based on the graph inputs (potentially they depend on the shape of the input, which needs to be fixed during conversion) are converted to \"load constant\" layers in the CoreML graph. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# inspect the CoreML model\n",
-    "spec = coreml_model.get_spec()\n",
-    "_print_coreml_nn_layer_info(spec)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "collapsed": true
-   },
-   "source": [
-    "\"ClassName\" is an important message: this is the name of the swift/objective-c class that needs to implemented in the Xcode app and will contain the actual code for running the layer.  \n",
-    "The \"tile\" op does not have any parameters, so there is no need to edit generated the coreml specification. Lets now convert a TF graph with the op [\"TopKV2\"](https://www.tensorflow.org/api_docs/python/tf/nn/top_k) that has parameters. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# define a TF graph: input -> Dense -> softmax -> top_k -> output\n",
-    "tf.reset_default_graph()\n",
-    "graph = tf.Graph()\n",
-    "with graph.as_default() as g:\n",
-    "    x = tf.placeholder(tf.float32, shape=[None,8], name=\"input\")\n",
-    "    y = tf.layers.dense(inputs=x, units=12, activation=tf.nn.relu)\n",
-    "    y = tf.nn.softmax(y, axis=1)\n",
-    "    y = tf.nn.top_k(y, k=3, sorted = False, name='output')\n",
-    "    \n",
-    "output_name = 'output'    \n",
-    "X = np.random.rand(1,8)\n",
-    "frozen_model_file = 'topk_graph.pb'\n",
-    "coreml_model_path = 'topk_graph.mlmodel'\n",
-    "out = _simple_run_and_freeze(graph, output_name, frozen_model_file, feed_dict={'input:0' : X})\n",
-    "print( 'TF out: ', output_name, out.shape, out)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# visualize the frozen TF model\n",
-    "_visualize(frozen_model_file, np.random.randint(8000, 9000))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Try to convert it : this call should raise an error\n",
-    "coreml_model = tfcoreml.convert(\n",
-    "        tf_model_path=frozen_model_file,\n",
-    "        mlmodel_path=coreml_model_path,\n",
-    "        input_name_shape_dict={'input:0':[1,8]},\n",
-    "        output_feature_names=['output:0'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# we got an unsupported op error. Try again with custom Flag set to true\n",
-    "coreml_model = tfcoreml.convert(\n",
-    "        tf_model_path=frozen_model_file,\n",
-    "        mlmodel_path=coreml_model_path,\n",
-    "        input_name_shape_dict={'input:0':[1,8]},\n",
-    "        output_feature_names=['output:0'],\n",
-    "        add_custom_layers=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# inspect the CoreML model\n",
-    "spec = coreml_model.get_spec()\n",
-    "_print_coreml_nn_layer_info(spec)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "[top_k](https://www.tensorflow.org/api_docs/python/tf/nn/top_k) operation has two parameters: 'k' and 'sorted'. In the TF graph, the former is received as an additional input by the op and the latter is an op attribute. \n",
-    "Let us modify the MLModel spec directly to add these two parameters to this layer. We need to know a little bit about the custom layer's [proto message](https://github.com/apple/coremltools/blob/5b5b8190764ffe78110be6b4d0edbeebe0253a6e/mlmodel/format/NeuralNetwork.proto#L2280) structure to be able to do that. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "nn_layers = coremltools.models.utils._get_nn_layers(spec) # get all the layers as a list\n",
-    "del nn_layers[3].input[1] # delete the second input: its just the value of k\n",
-    "del nn_layers[3].output[1] # delete the second output\n",
-    "nn_layers[3].custom.parameters[\"k\"].intValue = 3\n",
-    "nn_layers[3].custom.parameters[\"sorted\"].boolValue = False\n",
-    "_print_coreml_nn_layer_info(spec)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# save the spec back out\n",
-    "coremltools.models.utils.save_spec(spec, coreml_model_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# visualize CoreML model\n",
-    "_visualize(coreml_model_path, np.random.randint(8000, 9000))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Here is an alternative way to do the same thing using the \"custom_conversion_functions\" argument: "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def _convert_topk(**kwargs):\n",
-    "    tf_op = kwargs[\"op\"]\n",
-    "    coreml_nn_builder = kwargs[\"nn_builder\"]\n",
-    "    constant_inputs = kwargs[\"constant_inputs\"]\n",
-    "    \n",
-    "    params = NeuralNetwork_pb2.CustomLayerParams()\n",
-    "    params.className = 'Top_K'\n",
-    "    params.description = \"Custom layer that corresponds to the top_k TF op\"\n",
-    "    params.parameters[\"sorted\"].boolValue = tf_op.get_attr('sorted')\n",
-    "    # get the value of k\n",
-    "    k = constant_inputs.get(tf_op.inputs[1].name, 3)\n",
-    "    params.parameters[\"k\"].intValue = k\n",
-    "    coreml_nn_builder.add_custom(name=tf_op.name,\n",
-    "                                input_names=[tf_op.inputs[0].name],\n",
-    "                                output_names=[tf_op.outputs[0].name],\n",
-    "                                custom_proto_spec=params)\n",
-    "\n",
-    "coreml_model = tfcoreml.convert(\n",
-    "        tf_model_path=frozen_model_file,\n",
-    "        mlmodel_path=coreml_model_path,\n",
-    "        input_name_shape_dict={'input:0':[1,8]},\n",
-    "        output_feature_names=['output:0'],\n",
-    "        add_custom_layers=True,\n",
-    "        custom_conversion_functions={'TopKV2': _convert_topk})\n",
-    "\n",
-    "print(\"\\n \\n ML Model layers info: \\n\")\n",
-    "# inspect the CoreML model: this should be same as the one we got above\n",
-    "spec = coreml_model.get_spec()\n",
-    "_print_coreml_nn_layer_info(spec)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "collapsed": true
-   },
-   "source": [
-    "Lets move on to the third and the final example. Now we will encounter an op that is supported but it errors out due to an unsupported coniguration. It is the [Slice](https://www.tensorflow.org/versions/master/api_docs/python/tf/slice) op."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# define a TF graph: input -> conv -> slice -> output\n",
-    "tf.reset_default_graph()\n",
-    "graph = tf.Graph()\n",
-    "with graph.as_default() as g:\n",
-    "    x = tf.placeholder(tf.float32, shape=[None,10,10,3], name=\"input\")\n",
-    "    W = tf.Variable(tf.truncated_normal([1,1,3,5], stddev=0.1))\n",
-    "    y = tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')\n",
-    "    y = tf.slice(y, begin=[0,1,1,1], size=[1,2,2,2], name='output')\n",
-    "    \n",
-    "output_name = 'output'    \n",
-    "X = np.random.rand(1,10,10,3)\n",
-    "frozen_model_file = 'slice_graph.pb'\n",
-    "coreml_model_path = 'slice_graph.mlmodel'\n",
-    "out = _simple_run_and_freeze(graph, output_name, frozen_model_file, feed_dict={'input:0' : X})\n",
-    "print( 'TF out: ', output_name, out.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# visualize the frozen TF model\n",
-    "_visualize(frozen_model_file, np.random.randint(8000, 9000))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Try to convert it : this call should raise an error\n",
-    "coreml_model = tfcoreml.convert(\n",
-    "        tf_model_path=frozen_model_file,\n",
-    "        mlmodel_path=coreml_model_path,\n",
-    "        input_name_shape_dict={'input:0':[1,10,10,3]},\n",
-    "        output_feature_names=['output:0'])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This fails, so we provide a custom layer function. Note that this time, the key in the dictionary provided via  \"custom_conversion_functions\" should be same as the op name (\"output\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def _convert_slice(**kwargs):\n",
-    "    tf_op = kwargs[\"op\"]\n",
-    "    coreml_nn_builder = kwargs[\"nn_builder\"]\n",
-    "    constant_inputs = kwargs[\"constant_inputs\"]\n",
-    "    \n",
-    "    params = NeuralNetwork_pb2.CustomLayerParams()\n",
-    "    params.className = 'Slice'\n",
-    "    params.description = \"Custom layer that corresponds to the slice TF op\"\n",
-    "    # get the value of begin\n",
-    "    begin = constant_inputs.get(tf_op.inputs[1].name, [0,0,0,0])\n",
-    "    size = constant_inputs.get(tf_op.inputs[2].name, [0,0,0,0])\n",
-    "    # add begin and size as two repeated weight fields\n",
-    "    begin_as_weights = params.weights.add()\n",
-    "    begin_as_weights.floatValue.extend(map(float, begin))\n",
-    "    size_as_weights = params.weights.add()\n",
-    "    size_as_weights.floatValue.extend(map(float, size))\n",
-    "    coreml_nn_builder.add_custom(name=tf_op.name,\n",
-    "                                input_names=[tf_op.inputs[0].name],\n",
-    "                                output_names=[tf_op.outputs[0].name],\n",
-    "                                custom_proto_spec=params)\n",
-    "\n",
-    "coreml_model = tfcoreml.convert(\n",
-    "        tf_model_path=frozen_model_file,\n",
-    "        mlmodel_path=coreml_model_path,\n",
-    "        input_name_shape_dict={'input:0':[1,10,10,3]},\n",
-    "        output_feature_names=['output:0'],\n",
-    "        add_custom_layers=True,\n",
-    "        custom_conversion_functions={'output': _convert_slice}) # dictionary has op name as the key\n",
-    "\n",
-    "print(\"\\n \\n ML Model layers info: \\n\")\n",
-    "# inspect the CoreML model: this should be same as the one we got above\n",
-    "spec = coreml_model.get_spec()\n",
-    "_print_coreml_nn_layer_info(spec)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# visualize CoreML model\n",
-    "_visualize(coreml_model_path, np.random.randint(8000, 9000))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/deep_speech.ipynb b/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/deep_speech.ipynb
deleted file mode 100644
index 40094e32c..000000000
--- a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/deep_speech.ipynb
+++ /dev/null
@@ -1,267 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# DeepSpeech Conversion\n",
-    "\n",
-    "This example demonstrates the workflow to download a publicly available \n",
-    "TensorFlow model and convert it to Core ML format using the tfcoreml converter.\n",
-    "\n",
-    "We use an open source implementation of DeepSpeech model (https://arxiv.org/abs/1412.5567) \n",
-    "provided by Mozilla: https://github.com/mozilla/DeepSpeech.\n",
-    "\n",
-    "Note that this notebook is tested using following dependencies: \n",
-    "\n",
-    "```\n",
-    "tensorflow==1.14.0\n",
-    "coremltools==3.0\n",
-    "```\n",
-    "\n",
-    "It will **NOT** work on TensorFlow 2.0+ because of deprecated and removed `tf.contrib` module in TensorFlow."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import tfcoreml\n",
-    "import numpy as np\n",
-    "import tensorflow as tf"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Download TensorFlow implementation of DeepSpeech model from https://github.com/mozilla/DeepSpeech\n",
-    "# wget https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/deepspeech-0.4.1-models.tar.gz\n",
-    "# tar xvfz deepspeech-0.4.1-models.tar.gz"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0 assert nodes deleted\n",
-      "['h2:0', 'lstm_fused_cell/kernel:0', 'lstm_fused_cell/kernel/read:0', 'lstm_fused_cell/SequenceMask/Const_1:0', 'lstm_fused_cell/ExpandDims_1/dim:0', 'lstm_fused_cell/ExpandDims_2/dim:0', 'Reshape/shape:0', 'lstm_fused_cell/SequenceMask/Const:0', 'h6:0', 'lstm_fused_cell/SequenceMask/Range:0', 'zeros/shape_as_tensor:0', 'Minimum_2/y:0', 'h5/read:0', 'lstm_fused_cell/ExpandDims/dim:0', 'lstm_fused_cell/range_1/delta:0', 'b1/read:0', 'lstm_fused_cell/range/delta:0', 'Minimum/y:0', 'h1/read:0', 'b5:0', 'lstm_fused_cell/concat_1/axis:0', 'lstm_fused_cell/range/start:0', 'b1:0', 'b2/read:0', 'lstm_fused_cell/Const:0', 'h5:0', 'zeros/Const:0', 'lstm_fused_cell/SequenceMask/Const_2:0', 'b3/read:0', 'lstm_fused_cell/zeros/shape_as_tensor:0', 'zeros:0', 'lstm_fused_cell/range_1/limit:0', 'Minimum_1/y:0', 'raw_logits/shape:0', 'lstm_fused_cell/bias/read:0', 'lstm_fused_cell/zeros:0', 'lstm_fused_cell/Tile/multiples:0', 'Reshape_1/shape:0', 'lstm_fused_cell/zeros/Const:0', 'h1:0', 'b5/read:0', 'lstm_fused_cell/range:0', 'h3:0', 'lstm_fused_cell/range_1/start:0', 'b6/read:0', 'b6:0', 'h6/read:0', 'Reshape_2/shape:0', 'b2:0', 'lstm_fused_cell/range/limit:0', 'lstm_fused_cell/bias:0', 'lstm_fused_cell/transpose/perm:0', 'h3/read:0', 'h2/read:0', 'lstm_fused_cell/SequenceMask/ExpandDims/dim:0', 'b3:0', 'lstm_fused_cell/range_1:0', 'Minimum_3/y:0', 'lstm_fused_cell/concat/axis:0', 'transpose/perm:0']\n",
-      "25 nodes deleted\n",
-      "0 nodes deleted\n",
-      "0 nodes deleted\n",
-      "2 identity nodes deleted\n",
-      "5 disconnected nodes deleted\n",
-      "[SSAConverter] Converting function main ...\n",
-      "[SSAConverter] [1/76] Converting op type: 'Placeholder', name: 'input_node', output_shape: (1, 16, 19, 26).\n",
-      "[SSAConverter] [2/76] Converting op type: 'Placeholder', name: 'input_lengths', output_shape: (1,).\n",
-      "[SSAConverter] [3/76] Converting op type: 'get_global', name: 'previous_state_c/read', output_shape: (1, 2048).\n",
-      "[SSAConverter] [4/76] Converting op type: 'get_global', name: 'previous_state_h/read', output_shape: (1, 2048).\n",
-      "[SSAConverter] [5/76] Converting op type: 'Const', name: 'transpose/perm', output_shape: (4,).\n",
-      "[SSAConverter] [6/76] Converting op type: 'Const', name: 'Reshape/shape', output_shape: (2,).\n",
-      "[SSAConverter] [7/76] Converting op type: 'Const', name: 'b1/read', output_shape: (2048,).\n",
-      "[SSAConverter] [8/76] Converting op type: 'Const', name: 'Minimum/y'.\n",
-      "[SSAConverter] [9/76] Converting op type: 'Const', name: 'b2/read', output_shape: (2048,).\n",
-      "[SSAConverter] [10/76] Converting op type: 'Const', name: 'Minimum_1/y'.\n",
-      "[SSAConverter] [11/76] Converting op type: 'Const', name: 'b3/read', output_shape: (2048,).\n",
-      "[SSAConverter] [12/76] Converting op type: 'Const', name: 'Minimum_2/y'.\n",
-      "[SSAConverter] [13/76] Converting op type: 'Const', name: 'Reshape_1/shape', output_shape: (3,).\n",
-      "[SSAConverter] [14/76] Converting op type: 'Const', name: 'lstm_fused_cell/kernel/read', output_shape: (4096, 8192).\n",
-      "[SSAConverter] [15/76] Converting op type: 'Const', name: 'lstm_fused_cell/bias/read', output_shape: (8192,).\n",
-      "[SSAConverter] [16/76] Converting op type: 'Const', name: 'lstm_fused_cell/SequenceMask/Range', output_shape: (16,).\n",
-      "[SSAConverter] [17/76] Converting op type: 'Const', name: 'lstm_fused_cell/SequenceMask/ExpandDims/dim'.\n",
-      "[SSAConverter] [18/76] Converting op type: 'Const', name: 'lstm_fused_cell/transpose/perm', output_shape: (2,).\n",
-      "[SSAConverter] [19/76] Converting op type: 'Const', name: 'lstm_fused_cell/ExpandDims/dim', output_shape: (1,).\n",
-      "[SSAConverter] [20/76] Converting op type: 'Const', name: 'lstm_fused_cell/Tile/multiples', output_shape: (3,).\n",
-      "[SSAConverter] [21/76] Converting op type: 'Const', name: 'lstm_fused_cell/ExpandDims_1/dim', output_shape: (1,).\n",
-      "[SSAConverter] [22/76] Converting op type: 'Const', name: 'lstm_fused_cell/concat/axis'.\n",
-      "[SSAConverter] [23/76] Converting op type: 'Const', name: 'lstm_fused_cell/ExpandDims_2/dim', output_shape: (1,).\n",
-      "[SSAConverter] [24/76] Converting op type: 'Const', name: 'lstm_fused_cell/concat_1/axis'.\n",
-      "[SSAConverter] [25/76] Converting op type: 'Const', name: 'lstm_fused_cell/range', output_shape: (1,).\n",
-      "[SSAConverter] [26/76] Converting op type: 'Const', name: 'lstm_fused_cell/range_1', output_shape: (1,).\n",
-      "[SSAConverter] [27/76] Converting op type: 'Const', name: 'Reshape_2/shape', output_shape: (2,).\n",
-      "[SSAConverter] [28/76] Converting op type: 'Const', name: 'b5/read', output_shape: (2048,).\n",
-      "[SSAConverter] [29/76] Converting op type: 'Const', name: 'Minimum_3/y'.\n",
-      "[SSAConverter] [30/76] Converting op type: 'Const', name: 'b6/read', output_shape: (29,).\n",
-      "[SSAConverter] [31/76] Converting op type: 'Const', name: 'raw_logits/shape', output_shape: (3,).\n",
-      "[SSAConverter] [32/76] Converting op type: 'Transpose', name: 'transpose', output_shape: (16, 1, 19, 26).\n",
-      "[SSAConverter] [33/76] Converting op type: 'ExpandDims', name: 'lstm_fused_cell/SequenceMask/ExpandDims', output_shape: (1, 1).\n",
-      "[SSAConverter] [34/76] Converting op type: 'ExpandDims', name: 'lstm_fused_cell/ExpandDims_1', output_shape: (1, 1, 2048).\n",
-      "[SSAConverter] [35/76] Converting op type: 'ExpandDims', name: 'lstm_fused_cell/ExpandDims_2', output_shape: (1, 1, 2048).\n",
-      "[SSAConverter] [36/76] Converting op type: 'Pack', name: 'lstm_fused_cell/stack', output_shape: (1, 2).\n",
-      "[SSAConverter] [37/76] Converting op type: 'Pack', name: 'lstm_fused_cell/stack_1', output_shape: (1, 2).\n",
-      "[SSAConverter] [38/76] Converting op type: 'Reshape', name: 'Reshape', output_shape: (16, 494).\n",
-      "[SSAConverter] [39/76] Converting op type: 'Cast', name: 'lstm_fused_cell/SequenceMask/Cast', output_shape: (1, 1).\n",
-      "[SSAConverter] [40/76] Converting op type: 'MatMul', name: 'MatMul', output_shape: (16, 2048).\n",
-      "[SSAConverter] [41/76] Converting op type: 'Less', name: 'lstm_fused_cell/SequenceMask/Less', output_shape: (1, 16).\n",
-      "[SSAConverter] [42/76] Converting op type: 'Add', name: 'Add', output_shape: (16, 2048).\n",
-      "[SSAConverter] [43/76] Converting op type: 'Cast', name: 'lstm_fused_cell/SequenceMask/Cast_1', output_shape: (1, 16).\n",
-      "[SSAConverter] [44/76] Converting op type: 'Relu', name: 'Relu', output_shape: (16, 2048).\n",
-      "[SSAConverter] [45/76] Converting op type: 'Transpose', name: 'lstm_fused_cell/transpose', output_shape: (16, 1).\n",
-      "[SSAConverter] [46/76] Converting op type: 'Minimum', name: 'Minimum', output_shape: (16, 2048).\n",
-      "[SSAConverter] [47/76] Converting op type: 'ExpandDims', name: 'lstm_fused_cell/ExpandDims', output_shape: (16, 1, 1).\n",
-      "[SSAConverter] [48/76] Converting op type: 'MatMul', name: 'MatMul_1', output_shape: (16, 2048).\n",
-      "[SSAConverter] [49/76] Converting op type: 'Tile', name: 'lstm_fused_cell/Tile', output_shape: (16, 1, 2048).\n",
-      "[SSAConverter] [50/76] Converting op type: 'Add', name: 'Add_1', output_shape: (16, 2048).\n",
-      "[SSAConverter] [51/76] Converting op type: 'Relu', name: 'Relu_1', output_shape: (16, 2048).\n",
-      "[SSAConverter] [52/76] Converting op type: 'Minimum', name: 'Minimum_1', output_shape: (16, 2048).\n",
-      "[SSAConverter] [53/76] Converting op type: 'MatMul', name: 'MatMul_2', output_shape: (16, 2048).\n",
-      "[SSAConverter] [54/76] Converting op type: 'Add', name: 'Add_2', output_shape: (16, 2048).\n",
-      "[SSAConverter] [55/76] Converting op type: 'Relu', name: 'Relu_2', output_shape: (16, 2048).\n",
-      "[SSAConverter] [56/76] Converting op type: 'Minimum', name: 'Minimum_2', output_shape: (16, 2048).\n",
-      "[SSAConverter] [57/76] Converting op type: 'Reshape', name: 'Reshape_1', output_shape: (16, 1, 2048).\n",
-      "[SSAConverter] [58/76] Converting op type: 'LSTMBlock', name: 'lstm_fused_cell/BlockLSTM/LSTMBlock'.\n",
-      "[SSAConverter] [59/76] Converting op type: 'get_tuple', name: 'lstm_fused_cell/BlockLSTM/get_tuple', output_shape: (16, 1, 2048).\n",
-      "[SSAConverter] [60/76] Converting op type: 'get_tuple', name: 'lstm_fused_cell/BlockLSTM/get_tuple_0', output_shape: (16, 1, 2048).\n",
-      "[SSAConverter] [61/76] Converting op type: 'Mul', name: 'lstm_fused_cell/mul', output_shape: (16, 1, 2048).\n",
-      "[SSAConverter] [62/76] Converting op type: 'ConcatV2', name: 'lstm_fused_cell/concat', output_shape: (17, 1, 2048).\n",
-      "[SSAConverter] [63/76] Converting op type: 'ConcatV2', name: 'lstm_fused_cell/concat_1', output_shape: (17, 1, 2048).\n",
-      "[SSAConverter] [64/76] Converting op type: 'Reshape', name: 'Reshape_2', output_shape: (16, 2048).\n",
-      "[SSAConverter] [65/76] Converting op type: 'GatherNd', name: 'lstm_fused_cell/GatherNd', output_shape: (1, 2048).\n",
-      "[SSAConverter] [66/76] Converting op type: 'GatherNd', name: 'lstm_fused_cell/GatherNd_1', output_shape: (1, 2048).\n",
-      "[SSAConverter] [67/76] Converting op type: 'MatMul', name: 'MatMul_3', output_shape: (16, 2048).\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[SSAConverter] [68/76] Converting op type: 'set_global', name: 'Assign_2', output_shape: (1, 2048).\n",
-      "[SSAConverter] [69/76] Converting op type: 'set_global', name: 'Assign_3', output_shape: (1, 2048).\n",
-      "[SSAConverter] [70/76] Converting op type: 'Add', name: 'Add_3', output_shape: (16, 2048).\n",
-      "[SSAConverter] [71/76] Converting op type: 'Relu', name: 'Relu_3', output_shape: (16, 2048).\n",
-      "[SSAConverter] [72/76] Converting op type: 'Minimum', name: 'Minimum_3', output_shape: (16, 2048).\n",
-      "[SSAConverter] [73/76] Converting op type: 'MatMul', name: 'MatMul_4', output_shape: (16, 29).\n",
-      "[SSAConverter] [74/76] Converting op type: 'Add', name: 'Add_4', output_shape: (16, 29).\n",
-      "[SSAConverter] [75/76] Converting op type: 'Reshape', name: 'raw_logits', output_shape: (16, 1, 29).\n",
-      "[SSAConverter] [76/76] Converting op type: 'Softmax', name: 'logits', output_shape: (16, 1, 29).\n",
-      "[Core ML Pass] 15 disconnected constants nodes deleted\n"
-     ]
-    }
-   ],
-   "source": [
-    "tfmodel_path = './models/output_graph.pb'  # path to the downloaded model\n",
-    "mlmodel_path = './deep_speech.mlmodel'  # path to save converted Core ML model\n",
-    "\n",
-    "# convert model and save to the local directory\n",
-    "model = tfcoreml.convert(\n",
-    "    tf_model_path=tfmodel_path, \n",
-    "    mlmodel_path=mlmodel_path,\n",
-    "        input_name_shape_dict={\n",
-    "        'input_node': [1, 16, 19, 26],\n",
-    "        'input_lengths': [1],\n",
-    "        'previous_state_h__invar__': [1, 2048],\n",
-    "        'previous_state_c__invar__': [1, 2048]\n",
-    "    },\n",
-    "    output_feature_names=['logits'],\n",
-    "    minimum_ios_deployment_target='13'\n",
-    ")\n",
-    "\n",
-    "# Optionally, we can print and inspect converted Core ML model\n",
-    "# from coremltools.models.neural_network.printer import print_network_spec_coding_style\n",
-    "# print_network_spec_coding_style(model.get_spec())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Generate some random data as inputs\n",
-    "input_node = np.random.rand(1, 16, 19, 26)\n",
-    "input_lengths = np.array([16], dtype=np.int32)\n",
-    "previous_state_c = np.random.rand(1, 2048)\n",
-    "previous_state_h = np.random.rand(1, 2048)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Run predictions\n",
-    "out = model.predict({\n",
-    "    'input_node': input_node,\n",
-    "    'input_lengths': input_lengths,\n",
-    "    'previous_state_h__invar__': previous_state_h,\n",
-    "    'previous_state_c__invar__': previous_state_c\n",
-    "})['logits']\n",
-    "\n",
-    "output = np.array(out)\n",
-    "# print(output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Optionally we can verify the predictions are consistant with TensorFlow's output\n",
-    "from tensorflow.contrib.rnn import *\n",
-    "\n",
-    "with open(tfmodel_path, 'rb') as f:\n",
-    "    serialized = f.read()\n",
-    "original_gdef = tf.compat.v1.GraphDef()\n",
-    "original_gdef.ParseFromString(serialized)\n",
-    "\n",
-    "tf.import_graph_def(original_gdef, name=\"\")\n",
-    "\n",
-    "with tf.Session() as sess:\n",
-    "    g = sess.graph\n",
-    "    out = g.get_tensor_by_name('Softmax:0')\n",
-    "    in1 = g.get_tensor_by_name('input_node:0')\n",
-    "    in2 = g.get_tensor_by_name('input_lengths:0')\n",
-    "    in3 = g.get_tensor_by_name('previous_state_c:0')\n",
-    "    in4 = g.get_tensor_by_name('previous_state_h:0')\n",
-    "\n",
-    "    tf_out = sess.run(out, feed_dict={\n",
-    "        in1: input_node, in2: input_lengths, in3: previous_state_c, in4: previous_state_h,\n",
-    "    })\n",
-    "\n",
-    "tf_output = np.array(tf_out)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.testing.assert_array_equal(output.shape, tf_output.shape)\n",
-    "np.testing.assert_almost_equal(output.flatten(), tf_output.flatten(), decimal=2)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/inception_v1_preprocessing_steps.ipynb b/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/inception_v1_preprocessing_steps.ipynb
deleted file mode 100644
index 3d59cdc1e..000000000
--- a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/inception_v1_preprocessing_steps.ipynb
+++ /dev/null
@@ -1,342 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Inception V1 Example\n",
-    "In this notebook we will go through the process of converting the Inception V1 model to a Neural Network Classifier CoreML model that directly predicts the class label of the input image. We will highlight the importance of setting the image preprocessing parameters correctly to get the right results. \n",
-    "Lets get started!"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Lets first download the inception V1 frozen TF graph (the .pb file)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Download the model and class label package\n",
-    "from __future__ import print_function\n",
-    "import  os, sys\n",
-    "import tarfile"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def download_file_and_unzip(url, dir_path='.'):\n",
-    "    \"\"\"Download the frozen TensorFlow model and unzip it.\n",
-    "    url - The URL address of the frozen file\n",
-    "    dir_path - local directory\n",
-    "    \"\"\"\n",
-    "    if not os.path.exists(dir_path):\n",
-    "        os.makedirs(dir_path)\n",
-    "    k = url.rfind('/')\n",
-    "    fname = url[k+1:]\n",
-    "    fpath = os.path.join(dir_path, fname)\n",
-    "\n",
-    "    if not os.path.exists(fpath):\n",
-    "        if sys.version_info[0] < 3:\n",
-    "            import urllib\n",
-    "            urllib.urlretrieve(url, fpath)\n",
-    "        else:\n",
-    "            import urllib.request\n",
-    "            urllib.request.urlretrieve(url, fpath)\n",
-    "\n",
-    "    tar = tarfile.open(fpath)\n",
-    "    tar.extractall(dir_path)\n",
-    "    tar.close()\n",
-    "\n",
-    "inception_v1_url = 'https://storage.googleapis.com/download.tensorflow.org/models/inception_v1_2016_08_28_frozen.pb.tar.gz'\n",
-    "download_file_and_unzip(inception_v1_url)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For conversion to CoreML, we need to find the input and output tensor names in the TF graph. This will also be required to run the TF graph for numerical accuracy check. Lets load the TF graph def and try to find the names"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load the TF graph definition\n",
-    "import tensorflow as tf\n",
-    "tf_model_path = './inception_v1_2016_08_28_frozen.pb'\n",
-    "with open(tf_model_path, 'rb') as f:\n",
-    "    serialized = f.read()\n",
-    "tf.reset_default_graph()\n",
-    "original_gdef = tf.GraphDef()\n",
-    "original_gdef.ParseFromString(serialized)\n",
-    "\n",
-    "# Lets get some details about a few ops in the beginning and the end of the graph\n",
-    "with tf.Graph().as_default() as g:\n",
-    "    tf.import_graph_def(original_gdef, name='')\n",
-    "    ops = g.get_operations()\n",
-    "    N = len(ops)\n",
-    "    for i in [0,1,2,N-3,N-2,N-1]:\n",
-    "        print('\\n\\nop id {} : op type: \"{}\"'.format(str(i), ops[i].type));\n",
-    "        print('input(s):'),\n",
-    "        for x in ops[i].inputs:\n",
-    "            print(\"name = {}, shape: {}, \".format(x.name, x.get_shape())),\n",
-    "        print('\\noutput(s):'),\n",
-    "        for x in ops[i].outputs:\n",
-    "            print(\"name = {}, shape: {},\".format(x.name, x.get_shape())),           "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The output of the Placeholder op is the input (\"input:0\") and the output of the Softmax op towards the end of the graph is the output (\"InceptionV1/Logits/Predictions/Softmax:0\"). Lets convert to mlmodel now."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import tfcoreml\n",
-    "# Supply a dictionary of input tensors' name and shape (with batch axis)\n",
-    "input_tensor_shapes = {\"input:0\":[1,224,224,3]} # batch size is 1\n",
-    "#providing the image_input_names argument converts the input into an image for CoreML\n",
-    "image_input_name = ['input:0']\n",
-    "# Output CoreML model path\n",
-    "coreml_model_file = './inception_v1.mlmodel'\n",
-    "# The TF model's ouput tensor name\n",
-    "output_tensor_names = ['InceptionV1/Logits/Predictions/Softmax:0']\n",
-    "# class label file: providing this will make a \"Classifier\" CoreML model\n",
-    "class_labels = 'imagenet_slim_labels.txt'\n",
-    "\n",
-    "# Call the converter. This may take a while\n",
-    "coreml_model = tfcoreml.convert(\n",
-    "        tf_model_path=tf_model_path,\n",
-    "        mlmodel_path=coreml_model_file,\n",
-    "        input_name_shape_dict=input_tensor_shapes,\n",
-    "        output_feature_names=output_tensor_names,\n",
-    "        image_input_names = image_input_name,\n",
-    "        class_labels = class_labels)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Lets load an image for testing. We will get predictions on this image using the TF model and the corresponding mlmodel."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Now we're ready to test out the CoreML model with a real image!\n",
-    "# Load an image\n",
-    "import numpy as np\n",
-    "import PIL\n",
-    "import requests\n",
-    "from io import BytesIO\n",
-    "from matplotlib.pyplot import imshow\n",
-    "# This is an image of a golden retriever from Wikipedia\n",
-    "img_url = 'https://upload.wikimedia.org/wikipedia/commons/9/93/Golden_Retriever_Carlos_%2810581910556%29.jpg'\n",
-    "response = requests.get(img_url)\n",
-    "%matplotlib inline\n",
-    "img = PIL.Image.open(BytesIO(response.content))\n",
-    "imshow(np.asarray(img))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# for getting CoreML predictions we directly pass in the PIL image after resizing\n",
-    "import coremltools\n",
-    "img = img.resize([224,224], PIL.Image.ANTIALIAS)\n",
-    "coreml_inputs = {'input__0': img}\n",
-    "coreml_output = coreml_model.predict(coreml_inputs, useCPUOnly=True)\n",
-    "coreml_pred_dict = coreml_output['InceptionV1__Logits__Predictions__Softmax__0']\n",
-    "coreml_predicted_class_label = coreml_output['classLabel']\n",
-    "\n",
-    "#for getting TF prediction we get the numpy array of the image\n",
-    "img_np = np.array(img).astype(np.float32)\n",
-    "print( 'image shape:', img_np.shape)\n",
-    "print( 'first few values: ', img_np.flatten()[0:4], 'max value: ', np.amax(img_np))\n",
-    "img_tf = np.expand_dims(img_np, axis = 0) #now shape is [1,224,224,3] as required by TF\n",
-    "\n",
-    "# Evaluate TF and get the highest label \n",
-    "tf_input_name = 'input:0'\n",
-    "tf_output_name = 'InceptionV1/Logits/Predictions/Softmax:0'\n",
-    "with tf.Session(graph = g) as sess:\n",
-    "    tf_out = sess.run(tf_output_name, \n",
-    "                      feed_dict={tf_input_name: img_tf})\n",
-    "tf_out = tf_out.flatten()    \n",
-    "idx = np.argmax(tf_out)\n",
-    "label_file = 'imagenet_slim_labels.txt' \n",
-    "with open(label_file) as f:\n",
-    "    labels = f.readlines()\n",
-    "    \n",
-    "#print predictions   \n",
-    "print('\\n')\n",
-    "print(\"CoreML prediction class = {}, probabiltiy = {}\".format(coreml_predicted_class_label,\n",
-    "                                            str(coreml_pred_dict[coreml_predicted_class_label])))  \n",
-    "print(\"TF prediction class = {}, probability = {}\".format(labels[idx],\n",
-    "                                            str(tf_out[idx])))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Both the predictions match, this means that the conversion was correct. However, the class label seems incorrect. What could be the reason? The answer is that we did not preprocess the image correctly before passing it to the neural network!! This is always a crucial step when using neural networks on images.\n",
-    "\n",
-    "How do we know what preprocessing to apply? This can be tricky to find sometimes. The approach is to find the source of the pre-trained model and check for the preprocessing that the author of the model used while training and evaluation. In this case, the TF model comes from the SLIM library so we find the preprocessing steps [here](https://github.com/tensorflow/models/blob/edb6ed22a801665946c63d650ab9a0b23d98e1b1/research/slim/preprocessing/inception_preprocessing.py#L243)\n",
-    "\n",
-    "We see that the image pixels have to be scaled to lie in the interval [-1,1]. Lets do that and get the TF predictions again! "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "img_tf = (2.0/255.0) * img_tf - 1\n",
-    "with tf.Session(graph = g) as sess:\n",
-    "    tf_out = sess.run(tf_output_name, \n",
-    "                      feed_dict={tf_input_name: img_tf})\n",
-    "tf_out = tf_out.flatten()    \n",
-    "idx = np.argmax(tf_out)\n",
-    "print(\"TF prediction class = {}, probability = {}\".format(labels[idx],\n",
-    "                                            str(tf_out[idx])))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Much better now! The model is predicting a dog as the highest class. \n",
-    "\n",
-    "What about CoreML? CoreML automatically handles the image preprocessing, when the input is of type image, so we do not have to change the input that we were passing in earlier. For the mlmodel we converted, lets see what the image biases and scale have been set to"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Get image pre-processing parameters of a saved CoreML model\n",
-    "from coremltools.proto import FeatureTypes_pb2 as _FeatureTypes_pb2\n",
-    "\n",
-    "\n",
-    "spec = coremltools.models.utils.load_spec(coreml_model_file)\n",
-    "if spec.WhichOneof('Type') == 'neuralNetworkClassifier':\n",
-    "  nn = spec.neuralNetworkClassifier\n",
-    "if spec.WhichOneof('Type') == 'neuralNetwork':\n",
-    "  nn = spec.neuralNetwork  \n",
-    "if spec.WhichOneof('Type') == 'neuralNetworkRegressor':\n",
-    "  nn = spec.neuralNetworkRegressor\n",
-    "\n",
-    "preprocessing = nn.preprocessing[0].scaler\n",
-    "print( 'channel scale: ', preprocessing.channelScale)\n",
-    "print( 'blue bias: ', preprocessing.blueBias)\n",
-    "print( 'green bias: ', preprocessing.greenBias)\n",
-    "print( 'red bias: ', preprocessing.redBias)\n",
-    "\n",
-    "inp = spec.description.input[0]\n",
-    "if inp.type.WhichOneof('Type') == 'imageType':\n",
-    "  colorspace = _FeatureTypes_pb2.ImageFeatureType.ColorSpace.Name(inp.type.imageType.colorSpace)\n",
-    "  print( 'colorspace: ', colorspace)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "As suspected, they are not correct. Lets convert the model again and set them correctly this time. Note that the channel scale is multiplied first and then the bias is added. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Call the converter. This may take a while\n",
-    "coreml_model = tfcoreml.convert(\n",
-    "        tf_model_path=tf_model_path,\n",
-    "        mlmodel_path=coreml_model_file,\n",
-    "        input_name_shape_dict=input_tensor_shapes,\n",
-    "        output_feature_names=output_tensor_names,\n",
-    "        image_input_names = image_input_name,\n",
-    "        class_labels = class_labels,\n",
-    "        red_bias = -1,\n",
-    "        green_bias = -1,\n",
-    "        blue_bias = -1,\n",
-    "        image_scale = 2.0/255.0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Call CoreML predict again\n",
-    "coreml_output = coreml_model.predict(coreml_inputs, useCPUOnly=True)\n",
-    "coreml_pred_dict = coreml_output['InceptionV1__Logits__Predictions__Softmax__0']\n",
-    "coreml_predicted_class_label = coreml_output['classLabel']\n",
-    "print(\"CoreML prediction class = {}, probability = {}\".format(coreml_predicted_class_label,\n",
-    "                                            str(coreml_pred_dict[coreml_predicted_class_label])))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Yes, now its matching the TF output and is correct!!\n",
-    "\n",
-    "Note that predictions with the default CoreML predict call (when the flag useCPUOnly=True is skipped) may vary slightly since it uses a lower precision optimized path that runs faster. "
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.15"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/inception_v3.ipynb b/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/inception_v3.ipynb
deleted file mode 100644
index 8877424d6..000000000
--- a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/inception_v3.ipynb
+++ /dev/null
@@ -1,236 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\"\"\"\n",
-    "This example demonstrates the workflow to download a publicly available TF \n",
-    "model, strip part of it for inference, and convert it to CoreML using the \n",
-    "tfcoreml converter. \n",
-    "\n",
-    "Stripping part of the TF model may be useful when:\n",
-    "(1) the TF model contains input data pre-processing mechanisms that are \n",
-    "suitable for training / unsupported by CoreML\n",
-    "(2) the TF model has ops only used in training time\n",
-    "\n",
-    "We use an inception v3 model provided by Google, which can be downloaded \n",
-    "at this URL:\n",
-    "\n",
-    "https://storage.googleapis.com/download.tensorflow.org/models/inception_dec_2015.zip\n",
-    "\"\"\"\n",
-    "from __future__ import print_function\n",
-    "import  os, sys, zipfile\n",
-    "from os.path import dirname\n",
-    "import numpy as np\n",
-    "import tensorflow as tf\n",
-    "from tensorflow.core.framework import graph_pb2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Download the model and class label package\n",
-    "def download_file_and_unzip(url, dir_path='.'):\n",
-    "    \"\"\"Download the frozen TensorFlow model and unzip it.\n",
-    "    url - The URL address of the frozen file\n",
-    "    dir_path - local directory\n",
-    "    \"\"\"\n",
-    "    if not os.path.exists(dir_path):\n",
-    "        os.makedirs(dir_path)\n",
-    "    k = url.rfind('/')\n",
-    "    fname = url[k+1:]\n",
-    "    fpath = os.path.join(dir_path, fname)\n",
-    "\n",
-    "    if not os.path.exists(fpath):\n",
-    "        if sys.version_info[0] < 3:\n",
-    "            import urllib\n",
-    "            urllib.urlretrieve(url, fpath)\n",
-    "        else:\n",
-    "            import urllib.request\n",
-    "            urllib.request.urlretrieve(url, fpath)\n",
-    "\n",
-    "    zip_ref = zipfile.ZipFile(fpath, 'r')\n",
-    "    zip_ref.extractall(dir_path)\n",
-    "    zip_ref.close()\n",
-    "\n",
-    "inception_v3_url = 'https://storage.googleapis.com/download.tensorflow.org/models/inception_dec_2015.zip'\n",
-    "download_file_and_unzip(inception_v3_url);"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load the TF graph definition\n",
-    "tf_model_path = './tensorflow_inception_graph.pb'\n",
-    "with open(tf_model_path, 'rb') as f:\n",
-    "    serialized = f.read()\n",
-    "tf.reset_default_graph()\n",
-    "original_gdef = tf.GraphDef()\n",
-    "original_gdef.ParseFromString(serialized)\n",
-    "\n",
-    "# For demonstration purpose we show the first 15 ops the TF model\n",
-    "with tf.Graph().as_default() as g:\n",
-    "    tf.import_graph_def(original_gdef, name='')\n",
-    "    ops = g.get_operations()\n",
-    "    for i in range(15):\n",
-    "        print('op id {} : op name: {}, op type: \"{}\"'.format(str(i),ops[i].name, ops[i].type));\n",
-    "\n",
-    "# This Inception model uses DecodeJpeg op to read from JPEG images\n",
-    "# encoded as string Tensors. You can visualize it with TensorBoard,\n",
-    "# but we're omitting it here. For deployment we need to remove the\n",
-    "# JPEG decoder and related ops, and replace them with a placeholder\n",
-    "# where we can feed image data in. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Strip the JPEG decoder and preprocessing part of TF model\n",
-    "# In this model, the actual op that feeds pre-processed image into \n",
-    "# the network is 'Mul'. The op that generates probabilities per\n",
-    "# class is 'softmax/logits'\n",
-    "# To figure out what are inputs/outputs for your own model\n",
-    "# You can use use TensorFlow's summarize_graph or TensorBoard\n",
-    "# Visualization tool for your own models.\n",
-    "\n",
-    "from tensorflow.python.tools import strip_unused_lib\n",
-    "from tensorflow.python.framework import dtypes\n",
-    "from tensorflow.python.platform import gfile\n",
-    "input_node_names = ['Mul']\n",
-    "output_node_names = ['softmax/logits']\n",
-    "gdef = strip_unused_lib.strip_unused(\n",
-    "        input_graph_def = original_gdef,\n",
-    "        input_node_names = input_node_names,\n",
-    "        output_node_names = output_node_names,\n",
-    "        placeholder_type_enum = dtypes.float32.as_datatype_enum)\n",
-    "# Save it to an output file\n",
-    "frozen_model_file = './inception_v3.pb'\n",
-    "with gfile.GFile(frozen_model_file, \"wb\") as f:\n",
-    "    f.write(gdef.SerializeToString())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Now we have a TF model ready to be converted to CoreML\n",
-    "import tfcoreml\n",
-    "# Supply a dictionary of input tensors' name and shape (with \n",
-    "# batch axis)\n",
-    "input_tensor_shapes = {\"Mul:0\":[1,299,299,3]} # batch size is 1\n",
-    "# Output CoreML model path\n",
-    "coreml_model_file = './inception_v3.mlmodel'\n",
-    "# The TF model's ouput tensor name\n",
-    "output_tensor_names = ['softmax/logits:0']\n",
-    "\n",
-    "# Call the converter. This may take a while\n",
-    "coreml_model = tfcoreml.convert(\n",
-    "        tf_model_path=frozen_model_file,\n",
-    "        mlmodel_path=coreml_model_file,\n",
-    "        input_name_shape_dict=input_tensor_shapes,\n",
-    "        output_feature_names=output_tensor_names,\n",
-    "        image_input_names = ['Mul:0'],\n",
-    "        red_bias = -1,\n",
-    "        green_bias = -1,\n",
-    "        blue_bias = -1,\n",
-    "        image_scale = 2.0/255.0)\n",
-    "\n",
-    "# MLModel saved at location: ./inception_v3.mlmodel"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Now we're ready to test out the CoreML model with a real image!\n",
-    "# Load an image\n",
-    "import PIL\n",
-    "import requests\n",
-    "from io import BytesIO\n",
-    "from matplotlib.pyplot import imshow\n",
-    "# This is an image of a golden retriever from Wikipedia\n",
-    "img_url = 'https://upload.wikimedia.org/wikipedia/commons/9/93/Golden_Retriever_Carlos_%2810581910556%29.jpg'\n",
-    "response = requests.get(img_url)\n",
-    "%matplotlib inline\n",
-    "img = PIL.Image.open(BytesIO(response.content))\n",
-    "imshow(np.asarray(img))\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Run CoreML prediction\n",
-    "# Pay attention to '__0'. We change ':0' to '__0' to make sure \n",
-    "# MLModel's generated Swift/Obj-C code is semantically correct\n",
-    "img = img.resize([299,299], PIL.Image.ANTIALIAS)\n",
-    "coreml_inputs = {'Mul__0': img}\n",
-    "coreml_output = coreml_model.predict(coreml_inputs, useCPUOnly=False)\n",
-    "probs = coreml_output['softmax__logits__0'].flatten()\n",
-    "label_idx = np.argmax(probs)\n",
-    "\n",
-    "# This label file comes with the model\n",
-    "label_file = 'imagenet_comp_graph_label_strings.txt' \n",
-    "with open(label_file) as f:\n",
-    "    labels = f.readlines()\n",
-    "print('Label = {}'.format(labels[label_idx]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# And that's the end"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "celltoolbar": "Raw Cell Format",
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.15"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/linear_mnist_example.ipynb b/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/linear_mnist_example.ipynb
deleted file mode 100644
index 29b7aa3b0..000000000
--- a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/linear_mnist_example.ipynb
+++ /dev/null
@@ -1,156 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\"\"\"\n",
-    "This simple notebook demonstrates the workflow of using the TensorFlow converter.\n",
-    "\"\"\"\n",
-    "from __future__ import print_function\n",
-    "import numpy as np\n",
-    "from tensorflow.python.tools.freeze_graph import freeze_graph\n",
-    "\n",
-    "import tfcoreml\n",
-    "import linear_mnist_train"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\"\"\"\n",
-    "Step 0: Before you run this notebook, view run the example script linear_mnist_train.py\n",
-    "to get a trained TensorFlow network.\n",
-    "This may take a few minutes.\n",
-    "\"\"\"\n",
-    "linear_mnist_train.train()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\"\"\"\n",
-    "Step 1: \"Freeze\" your tensorflow model - convert your TF model into a stand-alone graph definition file\n",
-    "Inputs: \n",
-    "(1) TensorFlow code\n",
-    "(2) trained weights in a checkpoint file\n",
-    "(3) The output tensors' name you want to use in inference\n",
-    "(4) [Optional] Input tensors' name to TF model\n",
-    "Outputs: \n",
-    "(1) A frozen TensorFlow GraphDef, with trained weights frozen into it\n",
-    "\"\"\"\n",
-    "\n",
-    "# Provide these to run freeze_graph:\n",
-    "# Graph definition file, stored as protobuf TEXT\n",
-    "graph_def_file = './model.pbtxt'\n",
-    "# Trained model's checkpoint name\n",
-    "checkpoint_file = './checkpoints/model.ckpt'\n",
-    "# Frozen model's output name\n",
-    "frozen_model_file = './frozen_model.pb'\n",
-    "# Output nodes. If there're multiple output ops, use comma separated string, e.g. \"out1,out2\".\n",
-    "output_node_names = 'Softmax' \n",
-    "\n",
-    "\n",
-    "# Call freeze graph\n",
-    "freeze_graph(input_graph=graph_def_file,\n",
-    "             input_saver=\"\",\n",
-    "             input_binary=False,\n",
-    "             input_checkpoint=checkpoint_file,\n",
-    "             output_node_names=output_node_names,\n",
-    "             restore_op_name=\"save/restore_all\",\n",
-    "             filename_tensor_name=\"save/Const:0\",\n",
-    "             output_graph=frozen_model_file,\n",
-    "             clear_devices=True,\n",
-    "             initializer_nodes=\"\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\"\"\"\n",
-    "Step 2: Call converter\n",
-    "\"\"\"\n",
-    "\n",
-    "# Provide these inputs in addition to inputs in Step 1\n",
-    "# A dictionary of input tensors' name and shape (with batch)\n",
-    "input_tensor_shapes = {\"Placeholder:0\":[1,784]} # batch size is 1\n",
-    "# Output CoreML model path\n",
-    "coreml_model_file = './model.mlmodel'\n",
-    "output_tensor_names = ['Softmax:0']\n",
-    "\n",
-    "\n",
-    "# Call the converter\n",
-    "coreml_model = tfcoreml.convert(\n",
-    "        tf_model_path=frozen_model_file, \n",
-    "        mlmodel_path=coreml_model_file, \n",
-    "        input_name_shape_dict=input_tensor_shapes,\n",
-    "        output_feature_names=output_tensor_names)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\"\"\"\n",
-    "Step 3: Run the converted model\n",
-    "\"\"\"\n",
-    "\n",
-    "# Provide CoreML model with a dictionary as input. Change ':0' to '__0'\n",
-    "# as Swift / Objective-C code generation do not allow colons in variable names\n",
-    "np.random.seed(100)\n",
-    "coreml_inputs = {'Placeholder__0': np.random.rand(1,1,784)} # (sequence_length=1,batch=1,channels=784)\n",
-    "coreml_output = coreml_model.predict(coreml_inputs, useCPUOnly=False)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(coreml_output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/linear_mnist_train.py b/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/linear_mnist_train.py
deleted file mode 100644
index f758d7f8c..000000000
--- a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/linear_mnist_train.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from __future__ import print_function
-import os
-import tensorflow as tf
-from tensorflow.examples.tutorials.mnist import input_data # Import MINST data
-
-def linear_model(x):
-  # x is the image input
-  # mnist data image of shape 28*28=784
-
-  # Set model weights
-  W = tf.Variable(tf.zeros([784, 10]))
-  b = tf.Variable(tf.zeros([10]))
-
-  # Construct model
-  pred = tf.nn.softmax(tf.matmul(x, W) + b)
-
-  # Return the last op
-  return pred
-
-
-def train():
-  mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
-  # instantiate the model in the default graph
-  x = tf.placeholder(tf.float32, [None, 784])
-
-  print('image_input: ', x)  
-  #print 'image_input: ', x
-  pred = linear_model(x)
-  #print 'pred output:', pred
-
-  print('pred output:', pred)
-
-  # Add training components to it
-  # 0-9 digits recognition => 10 classes
-  y = tf.placeholder(tf.float32, [None, 10])
-
-  # Define training hyper-parameters
-  learning_rate = 0.01
-  training_epochs = 25
-  batch_size = 100
-  display_step = 1
-
-  # Define Cross Entropy loss
-  cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
-  # Use Gradient Descent
-  optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
-
-  # Initialize the variables (i.e. assign their default value)
-  init = tf.global_variables_initializer()
-
-  # Use a saver to save checkpoints
-  saver = tf.train.Saver()
-  # Training starts here
-  with tf.Session() as sess:
-    sess.run(init)
-    # Training cycle
-    for epoch in range(training_epochs):
-      avg_cost = 0.
-      total_batch = int(mnist.train.num_examples/batch_size)
-      # Loop over all batches
-      for i in range(total_batch):
-        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
-        # Fit training using batch data
-        _, c = sess.run([optimizer, cost], feed_dict={x: batch_xs,
-                                                       y: batch_ys})
-        # Compute average loss
-        avg_cost += c / total_batch
-      # Display logs per epoch step
-      if (epoch+1) % display_step == 0:
-        print(("Epoch: {:04d} , cost= {:.9f}").format(epoch+1,avg_cost))
-        #print "Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost)
-    print('Training Done. Now save the checkpoint...')
-    #print 'Training Done. Now save the checkpoint...'
-    save_dir = './checkpoints'
-    save_path = os.path.join(save_dir, 'model.ckpt')
-    if not os.path.exists(save_dir):
-      os.mkdir(save_dir)
-    save_path = saver.save(sess, save_path)
-    tf.train.write_graph(sess.graph, './', 'model.pbtxt')
-
-
-if __name__ == '__main__':
-
-  # Read the data
-  train()
-
diff --git a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/readme.md b/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/readme.md
deleted file mode 100644
index 05943101a..000000000
--- a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/readme.md
+++ /dev/null
@@ -1,29 +0,0 @@
-## Convert TensorFlow 1 models to the Core ML model format
-
-- **[deep_speech.ipynb](deep_speech.ipynb)**
-
-An example to freeze and convert a popular [TensorFlow implementations of deep speech model](https://github.com/mozilla/DeepSpeech).
-
-- **[inception_v1_preprocessing_steps.ipynb](inception_v1_preprocessing_steps.ipynb)**
-
-An example to generate a classifier model with image input types and the importance of properly setting the preprocessing parameters.
-
-- **[inception_v3.ipynb](inception_v3.ipynb)**
-
-An example to strip the `DecodeJpeg` op from the TensorFlow graph to prepare it for the conversion.
-
-- **[linear_mnist_example.ipynb](linear_mnist_example.ipynb)**
-
-An example to get a frozen graph from the checkpoint and graph description files generated by training in TensorFlow.
-
-- **[ssd_example.ipynb](ssd_example.ipynb)**
-
-An example to extract a portion of the TensorFlow graph that can be converted, from the overall graph that may have unsupported ops.
-
-- **[style_transfer_example.ipynb](style_transfer_example.ipynb)**
-
-An example to edit a Core ML model to get an image output type (by default the outputs are `MultiArrays`).
-
-- **[custom_layer_examples.ipynb](custom_layer_examples.ipynb)**
-
-A few examples to demonstrate the process of adding custom Core ML layers for unsupported TensorFlow ops.
diff --git a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/ssd_example.ipynb b/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/ssd_example.ipynb
deleted file mode 100644
index bf4fee9da..000000000
--- a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/ssd_example.ipynb
+++ /dev/null
@@ -1,289 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# MobileNet-SSD Object Detection Example\n",
-    "This example demonstrates the workflow to convert a publicly available TensorFlow model for object detection into CoreML, and verify its numerical correctness against the TensorFlow model.\n",
-    "\n",
-    "We recommend you go through the MNIST example (linear_mnist_example.ipynb) and Inception V3 example before this one, as they contain important documentation for the workflow.\n",
-    "\n",
-    "We use a MobileNet + SSD model provided by Google, which can be downloaded at this URL:\n",
-    "https://storage.googleapis.com/download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_android_export.zip\n",
-    "\n",
-    "Please refer to the [TensorFlow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection) for more details.\n",
-    "\n",
-    "Also, please refer to [here](https://developer.apple.com/documentation/coreml) for detailed documentation of CoreML."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from __future__ import print_function\n",
-    "import os, sys, zipfile\n",
-    "from os.path import dirname\n",
-    "import numpy as np\n",
-    "import tensorflow as tf\n",
-    "from tensorflow.core.framework import graph_pb2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Download the model and class label package\n",
-    "mobilenet_ssd_url = 'https://storage.googleapis.com/download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_android_export.zip'\n",
-    "example_dir = '/tmp/tfcoreml_ssd_example/'\n",
-    "if not os.path.exists(example_dir):\n",
-    "    os.makedirs(example_dir)\n",
-    "mobilenet_ssd_fpath = example_dir + 'ssd_mobilenet_v1_android_export.zip'\n",
-    "if sys.version_info[0] < 3:\n",
-    "    import urllib\n",
-    "    urllib.urlretrieve(mobilenet_ssd_url, mobilenet_ssd_fpath)\n",
-    "else:\n",
-    "    import urllib.request\n",
-    "    urllib.request.urlretrieve(mobilenet_ssd_url, mobilenet_ssd_fpath)\n",
-    "zip_ref = zipfile.ZipFile(mobilenet_ssd_fpath, 'r')\n",
-    "zip_ref.extractall(example_dir)\n",
-    "zip_ref.close()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load the TF graph definition\n",
-    "tf_model_path = example_dir + 'ssd_mobilenet_v1_android_export.pb'\n",
-    "with open(tf_model_path, 'rb') as f:\n",
-    "    serialized = f.read()\n",
-    "tf.reset_default_graph()\n",
-    "original_gdef = tf.GraphDef()\n",
-    "original_gdef.ParseFromString(serialized)\n",
-    "\n",
-    "with tf.Graph().as_default() as g:\n",
-    "    tf.import_graph_def(original_gdef, name='')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The full MobileNet-SSD TF model contains 4 subgraphs: *Preprocessor*, *FeatureExtractor*, *MultipleGridAnchorGenerator*, and *Postprocessor*. Here we will extract the *FeatureExtractor* from the model and strip off the other subgraphs, as these subgraphs contain structures not currently supported in CoreML. The tasks in *Preprocessor*, *MultipleGridAnchorGenerator* and *Postprocessor* subgraphs can be achieved by other means, although they are non-trivial.\n",
-    "\n",
-    "By inspecting TensorFlow GraphDef, it can be found that:\n",
-    "(1) the input tensor of MobileNet-SSD Feature Extractor is `Preprocessor/sub:0` of shape `(1,300,300,3)`, which contains the preprocessed image.\n",
-    "(2) The output tensors are: `concat:0` of shape `(1,1917,4)`, the box coordinate encoding for each of the 1917 anchor boxes; and `concat_1:0` of shape `(1,1917,91)`, the confidence scores (logits) for each of the 91 object classes (including 1 class for background), for each of the 1917 anchor boxes.\n",
-    "So we extract the feature extractor out as follows:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Strip unused subgraphs and save it as another frozen TF model\n",
-    "from tensorflow.python.tools import strip_unused_lib\n",
-    "from tensorflow.python.framework import dtypes\n",
-    "from tensorflow.python.platform import gfile\n",
-    "input_node_names = ['Preprocessor/sub']\n",
-    "output_node_names = ['concat', 'concat_1']\n",
-    "gdef = strip_unused_lib.strip_unused(\n",
-    "        input_graph_def = original_gdef,\n",
-    "        input_node_names = input_node_names,\n",
-    "        output_node_names = output_node_names,\n",
-    "        placeholder_type_enum = dtypes.float32.as_datatype_enum)\n",
-    "# Save the feature extractor to an output file\n",
-    "frozen_model_file = example_dir + 'ssd_mobilenet_feature_extractor.pb'\n",
-    "with gfile.GFile(frozen_model_file, \"wb\") as f:\n",
-    "    f.write(gdef.SerializeToString())\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Now we have a TF model ready to be converted to CoreML\n",
-    "import tfcoreml\n",
-    "# Supply a dictionary of input tensors' name and shape (with # batch axis)\n",
-    "input_tensor_shapes = {\"Preprocessor/sub:0\":[1,300,300,3]} # batch size is 1\n",
-    "# Output CoreML model path\n",
-    "coreml_model_file = example_dir + 'ssd_mobilenet_feature_extractor.mlmodel'\n",
-    "# The TF model's ouput tensor name\n",
-    "output_tensor_names = ['concat:0', 'concat_1:0']\n",
-    "\n",
-    "# Call the converter. This may take a while\n",
-    "coreml_model = tfcoreml.convert(\n",
-    "        tf_model_path=frozen_model_file,\n",
-    "        mlmodel_path=coreml_model_file,\n",
-    "        input_name_shape_dict=input_tensor_shapes,\n",
-    "        output_feature_names=output_tensor_names)\n",
-    "\n",
-    "# CoreML saved at location: /tmp/tfcoreml_ssd_example/ssd_mobilenet_feature_extractor.mlmodel"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now that we have converted the model to CoreML, we can test its numerical correctness by comparing it with TensorFlow model. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load an image as input\n",
-    "import PIL.Image\n",
-    "import requests\n",
-    "from io import BytesIO\n",
-    "from matplotlib.pyplot import imshow\n",
-    "img_url = 'https://upload.wikimedia.org/wikipedia/commons/9/93/Golden_Retriever_Carlos_%2810581910556%29.jpg'\n",
-    "response = requests.get(img_url)\n",
-    "%matplotlib inline\n",
-    "img = PIL.Image.open(BytesIO(response.content))\n",
-    "imshow(np.asarray(img))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Preprocess the image - normalize to [-1,1]\n",
-    "img = img.resize([300,300], PIL.Image.ANTIALIAS)\n",
-    "img_array = np.array(img).astype(np.float32) * 2.0 / 255 - 1\n",
-    "batch_img_array = img_array[None,:,:,:]\n",
-    "\n",
-    "# Evaluate TF\n",
-    "tf.reset_default_graph()\n",
-    "g = tf.import_graph_def(gdef)\n",
-    "\n",
-    "tf_input_name = 'Preprocessor/sub:0'\n",
-    "# concat:0 are the bounding-box encodings of the 1917 anchor boxes\n",
-    "# concat_1:0 are the confidence scores of 91 classes of anchor boxes\n",
-    "tf_output_names = ['concat:0', 'concat_1:0']\n",
-    "with tf.Session(graph = g) as sess:\n",
-    "    image_input_tensor = sess.graph.get_tensor_by_name(\"import/\" + tf_input_name)\n",
-    "    tf_output_tensors = [sess.graph.get_tensor_by_name(\"import/\" + output_name)\n",
-    "                         for output_name in tf_output_names]\n",
-    "    tf_output_values = sess.run(tf_output_tensors, \n",
-    "                                feed_dict={image_input_tensor: batch_img_array})\n",
-    "    tf_box_encodings, tf_scores = tf_output_values\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now we evaluate CoreML model and compare result against TensorFlow model.\n",
-    "CoreML uses 5D arrays to represent rank-1 to rank-5 tensors. The 5 axes are in the order of `(S,B,C,H,W)`, where S is sequence length, B is batch size, C is number of channels, H is height and W is width. This data layout is usually different from TensorFlow's default layout, where a rank-4 tensor for convolutional nets usually uses `(B,H,W,C)` layout. To make a comparison, one of the result should be transposed."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import coremltools\n",
-    "# Input shape should be [1,1,3,300,300]\n",
-    "mlmodel_path = example_dir + 'ssd_mobilenet_feature_extractor.mlmodel'\n",
-    "img_array_coreml = np.transpose(img_array, (2,0,1))[None,None,:,:,:]\n",
-    "mlmodel = coremltools.models.MLModel(mlmodel_path)\n",
-    "# Pay attention to '__0'. We change ':0' to '__0' to make sure MLModel's \n",
-    "# generated Swift/Obj-C code is semantically correct\n",
-    "coreml_input_name = tf_input_name.replace(':', '__').replace('/', '__')\n",
-    "coreml_output_names = [output_name.replace(':', '__').replace('/', '__') \n",
-    "                       for output_name in tf_output_names]\n",
-    "coreml_input = {coreml_input_name: img_array_coreml}\n",
-    "\n",
-    "# When useCPUOnly == True, Relative error should be around 0.001\n",
-    "# When useCPUOnly == False on GPU enabled devices, relative errors \n",
-    "# are expected to be larger due to utilization of lower-precision arithmetics\n",
-    "\n",
-    "coreml_outputs_dict = mlmodel.predict(coreml_input, useCPUOnly=True)\n",
-    "coreml_outputs = [coreml_outputs_dict[out_name] for out_name in \n",
-    "                  coreml_output_names]\n",
-    "coreml_box_encodings, coreml_scores = coreml_outputs\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Now we compare the differences of two results\n",
-    "def max_relative_error(x,y):\n",
-    "    den = np.maximum(x,y)\n",
-    "    den = np.maximum(den,1)\n",
-    "    rel_err = (np.abs(x-y))/den\n",
-    "    return np.max(rel_err)\n",
-    "\n",
-    "rel_error_box = max_relative_error(coreml_box_encodings.squeeze(), \n",
-    "        np.transpose(tf_box_encodings.squeeze(),(1,0)))\n",
-    "rel_error_score = max_relative_error(coreml_scores.squeeze(), \n",
-    "        np.transpose(tf_scores.squeeze(),(1,0)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print('Max relative error on box encoding: %f' %(rel_error_box))\n",
-    "print('Max relative error on scores: %f' %(rel_error_score))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Up to this point we have converted the MobileNet-SSD feature extractor. The remaining tasks are post-processing tasks including generating anchor boxes, decoding the bounding-boxes, and performing non-maximum suppression. These necessary tasks are not trivial; however, CoreML does not contain out-of-the-box support for these tasks at this time developers should write their own post-processing code."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.15"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/style_transfer_example.ipynb b/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/style_transfer_example.ipynb
deleted file mode 100644
index 8977ddbda..000000000
--- a/examples/neural_network_inference/tensorflow_converter/Tensorflow_1/style_transfer_example.ipynb
+++ /dev/null
@@ -1,401 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Style Transfer Network\n",
-    "In this notebook we will go through the process of converting and evaluating the style transfer model, the one linked in the readme page, to CoreML. This model takes in an image and a style index (one of 26 possible styles) and outputs the stylized image. "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We first download the TF model (.pb file)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Download the model \n",
-    "from __future__ import print_function\n",
-    "import coremltools\n",
-    "import os,sys\n",
-    "import zipfile\n",
-    "def download_file_and_unzip(url, dir_path='.'):\n",
-    "    \"\"\"Download the frozen TensorFlow model and unzip it.\n",
-    "    url - The URL address of the frozen file\n",
-    "    dir_path - local directory\n",
-    "    \"\"\"\n",
-    "    if not os.path.exists(dir_path):\n",
-    "        os.makedirs(dir_path)\n",
-    "    k = url.rfind('/')\n",
-    "    fname = url[k+1:]\n",
-    "    fpath = os.path.join(dir_path, fname)\n",
-    "\n",
-    "    if not os.path.exists(fpath):\n",
-    "        if sys.version_info[0] < 3:\n",
-    "            import urllib\n",
-    "            urllib.urlretrieve(url, fpath)\n",
-    "        else:\n",
-    "            import urllib.request\n",
-    "            urllib.request.urlretrieve(url, fpath)\n",
-    "    zip_ref = zipfile.ZipFile(fpath, 'r')\n",
-    "    zip_ref.extractall(dir_path)\n",
-    "    zip_ref.close()    \n",
-    "\n",
-    "inception_v1_url = 'https://storage.googleapis.com/download.tensorflow.org/models/stylize_v1.zip'\n",
-    "download_file_and_unzip(inception_v1_url)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For conversion to CoreML, we need to find the input and output tensor names in the TF graph. This will also be required to run the TF graph for numerical accuracy check. Lets load the TF graph def and try to find the names. Inputs are generally the tensors that are outputs of the \"Placeholder\" op. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load the TF graph definition\n",
-    "import tensorflow as tf\n",
-    "tf_model_path = './stylize_quantized.pb'\n",
-    "with open(tf_model_path, 'rb') as f:\n",
-    "    serialized = f.read()\n",
-    "tf.reset_default_graph()\n",
-    "original_gdef = tf.GraphDef()\n",
-    "original_gdef.ParseFromString(serialized)\n",
-    "\n",
-    "# Lets get some details about a few ops in the beginning and the end of the graph\n",
-    "with tf.Graph().as_default() as g:\n",
-    "    tf.import_graph_def(original_gdef, name='')\n",
-    "    ops = g.get_operations()\n",
-    "    N = len(ops)\n",
-    "    for i in range(N):\n",
-    "        if ops[i].type == 'Placeholder':\n",
-    "            for x in ops[i].outputs:\n",
-    "                print(\"output name = {}, shape: {},\".format(x.name, x.get_shape())),\n",
-    "                print('\\n')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "There are two inputs: the image input named \"input:0\" and the style index input named \"style_num:0\". For finding the output lets print some info of the last few ops"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with tf.Graph().as_default() as g:\n",
-    "    tf.import_graph_def(original_gdef, name='')\n",
-    "    ops = g.get_operations()\n",
-    "    N = len(ops)\n",
-    "    for i in range(N-10,N):\n",
-    "        print('\\n\\nop id {} : op type: \"{}\"'.format(str(i), ops[i].type));\n",
-    "        print('input(s):'),\n",
-    "        for x in ops[i].inputs:\n",
-    "            print(\"name = {}, shape: {}, \".format(x.name, x.get_shape())),\n",
-    "        print('\\noutput(s):'),\n",
-    "        for x in ops[i].outputs:\n",
-    "            print(\"name = {}, shape: {},\".format(x.name, x.get_shape())), "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Generally some knowledge about the network may be required to correctly determine the output. In this case the output of the \"Sigmoid\" op is the normalized image (between 0-1) which goes into the \"Mul\" op followed by the \"Squeeze\" op. The final output we are interested in is the tensor \"Squeeze:0\" which is the RGB image with values between 0-255. "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now lets convert the model to CoreML. In this particular model, the TF graph can take an image of any size (it will produce the output image of the same size). However, CoreML requires us to specify the exact size of all its inputs. Hence we choose a fixed size for our image. Lets say 256.  "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import tfcoreml\n",
-    "mlmodel = tfcoreml.convert(\n",
-    "        tf_model_path = tf_model_path,\n",
-    "        mlmodel_path = './stylize.mlmodel',\n",
-    "        output_feature_names = ['Squeeze:0'],\n",
-    "        input_name_shape_dict = {'input:0':[1,256,256,3], 'style_num:0':[26]})"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We see that the CoreML model expects two inputs: 'style\\_num_\\_0' which is a multiarray and a sequence of length 26 and 'input_\\_0' which is a multiarray corresponding to the image input and of shape (3,256,256). It produces a multiarray output called 'Squeeze_\\_0'"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Lets now grab an image and using coremltools see what the coreml model predicts.  "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import PIL\n",
-    "import requests\n",
-    "from io import BytesIO\n",
-    "from matplotlib.pyplot import imshow\n",
-    "# This is an image of a golden retriever from Wikipedia\n",
-    "img_url = 'https://upload.wikimedia.org/wikipedia/commons/9/93/Golden_Retriever_Carlos_%2810581910556%29.jpg'\n",
-    "response = requests.get(img_url)\n",
-    "%matplotlib inline\n",
-    "img = PIL.Image.open(BytesIO(response.content))\n",
-    "img = img.resize([256,256], PIL.Image.ANTIALIAS)\n",
-    "img_np = np.asarray(img).astype(np.float32)\n",
-    "print( img_np.shape, img_np.flatten()[:5])\n",
-    "imshow(img_np/255.0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Transpose the image since CoreML requires C,H,W format (3,256,256)\n",
-    "coreml_image_input = np.transpose(img_np, (2,0,1))\n",
-    "\n",
-    "# The style index is a one-hot vector: a vector of zeros of length 26, with 1 in the index whose style we want\n",
-    "index = np.zeros((26)).astype(np.float32)\n",
-    "index[0] = 1 #Lets say we want to get style 0\n",
-    "\n",
-    "# CoreML Multi array interpreation is (Seq, Batch, C,H,W). Hence the style index input, which is a sequence,\n",
-    "# must be of shape (26,1,1,1,1)\n",
-    "coreml_style_index = index[:,np.newaxis,np.newaxis,np.newaxis,np.newaxis]\n",
-    "\n",
-    "coreml_input = {'input__0': coreml_image_input, 'style_num__0': coreml_style_index}\n",
-    "coreml_out = mlmodel.predict(coreml_input, useCPUOnly = True)['Squeeze__0']\n",
-    "print( coreml_out.shape, coreml_out.flatten()[:5])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#Transpose back for visualization with imshow\n",
-    "coreml_out = np.transpose(np.squeeze(coreml_out), (1,2,0))\n",
-    "imshow(coreml_out/255.0)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "That looks cool! Lets try another style. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "index = np.zeros((26)).astype(np.float32)\n",
-    "index[10] = 1 \n",
-    "coreml_style_index = index[:,np.newaxis,np.newaxis,np.newaxis,np.newaxis]\n",
-    "coreml_input = {'input__0': coreml_image_input, 'style_num__0': coreml_style_index}\n",
-    "coreml_out = mlmodel.predict(coreml_input, useCPUOnly = True)['Squeeze__0']\n",
-    "coreml_out = np.transpose(np.squeeze(coreml_out), (1,2,0))\n",
-    "imshow(coreml_out/255.0)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Lets also try to evaluate the same image and style with the TF model to check that the conversion was correct (we should get similar output)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tf_img = np.expand_dims(img_np,axis=0)\n",
-    "tf_input_name_image = 'input:0'\n",
-    "tf_input_name_style_index = 'style_num:0'\n",
-    "feed_dict = {tf_input_name_image: tf_img, tf_input_name_style_index: index}\n",
-    "tf_output_name = 'Squeeze:0'\n",
-    "with tf.Session(graph = g) as sess:\n",
-    "    tf_out = sess.run(tf_output_name, \n",
-    "                      feed_dict=feed_dict)\n",
-    "imshow(tf_out/255.0)    "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let us look at the input/output description of the CoreML model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(mlmodel)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We see that input \"input_\\_0\" and the output \"Squeeze_\\_0\" are both multiarrays. Since they represent images, it may be more convenient to make them image types. The input can be made of type image by converting again and passing the \"image_input_names\" argument to the convert function call."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mlmodel = tfcoreml.convert(\n",
-    "        tf_model_path = tf_model_path,\n",
-    "        mlmodel_path = './stylize.mlmodel',\n",
-    "        output_feature_names = ['Squeeze:0'],\n",
-    "        input_name_shape_dict = {'input:0':[1,256,256,3], 'style_num:0':[26]},\n",
-    "        image_input_names = ['input:0'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(mlmodel)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We see that the input is of type image now. To convert the output type to image, we realize that the mlmodel is a protobuf format and can be edited directly."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "spec = mlmodel.get_spec()\n",
-    "output = spec.description.output[0]\n",
-    "print(output.name)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from coremltools.proto import FeatureTypes_pb2 as _FeatureTypes_pb2\n",
-    "output.type.imageType.colorSpace = _FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value('RGB')\n",
-    "output.type.imageType.width = 256\n",
-    "output.type.imageType.height = 256\n",
-    "print(spec.description)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now both input and output are image. Lets save the spec and call predict again."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "coremltools.models.utils.save_spec(spec, './stylize.mlmodel')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mlmodel = coremltools.models.utils._get_model(spec)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "coreml_input = {'input__0': img, 'style_num__0': coreml_style_index} #now we can pass in the PIL image\n",
-    "coreml_out = mlmodel.predict(coreml_input, useCPUOnly = True)['Squeeze__0'] #coreml_out is also a PIL image\n",
-    "imshow(np.asarray(coreml_out).astype(np.float32)/255.0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.15"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/neural_network_inference/tensorflow_converter/Tensorflow_2/readme.md b/examples/neural_network_inference/tensorflow_converter/Tensorflow_2/readme.md
deleted file mode 100644
index d286d71a6..000000000
--- a/examples/neural_network_inference/tensorflow_converter/Tensorflow_2/readme.md
+++ /dev/null
@@ -1,12 +0,0 @@
-## Convert TensorFlow 2 models to the Core ML model format
-
-
-- **[tf_keras_fashion_mnist.ipynb](tf_keras_fashion_mnist.ipynb)**
-
-An example to train, evaluate, and test a simple model using [fashion MNIST](https://research.zalando.com/welcome/mission/research-projects/fashion-mnist/) dataset from scratch with `tf.keras` in TensorFlow 2+ and convert it to Core ML format.
-
-- **[tf_low_level_apis.ipynb](tf_low_level_apis.ipynb)**
-
-A few examples to define a simple model using low-level APIs or `@tf.function()` in TensorFlow 2+, save the model, and convert it to Core ML format.
-
-
diff --git a/examples/neural_network_inference/tensorflow_converter/Tensorflow_2/tf_keras_fashion_mnist.ipynb b/examples/neural_network_inference/tensorflow_converter/Tensorflow_2/tf_keras_fashion_mnist.ipynb
deleted file mode 100644
index 1a89143d7..000000000
--- a/examples/neural_network_inference/tensorflow_converter/Tensorflow_2/tf_keras_fashion_mnist.ipynb
+++ /dev/null
@@ -1,253 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Fashion MNIST with `tf.keras` from Scratch\n",
-    "\n",
-    "This example demonstrates the workflow to create, train, and validate a \n",
-    "TensorFlow `tf.keras` model, save it to HDF5 `.h5` model and convert it \n",
-    "to Core ML `.mlmodel` format using the `tfcoreml` converter. For more\n",
-    "examples, refer `test_tf_2x.py` file.\n",
-    " \n",
-    "Note: \n",
-    "\n",
-    "- This notebook was tested with following dependencies:\n",
-    "\n",
-    "```\n",
-    "tensorflow==2.0.0\n",
-    "coremltools==3.1\n",
-    "tfcoreml==1.1\n",
-    "```\n",
-    "\n",
-    "- Models from TensorFlow 2.0+ is supported only for `minimum_ios_deployment_target>='13'`.\n",
-    "You can also use `coremltools.converters.tensorflow.convert()` \n",
-    "instead of `tfcoreml.convert()` to convert your model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING: Logging before flag parsing goes to stderr.\n",
-      "W1101 14:00:52.328081 4735601984 __init__.py:74] TensorFlow version 2.0.0 detected. Last version known to be fully compatible is 1.14.0 .\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2.0.0\n"
-     ]
-    }
-   ],
-   "source": [
-    "import tensorflow as tf\n",
-    "import numpy as np\n",
-    "import tfcoreml\n",
-    "\n",
-    "print(tf.__version__)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# prepare fashion_mnist dataset\n",
-    "fashion_mnist = tf.keras.datasets.fashion_mnist\n",
-    "(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()\n",
-    "\n",
-    "train_images = train_images / 255.0\n",
-    "test_images = test_images / 255.0"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# create a simple model using tf.keras\n",
-    "keras_model = tf.keras.Sequential([\n",
-    "    tf.keras.layers.Flatten(input_shape=(28, 28)),\n",
-    "    tf.keras.layers.Dense(128, activation='relu'),\n",
-    "    tf.keras.layers.Dense(10, activation='softmax')\n",
-    "])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train on 60000 samples\n",
-      "Epoch 1/10\n",
-      "60000/60000 [==============================] - 3s 46us/sample - loss: 0.4976 - accuracy: 0.8258\n",
-      "Epoch 2/10\n",
-      "60000/60000 [==============================] - 2s 39us/sample - loss: 0.3749 - accuracy: 0.8634\n",
-      "Epoch 3/10\n",
-      "60000/60000 [==============================] - 2s 39us/sample - loss: 0.3377 - accuracy: 0.8774\n",
-      "Epoch 4/10\n",
-      "60000/60000 [==============================] - 2s 38us/sample - loss: 0.3111 - accuracy: 0.8853\n",
-      "Epoch 5/10\n",
-      "60000/60000 [==============================] - 2s 38us/sample - loss: 0.2921 - accuracy: 0.8909\n",
-      "Epoch 6/10\n",
-      "60000/60000 [==============================] - 2s 39us/sample - loss: 0.2788 - accuracy: 0.8960\n",
-      "Epoch 7/10\n",
-      "60000/60000 [==============================] - 2s 39us/sample - loss: 0.2669 - accuracy: 0.9008\n",
-      "Epoch 8/10\n",
-      "60000/60000 [==============================] - 2s 40us/sample - loss: 0.2535 - accuracy: 0.9047\n",
-      "Epoch 9/10\n",
-      "60000/60000 [==============================] - 2s 40us/sample - loss: 0.2442 - accuracy: 0.9080\n",
-      "Epoch 10/10\n",
-      "60000/60000 [==============================] - 2s 40us/sample - loss: 0.2348 - accuracy: 0.9120\n",
-      "10000/1 - 0s - loss: 0.2264 - accuracy: 0.8833\n",
-      "\n",
-      "Test accuracy: 0.8833\n"
-     ]
-    }
-   ],
-   "source": [
-    "# training and evaludate keras model\n",
-    "keras_model.compile(optimizer='adam',\n",
-    "                    loss='sparse_categorical_crossentropy',\n",
-    "                    metrics=['accuracy'])\n",
-    "\n",
-    "keras_model.fit(train_images, train_labels, epochs=10)\n",
-    "test_loss, test_acc = keras_model.evaluate(test_images, test_labels, verbose=2)\n",
-    "\n",
-    "print('\\nTest accuracy:', test_acc)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "mnist_fashion_model.h5\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "# save the tf.keras model as .h5 model file\n",
-    "model_file = './mnist_fashion_model.h5'\n",
-    "keras_model.save(model_file)\n",
-    "\n",
-    "!ls mnist_fashion_model.h5"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0 assert nodes deleted\n",
-      "['sequential/dense_1/BiasAdd/ReadVariableOp/resource:0', 'sequential/dense/MatMul/ReadVariableOp:0', 'sequential/dense/BiasAdd/ReadVariableOp:0', 'sequential/flatten/Reshape/shape:0', 'sequential/dense/BiasAdd/ReadVariableOp/resource:0', 'sequential/dense/MatMul/ReadVariableOp/resource:0', 'sequential/dense_1/MatMul/ReadVariableOp/resource:0', 'sequential/dense_1/BiasAdd/ReadVariableOp:0', 'sequential/dense_1/MatMul/ReadVariableOp:0']\n",
-      "4 nodes deleted\n",
-      "0 nodes deleted\n",
-      "0 nodes deleted\n",
-      "[Op Fusion] fuse_bias_add() deleted 4 nodes.\n",
-      "2 identity nodes deleted\n",
-      "2 disconnected nodes deleted\n",
-      "[SSAConverter] Converting function main ...\n",
-      "[SSAConverter] [1/7] Converting op type: 'Placeholder', name: 'flatten_input', output_shape: (1, 28, 28).\n",
-      "[SSAConverter] [2/7] Converting op type: 'Const', name: 'sequential/flatten/Reshape/shape', output_shape: (2,).\n",
-      "[SSAConverter] [3/7] Converting op type: 'Reshape', name: 'sequential/flatten/Reshape', output_shape: (1, 784).\n",
-      "[SSAConverter] [4/7] Converting op type: 'MatMul', name: 'sequential/dense/MatMul', output_shape: (1, 128).\n",
-      "[SSAConverter] [5/7] Converting op type: 'Relu', name: 'sequential/dense/Relu', output_shape: (1, 128).\n",
-      "[SSAConverter] [6/7] Converting op type: 'MatMul', name: 'sequential/dense_1/MatMul', output_shape: (1, 10).\n",
-      "[SSAConverter] [7/7] Converting op type: 'Softmax', name: 'Identity', output_shape: (1, 10).\n",
-      "[Core ML Pass] 1 disconnected constants nodes deleted\n",
-      "mnist_fashion_model.mlmodel\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "# get input, output node names for the TF graph from the Keras model\n",
-    "input_name = keras_model.inputs[0].name.split(':')[0]\n",
-    "keras_output_node_name = keras_model.outputs[0].name.split(':')[0]\n",
-    "graph_output_node_name = keras_output_node_name.split('/')[-1]\n",
-    "\n",
-    "# convert this model to Core ML format\n",
-    "model = tfcoreml.convert(tf_model_path=model_file,\n",
-    "                         input_name_shape_dict={input_name: (1, 28, 28)},\n",
-    "                         output_feature_names=[graph_output_node_name],\n",
-    "                         minimum_ios_deployment_target='13')\n",
-    "model.save('./mnist_fashion_model.mlmodel')\n",
-    "\n",
-    "!ls mnist_fashion_model.mlmodel"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[[1.5719648e-09 1.7905072e-09 5.9817944e-07 8.1820750e-10 9.6943937e-09\n",
-      "  5.0254831e-20 1.5249961e-07 6.2053448e-17 9.9999928e-01 1.0400648e-15]]\n",
-      "[[1.57196778e-09 1.79050730e-09 5.98181146e-07 8.18209001e-10\n",
-      "  9.69441238e-09 5.02548314e-20 1.52499751e-07 6.20534484e-17\n",
-      "  9.99999285e-01 1.04006487e-15]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# run predictions with fake image as an input\n",
-    "fake_image = np.random.rand(1, 28, 28)\n",
-    "\n",
-    "keras_predictions = keras_model.predict(fake_image)\n",
-    "print(keras_predictions[:10])\n",
-    "\n",
-    "coreml_predictions = model.predict({'flatten_input': fake_image})['Identity']\n",
-    "print(coreml_predictions[:10])\n",
-    "\n",
-    "assert(np.allclose(keras_predictions, coreml_predictions))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/neural_network_inference/tensorflow_converter/Tensorflow_2/tf_low_level_apis.ipynb b/examples/neural_network_inference/tensorflow_converter/Tensorflow_2/tf_low_level_apis.ipynb
deleted file mode 100644
index e76edf766..000000000
--- a/examples/neural_network_inference/tensorflow_converter/Tensorflow_2/tf_low_level_apis.ipynb
+++ /dev/null
@@ -1,311 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# TensorFlow 2.0+ Low Level APIs Convert Example\n",
-    "\n",
-    "This example demonstrates the workflow to build a model using\n",
-    "TensorFlow 2.0+ low-level APIs and convert it to Core ML \n",
-    "`.mlmodel` format using the `coremltools.converters.tensorflow` converter.\n",
-    "For more example, refer `test_tf_2x.py` file.\n",
-    "\n",
-    "Note: \n",
-    "\n",
-    "- This notebook was tested with following dependencies:\n",
-    "\n",
-    "```\n",
-    "tensorflow==2.0.0\n",
-    "coremltools==3.1\n",
-    "```\n",
-    "\n",
-    "- Models from TensorFlow 2.0+ is supported only for `minimum_ios_deployment_target>=13`.\n",
-    "You can also use `tfcoreml.convert()` instead of \n",
-    "`coremltools.converters.tensorflow.convert()` to convert your model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING: Logging before flag parsing goes to stderr.\n",
-      "W1101 14:02:33.174557 4762860864 __init__.py:74] TensorFlow version 2.0.0 detected. Last version known to be fully compatible is 1.14.0 .\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2.0.0\n",
-      "3.1\n"
-     ]
-    }
-   ],
-   "source": [
-    "import tensorflow as tf\n",
-    "import numpy as np\n",
-    "import coremltools\n",
-    "\n",
-    "print(tf.__version__)\n",
-    "print(coremltools.__version__)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Using Low-Level APIs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "W1101 14:02:33.537978 4762860864 deprecation.py:506] From /Volumes/data/venv-py36/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1781: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n",
-      "Instructions for updating:\n",
-      "If using Keras pass *_constraint arguments to layers.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# construct a toy model with low level APIs\n",
-    "root = tf.train.Checkpoint()\n",
-    "root.v1 = tf.Variable(3.)\n",
-    "root.v2 = tf.Variable(2.)\n",
-    "root.f = tf.function(lambda x: root.v1 * root.v2 * x)\n",
-    "\n",
-    "# save the model\n",
-    "saved_model_dir = './tf_model'\n",
-    "input_data = tf.constant(1., shape=[1, 1])\n",
-    "to_save = root.f.get_concrete_function(input_data)\n",
-    "tf.saved_model.save(root, saved_model_dir, to_save)\n",
-    "\n",
-    "tf_model = tf.saved_model.load(saved_model_dir)\n",
-    "concrete_func = tf_model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0 assert nodes deleted\n",
-      "['Func/StatefulPartitionedCall/input/_2:0', 'StatefulPartitionedCall/mul/ReadVariableOp:0', 'statefulpartitionedcall_args_1:0', 'Func/StatefulPartitionedCall/input/_3:0', 'StatefulPartitionedCall/mul:0', 'StatefulPartitionedCall/ReadVariableOp:0', 'statefulpartitionedcall_args_2:0']\n",
-      "6 nodes deleted\n",
-      "0 nodes deleted\n",
-      "0 nodes deleted\n",
-      "2 identity nodes deleted\n",
-      "0 disconnected nodes deleted\n",
-      "[SSAConverter] Converting function main ...\n",
-      "[SSAConverter] [1/3] Converting op type: 'Placeholder', name: 'x', output_shape: (1, 1).\n",
-      "[SSAConverter] [2/3] Converting op type: 'Const', name: 'StatefulPartitionedCall/mul'.\n",
-      "[SSAConverter] [3/3] Converting op type: 'Mul', name: 'Identity', output_shape: (1, 1).\n"
-     ]
-    }
-   ],
-   "source": [
-    "# convert model into Core ML format\n",
-    "model = coremltools.converters.tensorflow.convert(\n",
-    "    [concrete_func],\n",
-    "    inputs={'x': (1, 1)},\n",
-    "    outputs=['Identity']\n",
-    ")\n",
-    "\n",
-    "assert isinstance(model, coremltools.models.MLModel)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Using Control Flow"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# construct a TensorFlow 2.0+ model with tf.function()\n",
-    "\n",
-    "@tf.function(input_signature=[tf.TensorSpec([], tf.float32)])\n",
-    "def control_flow(x):\n",
-    "    if x <= 0:\n",
-    "        return 0.\n",
-    "    else:\n",
-    "        return x * 3.\n",
-    "\n",
-    "to_save = tf.Module()\n",
-    "to_save.control_flow = control_flow\n",
-    "\n",
-    "saved_model_dir = './tf_model'\n",
-    "tf.saved_model.save(to_save, saved_model_dir)\n",
-    "tf_model = tf.saved_model.load(saved_model_dir)\n",
-    "concrete_func = tf_model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0 assert nodes deleted\n",
-      "['PartitionedCall/cond/then/_2/Identity_1:0', 'PartitionedCall/LessEqual/y:0', 'PartitionedCall/cond/else/_3/mul/y:0', 'Func/PartitionedCall/cond/then/_2/output/_14:0', 'PartitionedCall/cond/then/_2/Const_1:0']\n",
-      "2 nodes deleted\n",
-      "Fixing cond at merge location: PartitionedCall/cond/output/_9\n",
-      "In an IFF node  fp32  !=  tensor[fp32,1]\n",
-      "0 nodes deleted\n",
-      "0 nodes deleted\n",
-      "2 identity nodes deleted\n",
-      "0 disconnected nodes deleted\n",
-      "[SSAConverter] Converting function main ...\n",
-      "[SSAConverter] [1/7] Converting op type: 'Placeholder', name: 'x', output_shape: (1,).\n",
-      "[SSAConverter] [2/7] Converting op type: 'Const', name: 'PartitionedCall/LessEqual/y'.\n",
-      "[SSAConverter] [3/7] Converting op type: 'Const', name: 'Func/PartitionedCall/cond/then/_2/output/_14'.\n",
-      "[SSAConverter] [4/7] Converting op type: 'Const', name: 'PartitionedCall/cond/else/_3/mul/y'.\n",
-      "[SSAConverter] [5/7] Converting op type: 'LessEqual', name: 'PartitionedCall/LessEqual', output_shape: (1,).\n",
-      "[SSAConverter] [6/7] Converting op type: 'Mul', name: 'PartitionedCall/cond/else/_3/mul', output_shape: (1,).\n",
-      "[SSAConverter] [7/7] Converting op type: 'iff', name: 'Identity'.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# convert model into Core ML format\n",
-    "model = coremltools.converters.tensorflow.convert(\n",
-    "    [concrete_func],\n",
-    "    inputs={'x': (1,)},\n",
-    "    outputs=['Identity']\n",
-    ")\n",
-    "\n",
-    "assert isinstance(model, coremltools.models.MLModel)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# try with some sample inputs\n",
-    "\n",
-    "inputs = [-3.7, 6.17, 0.0, 1984., -5.]\n",
-    "for data in inputs:\n",
-    "    out1 = to_save.control_flow(data).numpy()\n",
-    "    out2 = model.predict({'x': np.array([data])})['Identity']\n",
-    "    np.testing.assert_array_almost_equal(out1, out2)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Using `tf.keras` Subclassing APIs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class MyModel(tf.keras.Model):\n",
-    "    def __init__(self):\n",
-    "        super(MyModel, self).__init__()\n",
-    "        self.dense1 = tf.keras.layers.Dense(4)\n",
-    "        self.dense2 = tf.keras.layers.Dense(5)\n",
-    "\n",
-    "    @tf.function\n",
-    "    def call(self, input_data):\n",
-    "        return self.dense2(self.dense1(input_data))\n",
-    "\n",
-    "keras_model = MyModel()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0 assert nodes deleted\n",
-      "['my_model/StatefulPartitionedCall/args_3:0', 'Func/my_model/StatefulPartitionedCall/input/_2:0', 'Func/my_model/StatefulPartitionedCall/StatefulPartitionedCall/input/_11:0', 'my_model/StatefulPartitionedCall/args_4:0', 'Func/my_model/StatefulPartitionedCall/input/_4:0', 'Func/my_model/StatefulPartitionedCall/StatefulPartitionedCall/input/_12:0', 'my_model/StatefulPartitionedCall/args_2:0', 'my_model/StatefulPartitionedCall/StatefulPartitionedCall/dense_1/StatefulPartitionedCall/MatMul/ReadVariableOp:0', 'Func/my_model/StatefulPartitionedCall/StatefulPartitionedCall/dense_1/StatefulPartitionedCall/input/_25:0', 'Func/my_model/StatefulPartitionedCall/input/_3:0', 'Func/my_model/StatefulPartitionedCall/StatefulPartitionedCall/input/_13:0', 'Func/my_model/StatefulPartitionedCall/input/_5:0', 'Func/my_model/StatefulPartitionedCall/StatefulPartitionedCall/input/_10:0', 'Func/my_model/StatefulPartitionedCall/StatefulPartitionedCall/dense_1/StatefulPartitionedCall/input/_24:0', 'my_model/StatefulPartitionedCall/args_1:0', 'Func/my_model/StatefulPartitionedCall/StatefulPartitionedCall/dense/StatefulPartitionedCall/input/_18:0', 'my_model/StatefulPartitionedCall/StatefulPartitionedCall/dense/StatefulPartitionedCall/BiasAdd/ReadVariableOp:0', 'my_model/StatefulPartitionedCall/StatefulPartitionedCall/dense/StatefulPartitionedCall/MatMul/ReadVariableOp:0', 'my_model/StatefulPartitionedCall/StatefulPartitionedCall/dense_1/StatefulPartitionedCall/BiasAdd/ReadVariableOp:0', 'Func/my_model/StatefulPartitionedCall/StatefulPartitionedCall/dense/StatefulPartitionedCall/input/_19:0']\n",
-      "16 nodes deleted\n",
-      "0 nodes deleted\n",
-      "0 nodes deleted\n",
-      "[Op Fusion] fuse_bias_add() deleted 4 nodes.\n",
-      "2 identity nodes deleted\n",
-      "2 disconnected nodes deleted\n",
-      "[SSAConverter] Converting function main ...\n",
-      "[SSAConverter] [1/3] Converting op type: 'Placeholder', name: 'input_1', output_shape: (4, 4).\n",
-      "[SSAConverter] [2/3] Converting op type: 'MatMul', name: 'my_model/StatefulPartitionedCall/StatefulPartitionedCall/dense/StatefulPartitionedCall/MatMul', output_shape: (4, 4).\n",
-      "[SSAConverter] [3/3] Converting op type: 'MatMul', name: 'Identity', output_shape: (4, 5).\n"
-     ]
-    }
-   ],
-   "source": [
-    "inputs = np.random.rand(4, 4)\n",
-    "\n",
-    "# subclassed model can only be saved as SavedModel format\n",
-    "keras_model._set_inputs(inputs)\n",
-    "saved_model_dir = './tf_model_subclassing'\n",
-    "keras_model.save(saved_model_dir, save_format='tf')\n",
-    "# convert and validate\n",
-    "model = coremltools.converters.tensorflow.convert(\n",
-    "    saved_model_dir,\n",
-    "    inputs={'input_1': (4, 4)},\n",
-    "    outputs=['Identity']\n",
-    ")\n",
-    "assert isinstance(model, coremltools.models.MLModel)\n",
-    "# verify the prediction matches\n",
-    "keras_prediction = keras_model.predict(inputs)\n",
-    "prediction = model.predict({'input_1': inputs})['Identity']\n",
-    "np.testing.assert_array_equal(keras_prediction.shape, prediction.shape)\n",
-    "np.testing.assert_almost_equal(keras_prediction.flatten(), prediction.flatten(), decimal=4)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/updatable_models/OnDeviceTraining_API_Usage.md b/examples/updatable_models/OnDeviceTraining_API_Usage.md
deleted file mode 100644
index 592032670..000000000
--- a/examples/updatable_models/OnDeviceTraining_API_Usage.md
+++ /dev/null
@@ -1,135 +0,0 @@
-# On-Device Model Personalization API Usage
-
-This document explains how one could use CoreML 3's update task APIs to personalize updatable MNIST digit classifier model on-device. A python notebook describing the steps for creating an updatable neural network model can be found [here](https://github.com/apple/coremltools/blob/master/examples/updatable_models/updatable_mnist.ipynb).
-
-Let us start by creating an inference-only instance of the `MLModel` using the auto-generated `UpdatableMNISTDigitClassifier` class. Xcode generates this class as soon as the CoreML model is added to the project.
-
-```swift
-let digitClassifier = UpdatableMNISTDigitClassifier()
-```
-
-Next, we'll define a utility method that reads sample images from the app bundle and creates training data that conforms to `MLBatchProvider`.
-
-```swift
-func createTrainingBatchProvider() throws -> MLBatchProvider {
-
-    let trainingSamples = [MLFeatureProvider]()
-
-    // sample images are assumed to be bundled with the app and named as "<TrueClassLabel>_<ImageIndex>.png"
-    // we use 10 images per class for the purpose of training the MNIST model
-
-    // iterate over all class labels
-    for trueClassLabel in ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] {
-
-        // iterate over 10 images per class
-        for imageIndex in 0 ..< 10 {
-
-            // access image URL from the app bundle
-            let imageURL = Bundle.main.url(forResource: "\(trueClassLabel)_\(imageIndex)",
-			                               withExtension: "png")!
-
-            // create a CVPixelBuffer containing the image used for training
-            let imageBuffer = try MLFeatureValue(imageAt: imageURL,
-			                                     pixelsWide: 28,
-			                                     pixelsHigh: 28,
-			                                     pixelFormatType: kCVPixelFormatType_OneComponent8,
-			                                     options: nil).imageBufferValue!
-
-            // create a training sample as a MLFeatureProvider
-            let trainingSample = UpdatableMNISTDigitClassifierTrainingInput(image: imageBuffer,
-                                                                            digit: trueClassLabel)
-
-            // and, hold on to the training sample
-            trainingSamples.append(trainingSample)
-        }
-    }
-
-    // return training samples as a MLBatchProvider
-    return MLArrayBatchProvider(array: trainingSamples)
-}
-```
-
-We'll define a progress handler block that gets invoked during the training for all specified events. If necessary, we could update the `digitClassifier` model to use the updated model before running predictions to compute accuracy of the updated model so far.
-
-```swift
-func progressHandler(_ context: MLUpdateContext) {
-
-    // replace the underlying MLModel instance with updated model from context
-    digitClassifier.model = context.model
-
-    // updated digitClassifier now can be used to monitor training by computing prediction accuracy on a test set (usually different from the training set)
-}
-```
-
-We'll define a completion handler block which gets invoked when the training is successful or when it fails with an error. Similar to the progress handler, we could compute prediction accuracy with the updated model from completion handler. We could also save this model to disk for later use.
-
-```swift
-func completionHandler(_ context: MLUpdateContext) {
-
-    if (context.task.error != nil) {
-        // handle error and return from completionHandler
-        return
-    }
-
-    // replace the underlying MLModel instance with updated model from context
-    digitClassifier.model = context.model
-
-    // updated digitClassifier now can be used to compute final prediction accuracy on a validation set (usually different from the training set)
-
-    do {
-        // obtain a URL to a writable location on disk to save the updated compiled model (.modelc)
-        let updatedModelURL = URL(fileURLWithPath: "<NEW_PATH_TO_WRITABLE_MODELC_LOCATION>")
-
-        // save the updated model to disk
-        try context.model.write(to: updatedModelURL)
-    }
-    catch {
-        // handle error while trying to save the model to disk
-        return
-    }
-}
-```
-
-Once the handlers have been defined, we'll collect necessary information in order to kick off the training process.
-
-* Get the updatable model URL from the app bundle:
-
-```swift
-let updatableModelURL = Bundle.main.url(forResource: "UpdatableMNISTDigitClassifier",
-                                        withExtension: "mlmodelc")!
-```
-
-* Create the training data from samples images that were bundled with the app:
-
-```swift
-let trainingData = try createTrainingBatchProvider()
-```
-
-* Create an instance of `MLModelConfiguration` and set any parameters (if required):
-
-```swift
-let configuration = MLModelConfiguration()
-
-// requests training loop run for 10 iterations and use 0.02 learning rate
-configuration.parameters = [MLParameterKey.epochs : 10, MLParameterKey.learningRate : 0.02]
-```
-
-* Set up progressHandlers by specifying epoch end as an interested event:
-
-```swift
-let progressHandlers = MLUpdateProgressHandlers(forEvents: .epochEnd,
-                                                progressHandler: progressHandler,
-                                                completionHandler: completionHandler)
-```
-
-* Lastly, setup update task to update the model at location `updatableModelURL` with `trainingData` and model `configuration` that specifies number of epochs as 10 and learning rate as 0.02. `trainingData` contains all samples in the training set. It conforms to `MLBatchProvider` and can be built using an array of `UpdatableMNISTDigitClassifierTrainingInput` instances.
-
-```swift
-let updateTask = try MLUpdateTask(forModelAt: updatableModelURL,
-                                  trainingData: trainingData,
-                                  configuration: configuration,
-                                  progressHandlers: progressHandlers)
-
-// start the update process
-updateTask.resume()
-```
diff --git a/examples/updatable_models/README.md b/examples/updatable_models/README.md
deleted file mode 100644
index 97b9ecbc5..000000000
--- a/examples/updatable_models/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-Updatable Models Examples
-=========================
-
-In this set of notebooks examples, we show how to create different types of updatable models using coremltools.
-
-- [Updatable Neural Network Classifier on MNIST Dataset](https://github.com/apple/coremltools/tree/master/examples/updatable_models/updatable_mnist.ipynb)  
-This notebook demonstrates the process of creating a simple convolutional model on the MNIST dataset with Keras, converting it to a Core ML model, and making it updatable.
-The updatable model has 2 updatable layers and uses Categorical Cross Entropy Loss and SGD Optimizer.
-
-- [Updatable Tiny Drawing Classifier - Pipeline Model](https://github.com/apple/coremltools/tree/master/examples/updatable_models/updatable_tiny_drawing_classifier.ipynb)  
-This notebook creates a model which can be used to train a simple drawing / sketch classifier based on user examples.  
-The model is a pipeline composed of a drawing embedding model and an updatable nearest neighbor classifier. 
-
-- [Updatable Tiny Drawing Classifier - Linked Pipeline Model](https://github.com/apple/coremltools/tree/master/examples/updatable_models/updatable_linked_model.ipynb)  
-This notebook creates a model which can be used to train a simple drawing / sketch classifier based on user examples. 
-The model is a 'linked' pipeline composed of a 'linked' drawing embedding model and an updatable nearest neighbor classifier.
-
-- [Updatable Nearest Neighbor Classifier](https://github.com/apple/coremltools/tree/master/examples/updatable_models/updatable_nearest_neighbor_classifier.ipynb)  
-This notebook makes an empty updatable nearest neighbor classifier. Before updating with training examples it predicts 'defaultLabel' for all input. 
-
-In addition of the above examples, a short document on CoreML 3.0 Update Task API usage is provided [here](https://github.com/apple/coremltools/tree/master/examples/updatable_models/OnDeviceTraining_API_Usage.md).
diff --git a/examples/updatable_models/images/five28x28.png b/examples/updatable_models/images/five28x28.png
deleted file mode 100644
index b03667dce..000000000
Binary files a/examples/updatable_models/images/five28x28.png and /dev/null differ
diff --git a/examples/updatable_models/images/heart28x28.png b/examples/updatable_models/images/heart28x28.png
deleted file mode 100644
index 6255bad63..000000000
Binary files a/examples/updatable_models/images/heart28x28.png and /dev/null differ
diff --git a/examples/updatable_models/images/star28x28.png b/examples/updatable_models/images/star28x28.png
deleted file mode 100644
index 62a2242cb..000000000
Binary files a/examples/updatable_models/images/star28x28.png and /dev/null differ
diff --git a/examples/updatable_models/models/TinyDrawingEmbedding.mlmodel b/examples/updatable_models/models/TinyDrawingEmbedding.mlmodel
deleted file mode 100644
index b9214af7c..000000000
Binary files a/examples/updatable_models/models/TinyDrawingEmbedding.mlmodel and /dev/null differ
diff --git a/examples/updatable_models/updatable_linked_model.ipynb b/examples/updatable_models/updatable_linked_model.ipynb
deleted file mode 100644
index b8e9f2c5d..000000000
--- a/examples/updatable_models/updatable_linked_model.ipynb
+++ /dev/null
@@ -1,241 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Updatable Linked Model - Tiny Drawing Classifier\n",
-    "\n",
-    "This notebook creates a model which can be used to train a simple drawing / sketch classifier based on user examples. The model is a 'linked' pipeline composed of a 'linked' drawing embedding model and a nearest neighbor classifier. \n",
-    "\n",
-    "The model is updatable and starts off 'empty' in that the nearest neighbor classifier has no examples or labels. Before updating with training examples it predicts 'unknown' for all input.\n",
-    "\n",
-    "The input to the model is a 28 x 28 grayscale drawing. The background is expected to be black (0) while the strokes of the drawing should be rendered as white (255). For example:\n",
-    "\n",
-    "| Drawing of a Star | Drawing of a Heart | Drawing of 5 |\n",
-    "| ----------- | ----------- | ----------- |\n",
-    "| ![Star Example](images/star28x28.png) | ![Heart Example](images/heart28x28.png) | ![Five Example](images/five28x28.png) |\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Embedding\n",
-    "\n",
-    "Let's start by getting the first part of the model.  It is the drawing embedding model which will be used as a feature extractor"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "input {\n",
-      "  name: \"drawing\"\n",
-      "  shortDescription: \"Input sketch image with black background and white strokes\"\n",
-      "  type {\n",
-      "    imageType {\n",
-      "      width: 28\n",
-      "      height: 28\n",
-      "      colorSpace: GRAYSCALE\n",
-      "    }\n",
-      "  }\n",
-      "}\n",
-      "output {\n",
-      "  name: \"embedding\"\n",
-      "  shortDescription: \"Vector embedding of sketch in 128 dimensional space\"\n",
-      "  type {\n",
-      "    multiArrayType {\n",
-      "      shape: 128\n",
-      "      dataType: FLOAT32\n",
-      "    }\n",
-      "  }\n",
-      "}\n",
-      "metadata {\n",
-      "  shortDescription: \"Embeds a 28 x 28 grayscale image of a sketch into 128 dimensional space. The model was created by removing the last layer of a simple convolution based neural network classifier trained on the Quick, Draw! dataset (https://github.com/googlecreativelab/quickdraw-dataset).\"\n",
-      "  author: \"Core ML Tools Example\"\n",
-      "  license: \"MIT\"\n",
-      "}\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "import coremltools\n",
-    "from coremltools.models import MLModel\n",
-    "\n",
-    "embedding_path = './models/TinyDrawingEmbedding.mlmodel'\n",
-    "embedding_model = MLModel(embedding_path)\n",
-    "\n",
-    "embedding_spec = embedding_model.get_spec()\n",
-    "print embedding_spec.description"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We can see from the description above that the embedding model takes in a 28x28 grayscale image about outputs a 128 dimensional float vector."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Linked Model\n",
-    "Let's create a linked model that points to the neural network feature extractor we just created. It requires the model file name and search path."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import coremltools\n",
-    "linked_model_spec = coremltools.proto.Model_pb2.Model()\n",
-    "linked_model_spec.specificationVersion = coremltools._MINIMUM_LINKED_MODELS_SPEC_VERSION\n",
-    "linked_model_spec.description.metadata.shortDescription = 'Linked model which points to the TinyDrawingEmbedding model.'\n",
-    "\n",
-    "# Input and output are the same as the model it is pointing to.\n",
-    "linked_model_spec.description.input.extend(embedding_spec.description.input[:])\n",
-    "linked_model_spec.description.output.extend(embedding_spec.description.output[:])\n",
-    "\n",
-    "fileName = coremltools.proto.Parameters_pb2.StringParameter()\n",
-    "fileName.defaultValue = 'TinyDrawingEmbedding.mlmodelc'\n",
-    "linked_model_spec.linkedModel.linkedModelFile.linkedModelFileName.CopyFrom(fileName)\n",
-    "\n",
-    "# Search path to find the linked model file\n",
-    "# Multiple paths can be searched using the unix-style path separator \":\"\n",
-    "# Each path can be relative (to this model) or absolute\n",
-    "#\n",
-    "# An empty string is the same as teh relative search path \".\"\n",
-    "# which searches in the same location as this model file\n",
-    "#\n",
-    "# There are some special paths which start with $\n",
-    "# $BUNDLE_MAIN - Indicates to look in the main bundle\n",
-    "# $BUNDLE_IDENTIFIER(identifier) - Looks in Bunde with given identifer\n",
-    "searchPath = coremltools.proto.Parameters_pb2.StringParameter()\n",
-    "searchPath.defaultValue = '.:$BUNDLE_MAIN'\n",
-    "linked_model_spec.linkedModel.linkedModelFile.linkedModelSearchPath.CopyFrom(searchPath)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Nearest Neighbor Classifier\n",
-    "\n",
-    "Now that the feature extractor is in place, let's create the second model of our pipeline model.\n",
-    "It is a nearest neighbor classifier operating on the embedding."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from coremltools.models.nearest_neighbors import KNearestNeighborsClassifierBuilder\n",
-    "import coremltools.models.datatypes as datatypes\n",
-    "\n",
-    "knn_builder = KNearestNeighborsClassifierBuilder(input_name='embedding',\n",
-    "                                                 output_name='label',\n",
-    "                                                 number_of_dimensions=128,\n",
-    "                                                 default_class_label='unknown',\n",
-    "                                                 number_of_neighbors=3,\n",
-    "                                                 weighting_scheme='inverse_distance',\n",
-    "                                                 index_type='linear')\n",
-    "\n",
-    "knn_builder.author = 'Core ML Tools Example'\n",
-    "knn_builder.license = 'MIT'\n",
-    "knn_builder.description = 'Classifies 128 dimension vector based on 3 nearest neighbors'\n",
-    "\n",
-    "knn_spec = knn_builder.spec\n",
-    "knn_spec.specificationVersion = coremltools._MINIMUM_NEAREST_NEIGHBORS_SPEC_VERSION\n",
-    "knn_spec.description.input[0].shortDescription = 'Input vector to classify'\n",
-    "knn_spec.description.output[0].shortDescription = 'Predicted label. Defaults to \\'unknown\\''\n",
-    "knn_spec.description.output[1].shortDescription = 'Probabilities / score for each possible label.'"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Updatable Pipeline\n",
-    "\n",
-    "Last step is to create the pipeline model and insert the linked model and the nearest neighbor classifier. The model will be set to be updatable."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pipeline_spec = coremltools.proto.Model_pb2.Model()\n",
-    "pipeline_spec.specificationVersion = coremltools._MINIMUM_UPDATABLE_SPEC_VERSION\n",
-    "pipeline_spec.isUpdatable = True\n",
-    "\n",
-    "# Inputs are the inputs from the linked model\n",
-    "pipeline_spec.description.input.extend(linked_model_spec.description.input[:])\n",
-    "\n",
-    "# Outputs are the outputs from the classification model\n",
-    "pipeline_spec.description.output.extend(knn_spec.description.output[:])\n",
-    "pipeline_spec.description.predictedFeatureName = knn_spec.description.predictedFeatureName\n",
-    "pipeline_spec.description.predictedProbabilitiesName = knn_spec.description.predictedProbabilitiesName\n",
-    "\n",
-    "# Training inputs\n",
-    "pipeline_spec.description.trainingInput.extend([linked_model_spec.description.input[0]])\n",
-    "pipeline_spec.description.trainingInput[0].shortDescription = 'Example sketch'\n",
-    "pipeline_spec.description.trainingInput.extend([knn_spec.description.output[0]])\n",
-    "pipeline_spec.description.trainingInput[1].shortDescription = 'Associated true label of example sketch'\n",
-    "\n",
-    "# Provide metadata\n",
-    "pipeline_spec.description.metadata.author = 'Core ML Tools'\n",
-    "pipeline_spec.description.metadata.license = 'MIT'\n",
-    "pipeline_spec.description.metadata.shortDescription = ('An updatable model which can be used to train a tiny 28 x 28 drawing classifier based on user examples.'\n",
-    "                                                       ' It uses a drawing embedding trained on the Quick, Draw! dataset (https://github.com/googlecreativelab/quickdraw-dataset)')\n",
-    "\n",
-    "# Construct pipeline by adding the embedding and then the nearest neighbor classifier\n",
-    "pipeline_spec.pipelineClassifier.pipeline.models.add().CopyFrom(linked_model_spec)\n",
-    "pipeline_spec.pipelineClassifier.pipeline.models.add().CopyFrom(knn_spec)\n",
-    "\n",
-    "# Save the updated spec.\n",
-    "# Note that to use this \"linked\" pipeline, both LinkedUpdatableTinyDrawingClassifier.mlmodel and TinyDrawingEmbedding.mlmodel must be imported into the project.\n",
-    "from coremltools.models import MLModel\n",
-    "mlmodel = MLModel(pipeline_spec)\n",
-    "\n",
-    "output_path = './LinkedUpdatableTinyDrawingClassifier.mlmodel'\n",
-    "from coremltools.models.utils import save_spec\n",
-    "mlmodel.save(output_path)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/updatable_models/updatable_mnist.ipynb b/examples/updatable_models/updatable_mnist.ipynb
deleted file mode 100644
index d6c638cfb..000000000
--- a/examples/updatable_models/updatable_mnist.ipynb
+++ /dev/null
@@ -1,308 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Updatable Neural Network Classifier\n",
-    "This notebook demonstrates the process of creating a simple convolutional neural network classifier model that can be used for training on the MNIST-like dataset. The model is created using Keras, and then converted to a Core ML format using keras_converter. Once in Core ML format, we mark last two fully connected layers as updatable. Lastly, we attach a categorical cross entropy loss layer to the last layer and use SGD as the optimizer. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def create_keras_base_model(url):\n",
-    "    \"\"\"This method creates a convolutional neural network model using Keras.\n",
-    "    url - The URL that the keras model will be saved as h5 file.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    import keras\n",
-    "    from keras.models import Sequential\n",
-    "    from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D\n",
-    "    \n",
-    "    keras.backend.clear_session()\n",
-    "    model = Sequential()\n",
-    "    model.add(Conv2D(32, kernel_size=(3, 3),\n",
-    "                     activation='relu',\n",
-    "                     input_shape=(28, 28, 1)))\n",
-    "    model.add(Conv2D(64, (3, 3), activation='relu'))\n",
-    "    model.add(MaxPooling2D(pool_size=(2, 2)))\n",
-    "    model.add(Dropout(0.25))\n",
-    "    model.add(Flatten())\n",
-    "    model.add(Dense(128, activation='relu'))\n",
-    "    model.add(Dropout(0.5))\n",
-    "    model.add(Dense(10, activation='softmax'))\n",
-    "\n",
-    "    model.compile(loss=keras.losses.categorical_crossentropy,\n",
-    "                  optimizer=keras.optimizers.SGD(lr=0.01),\n",
-    "                  metrics=['accuracy'])\n",
-    "\n",
-    "    model.save(url)\n",
-    "\n",
-    "keras_model_path = './KerasMnist.h5'\n",
-    "create_keras_base_model(keras_model_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def convert_keras_to_mlmodel(keras_url, mlmodel_url):\n",
-    "    \"\"\"This method simply converts the keras model to a mlmodel using coremltools.\n",
-    "    keras_url - The URL the keras model will be loaded.\n",
-    "    mlmodel_url - the URL the Core ML model will be saved.\n",
-    "    \"\"\"\n",
-    "    from keras.models import load_model\n",
-    "    keras_model = load_model(keras_url)\n",
-    "    \n",
-    "    from coremltools.converters import keras as keras_converter\n",
-    "    class_labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']\n",
-    "    mlmodel = keras_converter.convert(keras_model, input_names=['image'],\n",
-    "                                output_names=['digitProbabilities'],\n",
-    "                                class_labels=class_labels,\n",
-    "                                predicted_feature_name='digit')\n",
-    "    \n",
-    "    mlmodel.save(mlmodel_url)\n",
-    "     \n",
-    "coreml_model_path = './MNISTDigitClassifier.mlmodel'\n",
-    "convert_keras_to_mlmodel(keras_model_path , coreml_model_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[Id: 9], Name: dense_2__activation__ (Type: softmax)\n",
-      "          Updatable: False\n",
-      "          Input blobs: [u'dense_2_output']\n",
-      "          Output blobs: [u'digitProbabilities']\n",
-      "[Id: 8], Name: dense_2 (Type: innerProduct)\n",
-      "          Updatable: False\n",
-      "          Input blobs: [u'dense_1__activation___output']\n",
-      "          Output blobs: [u'dense_2_output']\n",
-      "[Id: 7], Name: dense_1__activation__ (Type: activation)\n",
-      "          Updatable: False\n",
-      "          Input blobs: [u'dense_1_output']\n",
-      "          Output blobs: [u'dense_1__activation___output']\n"
-     ]
-    }
-   ],
-   "source": [
-    "# let's inspect the last few layers of this model\n",
-    "import coremltools\n",
-    "spec = coremltools.utils.load_spec(coreml_model_path)\n",
-    "builder = coremltools.models.neural_network.NeuralNetworkBuilder(spec=spec)\n",
-    "builder.inspect_layers(last=3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[Id: 0] Name: image\n",
-      "          Type: multiArrayType {\n",
-      "  shape: 1\n",
-      "  shape: 28\n",
-      "  shape: 28\n",
-      "  dataType: DOUBLE\n",
-      "}\n",
-      "\n",
-      "[Id: 0] Name: image\n",
-      "          Type: imageType {\n",
-      "  width: 28\n",
-      "  height: 28\n",
-      "  colorSpace: GRAYSCALE\n",
-      "}\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "# let's inspect the input of the model as we need this information later on the make_updatable method\n",
-    "builder.inspect_input_features()\n",
-    "\n",
-    "neuralnetwork_spec = builder.spec\n",
-    "\n",
-    "# change the input so the model can accept 28x28 grayscale images\n",
-    "neuralnetwork_spec.description.input[0].type.imageType.width = 28\n",
-    "neuralnetwork_spec.description.input[0].type.imageType.height = 28\n",
-    "\n",
-    "from coremltools.proto import FeatureTypes_pb2 as _FeatureTypes_pb2\n",
-    "grayscale = _FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value('GRAYSCALE')\n",
-    "neuralnetwork_spec.description.input[0].type.imageType.colorSpace = grayscale\n",
-    "\n",
-    "# let's inspect the input again to confirm the change in input type\n",
-    "builder.inspect_input_features()\n",
-    "\n",
-    "# Set input and output description\n",
-    "neuralnetwork_spec.description.input[0].shortDescription = 'Input image of the handwriten digit to classify'\n",
-    "neuralnetwork_spec.description.output[0].shortDescription = 'Probabilities / score for each possible digit'\n",
-    "neuralnetwork_spec.description.output[1].shortDescription = 'Predicted digit'\n",
-    "\n",
-    "# Provide metadata\n",
-    "neuralnetwork_spec.description.metadata.author = 'Core ML Tools'\n",
-    "neuralnetwork_spec.description.metadata.license = 'MIT'\n",
-    "neuralnetwork_spec.description.metadata.shortDescription = (\n",
-    "        'An updatable hand-written digit classifier setup to train or be fine-tuned on MNIST like data.')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Now adding input digitProbabilities_true as target for categorical cross-entropy loss layer.\n"
-     ]
-    }
-   ],
-   "source": [
-    "def make_updatable(builder, mlmodel_url, mlmodel_updatable_path):\n",
-    "    \"\"\"This method makes an existing non-updatable mlmodel updatable.\n",
-    "    mlmodel_url - the path the Core ML model is stored.\n",
-    "    mlmodel_updatable_path - the path the updatable Core ML model will be saved.\n",
-    "    \"\"\"\n",
-    "    import coremltools\n",
-    "    model_spec = builder.spec\n",
-    "\n",
-    "    # make_updatable method is used to make a layer updatable. It requires a list of layer names.\n",
-    "    # dense_1 and dense_2 are two innerProduct layer in this example and we make them updatable.\n",
-    "    builder.make_updatable(['dense_1', 'dense_2'])\n",
-    "\n",
-    "    # Categorical Cross Entropy or Mean Squared Error can be chosen for the loss layer.\n",
-    "    # Categorical Cross Entropy is used on this example. CCE requires two inputs: 'name' and 'input'.\n",
-    "    # name must be a string and will be the name associated with the loss layer\n",
-    "    # input must be the output of a softmax layer in the case of CCE. \n",
-    "    # The loss's target will be provided automatically as a part of the model's training inputs.\n",
-    "    builder.set_categorical_cross_entropy_loss(name='lossLayer', input='digitProbabilities')\n",
-    "\n",
-    "    # in addition of the loss layer, an optimizer must also be defined. SGD and Adam optimizers are supported.\n",
-    "    # SGD has been used for this example. To use SGD, one must set lr(learningRate) and batch(miniBatchSize) (momentum is an optional parameter).\n",
-    "    from coremltools.models.neural_network import SgdParams\n",
-    "    builder.set_sgd_optimizer(SgdParams(lr=0.01, batch=32))\n",
-    "\n",
-    "    # Finally, the number of epochs must be set as follows.\n",
-    "    builder.set_epochs(10)\n",
-    "        \n",
-    "    # Set training inputs descriptions\n",
-    "    model_spec.description.trainingInput[0].shortDescription = 'Example image of handwritten digit'\n",
-    "    model_spec.description.trainingInput[1].shortDescription = 'Associated true label (digit) of example image'\n",
-    "\n",
-    "    # save the updated spec\n",
-    "    from coremltools.models import MLModel\n",
-    "    mlmodel_updatable = MLModel(model_spec)\n",
-    "    mlmodel_updatable.save(mlmodel_updatable_path)\n",
-    "\n",
-    "coreml_updatable_model_path = './UpdatableMNISTDigitClassifier.mlmodel'\n",
-    "make_updatable(builder, coreml_model_path, coreml_updatable_model_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[Id: 0], Name: lossLayer (Type: categoricalCrossEntropyLossLayer)\n",
-      "          Loss Input: digitProbabilities\n",
-      "          Loss Target: digitProbabilities_true\n"
-     ]
-    }
-   ],
-   "source": [
-    "# let's inspect the loss layer of the Core ML model\n",
-    "import coremltools\n",
-    "spec = coremltools.utils.load_spec(coreml_updatable_model_path)\n",
-    "builder = coremltools.models.neural_network.NeuralNetworkBuilder(spec=spec)\n",
-    "\n",
-    "builder.inspect_loss_layers()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Optimizer Type: sgdOptimizer\n",
-      "lr: 0.01, min: 0.0, max: 1.0\n",
-      "batch: 32, allowed_set: [32L]\n",
-      "momentum: 0.0, min: 0.0, max: 1.0\n"
-     ]
-    }
-   ],
-   "source": [
-    "# let's inspect the optimizer of the Core ML model\n",
-    "builder.inspect_optimizer()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Name: dense_2 (Type: innerProduct)\n",
-      "          Input blobs: [u'dense_1__activation___output']\n",
-      "          Output blobs: [u'dense_2_output']\n",
-      "Name: dense_1 (Type: innerProduct)\n",
-      "          Input blobs: [u'flatten_1_output']\n",
-      "          Output blobs: [u'dense_1_output']\n"
-     ]
-    }
-   ],
-   "source": [
-    "# let's see which layes are updatable\n",
-    "builder.inspect_updatable_layers()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.16"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/updatable_models/updatable_nearest_neighbor_classifier.ipynb b/examples/updatable_models/updatable_nearest_neighbor_classifier.ipynb
deleted file mode 100644
index 6f525143d..000000000
--- a/examples/updatable_models/updatable_nearest_neighbor_classifier.ipynb
+++ /dev/null
@@ -1,303 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Updatable Nearest Neighbor Classifier\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This notebook demonstrates the process of creating an updatable empty k-nearest neighbor model using coremltools."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "number_of_dimensions = 128\n",
-    "\n",
-    "from coremltools.models.nearest_neighbors import KNearestNeighborsClassifierBuilder\n",
-    "builder = KNearestNeighborsClassifierBuilder(input_name='input',\n",
-    "                                             output_name='output',\n",
-    "                                             number_of_dimensions=number_of_dimensions,\n",
-    "                                             default_class_label='defaultLabel',\n",
-    "                                             number_of_neighbors=3,\n",
-    "                                             weighting_scheme='inverse_distance',\n",
-    "                                             index_type='linear')\n",
-    "\n",
-    "builder.author = 'Core ML Tools Example'\n",
-    "builder.license = 'MIT'\n",
-    "builder.description = 'Classifies {} dimension vector based on 3 nearest neighbors'.format(number_of_dimensions)\n",
-    "\n",
-    "builder.spec.description.input[0].shortDescription = 'Input vector to classify'\n",
-    "builder.spec.description.output[0].shortDescription = 'Predicted label. Defaults to \\'defaultLabel\\''\n",
-    "builder.spec.description.output[1].shortDescription = 'Probabilities / score for each possible label.'\n",
-    "\n",
-    "builder.spec.description.trainingInput[0].shortDescription = 'Example input vector'\n",
-    "builder.spec.description.trainingInput[1].shortDescription = 'Associated true label of each example vector'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# By default an empty knn model is updatable\n",
-    "builder.is_updatable"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "128"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Let's confirm the number of dimension is set correctly\n",
-    "builder.number_of_dimensions"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "3"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Let's check what the value of 'numberOfNeighbors' is\n",
-    "builder.number_of_neighbors"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(1, 1000)"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# The number of neighbors is bounded by the default range...\n",
-    "builder.number_of_neighbors_allowed_range()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "ValueError",
-     "evalue": "number_of_neighbors is not within range bounds",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-17-e8bea591e72c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# If we try to set the number of neighbors to a value outside of this range\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mbuilder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumber_of_neighbors\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1001\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m~/eng/sources/coreml/coremltools/coremltools/models/nearest_neighbors/builder.py\u001b[0m in \u001b[0;36mnumber_of_neighbors\u001b[0;34m(self, number_of_neighbors)\u001b[0m\n\u001b[1;32m    312\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkNearestNeighborsClassifier\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumberOfNeighbors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefaultValue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnumber_of_neighbors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    313\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 314\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'number_of_neighbors is not within range bounds'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    315\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    316\u001b[0m             \u001b[0mspec_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkNearestNeighborsClassifier\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumberOfNeighbors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mValueError\u001b[0m: number_of_neighbors is not within range bounds"
-     ]
-    }
-   ],
-   "source": [
-    "# If we try to set the number of neighbors to a value outside of this range\n",
-    "builder.number_of_neighbors = 1001"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Instead of a range, you can a set individual values that are valid for the numberOfNeighbors parameter.\n",
-    "builder.set_number_of_neighbors_with_bounds(3, allowed_set={ 1, 3, 5 })"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{1, 3, 5}"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Check out the results of the previous operation\n",
-    "builder.number_of_neighbors_allowed_set()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "ValueError",
-     "evalue": "number_of_neighbors is not valid",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-20-98c77c72c722>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# And now if you attempt to set it to an invalid value...\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mbuilder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumber_of_neighbors\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m~/eng/sources/coreml/coremltools/coremltools/models/nearest_neighbors/builder.py\u001b[0m in \u001b[0;36mnumber_of_neighbors\u001b[0;34m(self, number_of_neighbors)\u001b[0m\n\u001b[1;32m    320\u001b[0m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkNearestNeighborsClassifier\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumberOfNeighbors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefaultValue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnumber_of_neighbors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    321\u001b[0m                     \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 322\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'number_of_neighbors is not an allowed value'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    323\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    324\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mset_number_of_neighbors_with_bounds\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnumber_of_neighbors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallowed_range\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallowed_set\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mValueError\u001b[0m: number_of_neighbors is not valid"
-     ]
-    }
-   ],
-   "source": [
-    "# And now if you attempt to set it to an invalid value...\n",
-    "builder.number_of_neighbors = 4"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# And of course you can go back to a valid range\n",
-    "builder.set_number_of_neighbors_with_bounds(3, allowed_range=(1, 30))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'linear'"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Let's see what the index type is\n",
-    "builder.index_type"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'kd_tree'"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Let's set the index to kd_tree with leaf size of 30\n",
-    "builder.set_index_type('kd_tree', 30)\n",
-    "builder.index_type"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mlmodel_updatable_path = './UpdatableKNN.mlmodel'\n",
-    "\n",
-    "# Save the updated spec\n",
-    "from coremltools.models import MLModel\n",
-    "mlmodel_updatable = MLModel(builder.spec)\n",
-    "mlmodel_updatable.save(mlmodel_updatable_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/updatable_models/updatable_tiny_drawing_classifier.ipynb b/examples/updatable_models/updatable_tiny_drawing_classifier.ipynb
deleted file mode 100644
index b70b3e214..000000000
--- a/examples/updatable_models/updatable_tiny_drawing_classifier.ipynb
+++ /dev/null
@@ -1,201 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Updatable Tiny Drawing Classifier\n",
-    "\n",
-    "This notebook creates a model which can be used to train a simple drawing / sketch classifier based on user examples. The model is a pipeline composed of a drawing embedding model and a nearest neighbor classifier. \n",
-    "\n",
-    "The model is updatable and starts off 'empty' in that the nearest neighbor classifier has no examples or labels. Before updating with training examples it predicts 'unknown' for all input.\n",
-    "\n",
-    "The input to the model is a 28 x 28 grayscale drawing. The background is expected to be black (0) while the strokes of the drawing should be rendered as white (255). For example:\n",
-    "\n",
-    "| Drawing of a Star | Drawing of a Heart | Drawing of 5 |\n",
-    "| ----------- | ----------- | ----------- |\n",
-    "| ![Star Example](images/star28x28.png) | ![Heart Example](images/heart28x28.png) | ![Five Example](images/five28x28.png) |\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Embedding\n",
-    "\n",
-    "Let's start by getting the first part of the model.  It is the drawing embedding model which will be used as a feature extractor"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tf.estimator package not installed.\n",
-      "tf.estimator package not installed.\n",
-      "input {\n",
-      "  name: \"drawing\"\n",
-      "  shortDescription: \"Input sketch image with black background and white strokes\"\n",
-      "  type {\n",
-      "    imageType {\n",
-      "      width: 28\n",
-      "      height: 28\n",
-      "      colorSpace: GRAYSCALE\n",
-      "    }\n",
-      "  }\n",
-      "}\n",
-      "output {\n",
-      "  name: \"embedding\"\n",
-      "  shortDescription: \"Vector embedding of sketch in 128 dimensional space\"\n",
-      "  type {\n",
-      "    multiArrayType {\n",
-      "      shape: 128\n",
-      "      dataType: FLOAT32\n",
-      "    }\n",
-      "  }\n",
-      "}\n",
-      "metadata {\n",
-      "  shortDescription: \"Embeds a 28 x 28 grayscale image of a sketch into 128 dimensional space. The model was created by removing the last layer of a simple convolution based neural network classifier trained on the Quick, Draw! dataset (https://github.com/googlecreativelab/quickdraw-dataset).\"\n",
-      "  author: \"Core ML Tools Example\"\n",
-      "  license: \"MIT\"\n",
-      "}\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "import coremltools\n",
-    "from coremltools.models import MLModel\n",
-    "\n",
-    "embedding_path = './models/TinyDrawingEmbedding.mlmodel'\n",
-    "embedding_model = MLModel(embedding_path)\n",
-    "\n",
-    "embedding_spec = embedding_model.get_spec()\n",
-    "print embedding_spec.description"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We can see from the description above that the embedding model takes in a 28x28 grayscale image about outputs a 128 dimensional float vector."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Nearest Neighbor Classifier\n",
-    "\n",
-    "Now that the feature extractor is in place, let's create the second model of our pipeline model.\n",
-    "It is a nearest neighbor classifier operating on the embedding."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from coremltools.models.nearest_neighbors import KNearestNeighborsClassifierBuilder\n",
-    "import coremltools.models.datatypes as datatypes\n",
-    "\n",
-    "knn_builder = KNearestNeighborsClassifierBuilder(input_name='embedding',\n",
-    "                                                 output_name='label',\n",
-    "                                                 number_of_dimensions=128,\n",
-    "                                                 default_class_label='unknown',\n",
-    "                                                 k=3,\n",
-    "                                                 weighting_scheme='inverse_distance',\n",
-    "                                                 index_type='linear')\n",
-    "\n",
-    "knn_builder.author = 'Core ML Tools Example'\n",
-    "knn_builder.license = 'MIT'\n",
-    "knn_builder.description = 'Classifies 128 dimension vector based on 3 nearest neighbors'\n",
-    "\n",
-    "knn_spec = knn_builder.spec\n",
-    "knn_spec.description.input[0].shortDescription = 'Input vector to classify'\n",
-    "knn_spec.description.output[0].shortDescription = 'Predicted label. Defaults to \\'unknown\\''\n",
-    "knn_spec.description.output[1].shortDescription = 'Probabilities / score for each possible label.'\n",
-    "\n",
-    "# print knn_spec.description\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Updatable Pipeline\n",
-    "\n",
-    "Last step is to create the pipeline model and insert the feature extractor and the nearest neighbor classifier. The model will be set to be updatable."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pipeline_spec = coremltools.proto.Model_pb2.Model()\n",
-    "pipeline_spec.specificationVersion = coremltools._MINIMUM_UPDATABLE_SPEC_VERSION\n",
-    "pipeline_spec.isUpdatable = True\n",
-    "\n",
-    "# Inputs are the inputs from the embedding model\n",
-    "pipeline_spec.description.input.extend(embedding_spec.description.input[:])\n",
-    "\n",
-    "# Outputs are the outputs from the classification model\n",
-    "pipeline_spec.description.output.extend(knn_spec.description.output[:])\n",
-    "pipeline_spec.description.predictedFeatureName = knn_spec.description.predictedFeatureName\n",
-    "pipeline_spec.description.predictedProbabilitiesName = knn_spec.description.predictedProbabilitiesName\n",
-    "\n",
-    "# Training inputs\n",
-    "pipeline_spec.description.trainingInput.extend([embedding_spec.description.input[0]])\n",
-    "pipeline_spec.description.trainingInput[0].shortDescription = 'Example sketch'\n",
-    "pipeline_spec.description.trainingInput.extend([knn_spec.description.output[0]])\n",
-    "pipeline_spec.description.trainingInput[1].shortDescription = 'Associated true label of example sketch'\n",
-    "\n",
-    "# Provide metadata\n",
-    "pipeline_spec.description.metadata.author = 'Core ML Tools'\n",
-    "pipeline_spec.description.metadata.license = 'MIT'\n",
-    "pipeline_spec.description.metadata.shortDescription = ('An updatable model which can be used to train a tiny 28 x 28 drawing classifier based on user examples.'\n",
-    "                                                       ' It uses a drawing embedding trained on the Quick, Draw! dataset (https://github.com/googlecreativelab/quickdraw-dataset)')\n",
-    "\n",
-    "# Construct pipeline by adding the embedding and then the nearest neighbor classifier\n",
-    "pipeline_spec.pipelineClassifier.pipeline.models.add().CopyFrom(embedding_spec)\n",
-    "pipeline_spec.pipelineClassifier.pipeline.models.add().CopyFrom(knn_spec)\n",
-    "\n",
-    "# Save the updated spec.\n",
-    "from coremltools.models import MLModel\n",
-    "mlmodel = MLModel(pipeline_spec)\n",
-    "\n",
-    "output_path = './TinyDrawingClassifier.mlmodel'\n",
-    "from coremltools.models.utils import save_spec\n",
-    "mlmodel.save(output_path)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/feedback/release-feedback-3.0 b/feedback/release-feedback-3.0
deleted file mode 100644
index 83bb37dc5..000000000
--- a/feedback/release-feedback-3.0
+++ /dev/null
@@ -1,48 +0,0 @@
-
-coremltools 3.0 - Release Feedback
-=======================
-
-Please feel free to give us feedback on the release! We would love to hear from you.
-To vote on each question, simply click on the option you would like to vote for and your response will be automatically recorded.
-
-**Question 1: Which feature from this release do you find the most useful?**
-
-<p><a href="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/Updatable%20models%20-%20Neural%20Network%20and%20KNN/vote"><img src="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/Updatable%20models%20-%20Neural%20Network%20and%20KNN" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/More%20dynamic%20and%20expressive%20neural%20networks/vote"><img src="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/More%20dynamic%20and%20expressive%20neural%20networks" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/Dynamic%20control%20flow%20/vote"><img src="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/Dynamic%20control%20flow%20" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/Nearest%20neighbor%20classifiers/vote"><img src="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/Nearest%20neighbor%20classifiers" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/Recommenders/vote"><img src="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/Recommenders" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/Linked%20models/vote"><img src="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/Linked%20models" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/Sound%20analysis%20preprocessing/vote"><img src="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/Sound%20analysis%20preprocessing" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/Runtime%20adjustable%20parameters%20/vote"><img src="https://api.gh-polls.com/poll/01DPCAV6ZJEF81FZVY985Z7RVA/Runtime%20adjustable%20parameters%20" alt=""></a></p>
-
-**Question 2: What area would you like more focus to be placed?**
-
-<p><a href="https://api.gh-polls.com/poll/01DPCBV07WN2HEJPH9KQN2CXHC/Issue%20Visibility%20-%20when%20will%20an%20issue%20be%20resolved/vote"><img src="https://api.gh-polls.com/poll/01DPCBV07WN2HEJPH9KQN2CXHC/Issue%20Visibility%20-%20when%20will%20an%20issue%20be%20resolved" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCBV07WN2HEJPH9KQN2CXHC/Documentation%20and%20Examples/vote"><img src="https://api.gh-polls.com/poll/01DPCBV07WN2HEJPH9KQN2CXHC/Documentation%20and%20Examples" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCBV07WN2HEJPH9KQN2CXHC/Additional%20Features/vote"><img src="https://api.gh-polls.com/poll/01DPCBV07WN2HEJPH9KQN2CXHC/Additional%20Features" alt=""></a></p>
-
-**Question 3: Which aspect of the Core ML community is the most useful for you?**
-
-<p><a href="https://api.gh-polls.com/poll/01DPCBW9P2TJ2NSZG0MQ3NZY3A/Project%20Boards/vote"><img src="https://api.gh-polls.com/poll/01DPCBW9P2TJ2NSZG0MQ3NZY3A/Project%20Boards" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCBW9P2TJ2NSZG0MQ3NZY3A/Issue%20Reporting%20Templates/vote"><img src="https://api.gh-polls.com/poll/01DPCBW9P2TJ2NSZG0MQ3NZY3A/Issue%20Reporting%20Templates" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCBW9P2TJ2NSZG0MQ3NZY3A/Labels%20on%20Issues/vote"><img src="https://api.gh-polls.com/poll/01DPCBW9P2TJ2NSZG0MQ3NZY3A/Labels%20on%20Issues" alt=""></a></p>
-
-
-**Question 4: What converters do you use the most?**
-
-<p><a href="https://api.gh-polls.com/poll/01DPCB5CQH6TSBHAP8DASKHR3X/TF/vote"><img src="https://api.gh-polls.com/poll/01DPCB5CQH6TSBHAP8DASKHR3X/TF" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCB5CQH6TSBHAP8DASKHR3X/ONNX/vote"><img src="https://api.gh-polls.com/poll/01DPCB5CQH6TSBHAP8DASKHR3X/ONNX" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCB5CQH6TSBHAP8DASKHR3X/Keras/vote"><img src="https://api.gh-polls.com/poll/01DPCB5CQH6TSBHAP8DASKHR3X/Keras" alt=""></a></p>
-
-
-**Question 5: Which version of Core ML do you use?**
-
-<p><a href="https://api.gh-polls.com/poll/01DPCBK3912XWNT0T2PPE3KVBM/Core%20ML%202.0/vote"><img src="https://api.gh-polls.com/poll/01DPCBK3912XWNT0T2PPE3KVBM/Core%20ML%202.0" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCBK3912XWNT0T2PPE3KVBM/Core%20ML%203.0/vote"><img src="https://api.gh-polls.com/poll/01DPCBK3912XWNT0T2PPE3KVBM/Core%20ML%203.0" alt=""></a></p>
-
-**Question 6: Would you participate in creating sample app or sample code for Core ML?**
-
-<p><a href="https://api.gh-polls.com/poll/01DPCBN6797EF05W181ZBMW7AC/Yes/vote"><img src="https://api.gh-polls.com/poll/01DPCBN6797EF05W181ZBMW7AC/Yes" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCBN6797EF05W181ZBMW7AC/Maybe/vote"><img src="https://api.gh-polls.com/poll/01DPCBN6797EF05W181ZBMW7AC/Maybe" alt=""></a>
-<a href="https://api.gh-polls.com/poll/01DPCBN6797EF05W181ZBMW7AC/No/vote"><img src="https://api.gh-polls.com/poll/01DPCBN6797EF05W181ZBMW7AC/No" alt=""></a></p>
diff --git a/mlmodel/CMakeLists.txt b/mlmodel/CMakeLists.txt
index 8d7904f58..a79664070 100644
--- a/mlmodel/CMakeLists.txt
+++ b/mlmodel/CMakeLists.txt
@@ -1,64 +1,60 @@
-cmake_minimum_required(VERSION 3.0.0)
-project(mlmodel)
+include("${CMAKE_SOURCE_DIR}/cmake/coreml-utils.cmake")
 
 include_directories(
   ..
   ../deps/protobuf/src
+  src
 )
 
-set(CMAKE_CXX_FLAGS " \
-    ${CMAKE_CXX_FLAGS} \
-    --std=c++14 \
-    -DGOOGLE_PROTOBUF_NO_STATIC_INITIALIZER \
-")
+add_definitions(-DGOOGLE_PROTOBUF_NO_STATIC_INITIALIZER)
+
 if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
   set(CMAKE_CXX_FLAGS " \
       ${CMAKE_CXX_FLAGS} \
       -Wglobal-constructors \
   ")
 endif()
-set(CMAKE_EXE_LINKER_FLAGS " \
-    ${CMAKE_EXE_LINKER_FLAGS} \
-    --std=c++14 \
-")
 
 add_library(mlmodel_test
     STATIC
 
-    tests/LinearModelTests.cpp
-    tests/TreeEnsembleTests.cpp
-    tests/OneHotEncoderTests.cpp
+    tests/BayesianProbitRegressionValidatorTests.cpp
     tests/InterfaceTests.cpp
-    tests/UtilsTests.cpp
+    tests/KNNValidatorTests.cpp
+    tests/LinearModelTests.cpp
+    tests/ModelContainerTests.cpp
+    tests/ModelCreationUtils.cpp
     tests/NNShaperTest.cpp
     tests/NNShapeTests.cpp
     tests/NNValidatorTests.cpp
-    tests/ModelContainerTests.cpp
-    tests/SaveLoadTests.cpp
-    tests/BayesianProbitRegressionValidatorTests.cpp
-    tests/NNShapeTests.cpp
-    tests/NNShaperTest.cpp
-    tests/KNNValidatorTests.cpp
-    tests/UpdatableModelValidatorTests.cpp
     tests/ParameterTests.cpp
-    tests/ModelCreationUtils.cpp
+    tests/OneHotEncoderTests.cpp
+    tests/SaveLoadTests.cpp
     tests/SoundAnalysisPreprocessingValidatorTests.cpp
+    tests/VisionFeaturePrintValidatorTests.cpp
+    tests/TreeEnsembleTests.cpp
+    tests/UpdatableModelValidatorTests.cpp
+    tests/UtilsTests.cpp
 )
 
 set_property(TARGET mlmodel_test
     PROPERTY POSITION_INDEPENDENT_CODE ON
 )
 
+target_include_directories(mlmodel_test
+    PRIVATE tests
+)
+
 target_link_libraries(mlmodel_test
-  mlmodel
+    mlmodel
 )
 
 add_executable(mlmodel_test_runner
-  test_runner.cpp
+    test_runner.cpp
 )
 
 target_link_libraries(mlmodel_test_runner
-  mlmodel_test
+    mlmodel_test
 )
 
 add_library(mlmodel
@@ -99,42 +95,13 @@ add_library(mlmodel
 
     src/Comparison.cpp
     src/DataType.cpp
+    src/ItemSimilarityRecommenderCommon.cpp
+    src/LayerShapeConstraints.cpp
     src/Model.cpp
+    src/NeuralNetworkBuffer.cpp
     src/Result.cpp
-    src/Utils.cpp
-
-    src/ArrayFeatureExtractorValidator.cpp
-    src/BayesianProbitRegressionValidator.cpp
-    src/CategoricalMappingValidator.cpp
-    src/CustomModelValidator.cpp
-    src/DictVectorizerValidator.cpp
-    src/FeatureVectorizerValidator.cpp
-    src/GazetteerValidator.cpp
-    src/IdentityValidator.cpp
-    src/ImputerValidator.cpp
-    src/InterfaceValidators.cpp
-    src/LinearModelValidator.cpp
-    src/NeuralNetwork/NeuralNetworkValidator.cpp
-    src/NeuralNetwork/NeuralNetworkLayerValidator.cpp
-    src/NonMaximumSuppressionValidator.cpp
-    src/NormalizerValidator.cpp
-    src/OneHotEncoderValidator.cpp
-    src/PipelineValidator.cpp
-    src/ScalarValidator.cpp
-    src/SVMValidator.cpp
-    src/TreeEnsembleValidator.cpp
-    src/TextClassifierValidator.cpp
-    src/VisionFeaturePrintValidator.cpp
-    src/WordEmbeddingValidator.cpp
-    src/WordTaggerValidator.cpp
-    src/QuantizationValidationUtils.cpp
-    src/KNearestNeighborsClassifierValidator.cpp
-    src/ParameterValidator.cpp
-    src/ItemSimilarityRecommenderValidator.cpp
-    src/ItemSimilarityRecommenderCommon.cpp
     src/TreeEnsembleCommon.cpp
-    src/SoundAnalysisPreprocessingValidator.cpp
-    src/LinkedModelValidator.cpp
+    src/Utils.cpp
 
     src/transforms/LinearModel.cpp
     src/transforms/Pipeline.cpp
@@ -145,10 +112,38 @@ add_library(mlmodel
     src/transforms/OneHotEncoder.cpp
     src/transforms/ItemSimilarityRecommender.cpp
 
-    src/LayerShapeConstraints.cpp
-    src/NeuralNetwork/NeuralNetworkShapes.cpp
-    src/NeuralNetwork/UpdatableNeuralNetworkValidator.cpp
-    src/QuantizationValidationUtils.cpp
+    src/Validation/ArrayFeatureExtractorValidator.cpp
+    src/Validation/BayesianProbitRegressionValidator.cpp
+    src/Validation/CategoricalMappingValidator.cpp
+    src/Validation/CustomModelValidator.cpp
+    src/Validation/DictVectorizerValidator.cpp
+    src/Validation/FeatureVectorizerValidator.cpp
+    src/Validation/GazetteerValidator.cpp
+    src/Validation/IdentityValidator.cpp
+    src/Validation/ImputerValidator.cpp
+    src/Validation/InterfaceValidators.cpp
+    src/Validation/ItemSimilarityRecommenderValidator.cpp
+    src/Validation/KNearestNeighborsClassifierValidator.cpp
+    src/Validation/LinearModelValidator.cpp
+    src/Validation/LinkedModelValidator.cpp
+    src/Validation/NeuralNetwork/NeuralNetworkLayerValidator.cpp
+    src/Validation/NeuralNetwork/NeuralNetworkShapes.cpp
+    src/Validation/NeuralNetwork/NeuralNetworkValidator.cpp
+    src/Validation/NeuralNetwork/UpdatableNeuralNetworkValidator.cpp
+    src/Validation/NonMaximumSuppressionValidator.cpp
+    src/Validation/NormalizerValidator.cpp
+    src/Validation/OneHotEncoderValidator.cpp
+    src/Validation/ParameterValidator.cpp
+    src/Validation/PipelineValidator.cpp
+    src/Validation/QuantizationValidationUtils.cpp
+    src/Validation/ScalarValidator.cpp
+    src/Validation/SoundAnalysisPreprocessingValidator.cpp
+    src/Validation/SVMValidator.cpp
+    src/Validation/TextClassifierValidator.cpp
+    src/Validation/TreeEnsembleValidator.cpp
+    src/Validation/VisionFeaturePrintValidator.cpp
+    src/Validation/WordEmbeddingValidator.cpp
+    src/Validation/WordTaggerValidator.cpp
 )
 
 set_property(TARGET mlmodel
@@ -208,90 +203,7 @@ option(OVERWRITE_PB_SOURCE
 # This will get filled with the dependencies for the 'protosrc' target.
 set(proto_depends)
 foreach(proto_fn IN ITEMS ${proto_files})
-    add_custom_command(
-        OUTPUT
-            ${CMAKE_CURRENT_BINARY_DIR}/format/${proto_fn}.pb.cc
-            ${CMAKE_CURRENT_BINARY_DIR}/format/${proto_fn}.pb.h
-        COMMENT "Generating proto files for ${proto_fn}..."
-        COMMAND ${CMAKE_BINARY_DIR}/deps/protobuf/cmake/protoc
-            --cpp_out=${CMAKE_CURRENT_BINARY_DIR}/format/
-            -I${CMAKE_CURRENT_SOURCE_DIR}/format
-            ${CMAKE_CURRENT_SOURCE_DIR}/format/${proto_fn}.proto
-        DEPENDS protoc
-        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
-    )
-    add_custom_command(
-        OUTPUT
-            ${CMAKE_CURRENT_BINARY_DIR}/format/${proto_fn}_enum.h
-        COMMENT "Generating enums for ${proto_fn}..."
-        COMMAND ${CMAKE_BINARY_DIR}/deps/protobuf/cmake/protoc
-            --plugin=protoc-gen-enum=mlmodel/enumgen
-            --enum_out=${CMAKE_CURRENT_BINARY_DIR}/format/
-            -I${CMAKE_CURRENT_SOURCE_DIR}/format/
-            ${CMAKE_CURRENT_SOURCE_DIR}/format/${proto_fn}.proto
-        DEPENDS enumgen protoc
-        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
-    )
-    add_custom_command(
-        OUTPUT
-            ${CMAKE_BINARY_DIR}/coremltools/proto/${proto_fn}_pb2.py
-        COMMENT "Generating Python for ${proto_fn}..."
-        COMMAND ${CMAKE_BINARY_DIR}/deps/protobuf/cmake/protoc
-            --python_out=${CMAKE_BINARY_DIR}/coremltools/proto
-            -I${CMAKE_CURRENT_SOURCE_DIR}/format/
-            ${CMAKE_CURRENT_SOURCE_DIR}/format/${proto_fn}.proto
-        COMMAND python
-            -m lib2to3
-            -wn
-            --no-diff
-            -f import
-            ${CMAKE_BINARY_DIR}/coremltools/${proto_fn}_pb2.py
-        DEPENDS protoc
-        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
-    )
-    # For the CoreML framework we read the source file locations for these, and
-    # so it can be useful to update the source tree in addition.  So we repeat
-    # all of the above with different outputs.
-    if(OVERWRITE_PB_SOURCE)
-        add_custom_target(tgt_${proto_fn}_source ALL
-            COMMENT "Generating (source) proto files for ${proto_fn}..."
-            COMMAND ${CMAKE_BINARY_DIR}/deps/protobuf/cmake/protoc
-                --cpp_out=${CMAKE_CURRENT_SOURCE_DIR}/build/format/
-                -I${CMAKE_CURRENT_SOURCE_DIR}/format
-                ${CMAKE_CURRENT_SOURCE_DIR}/format/${proto_fn}.proto
-            DEPENDS protoc
-            WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
-        )
-        add_custom_target(tgt_${proto_fn}_enums ALL
-            COMMENT "Generating (source) enums for ${proto_fn}..."
-            COMMAND ${CMAKE_BINARY_DIR}/deps/protobuf/cmake/protoc
-                --plugin=protoc-gen-enum=mlmodel/enumgen
-                --enum_out=${CMAKE_CURRENT_SOURCE_DIR}/build/format/
-                -I${CMAKE_CURRENT_SOURCE_DIR}/format/
-                ${CMAKE_CURRENT_SOURCE_DIR}/format/${proto_fn}.proto
-            DEPENDS enumgen protoc
-            WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
-        )
-        add_custom_target(tgt_${proto_fn}_python ALL
-            COMMENT "Generating (source) Python for ${proto_fn}..."
-            COMMAND ${CMAKE_BINARY_DIR}/deps/protobuf/cmake/protoc
-                --python_out=${CMAKE_SOURCE_DIR}/coremltools/proto
-                -I${CMAKE_CURRENT_SOURCE_DIR}/format/
-                ${CMAKE_CURRENT_SOURCE_DIR}/format/${proto_fn}.proto
-            COMMAND python
-                -m lib2to3
-                -wn
-                --no-diff
-                -f import
-                ${CMAKE_SOURCE_DIR}/coremltools/proto/${proto_fn}_pb2.py
-            DEPENDS protoc
-            WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
-        )
-        # Record dependencies for 'protosrc' target.
-        list(APPEND proto_depends tgt_${proto_fn}_source)
-        list(APPEND proto_depends tgt_${proto_fn}_enums)
-        list(APPEND proto_depends tgt_${proto_fn}_python)
-    endif()
+    coreml_add_build_proto(${proto_fn} "")
 endforeach()
 # Add a custom 'protosrc' target that only builds the source objects from proto
 # files.
diff --git a/mlmodel/README.md b/mlmodel/README.md
deleted file mode 100644
index 8b1378917..000000000
--- a/mlmodel/README.md
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/mlmodel/Requirements.txt b/mlmodel/Requirements.txt
deleted file mode 100644
index 481fa0b83..000000000
--- a/mlmodel/Requirements.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-alabaster==0.7.10
-Babel==2.4.0
-docutils==0.13.1
-imagesize==0.7.1
-inflection==0.3.1
-MarkupSafe==1.0
-Pygments==2.2.0
-pytz==2017.2
-six==1.10.0
-snowballstemmer==1.2.1
-Sphinx==1.6.1
-sphinx-rtd-theme==0.2.4
-sphinxcontrib-websupport==1.0.1
-typing==3.6.1
diff --git a/mlmodel/build/format/FeatureTypes.pb.cc b/mlmodel/build/format/FeatureTypes.pb.cc
index a577336ed..aba83e033 100644
--- a/mlmodel/build/format/FeatureTypes.pb.cc
+++ b/mlmodel/build/format/FeatureTypes.pb.cc
@@ -45,6 +45,9 @@ class ArrayFeatureTypeDefaultTypeInternal : public ::google::protobuf::internal:
   public:
   const ::CoreML::Specification::ArrayFeatureType_EnumeratedShapes* enumeratedshapes_;
   const ::CoreML::Specification::ArrayFeatureType_ShapeRange* shaperange_;
+  ::google::protobuf::int32 intdefaultvalue_;
+  float floatdefaultvalue_;
+  double doubledefaultvalue_;
 } _ArrayFeatureType_default_instance_;
 class DictionaryFeatureTypeDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<DictionaryFeatureType> {
   public:
@@ -2816,6 +2819,9 @@ const int ArrayFeatureType::kShapeFieldNumber;
 const int ArrayFeatureType::kDataTypeFieldNumber;
 const int ArrayFeatureType::kEnumeratedShapesFieldNumber;
 const int ArrayFeatureType::kShapeRangeFieldNumber;
+const int ArrayFeatureType::kIntDefaultValueFieldNumber;
+const int ArrayFeatureType::kFloatDefaultValueFieldNumber;
+const int ArrayFeatureType::kDoubleDefaultValueFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
 ArrayFeatureType::ArrayFeatureType()
@@ -2847,12 +2853,31 @@ ArrayFeatureType::ArrayFeatureType(const ArrayFeatureType& from)
       break;
     }
   }
+  clear_has_defaultOptionalValue();
+  switch (from.defaultOptionalValue_case()) {
+    case kIntDefaultValue: {
+      set_intdefaultvalue(from.intdefaultvalue());
+      break;
+    }
+    case kFloatDefaultValue: {
+      set_floatdefaultvalue(from.floatdefaultvalue());
+      break;
+    }
+    case kDoubleDefaultValue: {
+      set_doubledefaultvalue(from.doubledefaultvalue());
+      break;
+    }
+    case DEFAULTOPTIONALVALUE_NOT_SET: {
+      break;
+    }
+  }
   // @@protoc_insertion_point(copy_constructor:CoreML.Specification.ArrayFeatureType)
 }
 
 void ArrayFeatureType::SharedCtor() {
   datatype_ = 0;
   clear_has_ShapeFlexibility();
+  clear_has_defaultOptionalValue();
   _cached_size_ = 0;
 }
 
@@ -2865,6 +2890,9 @@ void ArrayFeatureType::SharedDtor() {
   if (has_ShapeFlexibility()) {
     clear_ShapeFlexibility();
   }
+  if (has_defaultOptionalValue()) {
+    clear_defaultOptionalValue();
+  }
 }
 
 void ArrayFeatureType::SetCachedSize(int size) const {
@@ -2903,12 +2931,35 @@ void ArrayFeatureType::clear_ShapeFlexibility() {
   _oneof_case_[0] = SHAPEFLEXIBILITY_NOT_SET;
 }
 
+void ArrayFeatureType::clear_defaultOptionalValue() {
+// @@protoc_insertion_point(one_of_clear_start:CoreML.Specification.ArrayFeatureType)
+  switch (defaultOptionalValue_case()) {
+    case kIntDefaultValue: {
+      // No need to clear
+      break;
+    }
+    case kFloatDefaultValue: {
+      // No need to clear
+      break;
+    }
+    case kDoubleDefaultValue: {
+      // No need to clear
+      break;
+    }
+    case DEFAULTOPTIONALVALUE_NOT_SET: {
+      break;
+    }
+  }
+  _oneof_case_[1] = DEFAULTOPTIONALVALUE_NOT_SET;
+}
+
 
 void ArrayFeatureType::Clear() {
 // @@protoc_insertion_point(message_clear_start:CoreML.Specification.ArrayFeatureType)
   shape_.Clear();
   datatype_ = 0;
   clear_ShapeFlexibility();
+  clear_defaultOptionalValue();
 }
 
 bool ArrayFeatureType::MergePartialFromCodedStream(
@@ -2978,6 +3029,51 @@ bool ArrayFeatureType::MergePartialFromCodedStream(
         break;
       }
 
+      // int32 intDefaultValue = 41;
+      case 41: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(328u)) {
+          clear_defaultOptionalValue();
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &defaultOptionalValue_.intdefaultvalue_)));
+          set_has_intdefaultvalue();
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // float floatDefaultValue = 51;
+      case 51: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(413u)) {
+          clear_defaultOptionalValue();
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &defaultOptionalValue_.floatdefaultvalue_)));
+          set_has_floatdefaultvalue();
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // double doubleDefaultValue = 61;
+      case 61: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(489u)) {
+          clear_defaultOptionalValue();
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   double, ::google::protobuf::internal::WireFormatLite::TYPE_DOUBLE>(
+                 input, &defaultOptionalValue_.doubledefaultvalue_)));
+          set_has_doubledefaultvalue();
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
       default: {
       handle_unusual:
         if (tag == 0 ||
@@ -3033,6 +3129,21 @@ void ArrayFeatureType::SerializeWithCachedSizes(
       31, *ShapeFlexibility_.shaperange_, output);
   }
 
+  // int32 intDefaultValue = 41;
+  if (has_intdefaultvalue()) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(41, this->intdefaultvalue(), output);
+  }
+
+  // float floatDefaultValue = 51;
+  if (has_floatdefaultvalue()) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(51, this->floatdefaultvalue(), output);
+  }
+
+  // double doubleDefaultValue = 61;
+  if (has_doubledefaultvalue()) {
+    ::google::protobuf::internal::WireFormatLite::WriteDouble(61, this->doubledefaultvalue(), output);
+  }
+
   // @@protoc_insertion_point(serialize_end:CoreML.Specification.ArrayFeatureType)
 }
 
@@ -3080,6 +3191,28 @@ size_t ArrayFeatureType::ByteSizeLong() const {
       break;
     }
   }
+  switch (defaultOptionalValue_case()) {
+    // int32 intDefaultValue = 41;
+    case kIntDefaultValue: {
+      total_size += 2 +
+        ::google::protobuf::internal::WireFormatLite::Int32Size(
+          this->intdefaultvalue());
+      break;
+    }
+    // float floatDefaultValue = 51;
+    case kFloatDefaultValue: {
+      total_size += 2 + 4;
+      break;
+    }
+    // double doubleDefaultValue = 61;
+    case kDoubleDefaultValue: {
+      total_size += 2 + 8;
+      break;
+    }
+    case DEFAULTOPTIONALVALUE_NOT_SET: {
+      break;
+    }
+  }
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = cached_size;
@@ -3116,6 +3249,23 @@ void ArrayFeatureType::MergeFrom(const ArrayFeatureType& from) {
       break;
     }
   }
+  switch (from.defaultOptionalValue_case()) {
+    case kIntDefaultValue: {
+      set_intdefaultvalue(from.intdefaultvalue());
+      break;
+    }
+    case kFloatDefaultValue: {
+      set_floatdefaultvalue(from.floatdefaultvalue());
+      break;
+    }
+    case kDoubleDefaultValue: {
+      set_doubledefaultvalue(from.doubledefaultvalue());
+      break;
+    }
+    case DEFAULTOPTIONALVALUE_NOT_SET: {
+      break;
+    }
+  }
 }
 
 void ArrayFeatureType::CopyFrom(const ArrayFeatureType& from) {
@@ -3138,6 +3288,8 @@ void ArrayFeatureType::InternalSwap(ArrayFeatureType* other) {
   std::swap(datatype_, other->datatype_);
   std::swap(ShapeFlexibility_, other->ShapeFlexibility_);
   std::swap(_oneof_case_[0], other->_oneof_case_[0]);
+  std::swap(defaultOptionalValue_, other->defaultOptionalValue_);
+  std::swap(_oneof_case_[1], other->_oneof_case_[1]);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
@@ -3288,15 +3440,111 @@ void ArrayFeatureType::set_allocated_shaperange(::CoreML::Specification::ArrayFe
   // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.ArrayFeatureType.shapeRange)
 }
 
+// int32 intDefaultValue = 41;
+bool ArrayFeatureType::has_intdefaultvalue() const {
+  return defaultOptionalValue_case() == kIntDefaultValue;
+}
+void ArrayFeatureType::set_has_intdefaultvalue() {
+  _oneof_case_[1] = kIntDefaultValue;
+}
+void ArrayFeatureType::clear_intdefaultvalue() {
+  if (has_intdefaultvalue()) {
+    defaultOptionalValue_.intdefaultvalue_ = 0;
+    clear_has_defaultOptionalValue();
+  }
+}
+::google::protobuf::int32 ArrayFeatureType::intdefaultvalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.ArrayFeatureType.intDefaultValue)
+  if (has_intdefaultvalue()) {
+    return defaultOptionalValue_.intdefaultvalue_;
+  }
+  return 0;
+}
+void ArrayFeatureType::set_intdefaultvalue(::google::protobuf::int32 value) {
+  if (!has_intdefaultvalue()) {
+    clear_defaultOptionalValue();
+    set_has_intdefaultvalue();
+  }
+  defaultOptionalValue_.intdefaultvalue_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.ArrayFeatureType.intDefaultValue)
+}
+
+// float floatDefaultValue = 51;
+bool ArrayFeatureType::has_floatdefaultvalue() const {
+  return defaultOptionalValue_case() == kFloatDefaultValue;
+}
+void ArrayFeatureType::set_has_floatdefaultvalue() {
+  _oneof_case_[1] = kFloatDefaultValue;
+}
+void ArrayFeatureType::clear_floatdefaultvalue() {
+  if (has_floatdefaultvalue()) {
+    defaultOptionalValue_.floatdefaultvalue_ = 0;
+    clear_has_defaultOptionalValue();
+  }
+}
+float ArrayFeatureType::floatdefaultvalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.ArrayFeatureType.floatDefaultValue)
+  if (has_floatdefaultvalue()) {
+    return defaultOptionalValue_.floatdefaultvalue_;
+  }
+  return 0;
+}
+void ArrayFeatureType::set_floatdefaultvalue(float value) {
+  if (!has_floatdefaultvalue()) {
+    clear_defaultOptionalValue();
+    set_has_floatdefaultvalue();
+  }
+  defaultOptionalValue_.floatdefaultvalue_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.ArrayFeatureType.floatDefaultValue)
+}
+
+// double doubleDefaultValue = 61;
+bool ArrayFeatureType::has_doubledefaultvalue() const {
+  return defaultOptionalValue_case() == kDoubleDefaultValue;
+}
+void ArrayFeatureType::set_has_doubledefaultvalue() {
+  _oneof_case_[1] = kDoubleDefaultValue;
+}
+void ArrayFeatureType::clear_doubledefaultvalue() {
+  if (has_doubledefaultvalue()) {
+    defaultOptionalValue_.doubledefaultvalue_ = 0;
+    clear_has_defaultOptionalValue();
+  }
+}
+double ArrayFeatureType::doubledefaultvalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.ArrayFeatureType.doubleDefaultValue)
+  if (has_doubledefaultvalue()) {
+    return defaultOptionalValue_.doubledefaultvalue_;
+  }
+  return 0;
+}
+void ArrayFeatureType::set_doubledefaultvalue(double value) {
+  if (!has_doubledefaultvalue()) {
+    clear_defaultOptionalValue();
+    set_has_doubledefaultvalue();
+  }
+  defaultOptionalValue_.doubledefaultvalue_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.ArrayFeatureType.doubleDefaultValue)
+}
+
 bool ArrayFeatureType::has_ShapeFlexibility() const {
   return ShapeFlexibility_case() != SHAPEFLEXIBILITY_NOT_SET;
 }
 void ArrayFeatureType::clear_has_ShapeFlexibility() {
   _oneof_case_[0] = SHAPEFLEXIBILITY_NOT_SET;
 }
+bool ArrayFeatureType::has_defaultOptionalValue() const {
+  return defaultOptionalValue_case() != DEFAULTOPTIONALVALUE_NOT_SET;
+}
+void ArrayFeatureType::clear_has_defaultOptionalValue() {
+  _oneof_case_[1] = DEFAULTOPTIONALVALUE_NOT_SET;
+}
 ArrayFeatureType::ShapeFlexibilityCase ArrayFeatureType::ShapeFlexibility_case() const {
   return ArrayFeatureType::ShapeFlexibilityCase(_oneof_case_[0]);
 }
+ArrayFeatureType::DefaultOptionalValueCase ArrayFeatureType::defaultOptionalValue_case() const {
+  return ArrayFeatureType::DefaultOptionalValueCase(_oneof_case_[1]);
+}
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
 
 // ===================================================================
diff --git a/mlmodel/build/format/FeatureTypes.pb.h b/mlmodel/build/format/FeatureTypes.pb.h
index 45e2d5c56..7eaf7d6c5 100644
--- a/mlmodel/build/format/FeatureTypes.pb.h
+++ b/mlmodel/build/format/FeatureTypes.pb.h
@@ -1109,6 +1109,13 @@ class ArrayFeatureType : public ::google::protobuf::MessageLite /* @@protoc_inse
     SHAPEFLEXIBILITY_NOT_SET = 0,
   };
 
+  enum DefaultOptionalValueCase {
+    kIntDefaultValue = 41,
+    kFloatDefaultValue = 51,
+    kDoubleDefaultValue = 61,
+    DEFAULTOPTIONALVALUE_NOT_SET = 0,
+  };
+
   static inline const ArrayFeatureType* internal_default_instance() {
     return reinterpret_cast<const ArrayFeatureType*>(
                &_ArrayFeatureType_default_instance_);
@@ -1216,16 +1223,51 @@ class ArrayFeatureType : public ::google::protobuf::MessageLite /* @@protoc_inse
   ::CoreML::Specification::ArrayFeatureType_ShapeRange* release_shaperange();
   void set_allocated_shaperange(::CoreML::Specification::ArrayFeatureType_ShapeRange* shaperange);
 
+  // int32 intDefaultValue = 41;
+  private:
+  bool has_intdefaultvalue() const;
+  public:
+  void clear_intdefaultvalue();
+  static const int kIntDefaultValueFieldNumber = 41;
+  ::google::protobuf::int32 intdefaultvalue() const;
+  void set_intdefaultvalue(::google::protobuf::int32 value);
+
+  // float floatDefaultValue = 51;
+  private:
+  bool has_floatdefaultvalue() const;
+  public:
+  void clear_floatdefaultvalue();
+  static const int kFloatDefaultValueFieldNumber = 51;
+  float floatdefaultvalue() const;
+  void set_floatdefaultvalue(float value);
+
+  // double doubleDefaultValue = 61;
+  private:
+  bool has_doubledefaultvalue() const;
+  public:
+  void clear_doubledefaultvalue();
+  static const int kDoubleDefaultValueFieldNumber = 61;
+  double doubledefaultvalue() const;
+  void set_doubledefaultvalue(double value);
+
   ShapeFlexibilityCase ShapeFlexibility_case() const;
+  DefaultOptionalValueCase defaultOptionalValue_case() const;
   // @@protoc_insertion_point(class_scope:CoreML.Specification.ArrayFeatureType)
  private:
   void set_has_enumeratedshapes();
   void set_has_shaperange();
+  void set_has_intdefaultvalue();
+  void set_has_floatdefaultvalue();
+  void set_has_doubledefaultvalue();
 
   inline bool has_ShapeFlexibility() const;
   void clear_ShapeFlexibility();
   inline void clear_has_ShapeFlexibility();
 
+  inline bool has_defaultOptionalValue() const;
+  void clear_defaultOptionalValue();
+  inline void clear_has_defaultOptionalValue();
+
   ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
   ::google::protobuf::RepeatedField< ::google::protobuf::int64 > shape_;
   mutable int _shape_cached_byte_size_;
@@ -1235,8 +1277,14 @@ class ArrayFeatureType : public ::google::protobuf::MessageLite /* @@protoc_inse
     ::CoreML::Specification::ArrayFeatureType_EnumeratedShapes* enumeratedshapes_;
     ::CoreML::Specification::ArrayFeatureType_ShapeRange* shaperange_;
   } ShapeFlexibility_;
+  union DefaultOptionalValueUnion {
+    DefaultOptionalValueUnion() {}
+    ::google::protobuf::int32 intdefaultvalue_;
+    float floatdefaultvalue_;
+    double doubledefaultvalue_;
+  } defaultOptionalValue_;
   mutable int _cached_size_;
-  ::google::protobuf::uint32 _oneof_case_[1];
+  ::google::protobuf::uint32 _oneof_case_[2];
 
   friend struct protobuf_FeatureTypes_2eproto::TableStruct;
 };
@@ -2237,15 +2285,111 @@ inline void ArrayFeatureType::set_allocated_shaperange(::CoreML::Specification::
   // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.ArrayFeatureType.shapeRange)
 }
 
+// int32 intDefaultValue = 41;
+inline bool ArrayFeatureType::has_intdefaultvalue() const {
+  return defaultOptionalValue_case() == kIntDefaultValue;
+}
+inline void ArrayFeatureType::set_has_intdefaultvalue() {
+  _oneof_case_[1] = kIntDefaultValue;
+}
+inline void ArrayFeatureType::clear_intdefaultvalue() {
+  if (has_intdefaultvalue()) {
+    defaultOptionalValue_.intdefaultvalue_ = 0;
+    clear_has_defaultOptionalValue();
+  }
+}
+inline ::google::protobuf::int32 ArrayFeatureType::intdefaultvalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.ArrayFeatureType.intDefaultValue)
+  if (has_intdefaultvalue()) {
+    return defaultOptionalValue_.intdefaultvalue_;
+  }
+  return 0;
+}
+inline void ArrayFeatureType::set_intdefaultvalue(::google::protobuf::int32 value) {
+  if (!has_intdefaultvalue()) {
+    clear_defaultOptionalValue();
+    set_has_intdefaultvalue();
+  }
+  defaultOptionalValue_.intdefaultvalue_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.ArrayFeatureType.intDefaultValue)
+}
+
+// float floatDefaultValue = 51;
+inline bool ArrayFeatureType::has_floatdefaultvalue() const {
+  return defaultOptionalValue_case() == kFloatDefaultValue;
+}
+inline void ArrayFeatureType::set_has_floatdefaultvalue() {
+  _oneof_case_[1] = kFloatDefaultValue;
+}
+inline void ArrayFeatureType::clear_floatdefaultvalue() {
+  if (has_floatdefaultvalue()) {
+    defaultOptionalValue_.floatdefaultvalue_ = 0;
+    clear_has_defaultOptionalValue();
+  }
+}
+inline float ArrayFeatureType::floatdefaultvalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.ArrayFeatureType.floatDefaultValue)
+  if (has_floatdefaultvalue()) {
+    return defaultOptionalValue_.floatdefaultvalue_;
+  }
+  return 0;
+}
+inline void ArrayFeatureType::set_floatdefaultvalue(float value) {
+  if (!has_floatdefaultvalue()) {
+    clear_defaultOptionalValue();
+    set_has_floatdefaultvalue();
+  }
+  defaultOptionalValue_.floatdefaultvalue_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.ArrayFeatureType.floatDefaultValue)
+}
+
+// double doubleDefaultValue = 61;
+inline bool ArrayFeatureType::has_doubledefaultvalue() const {
+  return defaultOptionalValue_case() == kDoubleDefaultValue;
+}
+inline void ArrayFeatureType::set_has_doubledefaultvalue() {
+  _oneof_case_[1] = kDoubleDefaultValue;
+}
+inline void ArrayFeatureType::clear_doubledefaultvalue() {
+  if (has_doubledefaultvalue()) {
+    defaultOptionalValue_.doubledefaultvalue_ = 0;
+    clear_has_defaultOptionalValue();
+  }
+}
+inline double ArrayFeatureType::doubledefaultvalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.ArrayFeatureType.doubleDefaultValue)
+  if (has_doubledefaultvalue()) {
+    return defaultOptionalValue_.doubledefaultvalue_;
+  }
+  return 0;
+}
+inline void ArrayFeatureType::set_doubledefaultvalue(double value) {
+  if (!has_doubledefaultvalue()) {
+    clear_defaultOptionalValue();
+    set_has_doubledefaultvalue();
+  }
+  defaultOptionalValue_.doubledefaultvalue_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.ArrayFeatureType.doubleDefaultValue)
+}
+
 inline bool ArrayFeatureType::has_ShapeFlexibility() const {
   return ShapeFlexibility_case() != SHAPEFLEXIBILITY_NOT_SET;
 }
 inline void ArrayFeatureType::clear_has_ShapeFlexibility() {
   _oneof_case_[0] = SHAPEFLEXIBILITY_NOT_SET;
 }
+inline bool ArrayFeatureType::has_defaultOptionalValue() const {
+  return defaultOptionalValue_case() != DEFAULTOPTIONALVALUE_NOT_SET;
+}
+inline void ArrayFeatureType::clear_has_defaultOptionalValue() {
+  _oneof_case_[1] = DEFAULTOPTIONALVALUE_NOT_SET;
+}
 inline ArrayFeatureType::ShapeFlexibilityCase ArrayFeatureType::ShapeFlexibility_case() const {
   return ArrayFeatureType::ShapeFlexibilityCase(_oneof_case_[0]);
 }
+inline ArrayFeatureType::DefaultOptionalValueCase ArrayFeatureType::defaultOptionalValue_case() const {
+  return ArrayFeatureType::DefaultOptionalValueCase(_oneof_case_[1]);
+}
 // -------------------------------------------------------------------
 
 // DictionaryFeatureType
diff --git a/mlmodel/build/format/FeatureTypes_enums.h b/mlmodel/build/format/FeatureTypes_enums.h
index 705b905f7..5ec789c67 100644
--- a/mlmodel/build/format/FeatureTypes_enums.h
+++ b/mlmodel/build/format/FeatureTypes_enums.h
@@ -52,6 +52,28 @@ static const char * MLArrayFeatureTypeShapeFlexibility_Name(MLArrayFeatureTypeSh
     return "INVALID";
 }
 
+enum MLArrayFeatureTypedefaultOptionalValue: int {
+    MLArrayFeatureTypedefaultOptionalValue_intDefaultValue = 41,
+    MLArrayFeatureTypedefaultOptionalValue_floatDefaultValue = 51,
+    MLArrayFeatureTypedefaultOptionalValue_doubleDefaultValue = 61,
+    MLArrayFeatureTypedefaultOptionalValue_NOT_SET = 0,
+};
+
+__attribute__((__unused__))
+static const char * MLArrayFeatureTypedefaultOptionalValue_Name(MLArrayFeatureTypedefaultOptionalValue x) {
+    switch (x) {
+        case MLArrayFeatureTypedefaultOptionalValue_intDefaultValue:
+            return "MLArrayFeatureTypedefaultOptionalValue_intDefaultValue";
+        case MLArrayFeatureTypedefaultOptionalValue_floatDefaultValue:
+            return "MLArrayFeatureTypedefaultOptionalValue_floatDefaultValue";
+        case MLArrayFeatureTypedefaultOptionalValue_doubleDefaultValue:
+            return "MLArrayFeatureTypedefaultOptionalValue_doubleDefaultValue";
+        case MLArrayFeatureTypedefaultOptionalValue_NOT_SET:
+            return "INVALID";
+    }
+    return "INVALID";
+}
+
 enum MLDictionaryFeatureTypeKeyType: int {
     MLDictionaryFeatureTypeKeyType_int64KeyType = 1,
     MLDictionaryFeatureTypeKeyType_stringKeyType = 2,
diff --git a/mlmodel/build/format/Model.pb.cc b/mlmodel/build/format/Model.pb.cc
index 32990b212..e564ecc93 100644
--- a/mlmodel/build/format/Model.pb.cc
+++ b/mlmodel/build/format/Model.pb.cc
@@ -30,6 +30,8 @@ class MetadataDefaultTypeInternal : public ::google::protobuf::internal::Explici
 } _Metadata_default_instance_;
 class ModelDescriptionDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<ModelDescription> {
 } _ModelDescription_default_instance_;
+class SerializedModelDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<SerializedModel> {
+} _SerializedModel_default_instance_;
 class ModelDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<Model> {
   public:
   const ::CoreML::Specification::PipelineClassifier* pipelineclassifier_;
@@ -65,6 +67,7 @@ class ModelDefaultTypeInternal : public ::google::protobuf::internal::Explicitly
   const ::CoreML::Specification::CoreMLModels::SoundAnalysisPreprocessing* soundanalysispreprocessing_;
   const ::CoreML::Specification::CoreMLModels::Gazetteer* gazetteer_;
   const ::CoreML::Specification::CoreMLModels::WordEmbedding* wordembedding_;
+  const ::CoreML::Specification::SerializedModel* serializedmodel_;
 } _Model_default_instance_;
 
 namespace protobuf_Model_2eproto {
@@ -88,6 +91,7 @@ PROTOBUF_CONSTEXPR_VAR ::google::protobuf::internal::ParseTable const
   { NULL, NULL, 0, -1, -1, false },
   { NULL, NULL, 0, -1, -1, false },
   { NULL, NULL, 0, -1, -1, false },
+  { NULL, NULL, 0, -1, -1, false },
 };
 
 
@@ -98,6 +102,7 @@ void TableStruct::Shutdown() {
   _FeatureDescription_default_instance_.Shutdown();
   _Metadata_default_instance_.Shutdown();
   _ModelDescription_default_instance_.Shutdown();
+  _SerializedModel_default_instance_.Shutdown();
   _Model_default_instance_.Shutdown();
 }
 
@@ -140,6 +145,7 @@ void TableStruct::InitDefaultsImpl() {
   _Metadata_UserDefinedEntry_default_instance_.DefaultConstruct();
   _Metadata_default_instance_.DefaultConstruct();
   _ModelDescription_default_instance_.DefaultConstruct();
+  _SerializedModel_default_instance_.DefaultConstruct();
   _Model_default_instance_.DefaultConstruct();
   _PipelineClassifier_default_instance_.get_mutable()->pipeline_ = const_cast< ::CoreML::Specification::Pipeline*>(
       ::CoreML::Specification::Pipeline::internal_default_instance());
@@ -2705,6 +2711,344 @@ void ModelDescription::set_allocated_metadata(::CoreML::Specification::Metadata*
 
 // ===================================================================
 
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+const int SerializedModel::kIdentifierFieldNumber;
+const int SerializedModel::kModelFieldNumber;
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+
+SerializedModel::SerializedModel()
+  : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
+  if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
+    protobuf_Model_2eproto::InitDefaults();
+  }
+  SharedCtor();
+  // @@protoc_insertion_point(constructor:CoreML.Specification.SerializedModel)
+}
+SerializedModel::SerializedModel(const SerializedModel& from)
+  : ::google::protobuf::MessageLite(),
+      _internal_metadata_(NULL),
+      _cached_size_(0) {
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  identifier_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+  if (from.identifier().size() > 0) {
+    identifier_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.identifier_);
+  }
+  model_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+  if (from.model().size() > 0) {
+    model_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.model_);
+  }
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.SerializedModel)
+}
+
+void SerializedModel::SharedCtor() {
+  identifier_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+  model_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+  _cached_size_ = 0;
+}
+
+SerializedModel::~SerializedModel() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.SerializedModel)
+  SharedDtor();
+}
+
+void SerializedModel::SharedDtor() {
+  identifier_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+  model_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+
+void SerializedModel::SetCachedSize(int size) const {
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+}
+const SerializedModel& SerializedModel::default_instance() {
+  protobuf_Model_2eproto::InitDefaults();
+  return *internal_default_instance();
+}
+
+SerializedModel* SerializedModel::New(::google::protobuf::Arena* arena) const {
+  SerializedModel* n = new SerializedModel;
+  if (arena != NULL) {
+    arena->Own(n);
+  }
+  return n;
+}
+
+void SerializedModel::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.SerializedModel)
+  identifier_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+  model_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+
+bool SerializedModel::MergePartialFromCodedStream(
+    ::google::protobuf::io::CodedInputStream* input) {
+#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
+  ::google::protobuf::uint32 tag;
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.SerializedModel)
+  for (;;) {
+    ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
+    tag = p.first;
+    if (!p.second) goto handle_unusual;
+    switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
+      // string identifier = 1;
+      case 1: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(10u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadString(
+                input, this->mutable_identifier()));
+          DO_(::google::protobuf::internal::WireFormatLite::VerifyUtf8String(
+            this->identifier().data(), this->identifier().length(),
+            ::google::protobuf::internal::WireFormatLite::PARSE,
+            "CoreML.Specification.SerializedModel.identifier"));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // bytes model = 2;
+      case 2: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(18u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadBytes(
+                input, this->mutable_model()));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      default: {
+      handle_unusual:
+        if (tag == 0 ||
+            ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
+            ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
+          goto success;
+        }
+        DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
+        break;
+      }
+    }
+  }
+success:
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.SerializedModel)
+  return true;
+failure:
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.SerializedModel)
+  return false;
+#undef DO_
+}
+
+void SerializedModel::SerializeWithCachedSizes(
+    ::google::protobuf::io::CodedOutputStream* output) const {
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.SerializedModel)
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  // string identifier = 1;
+  if (this->identifier().size() > 0) {
+    ::google::protobuf::internal::WireFormatLite::VerifyUtf8String(
+      this->identifier().data(), this->identifier().length(),
+      ::google::protobuf::internal::WireFormatLite::SERIALIZE,
+      "CoreML.Specification.SerializedModel.identifier");
+    ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(
+      1, this->identifier(), output);
+  }
+
+  // bytes model = 2;
+  if (this->model().size() > 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteBytesMaybeAliased(
+      2, this->model(), output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.SerializedModel)
+}
+
+size_t SerializedModel::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.SerializedModel)
+  size_t total_size = 0;
+
+  // string identifier = 1;
+  if (this->identifier().size() > 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::StringSize(
+        this->identifier());
+  }
+
+  // bytes model = 2;
+  if (this->model().size() > 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::BytesSize(
+        this->model());
+  }
+
+  int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = cached_size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+  return total_size;
+}
+
+void SerializedModel::CheckTypeAndMergeFrom(
+    const ::google::protobuf::MessageLite& from) {
+  MergeFrom(*::google::protobuf::down_cast<const SerializedModel*>(&from));
+}
+
+void SerializedModel::MergeFrom(const SerializedModel& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.SerializedModel)
+  GOOGLE_DCHECK_NE(&from, this);
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  if (from.identifier().size() > 0) {
+
+    identifier_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.identifier_);
+  }
+  if (from.model().size() > 0) {
+
+    model_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.model_);
+  }
+}
+
+void SerializedModel::CopyFrom(const SerializedModel& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.SerializedModel)
+  if (&from == this) return;
+  Clear();
+  MergeFrom(from);
+}
+
+bool SerializedModel::IsInitialized() const {
+  return true;
+}
+
+void SerializedModel::Swap(SerializedModel* other) {
+  if (other == this) return;
+  InternalSwap(other);
+}
+void SerializedModel::InternalSwap(SerializedModel* other) {
+  identifier_.Swap(&other->identifier_);
+  model_.Swap(&other->model_);
+  std::swap(_cached_size_, other->_cached_size_);
+}
+
+::std::string SerializedModel::GetTypeName() const {
+  return "CoreML.Specification.SerializedModel";
+}
+
+#if PROTOBUF_INLINE_NOT_IN_HEADERS
+// SerializedModel
+
+// string identifier = 1;
+void SerializedModel::clear_identifier() {
+  identifier_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+const ::std::string& SerializedModel::identifier() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SerializedModel.identifier)
+  return identifier_.GetNoArena();
+}
+void SerializedModel::set_identifier(const ::std::string& value) {
+  
+  identifier_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SerializedModel.identifier)
+}
+#if LANG_CXX11
+void SerializedModel::set_identifier(::std::string&& value) {
+  
+  identifier_.SetNoArena(
+    &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value));
+  // @@protoc_insertion_point(field_set_rvalue:CoreML.Specification.SerializedModel.identifier)
+}
+#endif
+void SerializedModel::set_identifier(const char* value) {
+  GOOGLE_DCHECK(value != NULL);
+  
+  identifier_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value));
+  // @@protoc_insertion_point(field_set_char:CoreML.Specification.SerializedModel.identifier)
+}
+void SerializedModel::set_identifier(const char* value, size_t size) {
+  
+  identifier_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
+      ::std::string(reinterpret_cast<const char*>(value), size));
+  // @@protoc_insertion_point(field_set_pointer:CoreML.Specification.SerializedModel.identifier)
+}
+::std::string* SerializedModel::mutable_identifier() {
+  
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.SerializedModel.identifier)
+  return identifier_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+::std::string* SerializedModel::release_identifier() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.SerializedModel.identifier)
+  
+  return identifier_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+void SerializedModel::set_allocated_identifier(::std::string* identifier) {
+  if (identifier != NULL) {
+    
+  } else {
+    
+  }
+  identifier_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), identifier);
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.SerializedModel.identifier)
+}
+
+// bytes model = 2;
+void SerializedModel::clear_model() {
+  model_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+const ::std::string& SerializedModel::model() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SerializedModel.model)
+  return model_.GetNoArena();
+}
+void SerializedModel::set_model(const ::std::string& value) {
+  
+  model_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SerializedModel.model)
+}
+#if LANG_CXX11
+void SerializedModel::set_model(::std::string&& value) {
+  
+  model_.SetNoArena(
+    &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value));
+  // @@protoc_insertion_point(field_set_rvalue:CoreML.Specification.SerializedModel.model)
+}
+#endif
+void SerializedModel::set_model(const char* value) {
+  GOOGLE_DCHECK(value != NULL);
+  
+  model_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value));
+  // @@protoc_insertion_point(field_set_char:CoreML.Specification.SerializedModel.model)
+}
+void SerializedModel::set_model(const void* value, size_t size) {
+  
+  model_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
+      ::std::string(reinterpret_cast<const char*>(value), size));
+  // @@protoc_insertion_point(field_set_pointer:CoreML.Specification.SerializedModel.model)
+}
+::std::string* SerializedModel::mutable_model() {
+  
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.SerializedModel.model)
+  return model_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+::std::string* SerializedModel::release_model() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.SerializedModel.model)
+  
+  return model_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+void SerializedModel::set_allocated_model(::std::string* model) {
+  if (model != NULL) {
+    
+  } else {
+    
+  }
+  model_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), model);
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.SerializedModel.model)
+}
+
+#endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
+
+// ===================================================================
+
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
 const int Model::kSpecificationVersionFieldNumber;
 const int Model::kDescriptionFieldNumber;
@@ -2742,6 +3086,7 @@ const int Model::kVisionFeaturePrintFieldNumber;
 const int Model::kSoundAnalysisPreprocessingFieldNumber;
 const int Model::kGazetteerFieldNumber;
 const int Model::kWordEmbeddingFieldNumber;
+const int Model::kSerializedModelFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
 Model::Model()
@@ -2899,6 +3244,10 @@ Model::Model(const Model& from)
       mutable_wordembedding()->::CoreML::Specification::CoreMLModels::WordEmbedding::MergeFrom(from.wordembedding());
       break;
     }
+    case kSerializedModel: {
+      mutable_serializedmodel()->::CoreML::Specification::SerializedModel::MergeFrom(from.serializedmodel());
+      break;
+    }
     case TYPE_NOT_SET: {
       break;
     }
@@ -3080,6 +3429,10 @@ void Model::clear_Type() {
       delete Type_.wordembedding_;
       break;
     }
+    case kSerializedModel: {
+      delete Type_.serializedmodel_;
+      break;
+    }
     case TYPE_NOT_SET: {
       break;
     }
@@ -3105,7 +3458,7 @@ bool Model::MergePartialFromCodedStream(
   ::google::protobuf::uint32 tag;
   // @@protoc_insertion_point(parse_start:CoreML.Specification.Model)
   for (;;) {
-    ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(16383u);
+    ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(24002u);
     tag = p.first;
     if (!p.second) goto handle_unusual;
     switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
@@ -3545,6 +3898,18 @@ bool Model::MergePartialFromCodedStream(
         break;
       }
 
+      // .CoreML.Specification.SerializedModel serializedModel = 3000;
+      case 3000: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(24002u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
+               input, mutable_serializedmodel()));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
       default: {
       handle_unusual:
         if (tag == 0 ||
@@ -3786,6 +4151,12 @@ void Model::SerializeWithCachedSizes(
       2005, *Type_.wordembedding_, output);
   }
 
+  // .CoreML.Specification.SerializedModel serializedModel = 3000;
+  if (has_serializedmodel()) {
+    ::google::protobuf::internal::WireFormatLite::WriteMessage(
+      3000, *Type_.serializedmodel_, output);
+  }
+
   // @@protoc_insertion_point(serialize_end:CoreML.Specification.Model)
 }
 
@@ -4044,6 +4415,13 @@ size_t Model::ByteSizeLong() const {
           *Type_.wordembedding_);
       break;
     }
+    // .CoreML.Specification.SerializedModel serializedModel = 3000;
+    case kSerializedModel: {
+      total_size += 3 +
+        ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
+          *Type_.serializedmodel_);
+      break;
+    }
     case TYPE_NOT_SET: {
       break;
     }
@@ -4209,6 +4587,10 @@ void Model::MergeFrom(const Model& from) {
       mutable_wordembedding()->::CoreML::Specification::CoreMLModels::WordEmbedding::MergeFrom(from.wordembedding());
       break;
     }
+    case kSerializedModel: {
+      mutable_serializedmodel()->::CoreML::Specification::SerializedModel::MergeFrom(from.serializedmodel());
+      break;
+    }
     case TYPE_NOT_SET: {
       break;
     }
@@ -5897,6 +6279,54 @@ void Model::set_allocated_wordembedding(::CoreML::Specification::CoreMLModels::W
   // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.Model.wordEmbedding)
 }
 
+// .CoreML.Specification.SerializedModel serializedModel = 3000;
+bool Model::has_serializedmodel() const {
+  return Type_case() == kSerializedModel;
+}
+void Model::set_has_serializedmodel() {
+  _oneof_case_[0] = kSerializedModel;
+}
+void Model::clear_serializedmodel() {
+  if (has_serializedmodel()) {
+    delete Type_.serializedmodel_;
+    clear_has_Type();
+  }
+}
+ const ::CoreML::Specification::SerializedModel& Model::serializedmodel() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Model.serializedModel)
+  return has_serializedmodel()
+      ? *Type_.serializedmodel_
+      : ::CoreML::Specification::SerializedModel::default_instance();
+}
+::CoreML::Specification::SerializedModel* Model::mutable_serializedmodel() {
+  if (!has_serializedmodel()) {
+    clear_Type();
+    set_has_serializedmodel();
+    Type_.serializedmodel_ = new ::CoreML::Specification::SerializedModel;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.Model.serializedModel)
+  return Type_.serializedmodel_;
+}
+::CoreML::Specification::SerializedModel* Model::release_serializedmodel() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.Model.serializedModel)
+  if (has_serializedmodel()) {
+    clear_has_Type();
+    ::CoreML::Specification::SerializedModel* temp = Type_.serializedmodel_;
+    Type_.serializedmodel_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+void Model::set_allocated_serializedmodel(::CoreML::Specification::SerializedModel* serializedmodel) {
+  clear_Type();
+  if (serializedmodel) {
+    set_has_serializedmodel();
+    Type_.serializedmodel_ = serializedmodel;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.Model.serializedModel)
+}
+
 bool Model::has_Type() const {
   return Type_case() != TYPE_NOT_SET;
 }
diff --git a/mlmodel/build/format/Model.pb.h b/mlmodel/build/format/Model.pb.h
index f07527c4e..50b345c50 100644
--- a/mlmodel/build/format/Model.pb.h
+++ b/mlmodel/build/format/Model.pb.h
@@ -124,6 +124,9 @@ extern ArgMaxLayerParamsDefaultTypeInternal _ArgMaxLayerParams_default_instance_
 class ArgMinLayerParams;
 class ArgMinLayerParamsDefaultTypeInternal;
 extern ArgMinLayerParamsDefaultTypeInternal _ArgMinLayerParams_default_instance_;
+class ArgSortLayerParams;
+class ArgSortLayerParamsDefaultTypeInternal;
+extern ArgSortLayerParamsDefaultTypeInternal _ArgSortLayerParams_default_instance_;
 class ArrayFeatureExtractor;
 class ArrayFeatureExtractorDefaultTypeInternal;
 extern ArrayFeatureExtractorDefaultTypeInternal _ArrayFeatureExtractor_default_instance_;
@@ -214,6 +217,9 @@ extern CategoricalMappingDefaultTypeInternal _CategoricalMapping_default_instanc
 class CeilLayerParams;
 class CeilLayerParamsDefaultTypeInternal;
 extern CeilLayerParamsDefaultTypeInternal _CeilLayerParams_default_instance_;
+class ClampedReLULayerParams;
+class ClampedReLULayerParamsDefaultTypeInternal;
+extern ClampedReLULayerParamsDefaultTypeInternal _ClampedReLULayerParams_default_instance_;
 class ClipLayerParams;
 class ClipLayerParamsDefaultTypeInternal;
 extern ClipLayerParamsDefaultTypeInternal _ClipLayerParams_default_instance_;
@@ -229,6 +235,9 @@ extern ConcatNDLayerParamsDefaultTypeInternal _ConcatNDLayerParams_default_insta
 class ConstantPaddingLayerParams;
 class ConstantPaddingLayerParamsDefaultTypeInternal;
 extern ConstantPaddingLayerParamsDefaultTypeInternal _ConstantPaddingLayerParams_default_instance_;
+class Convolution3DLayerParams;
+class Convolution3DLayerParamsDefaultTypeInternal;
+extern Convolution3DLayerParamsDefaultTypeInternal _Convolution3DLayerParams_default_instance_;
 class ConvolutionLayerParams;
 class ConvolutionLayerParamsDefaultTypeInternal;
 extern ConvolutionLayerParamsDefaultTypeInternal _ConvolutionLayerParams_default_instance_;
@@ -247,6 +256,9 @@ extern CropLayerParamsDefaultTypeInternal _CropLayerParams_default_instance_;
 class CropResizeLayerParams;
 class CropResizeLayerParamsDefaultTypeInternal;
 extern CropResizeLayerParamsDefaultTypeInternal _CropResizeLayerParams_default_instance_;
+class CumSumLayerParams;
+class CumSumLayerParamsDefaultTypeInternal;
+extern CumSumLayerParamsDefaultTypeInternal _CumSumLayerParams_default_instance_;
 class CustomLayerParams;
 class CustomLayerParamsDefaultTypeInternal;
 extern CustomLayerParamsDefaultTypeInternal _CustomLayerParams_default_instance_;
@@ -379,6 +391,9 @@ extern GeluLayerParamsDefaultTypeInternal _GeluLayerParams_default_instance_;
 class GetShapeLayerParams;
 class GetShapeLayerParamsDefaultTypeInternal;
 extern GetShapeLayerParamsDefaultTypeInternal _GetShapeLayerParams_default_instance_;
+class GlobalPooling3DLayerParams;
+class GlobalPooling3DLayerParamsDefaultTypeInternal;
+extern GlobalPooling3DLayerParamsDefaultTypeInternal _GlobalPooling3DLayerParams_default_instance_;
 class GreaterEqualLayerParams;
 class GreaterEqualLayerParamsDefaultTypeInternal;
 extern GreaterEqualLayerParamsDefaultTypeInternal _GreaterEqualLayerParams_default_instance_;
@@ -610,6 +625,9 @@ extern NotEqualLayerParamsDefaultTypeInternal _NotEqualLayerParams_default_insta
 class OneHotEncoder;
 class OneHotEncoderDefaultTypeInternal;
 extern OneHotEncoderDefaultTypeInternal _OneHotEncoder_default_instance_;
+class OneHotLayerParams;
+class OneHotLayerParamsDefaultTypeInternal;
+extern OneHotLayerParamsDefaultTypeInternal _OneHotLayerParams_default_instance_;
 class Optimizer;
 class OptimizerDefaultTypeInternal;
 extern OptimizerDefaultTypeInternal _Optimizer_default_instance_;
@@ -640,6 +658,9 @@ extern PipelineRegressorDefaultTypeInternal _PipelineRegressor_default_instance_
 class PolyKernel;
 class PolyKernelDefaultTypeInternal;
 extern PolyKernelDefaultTypeInternal _PolyKernel_default_instance_;
+class Pooling3DLayerParams;
+class Pooling3DLayerParamsDefaultTypeInternal;
+extern Pooling3DLayerParamsDefaultTypeInternal _Pooling3DLayerParams_default_instance_;
 class PoolingLayerParams;
 class PoolingLayerParamsDefaultTypeInternal;
 extern PoolingLayerParamsDefaultTypeInternal _PoolingLayerParams_default_instance_;
@@ -781,6 +802,9 @@ extern SequenceFeatureTypeDefaultTypeInternal _SequenceFeatureType_default_insta
 class SequenceRepeatLayerParams;
 class SequenceRepeatLayerParamsDefaultTypeInternal;
 extern SequenceRepeatLayerParamsDefaultTypeInternal _SequenceRepeatLayerParams_default_instance_;
+class SerializedModel;
+class SerializedModelDefaultTypeInternal;
+extern SerializedModelDefaultTypeInternal _SerializedModel_default_instance_;
 class SigmoidKernel;
 class SigmoidKernelDefaultTypeInternal;
 extern SigmoidKernelDefaultTypeInternal _SigmoidKernel_default_instance_;
@@ -802,6 +826,9 @@ extern SinhLayerParamsDefaultTypeInternal _SinhLayerParams_default_instance_;
 class SizeRange;
 class SizeRangeDefaultTypeInternal;
 extern SizeRangeDefaultTypeInternal _SizeRange_default_instance_;
+class SliceBySizeLayerParams;
+class SliceBySizeLayerParamsDefaultTypeInternal;
+extern SliceBySizeLayerParamsDefaultTypeInternal _SliceBySizeLayerParams_default_instance_;
 class SliceDynamicLayerParams;
 class SliceDynamicLayerParamsDefaultTypeInternal;
 extern SliceDynamicLayerParamsDefaultTypeInternal _SliceDynamicLayerParams_default_instance_;
@@ -950,6 +977,9 @@ extern TextClassifierDefaultTypeInternal _TextClassifier_default_instance_;
 class VisionFeaturePrint;
 class VisionFeaturePrintDefaultTypeInternal;
 extern VisionFeaturePrintDefaultTypeInternal _VisionFeaturePrint_default_instance_;
+class VisionFeaturePrint_Object;
+class VisionFeaturePrint_ObjectDefaultTypeInternal;
+extern VisionFeaturePrint_ObjectDefaultTypeInternal _VisionFeaturePrint_Object_default_instance_;
 class VisionFeaturePrint_Scene;
 class VisionFeaturePrint_SceneDefaultTypeInternal;
 extern VisionFeaturePrint_SceneDefaultTypeInternal _VisionFeaturePrint_Scene_default_instance_;
@@ -1670,6 +1700,107 @@ class ModelDescription : public ::google::protobuf::MessageLite /* @@protoc_inse
 };
 // -------------------------------------------------------------------
 
+class SerializedModel : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.SerializedModel) */ {
+ public:
+  SerializedModel();
+  virtual ~SerializedModel();
+
+  SerializedModel(const SerializedModel& from);
+
+  inline SerializedModel& operator=(const SerializedModel& from) {
+    CopyFrom(from);
+    return *this;
+  }
+
+  static const SerializedModel& default_instance();
+
+  static inline const SerializedModel* internal_default_instance() {
+    return reinterpret_cast<const SerializedModel*>(
+               &_SerializedModel_default_instance_);
+  }
+  static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
+    7;
+
+  void Swap(SerializedModel* other);
+
+  // implements Message ----------------------------------------------
+
+  inline SerializedModel* New() const PROTOBUF_FINAL { return New(NULL); }
+
+  SerializedModel* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
+  void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from)
+    PROTOBUF_FINAL;
+  void CopyFrom(const SerializedModel& from);
+  void MergeFrom(const SerializedModel& from);
+  void Clear() PROTOBUF_FINAL;
+  bool IsInitialized() const PROTOBUF_FINAL;
+
+  size_t ByteSizeLong() const PROTOBUF_FINAL;
+  bool MergePartialFromCodedStream(
+      ::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
+  void SerializeWithCachedSizes(
+      ::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
+  void DiscardUnknownFields();
+  int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
+  private:
+  void SharedCtor();
+  void SharedDtor();
+  void SetCachedSize(int size) const;
+  void InternalSwap(SerializedModel* other);
+  private:
+  inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
+    return NULL;
+  }
+  inline void* MaybeArenaPtr() const {
+    return NULL;
+  }
+  public:
+
+  ::std::string GetTypeName() const PROTOBUF_FINAL;
+
+  // nested types ----------------------------------------------------
+
+  // accessors -------------------------------------------------------
+
+  // string identifier = 1;
+  void clear_identifier();
+  static const int kIdentifierFieldNumber = 1;
+  const ::std::string& identifier() const;
+  void set_identifier(const ::std::string& value);
+  #if LANG_CXX11
+  void set_identifier(::std::string&& value);
+  #endif
+  void set_identifier(const char* value);
+  void set_identifier(const char* value, size_t size);
+  ::std::string* mutable_identifier();
+  ::std::string* release_identifier();
+  void set_allocated_identifier(::std::string* identifier);
+
+  // bytes model = 2;
+  void clear_model();
+  static const int kModelFieldNumber = 2;
+  const ::std::string& model() const;
+  void set_model(const ::std::string& value);
+  #if LANG_CXX11
+  void set_model(::std::string&& value);
+  #endif
+  void set_model(const char* value);
+  void set_model(const void* value, size_t size);
+  ::std::string* mutable_model();
+  ::std::string* release_model();
+  void set_allocated_model(::std::string* model);
+
+  // @@protoc_insertion_point(class_scope:CoreML.Specification.SerializedModel)
+ private:
+
+  ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
+  ::google::protobuf::internal::ArenaStringPtr identifier_;
+  ::google::protobuf::internal::ArenaStringPtr model_;
+  mutable int _cached_size_;
+  friend struct protobuf_Model_2eproto::TableStruct;
+};
+// -------------------------------------------------------------------
+
 class Model : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.Model) */ {
  public:
   Model();
@@ -1718,6 +1849,7 @@ class Model : public ::google::protobuf::MessageLite /* @@protoc_insertion_point
     kSoundAnalysisPreprocessing = 2003,
     kGazetteer = 2004,
     kWordEmbedding = 2005,
+    kSerializedModel = 3000,
     TYPE_NOT_SET = 0,
   };
 
@@ -1726,7 +1858,7 @@ class Model : public ::google::protobuf::MessageLite /* @@protoc_insertion_point
                &_Model_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    7;
+    8;
 
   void Swap(Model* other);
 
@@ -2087,6 +2219,15 @@ class Model : public ::google::protobuf::MessageLite /* @@protoc_insertion_point
   ::CoreML::Specification::CoreMLModels::WordEmbedding* release_wordembedding();
   void set_allocated_wordembedding(::CoreML::Specification::CoreMLModels::WordEmbedding* wordembedding);
 
+  // .CoreML.Specification.SerializedModel serializedModel = 3000;
+  bool has_serializedmodel() const;
+  void clear_serializedmodel();
+  static const int kSerializedModelFieldNumber = 3000;
+  const ::CoreML::Specification::SerializedModel& serializedmodel() const;
+  ::CoreML::Specification::SerializedModel* mutable_serializedmodel();
+  ::CoreML::Specification::SerializedModel* release_serializedmodel();
+  void set_allocated_serializedmodel(::CoreML::Specification::SerializedModel* serializedmodel);
+
   TypeCase Type_case() const;
   // @@protoc_insertion_point(class_scope:CoreML.Specification.Model)
  private:
@@ -2123,6 +2264,7 @@ class Model : public ::google::protobuf::MessageLite /* @@protoc_insertion_point
   void set_has_soundanalysispreprocessing();
   void set_has_gazetteer();
   void set_has_wordembedding();
+  void set_has_serializedmodel();
 
   inline bool has_Type() const;
   void clear_Type();
@@ -2167,6 +2309,7 @@ class Model : public ::google::protobuf::MessageLite /* @@protoc_insertion_point
     ::CoreML::Specification::CoreMLModels::SoundAnalysisPreprocessing* soundanalysispreprocessing_;
     ::CoreML::Specification::CoreMLModels::Gazetteer* gazetteer_;
     ::CoreML::Specification::CoreMLModels::WordEmbedding* wordembedding_;
+    ::CoreML::Specification::SerializedModel* serializedmodel_;
   } Type_;
   mutable int _cached_size_;
   ::google::protobuf::uint32 _oneof_case_[1];
@@ -2992,6 +3135,116 @@ inline void ModelDescription::set_allocated_metadata(::CoreML::Specification::Me
 
 // -------------------------------------------------------------------
 
+// SerializedModel
+
+// string identifier = 1;
+inline void SerializedModel::clear_identifier() {
+  identifier_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+inline const ::std::string& SerializedModel::identifier() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SerializedModel.identifier)
+  return identifier_.GetNoArena();
+}
+inline void SerializedModel::set_identifier(const ::std::string& value) {
+  
+  identifier_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SerializedModel.identifier)
+}
+#if LANG_CXX11
+inline void SerializedModel::set_identifier(::std::string&& value) {
+  
+  identifier_.SetNoArena(
+    &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value));
+  // @@protoc_insertion_point(field_set_rvalue:CoreML.Specification.SerializedModel.identifier)
+}
+#endif
+inline void SerializedModel::set_identifier(const char* value) {
+  GOOGLE_DCHECK(value != NULL);
+  
+  identifier_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value));
+  // @@protoc_insertion_point(field_set_char:CoreML.Specification.SerializedModel.identifier)
+}
+inline void SerializedModel::set_identifier(const char* value, size_t size) {
+  
+  identifier_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
+      ::std::string(reinterpret_cast<const char*>(value), size));
+  // @@protoc_insertion_point(field_set_pointer:CoreML.Specification.SerializedModel.identifier)
+}
+inline ::std::string* SerializedModel::mutable_identifier() {
+  
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.SerializedModel.identifier)
+  return identifier_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+inline ::std::string* SerializedModel::release_identifier() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.SerializedModel.identifier)
+  
+  return identifier_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+inline void SerializedModel::set_allocated_identifier(::std::string* identifier) {
+  if (identifier != NULL) {
+    
+  } else {
+    
+  }
+  identifier_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), identifier);
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.SerializedModel.identifier)
+}
+
+// bytes model = 2;
+inline void SerializedModel::clear_model() {
+  model_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+inline const ::std::string& SerializedModel::model() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SerializedModel.model)
+  return model_.GetNoArena();
+}
+inline void SerializedModel::set_model(const ::std::string& value) {
+  
+  model_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SerializedModel.model)
+}
+#if LANG_CXX11
+inline void SerializedModel::set_model(::std::string&& value) {
+  
+  model_.SetNoArena(
+    &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value));
+  // @@protoc_insertion_point(field_set_rvalue:CoreML.Specification.SerializedModel.model)
+}
+#endif
+inline void SerializedModel::set_model(const char* value) {
+  GOOGLE_DCHECK(value != NULL);
+  
+  model_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value));
+  // @@protoc_insertion_point(field_set_char:CoreML.Specification.SerializedModel.model)
+}
+inline void SerializedModel::set_model(const void* value, size_t size) {
+  
+  model_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
+      ::std::string(reinterpret_cast<const char*>(value), size));
+  // @@protoc_insertion_point(field_set_pointer:CoreML.Specification.SerializedModel.model)
+}
+inline ::std::string* SerializedModel::mutable_model() {
+  
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.SerializedModel.model)
+  return model_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+inline ::std::string* SerializedModel::release_model() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.SerializedModel.model)
+  
+  return model_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+inline void SerializedModel::set_allocated_model(::std::string* model) {
+  if (model != NULL) {
+    
+  } else {
+    
+  }
+  model_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), model);
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.SerializedModel.model)
+}
+
+// -------------------------------------------------------------------
+
 // Model
 
 // int32 specificationVersion = 1;
@@ -4645,6 +4898,54 @@ inline void Model::set_allocated_wordembedding(::CoreML::Specification::CoreMLMo
   // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.Model.wordEmbedding)
 }
 
+// .CoreML.Specification.SerializedModel serializedModel = 3000;
+inline bool Model::has_serializedmodel() const {
+  return Type_case() == kSerializedModel;
+}
+inline void Model::set_has_serializedmodel() {
+  _oneof_case_[0] = kSerializedModel;
+}
+inline void Model::clear_serializedmodel() {
+  if (has_serializedmodel()) {
+    delete Type_.serializedmodel_;
+    clear_has_Type();
+  }
+}
+inline  const ::CoreML::Specification::SerializedModel& Model::serializedmodel() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Model.serializedModel)
+  return has_serializedmodel()
+      ? *Type_.serializedmodel_
+      : ::CoreML::Specification::SerializedModel::default_instance();
+}
+inline ::CoreML::Specification::SerializedModel* Model::mutable_serializedmodel() {
+  if (!has_serializedmodel()) {
+    clear_Type();
+    set_has_serializedmodel();
+    Type_.serializedmodel_ = new ::CoreML::Specification::SerializedModel;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.Model.serializedModel)
+  return Type_.serializedmodel_;
+}
+inline ::CoreML::Specification::SerializedModel* Model::release_serializedmodel() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.Model.serializedModel)
+  if (has_serializedmodel()) {
+    clear_has_Type();
+    ::CoreML::Specification::SerializedModel* temp = Type_.serializedmodel_;
+    Type_.serializedmodel_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+inline void Model::set_allocated_serializedmodel(::CoreML::Specification::SerializedModel* serializedmodel) {
+  clear_Type();
+  if (serializedmodel) {
+    set_has_serializedmodel();
+    Type_.serializedmodel_ = serializedmodel;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.Model.serializedModel)
+}
+
 inline bool Model::has_Type() const {
   return Type_case() != TYPE_NOT_SET;
 }
@@ -4669,6 +4970,8 @@ inline Model::TypeCase Model::Type_case() const {
 
 // -------------------------------------------------------------------
 
+// -------------------------------------------------------------------
+
 
 // @@protoc_insertion_point(namespace_scope)
 
diff --git a/mlmodel/build/format/Model_enums.h b/mlmodel/build/format/Model_enums.h
index a70124e68..d8d8f3e28 100644
--- a/mlmodel/build/format/Model_enums.h
+++ b/mlmodel/build/format/Model_enums.h
@@ -34,6 +34,7 @@ enum MLModelType: int {
     MLModelType_soundAnalysisPreprocessing = 2003,
     MLModelType_gazetteer = 2004,
     MLModelType_wordEmbedding = 2005,
+    MLModelType_serializedModel = 3000,
     MLModelType_NOT_SET = 0,
 };
 
@@ -106,6 +107,8 @@ static const char * MLModelType_Name(MLModelType x) {
             return "MLModelType_gazetteer";
         case MLModelType_wordEmbedding:
             return "MLModelType_wordEmbedding";
+        case MLModelType_serializedModel:
+            return "MLModelType_serializedModel";
         case MLModelType_NOT_SET:
             return "INVALID";
     }
diff --git a/mlmodel/build/format/NeuralNetwork.pb.cc b/mlmodel/build/format/NeuralNetwork.pb.cc
index 901416436..b1147b971 100644
--- a/mlmodel/build/format/NeuralNetwork.pb.cc
+++ b/mlmodel/build/format/NeuralNetwork.pb.cc
@@ -223,6 +223,14 @@ class NeuralNetworkLayerDefaultTypeInternal : public ::google::protobuf::interna
   const ::CoreML::Specification::WhereBroadcastableLayerParams* wherebroadcastable_;
   const ::CoreML::Specification::LayerNormalizationLayerParams* layernormalization_;
   const ::CoreML::Specification::NonMaximumSuppressionLayerParams* nonmaximumsuppression_;
+  const ::CoreML::Specification::OneHotLayerParams* onehot_;
+  const ::CoreML::Specification::CumSumLayerParams* cumsum_;
+  const ::CoreML::Specification::ClampedReLULayerParams* clampedrelu_;
+  const ::CoreML::Specification::ArgSortLayerParams* argsort_;
+  const ::CoreML::Specification::Pooling3DLayerParams* pooling3d_;
+  const ::CoreML::Specification::GlobalPooling3DLayerParams* globalpooling3d_;
+  const ::CoreML::Specification::SliceBySizeLayerParams* slicebysize_;
+  const ::CoreML::Specification::Convolution3DLayerParams* convolution3d_;
 } _NeuralNetworkLayer_default_instance_;
 class BranchLayerParamsDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<BranchLayerParams> {
 } _BranchLayerParams_default_instance_;
@@ -282,6 +290,8 @@ class ConvolutionLayerParamsDefaultTypeInternal : public ::google::protobuf::int
   const ::CoreML::Specification::ValidPadding* valid_;
   const ::CoreML::Specification::SamePadding* same_;
 } _ConvolutionLayerParams_default_instance_;
+class Convolution3DLayerParamsDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<Convolution3DLayerParams> {
+} _Convolution3DLayerParams_default_instance_;
 class InnerProductLayerParamsDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<InnerProductLayerParams> {
 } _InnerProductLayerParams_default_instance_;
 class EmbeddingLayerParamsDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<EmbeddingLayerParams> {
@@ -298,6 +308,10 @@ class PoolingLayerParamsDefaultTypeInternal : public ::google::protobuf::interna
   const ::CoreML::Specification::SamePadding* same_;
   const ::CoreML::Specification::PoolingLayerParams_ValidCompletePadding* includelastpixel_;
 } _PoolingLayerParams_default_instance_;
+class Pooling3DLayerParamsDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<Pooling3DLayerParams> {
+} _Pooling3DLayerParams_default_instance_;
+class GlobalPooling3DLayerParamsDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<GlobalPooling3DLayerParams> {
+} _GlobalPooling3DLayerParams_default_instance_;
 class PaddingLayerParams_PaddingConstantDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<PaddingLayerParams_PaddingConstant> {
 } _PaddingLayerParams_PaddingConstant_default_instance_;
 class PaddingLayerParams_PaddingReflectionDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<PaddingLayerParams_PaddingReflection> {
@@ -578,11 +592,21 @@ class LayerNormalizationLayerParamsDefaultTypeInternal : public ::google::protob
 } _LayerNormalizationLayerParams_default_instance_;
 class NonMaximumSuppressionLayerParamsDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<NonMaximumSuppressionLayerParams> {
 } _NonMaximumSuppressionLayerParams_default_instance_;
+class ClampedReLULayerParamsDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<ClampedReLULayerParams> {
+} _ClampedReLULayerParams_default_instance_;
+class ArgSortLayerParamsDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<ArgSortLayerParams> {
+} _ArgSortLayerParams_default_instance_;
+class SliceBySizeLayerParamsDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<SliceBySizeLayerParams> {
+} _SliceBySizeLayerParams_default_instance_;
 class NeuralNetworkClassifierDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<NeuralNetworkClassifier> {
   public:
   const ::CoreML::Specification::StringVector* stringclasslabels_;
   const ::CoreML::Specification::Int64Vector* int64classlabels_;
 } _NeuralNetworkClassifier_default_instance_;
+class OneHotLayerParamsDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<OneHotLayerParams> {
+} _OneHotLayerParams_default_instance_;
+class CumSumLayerParamsDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<CumSumLayerParams> {
+} _CumSumLayerParams_default_instance_;
 class NeuralNetworkRegressorDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<NeuralNetworkRegressor> {
 } _NeuralNetworkRegressor_default_instance_;
 class NetworkUpdateParametersDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<NetworkUpdateParameters> {
@@ -815,6 +839,14 @@ PROTOBUF_CONSTEXPR_VAR ::google::protobuf::internal::ParseTable const
   { NULL, NULL, 0, -1, -1, false },
   { NULL, NULL, 0, -1, -1, false },
   { NULL, NULL, 0, -1, -1, false },
+  { NULL, NULL, 0, -1, -1, false },
+  { NULL, NULL, 0, -1, -1, false },
+  { NULL, NULL, 0, -1, -1, false },
+  { NULL, NULL, 0, -1, -1, false },
+  { NULL, NULL, 0, -1, -1, false },
+  { NULL, NULL, 0, -1, -1, false },
+  { NULL, NULL, 0, -1, -1, false },
+  { NULL, NULL, 0, -1, -1, false },
 };
 
 
@@ -865,12 +897,15 @@ void TableStruct::Shutdown() {
   _LinearQuantizationParams_default_instance_.Shutdown();
   _LookUpTableQuantizationParams_default_instance_.Shutdown();
   _ConvolutionLayerParams_default_instance_.Shutdown();
+  _Convolution3DLayerParams_default_instance_.Shutdown();
   _InnerProductLayerParams_default_instance_.Shutdown();
   _EmbeddingLayerParams_default_instance_.Shutdown();
   _EmbeddingNDLayerParams_default_instance_.Shutdown();
   _BatchnormLayerParams_default_instance_.Shutdown();
   _PoolingLayerParams_ValidCompletePadding_default_instance_.Shutdown();
   _PoolingLayerParams_default_instance_.Shutdown();
+  _Pooling3DLayerParams_default_instance_.Shutdown();
+  _GlobalPooling3DLayerParams_default_instance_.Shutdown();
   _PaddingLayerParams_PaddingConstant_default_instance_.Shutdown();
   _PaddingLayerParams_PaddingReflection_default_instance_.Shutdown();
   _PaddingLayerParams_PaddingReplication_default_instance_.Shutdown();
@@ -1005,7 +1040,12 @@ void TableStruct::Shutdown() {
   _SlidingWindowsLayerParams_default_instance_.Shutdown();
   _LayerNormalizationLayerParams_default_instance_.Shutdown();
   _NonMaximumSuppressionLayerParams_default_instance_.Shutdown();
+  _ClampedReLULayerParams_default_instance_.Shutdown();
+  _ArgSortLayerParams_default_instance_.Shutdown();
+  _SliceBySizeLayerParams_default_instance_.Shutdown();
   _NeuralNetworkClassifier_default_instance_.Shutdown();
+  _OneHotLayerParams_default_instance_.Shutdown();
+  _CumSumLayerParams_default_instance_.Shutdown();
   _NeuralNetworkRegressor_default_instance_.Shutdown();
   _NetworkUpdateParameters_default_instance_.Shutdown();
   _LossLayer_default_instance_.Shutdown();
@@ -1068,12 +1108,15 @@ void TableStruct::InitDefaultsImpl() {
   _LinearQuantizationParams_default_instance_.DefaultConstruct();
   _LookUpTableQuantizationParams_default_instance_.DefaultConstruct();
   _ConvolutionLayerParams_default_instance_.DefaultConstruct();
+  _Convolution3DLayerParams_default_instance_.DefaultConstruct();
   _InnerProductLayerParams_default_instance_.DefaultConstruct();
   _EmbeddingLayerParams_default_instance_.DefaultConstruct();
   _EmbeddingNDLayerParams_default_instance_.DefaultConstruct();
   _BatchnormLayerParams_default_instance_.DefaultConstruct();
   _PoolingLayerParams_ValidCompletePadding_default_instance_.DefaultConstruct();
   _PoolingLayerParams_default_instance_.DefaultConstruct();
+  _Pooling3DLayerParams_default_instance_.DefaultConstruct();
+  _GlobalPooling3DLayerParams_default_instance_.DefaultConstruct();
   _PaddingLayerParams_PaddingConstant_default_instance_.DefaultConstruct();
   _PaddingLayerParams_PaddingReflection_default_instance_.DefaultConstruct();
   _PaddingLayerParams_PaddingReplication_default_instance_.DefaultConstruct();
@@ -1209,7 +1252,12 @@ void TableStruct::InitDefaultsImpl() {
   _SlidingWindowsLayerParams_default_instance_.DefaultConstruct();
   _LayerNormalizationLayerParams_default_instance_.DefaultConstruct();
   _NonMaximumSuppressionLayerParams_default_instance_.DefaultConstruct();
+  _ClampedReLULayerParams_default_instance_.DefaultConstruct();
+  _ArgSortLayerParams_default_instance_.DefaultConstruct();
+  _SliceBySizeLayerParams_default_instance_.DefaultConstruct();
   _NeuralNetworkClassifier_default_instance_.DefaultConstruct();
+  _OneHotLayerParams_default_instance_.DefaultConstruct();
+  _CumSumLayerParams_default_instance_.DefaultConstruct();
   _NeuralNetworkRegressor_default_instance_.DefaultConstruct();
   _NetworkUpdateParameters_default_instance_.DefaultConstruct();
   _LossLayer_default_instance_.DefaultConstruct();
@@ -1242,6 +1290,10 @@ void TableStruct::InitDefaultsImpl() {
       ::CoreML::Specification::WeightParams::internal_default_instance());
   _ConvolutionLayerParams_default_instance_.get_mutable()->bias_ = const_cast< ::CoreML::Specification::WeightParams*>(
       ::CoreML::Specification::WeightParams::internal_default_instance());
+  _Convolution3DLayerParams_default_instance_.get_mutable()->weights_ = const_cast< ::CoreML::Specification::WeightParams*>(
+      ::CoreML::Specification::WeightParams::internal_default_instance());
+  _Convolution3DLayerParams_default_instance_.get_mutable()->bias_ = const_cast< ::CoreML::Specification::WeightParams*>(
+      ::CoreML::Specification::WeightParams::internal_default_instance());
   _InnerProductLayerParams_default_instance_.get_mutable()->weights_ = const_cast< ::CoreML::Specification::WeightParams*>(
       ::CoreML::Specification::WeightParams::internal_default_instance());
   _InnerProductLayerParams_default_instance_.get_mutable()->bias_ = const_cast< ::CoreML::Specification::WeightParams*>(
@@ -1469,6 +1521,25 @@ const BoxCoordinatesMode_Coordinates BoxCoordinatesMode::Coordinates_MIN;
 const BoxCoordinatesMode_Coordinates BoxCoordinatesMode::Coordinates_MAX;
 const int BoxCoordinatesMode::Coordinates_ARRAYSIZE;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+bool Convolution3DLayerParams_PaddingType_IsValid(int value) {
+  switch (value) {
+    case 0:
+    case 1:
+    case 2:
+      return true;
+    default:
+      return false;
+  }
+}
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+const Convolution3DLayerParams_PaddingType Convolution3DLayerParams::CUSTOM;
+const Convolution3DLayerParams_PaddingType Convolution3DLayerParams::VALID;
+const Convolution3DLayerParams_PaddingType Convolution3DLayerParams::SAME;
+const Convolution3DLayerParams_PaddingType Convolution3DLayerParams::PaddingType_MIN;
+const Convolution3DLayerParams_PaddingType Convolution3DLayerParams::PaddingType_MAX;
+const int Convolution3DLayerParams::PaddingType_ARRAYSIZE;
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 bool PoolingLayerParams_PoolingType_IsValid(int value) {
   switch (value) {
     case 0:
@@ -1488,6 +1559,59 @@ const PoolingLayerParams_PoolingType PoolingLayerParams::PoolingType_MIN;
 const PoolingLayerParams_PoolingType PoolingLayerParams::PoolingType_MAX;
 const int PoolingLayerParams::PoolingType_ARRAYSIZE;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+bool Pooling3DLayerParams_PoolingType3D_IsValid(int value) {
+  switch (value) {
+    case 0:
+    case 1:
+      return true;
+    default:
+      return false;
+  }
+}
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+const Pooling3DLayerParams_PoolingType3D Pooling3DLayerParams::MAX;
+const Pooling3DLayerParams_PoolingType3D Pooling3DLayerParams::AVERAGE;
+const Pooling3DLayerParams_PoolingType3D Pooling3DLayerParams::PoolingType3D_MIN;
+const Pooling3DLayerParams_PoolingType3D Pooling3DLayerParams::PoolingType3D_MAX;
+const int Pooling3DLayerParams::PoolingType3D_ARRAYSIZE;
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+bool Pooling3DLayerParams_Pooling3DPaddingType_IsValid(int value) {
+  switch (value) {
+    case 0:
+    case 1:
+    case 2:
+      return true;
+    default:
+      return false;
+  }
+}
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+const Pooling3DLayerParams_Pooling3DPaddingType Pooling3DLayerParams::CUSTOM;
+const Pooling3DLayerParams_Pooling3DPaddingType Pooling3DLayerParams::VALID;
+const Pooling3DLayerParams_Pooling3DPaddingType Pooling3DLayerParams::SAME;
+const Pooling3DLayerParams_Pooling3DPaddingType Pooling3DLayerParams::Pooling3DPaddingType_MIN;
+const Pooling3DLayerParams_Pooling3DPaddingType Pooling3DLayerParams::Pooling3DPaddingType_MAX;
+const int Pooling3DLayerParams::Pooling3DPaddingType_ARRAYSIZE;
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+bool GlobalPooling3DLayerParams_GlobalPoolingType3D_IsValid(int value) {
+  switch (value) {
+    case 0:
+    case 1:
+      return true;
+    default:
+      return false;
+  }
+}
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+const GlobalPooling3DLayerParams_GlobalPoolingType3D GlobalPooling3DLayerParams::MAX;
+const GlobalPooling3DLayerParams_GlobalPoolingType3D GlobalPooling3DLayerParams::AVERAGE;
+const GlobalPooling3DLayerParams_GlobalPoolingType3D GlobalPooling3DLayerParams::GlobalPoolingType3D_MIN;
+const GlobalPooling3DLayerParams_GlobalPoolingType3D GlobalPooling3DLayerParams::GlobalPoolingType3D_MAX;
+const int GlobalPooling3DLayerParams::GlobalPoolingType3D_ARRAYSIZE;
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 bool UnaryFunctionLayerParams_Operation_IsValid(int value) {
   switch (value) {
     case 0:
@@ -1534,6 +1658,25 @@ const UpsampleLayerParams_InterpolationMode UpsampleLayerParams::InterpolationMo
 const UpsampleLayerParams_InterpolationMode UpsampleLayerParams::InterpolationMode_MAX;
 const int UpsampleLayerParams::InterpolationMode_ARRAYSIZE;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+bool UpsampleLayerParams_LinearUpsampleMode_IsValid(int value) {
+  switch (value) {
+    case 0:
+    case 1:
+    case 2:
+      return true;
+    default:
+      return false;
+  }
+}
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+const UpsampleLayerParams_LinearUpsampleMode UpsampleLayerParams::DEFAULT;
+const UpsampleLayerParams_LinearUpsampleMode UpsampleLayerParams::ALIGN_CORNERS_TRUE;
+const UpsampleLayerParams_LinearUpsampleMode UpsampleLayerParams::ALIGN_CORNERS_FALSE;
+const UpsampleLayerParams_LinearUpsampleMode UpsampleLayerParams::LinearUpsampleMode_MIN;
+const UpsampleLayerParams_LinearUpsampleMode UpsampleLayerParams::LinearUpsampleMode_MAX;
+const int UpsampleLayerParams::LinearUpsampleMode_ARRAYSIZE;
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 bool FlattenLayerParams_FlattenOrder_IsValid(int value) {
   switch (value) {
     case 0:
@@ -1572,6 +1715,7 @@ bool ReorganizeDataLayerParams_ReorganizationType_IsValid(int value) {
   switch (value) {
     case 0:
     case 1:
+    case 2:
       return true;
     default:
       return false;
@@ -1581,6 +1725,7 @@ bool ReorganizeDataLayerParams_ReorganizationType_IsValid(int value) {
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
 const ReorganizeDataLayerParams_ReorganizationType ReorganizeDataLayerParams::SPACE_TO_DEPTH;
 const ReorganizeDataLayerParams_ReorganizationType ReorganizeDataLayerParams::DEPTH_TO_SPACE;
+const ReorganizeDataLayerParams_ReorganizationType ReorganizeDataLayerParams::PIXEL_SHUFFLE;
 const ReorganizeDataLayerParams_ReorganizationType ReorganizeDataLayerParams::ReorganizationType_MIN;
 const ReorganizeDataLayerParams_ReorganizationType ReorganizeDataLayerParams::ReorganizationType_MAX;
 const int ReorganizeDataLayerParams::ReorganizationType_ARRAYSIZE;
@@ -7531,6 +7676,14 @@ const int NeuralNetworkLayer::kUpperTriangularFieldNumber;
 const int NeuralNetworkLayer::kWhereBroadcastableFieldNumber;
 const int NeuralNetworkLayer::kLayerNormalizationFieldNumber;
 const int NeuralNetworkLayer::kNonMaximumSuppressionFieldNumber;
+const int NeuralNetworkLayer::kOneHotFieldNumber;
+const int NeuralNetworkLayer::kCumSumFieldNumber;
+const int NeuralNetworkLayer::kClampedReLUFieldNumber;
+const int NeuralNetworkLayer::kArgSortFieldNumber;
+const int NeuralNetworkLayer::kPooling3DFieldNumber;
+const int NeuralNetworkLayer::kGlobalPooling3DFieldNumber;
+const int NeuralNetworkLayer::kSliceBySizeFieldNumber;
+const int NeuralNetworkLayer::kConvolution3DFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
 NeuralNetworkLayer::NeuralNetworkLayer()
@@ -8157,6 +8310,38 @@ NeuralNetworkLayer::NeuralNetworkLayer(const NeuralNetworkLayer& from)
       mutable_nonmaximumsuppression()->::CoreML::Specification::NonMaximumSuppressionLayerParams::MergeFrom(from.nonmaximumsuppression());
       break;
     }
+    case kOneHot: {
+      mutable_onehot()->::CoreML::Specification::OneHotLayerParams::MergeFrom(from.onehot());
+      break;
+    }
+    case kCumSum: {
+      mutable_cumsum()->::CoreML::Specification::CumSumLayerParams::MergeFrom(from.cumsum());
+      break;
+    }
+    case kClampedReLU: {
+      mutable_clampedrelu()->::CoreML::Specification::ClampedReLULayerParams::MergeFrom(from.clampedrelu());
+      break;
+    }
+    case kArgSort: {
+      mutable_argsort()->::CoreML::Specification::ArgSortLayerParams::MergeFrom(from.argsort());
+      break;
+    }
+    case kPooling3D: {
+      mutable_pooling3d()->::CoreML::Specification::Pooling3DLayerParams::MergeFrom(from.pooling3d());
+      break;
+    }
+    case kGlobalPooling3D: {
+      mutable_globalpooling3d()->::CoreML::Specification::GlobalPooling3DLayerParams::MergeFrom(from.globalpooling3d());
+      break;
+    }
+    case kSliceBySize: {
+      mutable_slicebysize()->::CoreML::Specification::SliceBySizeLayerParams::MergeFrom(from.slicebysize());
+      break;
+    }
+    case kConvolution3D: {
+      mutable_convolution3d()->::CoreML::Specification::Convolution3DLayerParams::MergeFrom(from.convolution3d());
+      break;
+    }
     case LAYER_NOT_SET: {
       break;
     }
@@ -8804,6 +8989,38 @@ void NeuralNetworkLayer::clear_layer() {
       delete layer_.nonmaximumsuppression_;
       break;
     }
+    case kOneHot: {
+      delete layer_.onehot_;
+      break;
+    }
+    case kCumSum: {
+      delete layer_.cumsum_;
+      break;
+    }
+    case kClampedReLU: {
+      delete layer_.clampedrelu_;
+      break;
+    }
+    case kArgSort: {
+      delete layer_.argsort_;
+      break;
+    }
+    case kPooling3D: {
+      delete layer_.pooling3d_;
+      break;
+    }
+    case kGlobalPooling3D: {
+      delete layer_.globalpooling3d_;
+      break;
+    }
+    case kSliceBySize: {
+      delete layer_.slicebysize_;
+      break;
+    }
+    case kConvolution3D: {
+      delete layer_.convolution3d_;
+      break;
+    }
     case LAYER_NOT_SET: {
       break;
     }
@@ -10721,6 +10938,102 @@ bool NeuralNetworkLayer::MergePartialFromCodedStream(
         break;
       }
 
+      // .CoreML.Specification.OneHotLayerParams oneHot = 1450;
+      case 1450: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(11602u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
+               input, mutable_onehot()));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // .CoreML.Specification.CumSumLayerParams cumSum = 1455;
+      case 1455: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(11642u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
+               input, mutable_cumsum()));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // .CoreML.Specification.ClampedReLULayerParams clampedReLU = 1460;
+      case 1460: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(11682u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
+               input, mutable_clampedrelu()));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // .CoreML.Specification.ArgSortLayerParams argSort = 1461;
+      case 1461: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(11690u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
+               input, mutable_argsort()));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // .CoreML.Specification.Pooling3DLayerParams pooling3d = 1465;
+      case 1465: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(11722u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
+               input, mutable_pooling3d()));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // .CoreML.Specification.GlobalPooling3DLayerParams globalPooling3d = 1466;
+      case 1466: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(11730u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
+               input, mutable_globalpooling3d()));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // .CoreML.Specification.SliceBySizeLayerParams sliceBySize = 1470;
+      case 1470: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(11762u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
+               input, mutable_slicebysize()));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // .CoreML.Specification.Convolution3DLayerParams convolution3d = 1471;
+      case 1471: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(11770u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
+               input, mutable_convolution3d()));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
       default: {
       handle_unusual:
         if (tag == 0 ||
@@ -11695,6 +12008,54 @@ void NeuralNetworkLayer::SerializeWithCachedSizes(
       1400, *layer_.nonmaximumsuppression_, output);
   }
 
+  // .CoreML.Specification.OneHotLayerParams oneHot = 1450;
+  if (has_onehot()) {
+    ::google::protobuf::internal::WireFormatLite::WriteMessage(
+      1450, *layer_.onehot_, output);
+  }
+
+  // .CoreML.Specification.CumSumLayerParams cumSum = 1455;
+  if (has_cumsum()) {
+    ::google::protobuf::internal::WireFormatLite::WriteMessage(
+      1455, *layer_.cumsum_, output);
+  }
+
+  // .CoreML.Specification.ClampedReLULayerParams clampedReLU = 1460;
+  if (has_clampedrelu()) {
+    ::google::protobuf::internal::WireFormatLite::WriteMessage(
+      1460, *layer_.clampedrelu_, output);
+  }
+
+  // .CoreML.Specification.ArgSortLayerParams argSort = 1461;
+  if (has_argsort()) {
+    ::google::protobuf::internal::WireFormatLite::WriteMessage(
+      1461, *layer_.argsort_, output);
+  }
+
+  // .CoreML.Specification.Pooling3DLayerParams pooling3d = 1465;
+  if (has_pooling3d()) {
+    ::google::protobuf::internal::WireFormatLite::WriteMessage(
+      1465, *layer_.pooling3d_, output);
+  }
+
+  // .CoreML.Specification.GlobalPooling3DLayerParams globalPooling3d = 1466;
+  if (has_globalpooling3d()) {
+    ::google::protobuf::internal::WireFormatLite::WriteMessage(
+      1466, *layer_.globalpooling3d_, output);
+  }
+
+  // .CoreML.Specification.SliceBySizeLayerParams sliceBySize = 1470;
+  if (has_slicebysize()) {
+    ::google::protobuf::internal::WireFormatLite::WriteMessage(
+      1470, *layer_.slicebysize_, output);
+  }
+
+  // .CoreML.Specification.Convolution3DLayerParams convolution3d = 1471;
+  if (has_convolution3d()) {
+    ::google::protobuf::internal::WireFormatLite::WriteMessage(
+      1471, *layer_.convolution3d_, output);
+  }
+
   // @@protoc_insertion_point(serialize_end:CoreML.Specification.NeuralNetworkLayer)
 }
 
@@ -12803,6 +13164,62 @@ size_t NeuralNetworkLayer::ByteSizeLong() const {
           *layer_.nonmaximumsuppression_);
       break;
     }
+    // .CoreML.Specification.OneHotLayerParams oneHot = 1450;
+    case kOneHot: {
+      total_size += 2 +
+        ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
+          *layer_.onehot_);
+      break;
+    }
+    // .CoreML.Specification.CumSumLayerParams cumSum = 1455;
+    case kCumSum: {
+      total_size += 2 +
+        ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
+          *layer_.cumsum_);
+      break;
+    }
+    // .CoreML.Specification.ClampedReLULayerParams clampedReLU = 1460;
+    case kClampedReLU: {
+      total_size += 2 +
+        ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
+          *layer_.clampedrelu_);
+      break;
+    }
+    // .CoreML.Specification.ArgSortLayerParams argSort = 1461;
+    case kArgSort: {
+      total_size += 2 +
+        ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
+          *layer_.argsort_);
+      break;
+    }
+    // .CoreML.Specification.Pooling3DLayerParams pooling3d = 1465;
+    case kPooling3D: {
+      total_size += 2 +
+        ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
+          *layer_.pooling3d_);
+      break;
+    }
+    // .CoreML.Specification.GlobalPooling3DLayerParams globalPooling3d = 1466;
+    case kGlobalPooling3D: {
+      total_size += 2 +
+        ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
+          *layer_.globalpooling3d_);
+      break;
+    }
+    // .CoreML.Specification.SliceBySizeLayerParams sliceBySize = 1470;
+    case kSliceBySize: {
+      total_size += 2 +
+        ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
+          *layer_.slicebysize_);
+      break;
+    }
+    // .CoreML.Specification.Convolution3DLayerParams convolution3d = 1471;
+    case kConvolution3D: {
+      total_size += 2 +
+        ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
+          *layer_.convolution3d_);
+      break;
+    }
     case LAYER_NOT_SET: {
       break;
     }
@@ -13438,6 +13855,38 @@ void NeuralNetworkLayer::MergeFrom(const NeuralNetworkLayer& from) {
       mutable_nonmaximumsuppression()->::CoreML::Specification::NonMaximumSuppressionLayerParams::MergeFrom(from.nonmaximumsuppression());
       break;
     }
+    case kOneHot: {
+      mutable_onehot()->::CoreML::Specification::OneHotLayerParams::MergeFrom(from.onehot());
+      break;
+    }
+    case kCumSum: {
+      mutable_cumsum()->::CoreML::Specification::CumSumLayerParams::MergeFrom(from.cumsum());
+      break;
+    }
+    case kClampedReLU: {
+      mutable_clampedrelu()->::CoreML::Specification::ClampedReLULayerParams::MergeFrom(from.clampedrelu());
+      break;
+    }
+    case kArgSort: {
+      mutable_argsort()->::CoreML::Specification::ArgSortLayerParams::MergeFrom(from.argsort());
+      break;
+    }
+    case kPooling3D: {
+      mutable_pooling3d()->::CoreML::Specification::Pooling3DLayerParams::MergeFrom(from.pooling3d());
+      break;
+    }
+    case kGlobalPooling3D: {
+      mutable_globalpooling3d()->::CoreML::Specification::GlobalPooling3DLayerParams::MergeFrom(from.globalpooling3d());
+      break;
+    }
+    case kSliceBySize: {
+      mutable_slicebysize()->::CoreML::Specification::SliceBySizeLayerParams::MergeFrom(from.slicebysize());
+      break;
+    }
+    case kConvolution3D: {
+      mutable_convolution3d()->::CoreML::Specification::Convolution3DLayerParams::MergeFrom(from.convolution3d());
+      break;
+    }
     case LAYER_NOT_SET: {
       break;
     }
@@ -20943,6 +21392,390 @@ void NeuralNetworkLayer::set_allocated_nonmaximumsuppression(::CoreML::Specifica
   // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.NonMaximumSuppression)
 }
 
+// .CoreML.Specification.OneHotLayerParams oneHot = 1450;
+bool NeuralNetworkLayer::has_onehot() const {
+  return layer_case() == kOneHot;
+}
+void NeuralNetworkLayer::set_has_onehot() {
+  _oneof_case_[0] = kOneHot;
+}
+void NeuralNetworkLayer::clear_onehot() {
+  if (has_onehot()) {
+    delete layer_.onehot_;
+    clear_has_layer();
+  }
+}
+ const ::CoreML::Specification::OneHotLayerParams& NeuralNetworkLayer::onehot() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.oneHot)
+  return has_onehot()
+      ? *layer_.onehot_
+      : ::CoreML::Specification::OneHotLayerParams::default_instance();
+}
+::CoreML::Specification::OneHotLayerParams* NeuralNetworkLayer::mutable_onehot() {
+  if (!has_onehot()) {
+    clear_layer();
+    set_has_onehot();
+    layer_.onehot_ = new ::CoreML::Specification::OneHotLayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.oneHot)
+  return layer_.onehot_;
+}
+::CoreML::Specification::OneHotLayerParams* NeuralNetworkLayer::release_onehot() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.oneHot)
+  if (has_onehot()) {
+    clear_has_layer();
+    ::CoreML::Specification::OneHotLayerParams* temp = layer_.onehot_;
+    layer_.onehot_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+void NeuralNetworkLayer::set_allocated_onehot(::CoreML::Specification::OneHotLayerParams* onehot) {
+  clear_layer();
+  if (onehot) {
+    set_has_onehot();
+    layer_.onehot_ = onehot;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.oneHot)
+}
+
+// .CoreML.Specification.CumSumLayerParams cumSum = 1455;
+bool NeuralNetworkLayer::has_cumsum() const {
+  return layer_case() == kCumSum;
+}
+void NeuralNetworkLayer::set_has_cumsum() {
+  _oneof_case_[0] = kCumSum;
+}
+void NeuralNetworkLayer::clear_cumsum() {
+  if (has_cumsum()) {
+    delete layer_.cumsum_;
+    clear_has_layer();
+  }
+}
+ const ::CoreML::Specification::CumSumLayerParams& NeuralNetworkLayer::cumsum() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.cumSum)
+  return has_cumsum()
+      ? *layer_.cumsum_
+      : ::CoreML::Specification::CumSumLayerParams::default_instance();
+}
+::CoreML::Specification::CumSumLayerParams* NeuralNetworkLayer::mutable_cumsum() {
+  if (!has_cumsum()) {
+    clear_layer();
+    set_has_cumsum();
+    layer_.cumsum_ = new ::CoreML::Specification::CumSumLayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.cumSum)
+  return layer_.cumsum_;
+}
+::CoreML::Specification::CumSumLayerParams* NeuralNetworkLayer::release_cumsum() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.cumSum)
+  if (has_cumsum()) {
+    clear_has_layer();
+    ::CoreML::Specification::CumSumLayerParams* temp = layer_.cumsum_;
+    layer_.cumsum_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+void NeuralNetworkLayer::set_allocated_cumsum(::CoreML::Specification::CumSumLayerParams* cumsum) {
+  clear_layer();
+  if (cumsum) {
+    set_has_cumsum();
+    layer_.cumsum_ = cumsum;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.cumSum)
+}
+
+// .CoreML.Specification.ClampedReLULayerParams clampedReLU = 1460;
+bool NeuralNetworkLayer::has_clampedrelu() const {
+  return layer_case() == kClampedReLU;
+}
+void NeuralNetworkLayer::set_has_clampedrelu() {
+  _oneof_case_[0] = kClampedReLU;
+}
+void NeuralNetworkLayer::clear_clampedrelu() {
+  if (has_clampedrelu()) {
+    delete layer_.clampedrelu_;
+    clear_has_layer();
+  }
+}
+ const ::CoreML::Specification::ClampedReLULayerParams& NeuralNetworkLayer::clampedrelu() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.clampedReLU)
+  return has_clampedrelu()
+      ? *layer_.clampedrelu_
+      : ::CoreML::Specification::ClampedReLULayerParams::default_instance();
+}
+::CoreML::Specification::ClampedReLULayerParams* NeuralNetworkLayer::mutable_clampedrelu() {
+  if (!has_clampedrelu()) {
+    clear_layer();
+    set_has_clampedrelu();
+    layer_.clampedrelu_ = new ::CoreML::Specification::ClampedReLULayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.clampedReLU)
+  return layer_.clampedrelu_;
+}
+::CoreML::Specification::ClampedReLULayerParams* NeuralNetworkLayer::release_clampedrelu() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.clampedReLU)
+  if (has_clampedrelu()) {
+    clear_has_layer();
+    ::CoreML::Specification::ClampedReLULayerParams* temp = layer_.clampedrelu_;
+    layer_.clampedrelu_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+void NeuralNetworkLayer::set_allocated_clampedrelu(::CoreML::Specification::ClampedReLULayerParams* clampedrelu) {
+  clear_layer();
+  if (clampedrelu) {
+    set_has_clampedrelu();
+    layer_.clampedrelu_ = clampedrelu;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.clampedReLU)
+}
+
+// .CoreML.Specification.ArgSortLayerParams argSort = 1461;
+bool NeuralNetworkLayer::has_argsort() const {
+  return layer_case() == kArgSort;
+}
+void NeuralNetworkLayer::set_has_argsort() {
+  _oneof_case_[0] = kArgSort;
+}
+void NeuralNetworkLayer::clear_argsort() {
+  if (has_argsort()) {
+    delete layer_.argsort_;
+    clear_has_layer();
+  }
+}
+ const ::CoreML::Specification::ArgSortLayerParams& NeuralNetworkLayer::argsort() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.argSort)
+  return has_argsort()
+      ? *layer_.argsort_
+      : ::CoreML::Specification::ArgSortLayerParams::default_instance();
+}
+::CoreML::Specification::ArgSortLayerParams* NeuralNetworkLayer::mutable_argsort() {
+  if (!has_argsort()) {
+    clear_layer();
+    set_has_argsort();
+    layer_.argsort_ = new ::CoreML::Specification::ArgSortLayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.argSort)
+  return layer_.argsort_;
+}
+::CoreML::Specification::ArgSortLayerParams* NeuralNetworkLayer::release_argsort() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.argSort)
+  if (has_argsort()) {
+    clear_has_layer();
+    ::CoreML::Specification::ArgSortLayerParams* temp = layer_.argsort_;
+    layer_.argsort_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+void NeuralNetworkLayer::set_allocated_argsort(::CoreML::Specification::ArgSortLayerParams* argsort) {
+  clear_layer();
+  if (argsort) {
+    set_has_argsort();
+    layer_.argsort_ = argsort;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.argSort)
+}
+
+// .CoreML.Specification.Pooling3DLayerParams pooling3d = 1465;
+bool NeuralNetworkLayer::has_pooling3d() const {
+  return layer_case() == kPooling3D;
+}
+void NeuralNetworkLayer::set_has_pooling3d() {
+  _oneof_case_[0] = kPooling3D;
+}
+void NeuralNetworkLayer::clear_pooling3d() {
+  if (has_pooling3d()) {
+    delete layer_.pooling3d_;
+    clear_has_layer();
+  }
+}
+ const ::CoreML::Specification::Pooling3DLayerParams& NeuralNetworkLayer::pooling3d() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.pooling3d)
+  return has_pooling3d()
+      ? *layer_.pooling3d_
+      : ::CoreML::Specification::Pooling3DLayerParams::default_instance();
+}
+::CoreML::Specification::Pooling3DLayerParams* NeuralNetworkLayer::mutable_pooling3d() {
+  if (!has_pooling3d()) {
+    clear_layer();
+    set_has_pooling3d();
+    layer_.pooling3d_ = new ::CoreML::Specification::Pooling3DLayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.pooling3d)
+  return layer_.pooling3d_;
+}
+::CoreML::Specification::Pooling3DLayerParams* NeuralNetworkLayer::release_pooling3d() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.pooling3d)
+  if (has_pooling3d()) {
+    clear_has_layer();
+    ::CoreML::Specification::Pooling3DLayerParams* temp = layer_.pooling3d_;
+    layer_.pooling3d_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+void NeuralNetworkLayer::set_allocated_pooling3d(::CoreML::Specification::Pooling3DLayerParams* pooling3d) {
+  clear_layer();
+  if (pooling3d) {
+    set_has_pooling3d();
+    layer_.pooling3d_ = pooling3d;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.pooling3d)
+}
+
+// .CoreML.Specification.GlobalPooling3DLayerParams globalPooling3d = 1466;
+bool NeuralNetworkLayer::has_globalpooling3d() const {
+  return layer_case() == kGlobalPooling3D;
+}
+void NeuralNetworkLayer::set_has_globalpooling3d() {
+  _oneof_case_[0] = kGlobalPooling3D;
+}
+void NeuralNetworkLayer::clear_globalpooling3d() {
+  if (has_globalpooling3d()) {
+    delete layer_.globalpooling3d_;
+    clear_has_layer();
+  }
+}
+ const ::CoreML::Specification::GlobalPooling3DLayerParams& NeuralNetworkLayer::globalpooling3d() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.globalPooling3d)
+  return has_globalpooling3d()
+      ? *layer_.globalpooling3d_
+      : ::CoreML::Specification::GlobalPooling3DLayerParams::default_instance();
+}
+::CoreML::Specification::GlobalPooling3DLayerParams* NeuralNetworkLayer::mutable_globalpooling3d() {
+  if (!has_globalpooling3d()) {
+    clear_layer();
+    set_has_globalpooling3d();
+    layer_.globalpooling3d_ = new ::CoreML::Specification::GlobalPooling3DLayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.globalPooling3d)
+  return layer_.globalpooling3d_;
+}
+::CoreML::Specification::GlobalPooling3DLayerParams* NeuralNetworkLayer::release_globalpooling3d() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.globalPooling3d)
+  if (has_globalpooling3d()) {
+    clear_has_layer();
+    ::CoreML::Specification::GlobalPooling3DLayerParams* temp = layer_.globalpooling3d_;
+    layer_.globalpooling3d_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+void NeuralNetworkLayer::set_allocated_globalpooling3d(::CoreML::Specification::GlobalPooling3DLayerParams* globalpooling3d) {
+  clear_layer();
+  if (globalpooling3d) {
+    set_has_globalpooling3d();
+    layer_.globalpooling3d_ = globalpooling3d;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.globalPooling3d)
+}
+
+// .CoreML.Specification.SliceBySizeLayerParams sliceBySize = 1470;
+bool NeuralNetworkLayer::has_slicebysize() const {
+  return layer_case() == kSliceBySize;
+}
+void NeuralNetworkLayer::set_has_slicebysize() {
+  _oneof_case_[0] = kSliceBySize;
+}
+void NeuralNetworkLayer::clear_slicebysize() {
+  if (has_slicebysize()) {
+    delete layer_.slicebysize_;
+    clear_has_layer();
+  }
+}
+ const ::CoreML::Specification::SliceBySizeLayerParams& NeuralNetworkLayer::slicebysize() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.sliceBySize)
+  return has_slicebysize()
+      ? *layer_.slicebysize_
+      : ::CoreML::Specification::SliceBySizeLayerParams::default_instance();
+}
+::CoreML::Specification::SliceBySizeLayerParams* NeuralNetworkLayer::mutable_slicebysize() {
+  if (!has_slicebysize()) {
+    clear_layer();
+    set_has_slicebysize();
+    layer_.slicebysize_ = new ::CoreML::Specification::SliceBySizeLayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.sliceBySize)
+  return layer_.slicebysize_;
+}
+::CoreML::Specification::SliceBySizeLayerParams* NeuralNetworkLayer::release_slicebysize() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.sliceBySize)
+  if (has_slicebysize()) {
+    clear_has_layer();
+    ::CoreML::Specification::SliceBySizeLayerParams* temp = layer_.slicebysize_;
+    layer_.slicebysize_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+void NeuralNetworkLayer::set_allocated_slicebysize(::CoreML::Specification::SliceBySizeLayerParams* slicebysize) {
+  clear_layer();
+  if (slicebysize) {
+    set_has_slicebysize();
+    layer_.slicebysize_ = slicebysize;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.sliceBySize)
+}
+
+// .CoreML.Specification.Convolution3DLayerParams convolution3d = 1471;
+bool NeuralNetworkLayer::has_convolution3d() const {
+  return layer_case() == kConvolution3D;
+}
+void NeuralNetworkLayer::set_has_convolution3d() {
+  _oneof_case_[0] = kConvolution3D;
+}
+void NeuralNetworkLayer::clear_convolution3d() {
+  if (has_convolution3d()) {
+    delete layer_.convolution3d_;
+    clear_has_layer();
+  }
+}
+ const ::CoreML::Specification::Convolution3DLayerParams& NeuralNetworkLayer::convolution3d() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.convolution3d)
+  return has_convolution3d()
+      ? *layer_.convolution3d_
+      : ::CoreML::Specification::Convolution3DLayerParams::default_instance();
+}
+::CoreML::Specification::Convolution3DLayerParams* NeuralNetworkLayer::mutable_convolution3d() {
+  if (!has_convolution3d()) {
+    clear_layer();
+    set_has_convolution3d();
+    layer_.convolution3d_ = new ::CoreML::Specification::Convolution3DLayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.convolution3d)
+  return layer_.convolution3d_;
+}
+::CoreML::Specification::Convolution3DLayerParams* NeuralNetworkLayer::release_convolution3d() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.convolution3d)
+  if (has_convolution3d()) {
+    clear_has_layer();
+    ::CoreML::Specification::Convolution3DLayerParams* temp = layer_.convolution3d_;
+    layer_.convolution3d_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+void NeuralNetworkLayer::set_allocated_convolution3d(::CoreML::Specification::Convolution3DLayerParams* convolution3d) {
+  clear_layer();
+  if (convolution3d) {
+    set_has_convolution3d();
+    layer_.convolution3d_ = convolution3d;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.convolution3d)
+}
+
 bool NeuralNetworkLayer::has_layer() const {
   return layer_case() != LAYER_NOT_SET;
 }
@@ -25173,6 +26006,7 @@ void BoxCoordinatesMode::set_boxmode(::CoreML::Specification::BoxCoordinatesMode
 const int WeightParams::kFloatValueFieldNumber;
 const int WeightParams::kFloat16ValueFieldNumber;
 const int WeightParams::kRawValueFieldNumber;
+const int WeightParams::kInt8RawValueFieldNumber;
 const int WeightParams::kQuantizationFieldNumber;
 const int WeightParams::kIsUpdatableFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
@@ -25199,6 +26033,10 @@ WeightParams::WeightParams(const WeightParams& from)
   if (from.rawvalue().size() > 0) {
     rawvalue_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.rawvalue_);
   }
+  int8rawvalue_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+  if (from.int8rawvalue().size() > 0) {
+    int8rawvalue_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.int8rawvalue_);
+  }
   if (from.has_quantization()) {
     quantization_ = new ::CoreML::Specification::QuantizationParams(*from.quantization_);
   } else {
@@ -25211,6 +26049,7 @@ WeightParams::WeightParams(const WeightParams& from)
 void WeightParams::SharedCtor() {
   float16value_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
   rawvalue_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+  int8rawvalue_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
   ::memset(&quantization_, 0, reinterpret_cast<char*>(&isupdatable_) -
     reinterpret_cast<char*>(&quantization_) + sizeof(isupdatable_));
   _cached_size_ = 0;
@@ -25224,6 +26063,7 @@ WeightParams::~WeightParams() {
 void WeightParams::SharedDtor() {
   float16value_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
   rawvalue_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+  int8rawvalue_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
   if (this != internal_default_instance()) {
     delete quantization_;
   }
@@ -25252,6 +26092,7 @@ void WeightParams::Clear() {
   floatvalue_.Clear();
   float16value_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
   rawvalue_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+  int8rawvalue_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
   if (GetArenaNoVirtual() == NULL && quantization_ != NULL) {
     delete quantization_;
   }
@@ -25311,6 +26152,18 @@ bool WeightParams::MergePartialFromCodedStream(
         break;
       }
 
+      // bytes int8RawValue = 31;
+      case 31: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(250u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadBytes(
+                input, this->mutable_int8rawvalue()));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
       // .CoreML.Specification.QuantizationParams quantization = 40;
       case 40: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
@@ -25384,6 +26237,12 @@ void WeightParams::SerializeWithCachedSizes(
       30, this->rawvalue(), output);
   }
 
+  // bytes int8RawValue = 31;
+  if (this->int8rawvalue().size() > 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteBytesMaybeAliased(
+      31, this->int8rawvalue(), output);
+  }
+
   // .CoreML.Specification.QuantizationParams quantization = 40;
   if (this->has_quantization()) {
     ::google::protobuf::internal::WireFormatLite::WriteMessage(
@@ -25431,6 +26290,13 @@ size_t WeightParams::ByteSizeLong() const {
         this->rawvalue());
   }
 
+  // bytes int8RawValue = 31;
+  if (this->int8rawvalue().size() > 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::BytesSize(
+        this->int8rawvalue());
+  }
+
   // .CoreML.Specification.QuantizationParams quantization = 40;
   if (this->has_quantization()) {
     total_size += 2 +
@@ -25471,6 +26337,10 @@ void WeightParams::MergeFrom(const WeightParams& from) {
 
     rawvalue_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.rawvalue_);
   }
+  if (from.int8rawvalue().size() > 0) {
+
+    int8rawvalue_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.int8rawvalue_);
+  }
   if (from.has_quantization()) {
     mutable_quantization()->::CoreML::Specification::QuantizationParams::MergeFrom(from.quantization());
   }
@@ -25498,6 +26368,7 @@ void WeightParams::InternalSwap(WeightParams* other) {
   floatvalue_.InternalSwap(&other->floatvalue_);
   float16value_.Swap(&other->float16value_);
   rawvalue_.Swap(&other->rawvalue_);
+  int8rawvalue_.Swap(&other->int8rawvalue_);
   std::swap(quantization_, other->quantization_);
   std::swap(isupdatable_, other->isupdatable_);
   std::swap(_cached_size_, other->_cached_size_);
@@ -25646,6 +26517,59 @@ void WeightParams::set_allocated_rawvalue(::std::string* rawvalue) {
   // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.WeightParams.rawValue)
 }
 
+// bytes int8RawValue = 31;
+void WeightParams::clear_int8rawvalue() {
+  int8rawvalue_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+const ::std::string& WeightParams::int8rawvalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.WeightParams.int8RawValue)
+  return int8rawvalue_.GetNoArena();
+}
+void WeightParams::set_int8rawvalue(const ::std::string& value) {
+  
+  int8rawvalue_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.WeightParams.int8RawValue)
+}
+#if LANG_CXX11
+void WeightParams::set_int8rawvalue(::std::string&& value) {
+  
+  int8rawvalue_.SetNoArena(
+    &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value));
+  // @@protoc_insertion_point(field_set_rvalue:CoreML.Specification.WeightParams.int8RawValue)
+}
+#endif
+void WeightParams::set_int8rawvalue(const char* value) {
+  GOOGLE_DCHECK(value != NULL);
+  
+  int8rawvalue_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value));
+  // @@protoc_insertion_point(field_set_char:CoreML.Specification.WeightParams.int8RawValue)
+}
+void WeightParams::set_int8rawvalue(const void* value, size_t size) {
+  
+  int8rawvalue_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
+      ::std::string(reinterpret_cast<const char*>(value), size));
+  // @@protoc_insertion_point(field_set_pointer:CoreML.Specification.WeightParams.int8RawValue)
+}
+::std::string* WeightParams::mutable_int8rawvalue() {
+  
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.WeightParams.int8RawValue)
+  return int8rawvalue_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+::std::string* WeightParams::release_int8rawvalue() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.WeightParams.int8RawValue)
+  
+  return int8rawvalue_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+void WeightParams::set_allocated_int8rawvalue(::std::string* int8rawvalue) {
+  if (int8rawvalue != NULL) {
+    
+  } else {
+    
+  }
+  int8rawvalue_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), int8rawvalue);
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.WeightParams.int8RawValue)
+}
+
 // .CoreML.Specification.QuantizationParams quantization = 40;
 bool WeightParams::has_quantization() const {
   return this != internal_default_instance() && quantization_ != NULL;
@@ -27704,24 +28628,44 @@ ConvolutionLayerParams::ConvolutionPaddingTypeCase ConvolutionLayerParams::Convo
 // ===================================================================
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
-const int InnerProductLayerParams::kInputChannelsFieldNumber;
-const int InnerProductLayerParams::kOutputChannelsFieldNumber;
-const int InnerProductLayerParams::kHasBiasFieldNumber;
-const int InnerProductLayerParams::kWeightsFieldNumber;
-const int InnerProductLayerParams::kBiasFieldNumber;
+const int Convolution3DLayerParams::kOutputChannelsFieldNumber;
+const int Convolution3DLayerParams::kInputChannelsFieldNumber;
+const int Convolution3DLayerParams::kNGroupsFieldNumber;
+const int Convolution3DLayerParams::kKernelDepthFieldNumber;
+const int Convolution3DLayerParams::kKernelHeightFieldNumber;
+const int Convolution3DLayerParams::kKernelWidthFieldNumber;
+const int Convolution3DLayerParams::kStrideDepthFieldNumber;
+const int Convolution3DLayerParams::kStrideHeightFieldNumber;
+const int Convolution3DLayerParams::kStrideWidthFieldNumber;
+const int Convolution3DLayerParams::kDilationDepthFieldNumber;
+const int Convolution3DLayerParams::kDilationHeightFieldNumber;
+const int Convolution3DLayerParams::kDilationWidthFieldNumber;
+const int Convolution3DLayerParams::kHasBiasFieldNumber;
+const int Convolution3DLayerParams::kWeightsFieldNumber;
+const int Convolution3DLayerParams::kBiasFieldNumber;
+const int Convolution3DLayerParams::kPaddingTypeFieldNumber;
+const int Convolution3DLayerParams::kCustomPaddingFrontFieldNumber;
+const int Convolution3DLayerParams::kCustomPaddingBackFieldNumber;
+const int Convolution3DLayerParams::kCustomPaddingTopFieldNumber;
+const int Convolution3DLayerParams::kCustomPaddingBottomFieldNumber;
+const int Convolution3DLayerParams::kCustomPaddingLeftFieldNumber;
+const int Convolution3DLayerParams::kCustomPaddingRightFieldNumber;
+const int Convolution3DLayerParams::kIsDeconvolutionFieldNumber;
+const int Convolution3DLayerParams::kOutputShapeFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
-InnerProductLayerParams::InnerProductLayerParams()
+Convolution3DLayerParams::Convolution3DLayerParams()
   : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
   if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
     protobuf_NeuralNetwork_2eproto::InitDefaults();
   }
   SharedCtor();
-  // @@protoc_insertion_point(constructor:CoreML.Specification.InnerProductLayerParams)
+  // @@protoc_insertion_point(constructor:CoreML.Specification.Convolution3DLayerParams)
 }
-InnerProductLayerParams::InnerProductLayerParams(const InnerProductLayerParams& from)
+Convolution3DLayerParams::Convolution3DLayerParams(const Convolution3DLayerParams& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
+      outputshape_(from.outputshape_),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   if (from.has_weights()) {
@@ -27734,24 +28678,24 @@ InnerProductLayerParams::InnerProductLayerParams(const InnerProductLayerParams&
   } else {
     bias_ = NULL;
   }
-  ::memcpy(&inputchannels_, &from.inputchannels_,
-    reinterpret_cast<char*>(&hasbias_) -
-    reinterpret_cast<char*>(&inputchannels_) + sizeof(hasbias_));
-  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.InnerProductLayerParams)
+  ::memcpy(&outputchannels_, &from.outputchannels_,
+    reinterpret_cast<char*>(&custompaddingright_) -
+    reinterpret_cast<char*>(&outputchannels_) + sizeof(custompaddingright_));
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.Convolution3DLayerParams)
 }
 
-void InnerProductLayerParams::SharedCtor() {
-  ::memset(&weights_, 0, reinterpret_cast<char*>(&hasbias_) -
-    reinterpret_cast<char*>(&weights_) + sizeof(hasbias_));
+void Convolution3DLayerParams::SharedCtor() {
+  ::memset(&weights_, 0, reinterpret_cast<char*>(&custompaddingright_) -
+    reinterpret_cast<char*>(&weights_) + sizeof(custompaddingright_));
   _cached_size_ = 0;
 }
 
-InnerProductLayerParams::~InnerProductLayerParams() {
-  // @@protoc_insertion_point(destructor:CoreML.Specification.InnerProductLayerParams)
+Convolution3DLayerParams::~Convolution3DLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.Convolution3DLayerParams)
   SharedDtor();
 }
 
-void InnerProductLayerParams::SharedDtor() {
+void Convolution3DLayerParams::SharedDtor() {
   if (this != internal_default_instance()) {
     delete weights_;
   }
@@ -27760,26 +28704,27 @@ void InnerProductLayerParams::SharedDtor() {
   }
 }
 
-void InnerProductLayerParams::SetCachedSize(int size) const {
+void Convolution3DLayerParams::SetCachedSize(int size) const {
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = size;
   GOOGLE_SAFE_CONCURRENT_WRITES_END();
 }
-const InnerProductLayerParams& InnerProductLayerParams::default_instance() {
+const Convolution3DLayerParams& Convolution3DLayerParams::default_instance() {
   protobuf_NeuralNetwork_2eproto::InitDefaults();
   return *internal_default_instance();
 }
 
-InnerProductLayerParams* InnerProductLayerParams::New(::google::protobuf::Arena* arena) const {
-  InnerProductLayerParams* n = new InnerProductLayerParams;
+Convolution3DLayerParams* Convolution3DLayerParams::New(::google::protobuf::Arena* arena) const {
+  Convolution3DLayerParams* n = new Convolution3DLayerParams;
   if (arena != NULL) {
     arena->Own(n);
   }
   return n;
 }
 
-void InnerProductLayerParams::Clear() {
-// @@protoc_insertion_point(message_clear_start:CoreML.Specification.InnerProductLayerParams)
+void Convolution3DLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.Convolution3DLayerParams)
+  outputshape_.Clear();
   if (GetArenaNoVirtual() == NULL && weights_ != NULL) {
     delete weights_;
   }
@@ -27788,53 +28733,193 @@ void InnerProductLayerParams::Clear() {
     delete bias_;
   }
   bias_ = NULL;
-  ::memset(&inputchannels_, 0, reinterpret_cast<char*>(&hasbias_) -
-    reinterpret_cast<char*>(&inputchannels_) + sizeof(hasbias_));
+  ::memset(&outputchannels_, 0, reinterpret_cast<char*>(&custompaddingright_) -
+    reinterpret_cast<char*>(&outputchannels_) + sizeof(custompaddingright_));
 }
 
-bool InnerProductLayerParams::MergePartialFromCodedStream(
+bool Convolution3DLayerParams::MergePartialFromCodedStream(
     ::google::protobuf::io::CodedInputStream* input) {
 #define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
   ::google::protobuf::uint32 tag;
-  // @@protoc_insertion_point(parse_start:CoreML.Specification.InnerProductLayerParams)
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.Convolution3DLayerParams)
   for (;;) {
     ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(16383u);
     tag = p.first;
     if (!p.second) goto handle_unusual;
     switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
-      // uint64 inputChannels = 1;
+      // int32 outputChannels = 1;
       case 1: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
             static_cast< ::google::protobuf::uint8>(8u)) {
 
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
-                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
-                 input, &inputchannels_)));
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &outputchannels_)));
         } else {
           goto handle_unusual;
         }
         break;
       }
 
-      // uint64 outputChannels = 2;
+      // int32 inputChannels = 2;
       case 2: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
             static_cast< ::google::protobuf::uint8>(16u)) {
 
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
-                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
-                 input, &outputchannels_)));
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &inputchannels_)));
         } else {
           goto handle_unusual;
         }
         break;
       }
 
-      // bool hasBias = 10;
+      // int32 nGroups = 10;
       case 10: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
             static_cast< ::google::protobuf::uint8>(80u)) {
 
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &ngroups_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 kernelDepth = 20;
+      case 20: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(160u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &kerneldepth_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 kernelHeight = 21;
+      case 21: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(168u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &kernelheight_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 kernelWidth = 22;
+      case 22: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(176u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &kernelwidth_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 strideDepth = 31;
+      case 31: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(248u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &stridedepth_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 strideHeight = 32;
+      case 32: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(256u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &strideheight_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 strideWidth = 33;
+      case 33: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(264u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &stridewidth_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 dilationDepth = 40;
+      case 40: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(320u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &dilationdepth_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 dilationHeight = 41;
+      case 41: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(328u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &dilationheight_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 dilationWidth = 42;
+      case 42: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(336u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &dilationwidth_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // bool hasBias = 50;
+      case 50: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(400u)) {
+
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
                    bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
                  input, &hasbias_)));
@@ -27844,10 +28929,10 @@ bool InnerProductLayerParams::MergePartialFromCodedStream(
         break;
       }
 
-      // .CoreML.Specification.WeightParams weights = 20;
-      case 20: {
+      // .CoreML.Specification.WeightParams weights = 60;
+      case 60: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(162u)) {
+            static_cast< ::google::protobuf::uint8>(482u)) {
           DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
                input, mutable_weights()));
         } else {
@@ -27856,10 +28941,10 @@ bool InnerProductLayerParams::MergePartialFromCodedStream(
         break;
       }
 
-      // .CoreML.Specification.WeightParams bias = 21;
-      case 21: {
+      // .CoreML.Specification.WeightParams bias = 61;
+      case 61: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(170u)) {
+            static_cast< ::google::protobuf::uint8>(490u)) {
           DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
                input, mutable_bias()));
         } else {
@@ -27868,6 +28953,137 @@ bool InnerProductLayerParams::MergePartialFromCodedStream(
         break;
       }
 
+      // .CoreML.Specification.Convolution3DLayerParams.PaddingType paddingType = 70;
+      case 70: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(560u)) {
+          int value;
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   int, ::google::protobuf::internal::WireFormatLite::TYPE_ENUM>(
+                 input, &value)));
+          set_paddingtype(static_cast< ::CoreML::Specification::Convolution3DLayerParams_PaddingType >(value));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 customPaddingFront = 80;
+      case 80: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(640u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &custompaddingfront_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 customPaddingBack = 81;
+      case 81: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(648u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &custompaddingback_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 customPaddingTop = 82;
+      case 82: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(656u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &custompaddingtop_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 customPaddingBottom = 83;
+      case 83: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(664u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &custompaddingbottom_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 customPaddingLeft = 84;
+      case 84: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(672u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &custompaddingleft_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 customPaddingRight = 85;
+      case 85: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(680u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &custompaddingright_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // bool isDeconvolution = 86;
+      case 86: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(688u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
+                 input, &isdeconvolution_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // repeated uint64 outputShape = 87;
+      case 87: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(698u)) {
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPackedPrimitive<
+                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
+                 input, this->mutable_outputshape())));
+        } else if (static_cast< ::google::protobuf::uint8>(tag) ==
+                   static_cast< ::google::protobuf::uint8>(696u)) {
+          DO_((::google::protobuf::internal::WireFormatLite::ReadRepeatedPrimitiveNoInline<
+                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
+                 2, 698u, input, this->mutable_outputshape())));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
       default: {
       handle_unusual:
         if (tag == 0 ||
@@ -27881,85 +29097,324 @@ bool InnerProductLayerParams::MergePartialFromCodedStream(
     }
   }
 success:
-  // @@protoc_insertion_point(parse_success:CoreML.Specification.InnerProductLayerParams)
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.Convolution3DLayerParams)
   return true;
 failure:
-  // @@protoc_insertion_point(parse_failure:CoreML.Specification.InnerProductLayerParams)
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.Convolution3DLayerParams)
   return false;
 #undef DO_
 }
 
-void InnerProductLayerParams::SerializeWithCachedSizes(
+void Convolution3DLayerParams::SerializeWithCachedSizes(
     ::google::protobuf::io::CodedOutputStream* output) const {
-  // @@protoc_insertion_point(serialize_start:CoreML.Specification.InnerProductLayerParams)
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.Convolution3DLayerParams)
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  // uint64 inputChannels = 1;
+  // int32 outputChannels = 1;
+  if (this->outputchannels() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(1, this->outputchannels(), output);
+  }
+
+  // int32 inputChannels = 2;
   if (this->inputchannels() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteUInt64(1, this->inputchannels(), output);
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(2, this->inputchannels(), output);
   }
 
-  // uint64 outputChannels = 2;
-  if (this->outputchannels() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteUInt64(2, this->outputchannels(), output);
+  // int32 nGroups = 10;
+  if (this->ngroups() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(10, this->ngroups(), output);
   }
 
-  // bool hasBias = 10;
+  // int32 kernelDepth = 20;
+  if (this->kerneldepth() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(20, this->kerneldepth(), output);
+  }
+
+  // int32 kernelHeight = 21;
+  if (this->kernelheight() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(21, this->kernelheight(), output);
+  }
+
+  // int32 kernelWidth = 22;
+  if (this->kernelwidth() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(22, this->kernelwidth(), output);
+  }
+
+  // int32 strideDepth = 31;
+  if (this->stridedepth() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(31, this->stridedepth(), output);
+  }
+
+  // int32 strideHeight = 32;
+  if (this->strideheight() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(32, this->strideheight(), output);
+  }
+
+  // int32 strideWidth = 33;
+  if (this->stridewidth() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(33, this->stridewidth(), output);
+  }
+
+  // int32 dilationDepth = 40;
+  if (this->dilationdepth() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(40, this->dilationdepth(), output);
+  }
+
+  // int32 dilationHeight = 41;
+  if (this->dilationheight() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(41, this->dilationheight(), output);
+  }
+
+  // int32 dilationWidth = 42;
+  if (this->dilationwidth() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(42, this->dilationwidth(), output);
+  }
+
+  // bool hasBias = 50;
   if (this->hasbias() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteBool(10, this->hasbias(), output);
+    ::google::protobuf::internal::WireFormatLite::WriteBool(50, this->hasbias(), output);
   }
 
-  // .CoreML.Specification.WeightParams weights = 20;
+  // .CoreML.Specification.WeightParams weights = 60;
   if (this->has_weights()) {
     ::google::protobuf::internal::WireFormatLite::WriteMessage(
-      20, *this->weights_, output);
+      60, *this->weights_, output);
   }
 
-  // .CoreML.Specification.WeightParams bias = 21;
+  // .CoreML.Specification.WeightParams bias = 61;
   if (this->has_bias()) {
     ::google::protobuf::internal::WireFormatLite::WriteMessage(
-      21, *this->bias_, output);
+      61, *this->bias_, output);
   }
 
-  // @@protoc_insertion_point(serialize_end:CoreML.Specification.InnerProductLayerParams)
+  // .CoreML.Specification.Convolution3DLayerParams.PaddingType paddingType = 70;
+  if (this->paddingtype() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteEnum(
+      70, this->paddingtype(), output);
+  }
+
+  // int32 customPaddingFront = 80;
+  if (this->custompaddingfront() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(80, this->custompaddingfront(), output);
+  }
+
+  // int32 customPaddingBack = 81;
+  if (this->custompaddingback() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(81, this->custompaddingback(), output);
+  }
+
+  // int32 customPaddingTop = 82;
+  if (this->custompaddingtop() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(82, this->custompaddingtop(), output);
+  }
+
+  // int32 customPaddingBottom = 83;
+  if (this->custompaddingbottom() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(83, this->custompaddingbottom(), output);
+  }
+
+  // int32 customPaddingLeft = 84;
+  if (this->custompaddingleft() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(84, this->custompaddingleft(), output);
+  }
+
+  // int32 customPaddingRight = 85;
+  if (this->custompaddingright() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(85, this->custompaddingright(), output);
+  }
+
+  // bool isDeconvolution = 86;
+  if (this->isdeconvolution() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteBool(86, this->isdeconvolution(), output);
+  }
+
+  // repeated uint64 outputShape = 87;
+  if (this->outputshape_size() > 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteTag(87, ::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output);
+    output->WriteVarint32(_outputshape_cached_byte_size_);
+  }
+  for (int i = 0, n = this->outputshape_size(); i < n; i++) {
+    ::google::protobuf::internal::WireFormatLite::WriteUInt64NoTag(
+      this->outputshape(i), output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.Convolution3DLayerParams)
 }
 
-size_t InnerProductLayerParams::ByteSizeLong() const {
-// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.InnerProductLayerParams)
+size_t Convolution3DLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.Convolution3DLayerParams)
   size_t total_size = 0;
 
-  // .CoreML.Specification.WeightParams weights = 20;
+  // repeated uint64 outputShape = 87;
+  {
+    size_t data_size = ::google::protobuf::internal::WireFormatLite::
+      UInt64Size(this->outputshape_);
+    if (data_size > 0) {
+      total_size += 2 +
+        ::google::protobuf::internal::WireFormatLite::Int32Size(data_size);
+    }
+    int cached_size = ::google::protobuf::internal::ToCachedSize(data_size);
+    GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+    _outputshape_cached_byte_size_ = cached_size;
+    GOOGLE_SAFE_CONCURRENT_WRITES_END();
+    total_size += data_size;
+  }
+
+  // .CoreML.Specification.WeightParams weights = 60;
   if (this->has_weights()) {
     total_size += 2 +
       ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
         *this->weights_);
   }
 
-  // .CoreML.Specification.WeightParams bias = 21;
+  // .CoreML.Specification.WeightParams bias = 61;
   if (this->has_bias()) {
     total_size += 2 +
       ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
         *this->bias_);
   }
 
-  // uint64 inputChannels = 1;
+  // int32 outputChannels = 1;
+  if (this->outputchannels() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->outputchannels());
+  }
+
+  // int32 inputChannels = 2;
   if (this->inputchannels() != 0) {
     total_size += 1 +
-      ::google::protobuf::internal::WireFormatLite::UInt64Size(
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
         this->inputchannels());
   }
 
-  // uint64 outputChannels = 2;
-  if (this->outputchannels() != 0) {
+  // int32 nGroups = 10;
+  if (this->ngroups() != 0) {
     total_size += 1 +
-      ::google::protobuf::internal::WireFormatLite::UInt64Size(
-        this->outputchannels());
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->ngroups());
   }
 
-  // bool hasBias = 10;
+  // int32 kernelDepth = 20;
+  if (this->kerneldepth() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->kerneldepth());
+  }
+
+  // int32 kernelHeight = 21;
+  if (this->kernelheight() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->kernelheight());
+  }
+
+  // int32 kernelWidth = 22;
+  if (this->kernelwidth() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->kernelwidth());
+  }
+
+  // int32 strideDepth = 31;
+  if (this->stridedepth() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->stridedepth());
+  }
+
+  // int32 strideHeight = 32;
+  if (this->strideheight() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->strideheight());
+  }
+
+  // int32 strideWidth = 33;
+  if (this->stridewidth() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->stridewidth());
+  }
+
+  // int32 dilationDepth = 40;
+  if (this->dilationdepth() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->dilationdepth());
+  }
+
+  // int32 dilationHeight = 41;
+  if (this->dilationheight() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->dilationheight());
+  }
+
+  // int32 dilationWidth = 42;
+  if (this->dilationwidth() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->dilationwidth());
+  }
+
+  // bool hasBias = 50;
   if (this->hasbias() != 0) {
-    total_size += 1 + 1;
+    total_size += 2 + 1;
+  }
+
+  // bool isDeconvolution = 86;
+  if (this->isdeconvolution() != 0) {
+    total_size += 2 + 1;
+  }
+
+  // .CoreML.Specification.Convolution3DLayerParams.PaddingType paddingType = 70;
+  if (this->paddingtype() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::EnumSize(this->paddingtype());
+  }
+
+  // int32 customPaddingFront = 80;
+  if (this->custompaddingfront() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->custompaddingfront());
+  }
+
+  // int32 customPaddingBack = 81;
+  if (this->custompaddingback() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->custompaddingback());
+  }
+
+  // int32 customPaddingTop = 82;
+  if (this->custompaddingtop() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->custompaddingtop());
+  }
+
+  // int32 customPaddingBottom = 83;
+  if (this->custompaddingbottom() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->custompaddingbottom());
+  }
+
+  // int32 customPaddingLeft = 84;
+  if (this->custompaddingleft() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->custompaddingleft());
+  }
+
+  // int32 customPaddingRight = 85;
+  if (this->custompaddingright() != 0) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->custompaddingright());
   }
 
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
@@ -27969,137 +29424,351 @@ size_t InnerProductLayerParams::ByteSizeLong() const {
   return total_size;
 }
 
-void InnerProductLayerParams::CheckTypeAndMergeFrom(
+void Convolution3DLayerParams::CheckTypeAndMergeFrom(
     const ::google::protobuf::MessageLite& from) {
-  MergeFrom(*::google::protobuf::down_cast<const InnerProductLayerParams*>(&from));
+  MergeFrom(*::google::protobuf::down_cast<const Convolution3DLayerParams*>(&from));
 }
 
-void InnerProductLayerParams::MergeFrom(const InnerProductLayerParams& from) {
-// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.InnerProductLayerParams)
+void Convolution3DLayerParams::MergeFrom(const Convolution3DLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.Convolution3DLayerParams)
   GOOGLE_DCHECK_NE(&from, this);
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
+  outputshape_.MergeFrom(from.outputshape_);
   if (from.has_weights()) {
     mutable_weights()->::CoreML::Specification::WeightParams::MergeFrom(from.weights());
   }
   if (from.has_bias()) {
     mutable_bias()->::CoreML::Specification::WeightParams::MergeFrom(from.bias());
   }
+  if (from.outputchannels() != 0) {
+    set_outputchannels(from.outputchannels());
+  }
   if (from.inputchannels() != 0) {
     set_inputchannels(from.inputchannels());
   }
-  if (from.outputchannels() != 0) {
-    set_outputchannels(from.outputchannels());
+  if (from.ngroups() != 0) {
+    set_ngroups(from.ngroups());
+  }
+  if (from.kerneldepth() != 0) {
+    set_kerneldepth(from.kerneldepth());
+  }
+  if (from.kernelheight() != 0) {
+    set_kernelheight(from.kernelheight());
+  }
+  if (from.kernelwidth() != 0) {
+    set_kernelwidth(from.kernelwidth());
+  }
+  if (from.stridedepth() != 0) {
+    set_stridedepth(from.stridedepth());
+  }
+  if (from.strideheight() != 0) {
+    set_strideheight(from.strideheight());
+  }
+  if (from.stridewidth() != 0) {
+    set_stridewidth(from.stridewidth());
+  }
+  if (from.dilationdepth() != 0) {
+    set_dilationdepth(from.dilationdepth());
+  }
+  if (from.dilationheight() != 0) {
+    set_dilationheight(from.dilationheight());
+  }
+  if (from.dilationwidth() != 0) {
+    set_dilationwidth(from.dilationwidth());
   }
   if (from.hasbias() != 0) {
     set_hasbias(from.hasbias());
   }
+  if (from.isdeconvolution() != 0) {
+    set_isdeconvolution(from.isdeconvolution());
+  }
+  if (from.paddingtype() != 0) {
+    set_paddingtype(from.paddingtype());
+  }
+  if (from.custompaddingfront() != 0) {
+    set_custompaddingfront(from.custompaddingfront());
+  }
+  if (from.custompaddingback() != 0) {
+    set_custompaddingback(from.custompaddingback());
+  }
+  if (from.custompaddingtop() != 0) {
+    set_custompaddingtop(from.custompaddingtop());
+  }
+  if (from.custompaddingbottom() != 0) {
+    set_custompaddingbottom(from.custompaddingbottom());
+  }
+  if (from.custompaddingleft() != 0) {
+    set_custompaddingleft(from.custompaddingleft());
+  }
+  if (from.custompaddingright() != 0) {
+    set_custompaddingright(from.custompaddingright());
+  }
 }
 
-void InnerProductLayerParams::CopyFrom(const InnerProductLayerParams& from) {
-// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.InnerProductLayerParams)
+void Convolution3DLayerParams::CopyFrom(const Convolution3DLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.Convolution3DLayerParams)
   if (&from == this) return;
   Clear();
   MergeFrom(from);
 }
 
-bool InnerProductLayerParams::IsInitialized() const {
+bool Convolution3DLayerParams::IsInitialized() const {
   return true;
 }
 
-void InnerProductLayerParams::Swap(InnerProductLayerParams* other) {
+void Convolution3DLayerParams::Swap(Convolution3DLayerParams* other) {
   if (other == this) return;
   InternalSwap(other);
 }
-void InnerProductLayerParams::InternalSwap(InnerProductLayerParams* other) {
+void Convolution3DLayerParams::InternalSwap(Convolution3DLayerParams* other) {
+  outputshape_.InternalSwap(&other->outputshape_);
   std::swap(weights_, other->weights_);
   std::swap(bias_, other->bias_);
-  std::swap(inputchannels_, other->inputchannels_);
   std::swap(outputchannels_, other->outputchannels_);
+  std::swap(inputchannels_, other->inputchannels_);
+  std::swap(ngroups_, other->ngroups_);
+  std::swap(kerneldepth_, other->kerneldepth_);
+  std::swap(kernelheight_, other->kernelheight_);
+  std::swap(kernelwidth_, other->kernelwidth_);
+  std::swap(stridedepth_, other->stridedepth_);
+  std::swap(strideheight_, other->strideheight_);
+  std::swap(stridewidth_, other->stridewidth_);
+  std::swap(dilationdepth_, other->dilationdepth_);
+  std::swap(dilationheight_, other->dilationheight_);
+  std::swap(dilationwidth_, other->dilationwidth_);
   std::swap(hasbias_, other->hasbias_);
+  std::swap(isdeconvolution_, other->isdeconvolution_);
+  std::swap(paddingtype_, other->paddingtype_);
+  std::swap(custompaddingfront_, other->custompaddingfront_);
+  std::swap(custompaddingback_, other->custompaddingback_);
+  std::swap(custompaddingtop_, other->custompaddingtop_);
+  std::swap(custompaddingbottom_, other->custompaddingbottom_);
+  std::swap(custompaddingleft_, other->custompaddingleft_);
+  std::swap(custompaddingright_, other->custompaddingright_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
-::std::string InnerProductLayerParams::GetTypeName() const {
-  return "CoreML.Specification.InnerProductLayerParams";
+::std::string Convolution3DLayerParams::GetTypeName() const {
+  return "CoreML.Specification.Convolution3DLayerParams";
 }
 
 #if PROTOBUF_INLINE_NOT_IN_HEADERS
-// InnerProductLayerParams
+// Convolution3DLayerParams
 
-// uint64 inputChannels = 1;
-void InnerProductLayerParams::clear_inputchannels() {
-  inputchannels_ = GOOGLE_ULONGLONG(0);
+// int32 outputChannels = 1;
+void Convolution3DLayerParams::clear_outputchannels() {
+  outputchannels_ = 0;
 }
-::google::protobuf::uint64 InnerProductLayerParams::inputchannels() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.InnerProductLayerParams.inputChannels)
+::google::protobuf::int32 Convolution3DLayerParams::outputchannels() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.outputChannels)
+  return outputchannels_;
+}
+void Convolution3DLayerParams::set_outputchannels(::google::protobuf::int32 value) {
+  
+  outputchannels_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.outputChannels)
+}
+
+// int32 inputChannels = 2;
+void Convolution3DLayerParams::clear_inputchannels() {
+  inputchannels_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::inputchannels() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.inputChannels)
   return inputchannels_;
 }
-void InnerProductLayerParams::set_inputchannels(::google::protobuf::uint64 value) {
+void Convolution3DLayerParams::set_inputchannels(::google::protobuf::int32 value) {
   
   inputchannels_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.InnerProductLayerParams.inputChannels)
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.inputChannels)
 }
 
-// uint64 outputChannels = 2;
-void InnerProductLayerParams::clear_outputchannels() {
-  outputchannels_ = GOOGLE_ULONGLONG(0);
+// int32 nGroups = 10;
+void Convolution3DLayerParams::clear_ngroups() {
+  ngroups_ = 0;
 }
-::google::protobuf::uint64 InnerProductLayerParams::outputchannels() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.InnerProductLayerParams.outputChannels)
-  return outputchannels_;
+::google::protobuf::int32 Convolution3DLayerParams::ngroups() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.nGroups)
+  return ngroups_;
 }
-void InnerProductLayerParams::set_outputchannels(::google::protobuf::uint64 value) {
+void Convolution3DLayerParams::set_ngroups(::google::protobuf::int32 value) {
   
-  outputchannels_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.InnerProductLayerParams.outputChannels)
+  ngroups_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.nGroups)
 }
 
-// bool hasBias = 10;
-void InnerProductLayerParams::clear_hasbias() {
+// int32 kernelDepth = 20;
+void Convolution3DLayerParams::clear_kerneldepth() {
+  kerneldepth_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::kerneldepth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.kernelDepth)
+  return kerneldepth_;
+}
+void Convolution3DLayerParams::set_kerneldepth(::google::protobuf::int32 value) {
+  
+  kerneldepth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.kernelDepth)
+}
+
+// int32 kernelHeight = 21;
+void Convolution3DLayerParams::clear_kernelheight() {
+  kernelheight_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::kernelheight() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.kernelHeight)
+  return kernelheight_;
+}
+void Convolution3DLayerParams::set_kernelheight(::google::protobuf::int32 value) {
+  
+  kernelheight_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.kernelHeight)
+}
+
+// int32 kernelWidth = 22;
+void Convolution3DLayerParams::clear_kernelwidth() {
+  kernelwidth_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::kernelwidth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.kernelWidth)
+  return kernelwidth_;
+}
+void Convolution3DLayerParams::set_kernelwidth(::google::protobuf::int32 value) {
+  
+  kernelwidth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.kernelWidth)
+}
+
+// int32 strideDepth = 31;
+void Convolution3DLayerParams::clear_stridedepth() {
+  stridedepth_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::stridedepth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.strideDepth)
+  return stridedepth_;
+}
+void Convolution3DLayerParams::set_stridedepth(::google::protobuf::int32 value) {
+  
+  stridedepth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.strideDepth)
+}
+
+// int32 strideHeight = 32;
+void Convolution3DLayerParams::clear_strideheight() {
+  strideheight_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::strideheight() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.strideHeight)
+  return strideheight_;
+}
+void Convolution3DLayerParams::set_strideheight(::google::protobuf::int32 value) {
+  
+  strideheight_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.strideHeight)
+}
+
+// int32 strideWidth = 33;
+void Convolution3DLayerParams::clear_stridewidth() {
+  stridewidth_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::stridewidth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.strideWidth)
+  return stridewidth_;
+}
+void Convolution3DLayerParams::set_stridewidth(::google::protobuf::int32 value) {
+  
+  stridewidth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.strideWidth)
+}
+
+// int32 dilationDepth = 40;
+void Convolution3DLayerParams::clear_dilationdepth() {
+  dilationdepth_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::dilationdepth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.dilationDepth)
+  return dilationdepth_;
+}
+void Convolution3DLayerParams::set_dilationdepth(::google::protobuf::int32 value) {
+  
+  dilationdepth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.dilationDepth)
+}
+
+// int32 dilationHeight = 41;
+void Convolution3DLayerParams::clear_dilationheight() {
+  dilationheight_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::dilationheight() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.dilationHeight)
+  return dilationheight_;
+}
+void Convolution3DLayerParams::set_dilationheight(::google::protobuf::int32 value) {
+  
+  dilationheight_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.dilationHeight)
+}
+
+// int32 dilationWidth = 42;
+void Convolution3DLayerParams::clear_dilationwidth() {
+  dilationwidth_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::dilationwidth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.dilationWidth)
+  return dilationwidth_;
+}
+void Convolution3DLayerParams::set_dilationwidth(::google::protobuf::int32 value) {
+  
+  dilationwidth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.dilationWidth)
+}
+
+// bool hasBias = 50;
+void Convolution3DLayerParams::clear_hasbias() {
   hasbias_ = false;
 }
-bool InnerProductLayerParams::hasbias() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.InnerProductLayerParams.hasBias)
+bool Convolution3DLayerParams::hasbias() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.hasBias)
   return hasbias_;
 }
-void InnerProductLayerParams::set_hasbias(bool value) {
+void Convolution3DLayerParams::set_hasbias(bool value) {
   
   hasbias_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.InnerProductLayerParams.hasBias)
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.hasBias)
 }
 
-// .CoreML.Specification.WeightParams weights = 20;
-bool InnerProductLayerParams::has_weights() const {
+// .CoreML.Specification.WeightParams weights = 60;
+bool Convolution3DLayerParams::has_weights() const {
   return this != internal_default_instance() && weights_ != NULL;
 }
-void InnerProductLayerParams::clear_weights() {
+void Convolution3DLayerParams::clear_weights() {
   if (GetArenaNoVirtual() == NULL && weights_ != NULL) delete weights_;
   weights_ = NULL;
 }
-const ::CoreML::Specification::WeightParams& InnerProductLayerParams::weights() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.InnerProductLayerParams.weights)
+const ::CoreML::Specification::WeightParams& Convolution3DLayerParams::weights() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.weights)
   return weights_ != NULL ? *weights_
                          : *::CoreML::Specification::WeightParams::internal_default_instance();
 }
-::CoreML::Specification::WeightParams* InnerProductLayerParams::mutable_weights() {
+::CoreML::Specification::WeightParams* Convolution3DLayerParams::mutable_weights() {
   
   if (weights_ == NULL) {
     weights_ = new ::CoreML::Specification::WeightParams;
   }
-  // @@protoc_insertion_point(field_mutable:CoreML.Specification.InnerProductLayerParams.weights)
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.Convolution3DLayerParams.weights)
   return weights_;
 }
-::CoreML::Specification::WeightParams* InnerProductLayerParams::release_weights() {
-  // @@protoc_insertion_point(field_release:CoreML.Specification.InnerProductLayerParams.weights)
+::CoreML::Specification::WeightParams* Convolution3DLayerParams::release_weights() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.Convolution3DLayerParams.weights)
   
   ::CoreML::Specification::WeightParams* temp = weights_;
   weights_ = NULL;
   return temp;
 }
-void InnerProductLayerParams::set_allocated_weights(::CoreML::Specification::WeightParams* weights) {
+void Convolution3DLayerParams::set_allocated_weights(::CoreML::Specification::WeightParams* weights) {
   delete weights_;
   weights_ = weights;
   if (weights) {
@@ -28107,38 +29776,38 @@ void InnerProductLayerParams::set_allocated_weights(::CoreML::Specification::Wei
   } else {
     
   }
-  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.InnerProductLayerParams.weights)
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.Convolution3DLayerParams.weights)
 }
 
-// .CoreML.Specification.WeightParams bias = 21;
-bool InnerProductLayerParams::has_bias() const {
+// .CoreML.Specification.WeightParams bias = 61;
+bool Convolution3DLayerParams::has_bias() const {
   return this != internal_default_instance() && bias_ != NULL;
 }
-void InnerProductLayerParams::clear_bias() {
+void Convolution3DLayerParams::clear_bias() {
   if (GetArenaNoVirtual() == NULL && bias_ != NULL) delete bias_;
   bias_ = NULL;
 }
-const ::CoreML::Specification::WeightParams& InnerProductLayerParams::bias() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.InnerProductLayerParams.bias)
+const ::CoreML::Specification::WeightParams& Convolution3DLayerParams::bias() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.bias)
   return bias_ != NULL ? *bias_
                          : *::CoreML::Specification::WeightParams::internal_default_instance();
 }
-::CoreML::Specification::WeightParams* InnerProductLayerParams::mutable_bias() {
+::CoreML::Specification::WeightParams* Convolution3DLayerParams::mutable_bias() {
   
   if (bias_ == NULL) {
     bias_ = new ::CoreML::Specification::WeightParams;
   }
-  // @@protoc_insertion_point(field_mutable:CoreML.Specification.InnerProductLayerParams.bias)
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.Convolution3DLayerParams.bias)
   return bias_;
 }
-::CoreML::Specification::WeightParams* InnerProductLayerParams::release_bias() {
-  // @@protoc_insertion_point(field_release:CoreML.Specification.InnerProductLayerParams.bias)
+::CoreML::Specification::WeightParams* Convolution3DLayerParams::release_bias() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.Convolution3DLayerParams.bias)
   
   ::CoreML::Specification::WeightParams* temp = bias_;
   bias_ = NULL;
   return temp;
 }
-void InnerProductLayerParams::set_allocated_bias(::CoreML::Specification::WeightParams* bias) {
+void Convolution3DLayerParams::set_allocated_bias(::CoreML::Specification::WeightParams* bias) {
   delete bias_;
   bias_ = bias;
   if (bias) {
@@ -28146,7 +29815,149 @@ void InnerProductLayerParams::set_allocated_bias(::CoreML::Specification::Weight
   } else {
     
   }
-  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.InnerProductLayerParams.bias)
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.Convolution3DLayerParams.bias)
+}
+
+// .CoreML.Specification.Convolution3DLayerParams.PaddingType paddingType = 70;
+void Convolution3DLayerParams::clear_paddingtype() {
+  paddingtype_ = 0;
+}
+::CoreML::Specification::Convolution3DLayerParams_PaddingType Convolution3DLayerParams::paddingtype() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.paddingType)
+  return static_cast< ::CoreML::Specification::Convolution3DLayerParams_PaddingType >(paddingtype_);
+}
+void Convolution3DLayerParams::set_paddingtype(::CoreML::Specification::Convolution3DLayerParams_PaddingType value) {
+  
+  paddingtype_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.paddingType)
+}
+
+// int32 customPaddingFront = 80;
+void Convolution3DLayerParams::clear_custompaddingfront() {
+  custompaddingfront_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::custompaddingfront() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.customPaddingFront)
+  return custompaddingfront_;
+}
+void Convolution3DLayerParams::set_custompaddingfront(::google::protobuf::int32 value) {
+  
+  custompaddingfront_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.customPaddingFront)
+}
+
+// int32 customPaddingBack = 81;
+void Convolution3DLayerParams::clear_custompaddingback() {
+  custompaddingback_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::custompaddingback() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.customPaddingBack)
+  return custompaddingback_;
+}
+void Convolution3DLayerParams::set_custompaddingback(::google::protobuf::int32 value) {
+  
+  custompaddingback_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.customPaddingBack)
+}
+
+// int32 customPaddingTop = 82;
+void Convolution3DLayerParams::clear_custompaddingtop() {
+  custompaddingtop_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::custompaddingtop() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.customPaddingTop)
+  return custompaddingtop_;
+}
+void Convolution3DLayerParams::set_custompaddingtop(::google::protobuf::int32 value) {
+  
+  custompaddingtop_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.customPaddingTop)
+}
+
+// int32 customPaddingBottom = 83;
+void Convolution3DLayerParams::clear_custompaddingbottom() {
+  custompaddingbottom_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::custompaddingbottom() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.customPaddingBottom)
+  return custompaddingbottom_;
+}
+void Convolution3DLayerParams::set_custompaddingbottom(::google::protobuf::int32 value) {
+  
+  custompaddingbottom_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.customPaddingBottom)
+}
+
+// int32 customPaddingLeft = 84;
+void Convolution3DLayerParams::clear_custompaddingleft() {
+  custompaddingleft_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::custompaddingleft() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.customPaddingLeft)
+  return custompaddingleft_;
+}
+void Convolution3DLayerParams::set_custompaddingleft(::google::protobuf::int32 value) {
+  
+  custompaddingleft_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.customPaddingLeft)
+}
+
+// int32 customPaddingRight = 85;
+void Convolution3DLayerParams::clear_custompaddingright() {
+  custompaddingright_ = 0;
+}
+::google::protobuf::int32 Convolution3DLayerParams::custompaddingright() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.customPaddingRight)
+  return custompaddingright_;
+}
+void Convolution3DLayerParams::set_custompaddingright(::google::protobuf::int32 value) {
+  
+  custompaddingright_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.customPaddingRight)
+}
+
+// bool isDeconvolution = 86;
+void Convolution3DLayerParams::clear_isdeconvolution() {
+  isdeconvolution_ = false;
+}
+bool Convolution3DLayerParams::isdeconvolution() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.isDeconvolution)
+  return isdeconvolution_;
+}
+void Convolution3DLayerParams::set_isdeconvolution(bool value) {
+  
+  isdeconvolution_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.isDeconvolution)
+}
+
+// repeated uint64 outputShape = 87;
+int Convolution3DLayerParams::outputshape_size() const {
+  return outputshape_.size();
+}
+void Convolution3DLayerParams::clear_outputshape() {
+  outputshape_.Clear();
+}
+::google::protobuf::uint64 Convolution3DLayerParams::outputshape(int index) const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.outputShape)
+  return outputshape_.Get(index);
+}
+void Convolution3DLayerParams::set_outputshape(int index, ::google::protobuf::uint64 value) {
+  outputshape_.Set(index, value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.outputShape)
+}
+void Convolution3DLayerParams::add_outputshape(::google::protobuf::uint64 value) {
+  outputshape_.Add(value);
+  // @@protoc_insertion_point(field_add:CoreML.Specification.Convolution3DLayerParams.outputShape)
+}
+const ::google::protobuf::RepeatedField< ::google::protobuf::uint64 >&
+Convolution3DLayerParams::outputshape() const {
+  // @@protoc_insertion_point(field_list:CoreML.Specification.Convolution3DLayerParams.outputShape)
+  return outputshape_;
+}
+::google::protobuf::RepeatedField< ::google::protobuf::uint64 >*
+Convolution3DLayerParams::mutable_outputshape() {
+  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.Convolution3DLayerParams.outputShape)
+  return &outputshape_;
 }
 
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
@@ -28154,22 +29965,23 @@ void InnerProductLayerParams::set_allocated_bias(::CoreML::Specification::Weight
 // ===================================================================
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
-const int EmbeddingLayerParams::kInputDimFieldNumber;
-const int EmbeddingLayerParams::kOutputChannelsFieldNumber;
-const int EmbeddingLayerParams::kHasBiasFieldNumber;
-const int EmbeddingLayerParams::kWeightsFieldNumber;
-const int EmbeddingLayerParams::kBiasFieldNumber;
+const int InnerProductLayerParams::kInputChannelsFieldNumber;
+const int InnerProductLayerParams::kOutputChannelsFieldNumber;
+const int InnerProductLayerParams::kHasBiasFieldNumber;
+const int InnerProductLayerParams::kWeightsFieldNumber;
+const int InnerProductLayerParams::kBiasFieldNumber;
+const int InnerProductLayerParams::kInt8DynamicQuantizeFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
-EmbeddingLayerParams::EmbeddingLayerParams()
+InnerProductLayerParams::InnerProductLayerParams()
   : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
   if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
     protobuf_NeuralNetwork_2eproto::InitDefaults();
   }
   SharedCtor();
-  // @@protoc_insertion_point(constructor:CoreML.Specification.EmbeddingLayerParams)
+  // @@protoc_insertion_point(constructor:CoreML.Specification.InnerProductLayerParams)
 }
-EmbeddingLayerParams::EmbeddingLayerParams(const EmbeddingLayerParams& from)
+InnerProductLayerParams::InnerProductLayerParams(const InnerProductLayerParams& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
       _cached_size_(0) {
@@ -28184,24 +29996,24 @@ EmbeddingLayerParams::EmbeddingLayerParams(const EmbeddingLayerParams& from)
   } else {
     bias_ = NULL;
   }
-  ::memcpy(&inputdim_, &from.inputdim_,
-    reinterpret_cast<char*>(&hasbias_) -
-    reinterpret_cast<char*>(&inputdim_) + sizeof(hasbias_));
-  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.EmbeddingLayerParams)
+  ::memcpy(&inputchannels_, &from.inputchannels_,
+    reinterpret_cast<char*>(&int8dynamicquantize_) -
+    reinterpret_cast<char*>(&inputchannels_) + sizeof(int8dynamicquantize_));
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.InnerProductLayerParams)
 }
 
-void EmbeddingLayerParams::SharedCtor() {
-  ::memset(&weights_, 0, reinterpret_cast<char*>(&hasbias_) -
-    reinterpret_cast<char*>(&weights_) + sizeof(hasbias_));
+void InnerProductLayerParams::SharedCtor() {
+  ::memset(&weights_, 0, reinterpret_cast<char*>(&int8dynamicquantize_) -
+    reinterpret_cast<char*>(&weights_) + sizeof(int8dynamicquantize_));
   _cached_size_ = 0;
 }
 
-EmbeddingLayerParams::~EmbeddingLayerParams() {
-  // @@protoc_insertion_point(destructor:CoreML.Specification.EmbeddingLayerParams)
+InnerProductLayerParams::~InnerProductLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.InnerProductLayerParams)
   SharedDtor();
 }
 
-void EmbeddingLayerParams::SharedDtor() {
+void InnerProductLayerParams::SharedDtor() {
   if (this != internal_default_instance()) {
     delete weights_;
   }
@@ -28210,26 +30022,26 @@ void EmbeddingLayerParams::SharedDtor() {
   }
 }
 
-void EmbeddingLayerParams::SetCachedSize(int size) const {
+void InnerProductLayerParams::SetCachedSize(int size) const {
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = size;
   GOOGLE_SAFE_CONCURRENT_WRITES_END();
 }
-const EmbeddingLayerParams& EmbeddingLayerParams::default_instance() {
+const InnerProductLayerParams& InnerProductLayerParams::default_instance() {
   protobuf_NeuralNetwork_2eproto::InitDefaults();
   return *internal_default_instance();
 }
 
-EmbeddingLayerParams* EmbeddingLayerParams::New(::google::protobuf::Arena* arena) const {
-  EmbeddingLayerParams* n = new EmbeddingLayerParams;
+InnerProductLayerParams* InnerProductLayerParams::New(::google::protobuf::Arena* arena) const {
+  InnerProductLayerParams* n = new InnerProductLayerParams;
   if (arena != NULL) {
     arena->Own(n);
   }
   return n;
 }
 
-void EmbeddingLayerParams::Clear() {
-// @@protoc_insertion_point(message_clear_start:CoreML.Specification.EmbeddingLayerParams)
+void InnerProductLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.InnerProductLayerParams)
   if (GetArenaNoVirtual() == NULL && weights_ != NULL) {
     delete weights_;
   }
@@ -28238,28 +30050,28 @@ void EmbeddingLayerParams::Clear() {
     delete bias_;
   }
   bias_ = NULL;
-  ::memset(&inputdim_, 0, reinterpret_cast<char*>(&hasbias_) -
-    reinterpret_cast<char*>(&inputdim_) + sizeof(hasbias_));
+  ::memset(&inputchannels_, 0, reinterpret_cast<char*>(&int8dynamicquantize_) -
+    reinterpret_cast<char*>(&inputchannels_) + sizeof(int8dynamicquantize_));
 }
 
-bool EmbeddingLayerParams::MergePartialFromCodedStream(
+bool InnerProductLayerParams::MergePartialFromCodedStream(
     ::google::protobuf::io::CodedInputStream* input) {
 #define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
   ::google::protobuf::uint32 tag;
-  // @@protoc_insertion_point(parse_start:CoreML.Specification.EmbeddingLayerParams)
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.InnerProductLayerParams)
   for (;;) {
     ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(16383u);
     tag = p.first;
     if (!p.second) goto handle_unusual;
     switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
-      // uint64 inputDim = 1;
+      // uint64 inputChannels = 1;
       case 1: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
             static_cast< ::google::protobuf::uint8>(8u)) {
 
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
                    ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
-                 input, &inputdim_)));
+                 input, &inputchannels_)));
         } else {
           goto handle_unusual;
         }
@@ -28318,6 +30130,20 @@ bool EmbeddingLayerParams::MergePartialFromCodedStream(
         break;
       }
 
+      // bool int8DynamicQuantize = 22;
+      case 22: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(176u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
+                 input, &int8dynamicquantize_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
       default: {
       handle_unusual:
         if (tag == 0 ||
@@ -28331,23 +30157,23 @@ bool EmbeddingLayerParams::MergePartialFromCodedStream(
     }
   }
 success:
-  // @@protoc_insertion_point(parse_success:CoreML.Specification.EmbeddingLayerParams)
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.InnerProductLayerParams)
   return true;
 failure:
-  // @@protoc_insertion_point(parse_failure:CoreML.Specification.EmbeddingLayerParams)
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.InnerProductLayerParams)
   return false;
 #undef DO_
 }
 
-void EmbeddingLayerParams::SerializeWithCachedSizes(
+void InnerProductLayerParams::SerializeWithCachedSizes(
     ::google::protobuf::io::CodedOutputStream* output) const {
-  // @@protoc_insertion_point(serialize_start:CoreML.Specification.EmbeddingLayerParams)
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.InnerProductLayerParams)
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  // uint64 inputDim = 1;
-  if (this->inputdim() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteUInt64(1, this->inputdim(), output);
+  // uint64 inputChannels = 1;
+  if (this->inputchannels() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteUInt64(1, this->inputchannels(), output);
   }
 
   // uint64 outputChannels = 2;
@@ -28372,11 +30198,16 @@ void EmbeddingLayerParams::SerializeWithCachedSizes(
       21, *this->bias_, output);
   }
 
-  // @@protoc_insertion_point(serialize_end:CoreML.Specification.EmbeddingLayerParams)
+  // bool int8DynamicQuantize = 22;
+  if (this->int8dynamicquantize() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteBool(22, this->int8dynamicquantize(), output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.InnerProductLayerParams)
 }
 
-size_t EmbeddingLayerParams::ByteSizeLong() const {
-// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.EmbeddingLayerParams)
+size_t InnerProductLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.InnerProductLayerParams)
   size_t total_size = 0;
 
   // .CoreML.Specification.WeightParams weights = 20;
@@ -28393,11 +30224,11 @@ size_t EmbeddingLayerParams::ByteSizeLong() const {
         *this->bias_);
   }
 
-  // uint64 inputDim = 1;
-  if (this->inputdim() != 0) {
+  // uint64 inputChannels = 1;
+  if (this->inputchannels() != 0) {
     total_size += 1 +
       ::google::protobuf::internal::WireFormatLite::UInt64Size(
-        this->inputdim());
+        this->inputchannels());
   }
 
   // uint64 outputChannels = 2;
@@ -28412,6 +30243,11 @@ size_t EmbeddingLayerParams::ByteSizeLong() const {
     total_size += 1 + 1;
   }
 
+  // bool int8DynamicQuantize = 22;
+  if (this->int8dynamicquantize() != 0) {
+    total_size += 2 + 1;
+  }
+
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = cached_size;
@@ -28419,13 +30255,13 @@ size_t EmbeddingLayerParams::ByteSizeLong() const {
   return total_size;
 }
 
-void EmbeddingLayerParams::CheckTypeAndMergeFrom(
+void InnerProductLayerParams::CheckTypeAndMergeFrom(
     const ::google::protobuf::MessageLite& from) {
-  MergeFrom(*::google::protobuf::down_cast<const EmbeddingLayerParams*>(&from));
+  MergeFrom(*::google::protobuf::down_cast<const InnerProductLayerParams*>(&from));
 }
 
-void EmbeddingLayerParams::MergeFrom(const EmbeddingLayerParams& from) {
-// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.EmbeddingLayerParams)
+void InnerProductLayerParams::MergeFrom(const InnerProductLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.InnerProductLayerParams)
   GOOGLE_DCHECK_NE(&from, this);
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   ::google::protobuf::uint32 cached_has_bits = 0;
@@ -28437,8 +30273,8 @@ void EmbeddingLayerParams::MergeFrom(const EmbeddingLayerParams& from) {
   if (from.has_bias()) {
     mutable_bias()->::CoreML::Specification::WeightParams::MergeFrom(from.bias());
   }
-  if (from.inputdim() != 0) {
-    set_inputdim(from.inputdim());
+  if (from.inputchannels() != 0) {
+    set_inputchannels(from.inputchannels());
   }
   if (from.outputchannels() != 0) {
     set_outputchannels(from.outputchannels());
@@ -28446,110 +30282,114 @@ void EmbeddingLayerParams::MergeFrom(const EmbeddingLayerParams& from) {
   if (from.hasbias() != 0) {
     set_hasbias(from.hasbias());
   }
+  if (from.int8dynamicquantize() != 0) {
+    set_int8dynamicquantize(from.int8dynamicquantize());
+  }
 }
 
-void EmbeddingLayerParams::CopyFrom(const EmbeddingLayerParams& from) {
-// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.EmbeddingLayerParams)
+void InnerProductLayerParams::CopyFrom(const InnerProductLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.InnerProductLayerParams)
   if (&from == this) return;
   Clear();
   MergeFrom(from);
 }
 
-bool EmbeddingLayerParams::IsInitialized() const {
+bool InnerProductLayerParams::IsInitialized() const {
   return true;
 }
 
-void EmbeddingLayerParams::Swap(EmbeddingLayerParams* other) {
+void InnerProductLayerParams::Swap(InnerProductLayerParams* other) {
   if (other == this) return;
   InternalSwap(other);
 }
-void EmbeddingLayerParams::InternalSwap(EmbeddingLayerParams* other) {
+void InnerProductLayerParams::InternalSwap(InnerProductLayerParams* other) {
   std::swap(weights_, other->weights_);
   std::swap(bias_, other->bias_);
-  std::swap(inputdim_, other->inputdim_);
+  std::swap(inputchannels_, other->inputchannels_);
   std::swap(outputchannels_, other->outputchannels_);
   std::swap(hasbias_, other->hasbias_);
+  std::swap(int8dynamicquantize_, other->int8dynamicquantize_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
-::std::string EmbeddingLayerParams::GetTypeName() const {
-  return "CoreML.Specification.EmbeddingLayerParams";
+::std::string InnerProductLayerParams::GetTypeName() const {
+  return "CoreML.Specification.InnerProductLayerParams";
 }
 
 #if PROTOBUF_INLINE_NOT_IN_HEADERS
-// EmbeddingLayerParams
+// InnerProductLayerParams
 
-// uint64 inputDim = 1;
-void EmbeddingLayerParams::clear_inputdim() {
-  inputdim_ = GOOGLE_ULONGLONG(0);
+// uint64 inputChannels = 1;
+void InnerProductLayerParams::clear_inputchannels() {
+  inputchannels_ = GOOGLE_ULONGLONG(0);
 }
-::google::protobuf::uint64 EmbeddingLayerParams::inputdim() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.EmbeddingLayerParams.inputDim)
-  return inputdim_;
+::google::protobuf::uint64 InnerProductLayerParams::inputchannels() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.InnerProductLayerParams.inputChannels)
+  return inputchannels_;
 }
-void EmbeddingLayerParams::set_inputdim(::google::protobuf::uint64 value) {
+void InnerProductLayerParams::set_inputchannels(::google::protobuf::uint64 value) {
   
-  inputdim_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.EmbeddingLayerParams.inputDim)
+  inputchannels_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.InnerProductLayerParams.inputChannels)
 }
 
 // uint64 outputChannels = 2;
-void EmbeddingLayerParams::clear_outputchannels() {
+void InnerProductLayerParams::clear_outputchannels() {
   outputchannels_ = GOOGLE_ULONGLONG(0);
 }
-::google::protobuf::uint64 EmbeddingLayerParams::outputchannels() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.EmbeddingLayerParams.outputChannels)
+::google::protobuf::uint64 InnerProductLayerParams::outputchannels() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.InnerProductLayerParams.outputChannels)
   return outputchannels_;
 }
-void EmbeddingLayerParams::set_outputchannels(::google::protobuf::uint64 value) {
+void InnerProductLayerParams::set_outputchannels(::google::protobuf::uint64 value) {
   
   outputchannels_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.EmbeddingLayerParams.outputChannels)
+  // @@protoc_insertion_point(field_set:CoreML.Specification.InnerProductLayerParams.outputChannels)
 }
 
 // bool hasBias = 10;
-void EmbeddingLayerParams::clear_hasbias() {
+void InnerProductLayerParams::clear_hasbias() {
   hasbias_ = false;
 }
-bool EmbeddingLayerParams::hasbias() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.EmbeddingLayerParams.hasBias)
+bool InnerProductLayerParams::hasbias() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.InnerProductLayerParams.hasBias)
   return hasbias_;
 }
-void EmbeddingLayerParams::set_hasbias(bool value) {
+void InnerProductLayerParams::set_hasbias(bool value) {
   
   hasbias_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.EmbeddingLayerParams.hasBias)
+  // @@protoc_insertion_point(field_set:CoreML.Specification.InnerProductLayerParams.hasBias)
 }
 
 // .CoreML.Specification.WeightParams weights = 20;
-bool EmbeddingLayerParams::has_weights() const {
+bool InnerProductLayerParams::has_weights() const {
   return this != internal_default_instance() && weights_ != NULL;
 }
-void EmbeddingLayerParams::clear_weights() {
+void InnerProductLayerParams::clear_weights() {
   if (GetArenaNoVirtual() == NULL && weights_ != NULL) delete weights_;
   weights_ = NULL;
 }
-const ::CoreML::Specification::WeightParams& EmbeddingLayerParams::weights() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.EmbeddingLayerParams.weights)
+const ::CoreML::Specification::WeightParams& InnerProductLayerParams::weights() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.InnerProductLayerParams.weights)
   return weights_ != NULL ? *weights_
                          : *::CoreML::Specification::WeightParams::internal_default_instance();
 }
-::CoreML::Specification::WeightParams* EmbeddingLayerParams::mutable_weights() {
+::CoreML::Specification::WeightParams* InnerProductLayerParams::mutable_weights() {
   
   if (weights_ == NULL) {
     weights_ = new ::CoreML::Specification::WeightParams;
   }
-  // @@protoc_insertion_point(field_mutable:CoreML.Specification.EmbeddingLayerParams.weights)
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.InnerProductLayerParams.weights)
   return weights_;
 }
-::CoreML::Specification::WeightParams* EmbeddingLayerParams::release_weights() {
-  // @@protoc_insertion_point(field_release:CoreML.Specification.EmbeddingLayerParams.weights)
+::CoreML::Specification::WeightParams* InnerProductLayerParams::release_weights() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.InnerProductLayerParams.weights)
   
   ::CoreML::Specification::WeightParams* temp = weights_;
   weights_ = NULL;
   return temp;
 }
-void EmbeddingLayerParams::set_allocated_weights(::CoreML::Specification::WeightParams* weights) {
+void InnerProductLayerParams::set_allocated_weights(::CoreML::Specification::WeightParams* weights) {
   delete weights_;
   weights_ = weights;
   if (weights) {
@@ -28557,38 +30397,38 @@ void EmbeddingLayerParams::set_allocated_weights(::CoreML::Specification::Weight
   } else {
     
   }
-  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.EmbeddingLayerParams.weights)
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.InnerProductLayerParams.weights)
 }
 
 // .CoreML.Specification.WeightParams bias = 21;
-bool EmbeddingLayerParams::has_bias() const {
+bool InnerProductLayerParams::has_bias() const {
   return this != internal_default_instance() && bias_ != NULL;
 }
-void EmbeddingLayerParams::clear_bias() {
+void InnerProductLayerParams::clear_bias() {
   if (GetArenaNoVirtual() == NULL && bias_ != NULL) delete bias_;
   bias_ = NULL;
 }
-const ::CoreML::Specification::WeightParams& EmbeddingLayerParams::bias() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.EmbeddingLayerParams.bias)
+const ::CoreML::Specification::WeightParams& InnerProductLayerParams::bias() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.InnerProductLayerParams.bias)
   return bias_ != NULL ? *bias_
                          : *::CoreML::Specification::WeightParams::internal_default_instance();
 }
-::CoreML::Specification::WeightParams* EmbeddingLayerParams::mutable_bias() {
+::CoreML::Specification::WeightParams* InnerProductLayerParams::mutable_bias() {
   
   if (bias_ == NULL) {
     bias_ = new ::CoreML::Specification::WeightParams;
   }
-  // @@protoc_insertion_point(field_mutable:CoreML.Specification.EmbeddingLayerParams.bias)
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.InnerProductLayerParams.bias)
   return bias_;
 }
-::CoreML::Specification::WeightParams* EmbeddingLayerParams::release_bias() {
-  // @@protoc_insertion_point(field_release:CoreML.Specification.EmbeddingLayerParams.bias)
+::CoreML::Specification::WeightParams* InnerProductLayerParams::release_bias() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.InnerProductLayerParams.bias)
   
   ::CoreML::Specification::WeightParams* temp = bias_;
   bias_ = NULL;
   return temp;
 }
-void EmbeddingLayerParams::set_allocated_bias(::CoreML::Specification::WeightParams* bias) {
+void InnerProductLayerParams::set_allocated_bias(::CoreML::Specification::WeightParams* bias) {
   delete bias_;
   bias_ = bias;
   if (bias) {
@@ -28596,7 +30436,21 @@ void EmbeddingLayerParams::set_allocated_bias(::CoreML::Specification::WeightPar
   } else {
     
   }
-  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.EmbeddingLayerParams.bias)
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.InnerProductLayerParams.bias)
+}
+
+// bool int8DynamicQuantize = 22;
+void InnerProductLayerParams::clear_int8dynamicquantize() {
+  int8dynamicquantize_ = false;
+}
+bool InnerProductLayerParams::int8dynamicquantize() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.InnerProductLayerParams.int8DynamicQuantize)
+  return int8dynamicquantize_;
+}
+void InnerProductLayerParams::set_int8dynamicquantize(bool value) {
+  
+  int8dynamicquantize_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.InnerProductLayerParams.int8DynamicQuantize)
 }
 
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
@@ -28604,22 +30458,22 @@ void EmbeddingLayerParams::set_allocated_bias(::CoreML::Specification::WeightPar
 // ===================================================================
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
-const int EmbeddingNDLayerParams::kVocabSizeFieldNumber;
-const int EmbeddingNDLayerParams::kEmbeddingSizeFieldNumber;
-const int EmbeddingNDLayerParams::kHasBiasFieldNumber;
-const int EmbeddingNDLayerParams::kWeightsFieldNumber;
-const int EmbeddingNDLayerParams::kBiasFieldNumber;
+const int EmbeddingLayerParams::kInputDimFieldNumber;
+const int EmbeddingLayerParams::kOutputChannelsFieldNumber;
+const int EmbeddingLayerParams::kHasBiasFieldNumber;
+const int EmbeddingLayerParams::kWeightsFieldNumber;
+const int EmbeddingLayerParams::kBiasFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
-EmbeddingNDLayerParams::EmbeddingNDLayerParams()
+EmbeddingLayerParams::EmbeddingLayerParams()
   : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
   if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
     protobuf_NeuralNetwork_2eproto::InitDefaults();
   }
   SharedCtor();
-  // @@protoc_insertion_point(constructor:CoreML.Specification.EmbeddingNDLayerParams)
+  // @@protoc_insertion_point(constructor:CoreML.Specification.EmbeddingLayerParams)
 }
-EmbeddingNDLayerParams::EmbeddingNDLayerParams(const EmbeddingNDLayerParams& from)
+EmbeddingLayerParams::EmbeddingLayerParams(const EmbeddingLayerParams& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
       _cached_size_(0) {
@@ -28634,24 +30488,24 @@ EmbeddingNDLayerParams::EmbeddingNDLayerParams(const EmbeddingNDLayerParams& fro
   } else {
     bias_ = NULL;
   }
-  ::memcpy(&vocabsize_, &from.vocabsize_,
+  ::memcpy(&inputdim_, &from.inputdim_,
     reinterpret_cast<char*>(&hasbias_) -
-    reinterpret_cast<char*>(&vocabsize_) + sizeof(hasbias_));
-  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.EmbeddingNDLayerParams)
+    reinterpret_cast<char*>(&inputdim_) + sizeof(hasbias_));
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.EmbeddingLayerParams)
 }
 
-void EmbeddingNDLayerParams::SharedCtor() {
+void EmbeddingLayerParams::SharedCtor() {
   ::memset(&weights_, 0, reinterpret_cast<char*>(&hasbias_) -
     reinterpret_cast<char*>(&weights_) + sizeof(hasbias_));
   _cached_size_ = 0;
 }
 
-EmbeddingNDLayerParams::~EmbeddingNDLayerParams() {
-  // @@protoc_insertion_point(destructor:CoreML.Specification.EmbeddingNDLayerParams)
+EmbeddingLayerParams::~EmbeddingLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.EmbeddingLayerParams)
   SharedDtor();
 }
 
-void EmbeddingNDLayerParams::SharedDtor() {
+void EmbeddingLayerParams::SharedDtor() {
   if (this != internal_default_instance()) {
     delete weights_;
   }
@@ -28660,26 +30514,26 @@ void EmbeddingNDLayerParams::SharedDtor() {
   }
 }
 
-void EmbeddingNDLayerParams::SetCachedSize(int size) const {
+void EmbeddingLayerParams::SetCachedSize(int size) const {
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = size;
   GOOGLE_SAFE_CONCURRENT_WRITES_END();
 }
-const EmbeddingNDLayerParams& EmbeddingNDLayerParams::default_instance() {
+const EmbeddingLayerParams& EmbeddingLayerParams::default_instance() {
   protobuf_NeuralNetwork_2eproto::InitDefaults();
   return *internal_default_instance();
 }
 
-EmbeddingNDLayerParams* EmbeddingNDLayerParams::New(::google::protobuf::Arena* arena) const {
-  EmbeddingNDLayerParams* n = new EmbeddingNDLayerParams;
+EmbeddingLayerParams* EmbeddingLayerParams::New(::google::protobuf::Arena* arena) const {
+  EmbeddingLayerParams* n = new EmbeddingLayerParams;
   if (arena != NULL) {
     arena->Own(n);
   }
   return n;
 }
 
-void EmbeddingNDLayerParams::Clear() {
-// @@protoc_insertion_point(message_clear_start:CoreML.Specification.EmbeddingNDLayerParams)
+void EmbeddingLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.EmbeddingLayerParams)
   if (GetArenaNoVirtual() == NULL && weights_ != NULL) {
     delete weights_;
   }
@@ -28688,52 +30542,52 @@ void EmbeddingNDLayerParams::Clear() {
     delete bias_;
   }
   bias_ = NULL;
-  ::memset(&vocabsize_, 0, reinterpret_cast<char*>(&hasbias_) -
-    reinterpret_cast<char*>(&vocabsize_) + sizeof(hasbias_));
+  ::memset(&inputdim_, 0, reinterpret_cast<char*>(&hasbias_) -
+    reinterpret_cast<char*>(&inputdim_) + sizeof(hasbias_));
 }
 
-bool EmbeddingNDLayerParams::MergePartialFromCodedStream(
+bool EmbeddingLayerParams::MergePartialFromCodedStream(
     ::google::protobuf::io::CodedInputStream* input) {
 #define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
   ::google::protobuf::uint32 tag;
-  // @@protoc_insertion_point(parse_start:CoreML.Specification.EmbeddingNDLayerParams)
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.EmbeddingLayerParams)
   for (;;) {
     ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(16383u);
     tag = p.first;
     if (!p.second) goto handle_unusual;
     switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
-      // uint64 vocabSize = 1;
+      // uint64 inputDim = 1;
       case 1: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
             static_cast< ::google::protobuf::uint8>(8u)) {
 
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
                    ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
-                 input, &vocabsize_)));
+                 input, &inputdim_)));
         } else {
           goto handle_unusual;
         }
         break;
       }
 
-      // uint64 embeddingSize = 2;
+      // uint64 outputChannels = 2;
       case 2: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
             static_cast< ::google::protobuf::uint8>(16u)) {
 
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
                    ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
-                 input, &embeddingsize_)));
+                 input, &outputchannels_)));
         } else {
           goto handle_unusual;
         }
         break;
       }
 
-      // bool hasBias = 3;
-      case 3: {
+      // bool hasBias = 10;
+      case 10: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(24u)) {
+            static_cast< ::google::protobuf::uint8>(80u)) {
 
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
                    bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
@@ -28781,33 +30635,33 @@ bool EmbeddingNDLayerParams::MergePartialFromCodedStream(
     }
   }
 success:
-  // @@protoc_insertion_point(parse_success:CoreML.Specification.EmbeddingNDLayerParams)
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.EmbeddingLayerParams)
   return true;
 failure:
-  // @@protoc_insertion_point(parse_failure:CoreML.Specification.EmbeddingNDLayerParams)
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.EmbeddingLayerParams)
   return false;
 #undef DO_
 }
 
-void EmbeddingNDLayerParams::SerializeWithCachedSizes(
+void EmbeddingLayerParams::SerializeWithCachedSizes(
     ::google::protobuf::io::CodedOutputStream* output) const {
-  // @@protoc_insertion_point(serialize_start:CoreML.Specification.EmbeddingNDLayerParams)
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.EmbeddingLayerParams)
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  // uint64 vocabSize = 1;
-  if (this->vocabsize() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteUInt64(1, this->vocabsize(), output);
+  // uint64 inputDim = 1;
+  if (this->inputdim() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteUInt64(1, this->inputdim(), output);
   }
 
-  // uint64 embeddingSize = 2;
-  if (this->embeddingsize() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteUInt64(2, this->embeddingsize(), output);
+  // uint64 outputChannels = 2;
+  if (this->outputchannels() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteUInt64(2, this->outputchannels(), output);
   }
 
-  // bool hasBias = 3;
+  // bool hasBias = 10;
   if (this->hasbias() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteBool(3, this->hasbias(), output);
+    ::google::protobuf::internal::WireFormatLite::WriteBool(10, this->hasbias(), output);
   }
 
   // .CoreML.Specification.WeightParams weights = 20;
@@ -28822,11 +30676,11 @@ void EmbeddingNDLayerParams::SerializeWithCachedSizes(
       21, *this->bias_, output);
   }
 
-  // @@protoc_insertion_point(serialize_end:CoreML.Specification.EmbeddingNDLayerParams)
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.EmbeddingLayerParams)
 }
 
-size_t EmbeddingNDLayerParams::ByteSizeLong() const {
-// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.EmbeddingNDLayerParams)
+size_t EmbeddingLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.EmbeddingLayerParams)
   size_t total_size = 0;
 
   // .CoreML.Specification.WeightParams weights = 20;
@@ -28843,21 +30697,21 @@ size_t EmbeddingNDLayerParams::ByteSizeLong() const {
         *this->bias_);
   }
 
-  // uint64 vocabSize = 1;
-  if (this->vocabsize() != 0) {
+  // uint64 inputDim = 1;
+  if (this->inputdim() != 0) {
     total_size += 1 +
       ::google::protobuf::internal::WireFormatLite::UInt64Size(
-        this->vocabsize());
+        this->inputdim());
   }
 
-  // uint64 embeddingSize = 2;
-  if (this->embeddingsize() != 0) {
+  // uint64 outputChannels = 2;
+  if (this->outputchannels() != 0) {
     total_size += 1 +
       ::google::protobuf::internal::WireFormatLite::UInt64Size(
-        this->embeddingsize());
+        this->outputchannels());
   }
 
-  // bool hasBias = 3;
+  // bool hasBias = 10;
   if (this->hasbias() != 0) {
     total_size += 1 + 1;
   }
@@ -28869,13 +30723,13 @@ size_t EmbeddingNDLayerParams::ByteSizeLong() const {
   return total_size;
 }
 
-void EmbeddingNDLayerParams::CheckTypeAndMergeFrom(
+void EmbeddingLayerParams::CheckTypeAndMergeFrom(
     const ::google::protobuf::MessageLite& from) {
-  MergeFrom(*::google::protobuf::down_cast<const EmbeddingNDLayerParams*>(&from));
+  MergeFrom(*::google::protobuf::down_cast<const EmbeddingLayerParams*>(&from));
 }
 
-void EmbeddingNDLayerParams::MergeFrom(const EmbeddingNDLayerParams& from) {
-// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.EmbeddingNDLayerParams)
+void EmbeddingLayerParams::MergeFrom(const EmbeddingLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.EmbeddingLayerParams)
   GOOGLE_DCHECK_NE(&from, this);
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   ::google::protobuf::uint32 cached_has_bits = 0;
@@ -28887,57 +30741,507 @@ void EmbeddingNDLayerParams::MergeFrom(const EmbeddingNDLayerParams& from) {
   if (from.has_bias()) {
     mutable_bias()->::CoreML::Specification::WeightParams::MergeFrom(from.bias());
   }
-  if (from.vocabsize() != 0) {
-    set_vocabsize(from.vocabsize());
+  if (from.inputdim() != 0) {
+    set_inputdim(from.inputdim());
   }
-  if (from.embeddingsize() != 0) {
-    set_embeddingsize(from.embeddingsize());
+  if (from.outputchannels() != 0) {
+    set_outputchannels(from.outputchannels());
   }
   if (from.hasbias() != 0) {
     set_hasbias(from.hasbias());
   }
 }
 
-void EmbeddingNDLayerParams::CopyFrom(const EmbeddingNDLayerParams& from) {
-// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.EmbeddingNDLayerParams)
+void EmbeddingLayerParams::CopyFrom(const EmbeddingLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.EmbeddingLayerParams)
   if (&from == this) return;
   Clear();
   MergeFrom(from);
 }
 
-bool EmbeddingNDLayerParams::IsInitialized() const {
+bool EmbeddingLayerParams::IsInitialized() const {
   return true;
 }
 
-void EmbeddingNDLayerParams::Swap(EmbeddingNDLayerParams* other) {
+void EmbeddingLayerParams::Swap(EmbeddingLayerParams* other) {
   if (other == this) return;
   InternalSwap(other);
 }
-void EmbeddingNDLayerParams::InternalSwap(EmbeddingNDLayerParams* other) {
+void EmbeddingLayerParams::InternalSwap(EmbeddingLayerParams* other) {
   std::swap(weights_, other->weights_);
   std::swap(bias_, other->bias_);
-  std::swap(vocabsize_, other->vocabsize_);
-  std::swap(embeddingsize_, other->embeddingsize_);
+  std::swap(inputdim_, other->inputdim_);
+  std::swap(outputchannels_, other->outputchannels_);
   std::swap(hasbias_, other->hasbias_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
-::std::string EmbeddingNDLayerParams::GetTypeName() const {
-  return "CoreML.Specification.EmbeddingNDLayerParams";
+::std::string EmbeddingLayerParams::GetTypeName() const {
+  return "CoreML.Specification.EmbeddingLayerParams";
 }
 
 #if PROTOBUF_INLINE_NOT_IN_HEADERS
-// EmbeddingNDLayerParams
+// EmbeddingLayerParams
 
-// uint64 vocabSize = 1;
-void EmbeddingNDLayerParams::clear_vocabsize() {
-  vocabsize_ = GOOGLE_ULONGLONG(0);
+// uint64 inputDim = 1;
+void EmbeddingLayerParams::clear_inputdim() {
+  inputdim_ = GOOGLE_ULONGLONG(0);
 }
-::google::protobuf::uint64 EmbeddingNDLayerParams::vocabsize() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.EmbeddingNDLayerParams.vocabSize)
-  return vocabsize_;
+::google::protobuf::uint64 EmbeddingLayerParams::inputdim() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.EmbeddingLayerParams.inputDim)
+  return inputdim_;
 }
-void EmbeddingNDLayerParams::set_vocabsize(::google::protobuf::uint64 value) {
+void EmbeddingLayerParams::set_inputdim(::google::protobuf::uint64 value) {
+  
+  inputdim_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.EmbeddingLayerParams.inputDim)
+}
+
+// uint64 outputChannels = 2;
+void EmbeddingLayerParams::clear_outputchannels() {
+  outputchannels_ = GOOGLE_ULONGLONG(0);
+}
+::google::protobuf::uint64 EmbeddingLayerParams::outputchannels() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.EmbeddingLayerParams.outputChannels)
+  return outputchannels_;
+}
+void EmbeddingLayerParams::set_outputchannels(::google::protobuf::uint64 value) {
+  
+  outputchannels_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.EmbeddingLayerParams.outputChannels)
+}
+
+// bool hasBias = 10;
+void EmbeddingLayerParams::clear_hasbias() {
+  hasbias_ = false;
+}
+bool EmbeddingLayerParams::hasbias() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.EmbeddingLayerParams.hasBias)
+  return hasbias_;
+}
+void EmbeddingLayerParams::set_hasbias(bool value) {
+  
+  hasbias_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.EmbeddingLayerParams.hasBias)
+}
+
+// .CoreML.Specification.WeightParams weights = 20;
+bool EmbeddingLayerParams::has_weights() const {
+  return this != internal_default_instance() && weights_ != NULL;
+}
+void EmbeddingLayerParams::clear_weights() {
+  if (GetArenaNoVirtual() == NULL && weights_ != NULL) delete weights_;
+  weights_ = NULL;
+}
+const ::CoreML::Specification::WeightParams& EmbeddingLayerParams::weights() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.EmbeddingLayerParams.weights)
+  return weights_ != NULL ? *weights_
+                         : *::CoreML::Specification::WeightParams::internal_default_instance();
+}
+::CoreML::Specification::WeightParams* EmbeddingLayerParams::mutable_weights() {
+  
+  if (weights_ == NULL) {
+    weights_ = new ::CoreML::Specification::WeightParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.EmbeddingLayerParams.weights)
+  return weights_;
+}
+::CoreML::Specification::WeightParams* EmbeddingLayerParams::release_weights() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.EmbeddingLayerParams.weights)
+  
+  ::CoreML::Specification::WeightParams* temp = weights_;
+  weights_ = NULL;
+  return temp;
+}
+void EmbeddingLayerParams::set_allocated_weights(::CoreML::Specification::WeightParams* weights) {
+  delete weights_;
+  weights_ = weights;
+  if (weights) {
+    
+  } else {
+    
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.EmbeddingLayerParams.weights)
+}
+
+// .CoreML.Specification.WeightParams bias = 21;
+bool EmbeddingLayerParams::has_bias() const {
+  return this != internal_default_instance() && bias_ != NULL;
+}
+void EmbeddingLayerParams::clear_bias() {
+  if (GetArenaNoVirtual() == NULL && bias_ != NULL) delete bias_;
+  bias_ = NULL;
+}
+const ::CoreML::Specification::WeightParams& EmbeddingLayerParams::bias() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.EmbeddingLayerParams.bias)
+  return bias_ != NULL ? *bias_
+                         : *::CoreML::Specification::WeightParams::internal_default_instance();
+}
+::CoreML::Specification::WeightParams* EmbeddingLayerParams::mutable_bias() {
+  
+  if (bias_ == NULL) {
+    bias_ = new ::CoreML::Specification::WeightParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.EmbeddingLayerParams.bias)
+  return bias_;
+}
+::CoreML::Specification::WeightParams* EmbeddingLayerParams::release_bias() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.EmbeddingLayerParams.bias)
+  
+  ::CoreML::Specification::WeightParams* temp = bias_;
+  bias_ = NULL;
+  return temp;
+}
+void EmbeddingLayerParams::set_allocated_bias(::CoreML::Specification::WeightParams* bias) {
+  delete bias_;
+  bias_ = bias;
+  if (bias) {
+    
+  } else {
+    
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.EmbeddingLayerParams.bias)
+}
+
+#endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
+
+// ===================================================================
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+const int EmbeddingNDLayerParams::kVocabSizeFieldNumber;
+const int EmbeddingNDLayerParams::kEmbeddingSizeFieldNumber;
+const int EmbeddingNDLayerParams::kHasBiasFieldNumber;
+const int EmbeddingNDLayerParams::kWeightsFieldNumber;
+const int EmbeddingNDLayerParams::kBiasFieldNumber;
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+
+EmbeddingNDLayerParams::EmbeddingNDLayerParams()
+  : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
+  if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
+    protobuf_NeuralNetwork_2eproto::InitDefaults();
+  }
+  SharedCtor();
+  // @@protoc_insertion_point(constructor:CoreML.Specification.EmbeddingNDLayerParams)
+}
+EmbeddingNDLayerParams::EmbeddingNDLayerParams(const EmbeddingNDLayerParams& from)
+  : ::google::protobuf::MessageLite(),
+      _internal_metadata_(NULL),
+      _cached_size_(0) {
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  if (from.has_weights()) {
+    weights_ = new ::CoreML::Specification::WeightParams(*from.weights_);
+  } else {
+    weights_ = NULL;
+  }
+  if (from.has_bias()) {
+    bias_ = new ::CoreML::Specification::WeightParams(*from.bias_);
+  } else {
+    bias_ = NULL;
+  }
+  ::memcpy(&vocabsize_, &from.vocabsize_,
+    reinterpret_cast<char*>(&hasbias_) -
+    reinterpret_cast<char*>(&vocabsize_) + sizeof(hasbias_));
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.EmbeddingNDLayerParams)
+}
+
+void EmbeddingNDLayerParams::SharedCtor() {
+  ::memset(&weights_, 0, reinterpret_cast<char*>(&hasbias_) -
+    reinterpret_cast<char*>(&weights_) + sizeof(hasbias_));
+  _cached_size_ = 0;
+}
+
+EmbeddingNDLayerParams::~EmbeddingNDLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.EmbeddingNDLayerParams)
+  SharedDtor();
+}
+
+void EmbeddingNDLayerParams::SharedDtor() {
+  if (this != internal_default_instance()) {
+    delete weights_;
+  }
+  if (this != internal_default_instance()) {
+    delete bias_;
+  }
+}
+
+void EmbeddingNDLayerParams::SetCachedSize(int size) const {
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+}
+const EmbeddingNDLayerParams& EmbeddingNDLayerParams::default_instance() {
+  protobuf_NeuralNetwork_2eproto::InitDefaults();
+  return *internal_default_instance();
+}
+
+EmbeddingNDLayerParams* EmbeddingNDLayerParams::New(::google::protobuf::Arena* arena) const {
+  EmbeddingNDLayerParams* n = new EmbeddingNDLayerParams;
+  if (arena != NULL) {
+    arena->Own(n);
+  }
+  return n;
+}
+
+void EmbeddingNDLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.EmbeddingNDLayerParams)
+  if (GetArenaNoVirtual() == NULL && weights_ != NULL) {
+    delete weights_;
+  }
+  weights_ = NULL;
+  if (GetArenaNoVirtual() == NULL && bias_ != NULL) {
+    delete bias_;
+  }
+  bias_ = NULL;
+  ::memset(&vocabsize_, 0, reinterpret_cast<char*>(&hasbias_) -
+    reinterpret_cast<char*>(&vocabsize_) + sizeof(hasbias_));
+}
+
+bool EmbeddingNDLayerParams::MergePartialFromCodedStream(
+    ::google::protobuf::io::CodedInputStream* input) {
+#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
+  ::google::protobuf::uint32 tag;
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.EmbeddingNDLayerParams)
+  for (;;) {
+    ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(16383u);
+    tag = p.first;
+    if (!p.second) goto handle_unusual;
+    switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
+      // uint64 vocabSize = 1;
+      case 1: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(8u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
+                 input, &vocabsize_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // uint64 embeddingSize = 2;
+      case 2: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(16u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
+                 input, &embeddingsize_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // bool hasBias = 3;
+      case 3: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(24u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
+                 input, &hasbias_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // .CoreML.Specification.WeightParams weights = 20;
+      case 20: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(162u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
+               input, mutable_weights()));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // .CoreML.Specification.WeightParams bias = 21;
+      case 21: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(170u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
+               input, mutable_bias()));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      default: {
+      handle_unusual:
+        if (tag == 0 ||
+            ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
+            ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
+          goto success;
+        }
+        DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
+        break;
+      }
+    }
+  }
+success:
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.EmbeddingNDLayerParams)
+  return true;
+failure:
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.EmbeddingNDLayerParams)
+  return false;
+#undef DO_
+}
+
+void EmbeddingNDLayerParams::SerializeWithCachedSizes(
+    ::google::protobuf::io::CodedOutputStream* output) const {
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.EmbeddingNDLayerParams)
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  // uint64 vocabSize = 1;
+  if (this->vocabsize() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteUInt64(1, this->vocabsize(), output);
+  }
+
+  // uint64 embeddingSize = 2;
+  if (this->embeddingsize() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteUInt64(2, this->embeddingsize(), output);
+  }
+
+  // bool hasBias = 3;
+  if (this->hasbias() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteBool(3, this->hasbias(), output);
+  }
+
+  // .CoreML.Specification.WeightParams weights = 20;
+  if (this->has_weights()) {
+    ::google::protobuf::internal::WireFormatLite::WriteMessage(
+      20, *this->weights_, output);
+  }
+
+  // .CoreML.Specification.WeightParams bias = 21;
+  if (this->has_bias()) {
+    ::google::protobuf::internal::WireFormatLite::WriteMessage(
+      21, *this->bias_, output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.EmbeddingNDLayerParams)
+}
+
+size_t EmbeddingNDLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.EmbeddingNDLayerParams)
+  size_t total_size = 0;
+
+  // .CoreML.Specification.WeightParams weights = 20;
+  if (this->has_weights()) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
+        *this->weights_);
+  }
+
+  // .CoreML.Specification.WeightParams bias = 21;
+  if (this->has_bias()) {
+    total_size += 2 +
+      ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
+        *this->bias_);
+  }
+
+  // uint64 vocabSize = 1;
+  if (this->vocabsize() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::UInt64Size(
+        this->vocabsize());
+  }
+
+  // uint64 embeddingSize = 2;
+  if (this->embeddingsize() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::UInt64Size(
+        this->embeddingsize());
+  }
+
+  // bool hasBias = 3;
+  if (this->hasbias() != 0) {
+    total_size += 1 + 1;
+  }
+
+  int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = cached_size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+  return total_size;
+}
+
+void EmbeddingNDLayerParams::CheckTypeAndMergeFrom(
+    const ::google::protobuf::MessageLite& from) {
+  MergeFrom(*::google::protobuf::down_cast<const EmbeddingNDLayerParams*>(&from));
+}
+
+void EmbeddingNDLayerParams::MergeFrom(const EmbeddingNDLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.EmbeddingNDLayerParams)
+  GOOGLE_DCHECK_NE(&from, this);
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  if (from.has_weights()) {
+    mutable_weights()->::CoreML::Specification::WeightParams::MergeFrom(from.weights());
+  }
+  if (from.has_bias()) {
+    mutable_bias()->::CoreML::Specification::WeightParams::MergeFrom(from.bias());
+  }
+  if (from.vocabsize() != 0) {
+    set_vocabsize(from.vocabsize());
+  }
+  if (from.embeddingsize() != 0) {
+    set_embeddingsize(from.embeddingsize());
+  }
+  if (from.hasbias() != 0) {
+    set_hasbias(from.hasbias());
+  }
+}
+
+void EmbeddingNDLayerParams::CopyFrom(const EmbeddingNDLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.EmbeddingNDLayerParams)
+  if (&from == this) return;
+  Clear();
+  MergeFrom(from);
+}
+
+bool EmbeddingNDLayerParams::IsInitialized() const {
+  return true;
+}
+
+void EmbeddingNDLayerParams::Swap(EmbeddingNDLayerParams* other) {
+  if (other == this) return;
+  InternalSwap(other);
+}
+void EmbeddingNDLayerParams::InternalSwap(EmbeddingNDLayerParams* other) {
+  std::swap(weights_, other->weights_);
+  std::swap(bias_, other->bias_);
+  std::swap(vocabsize_, other->vocabsize_);
+  std::swap(embeddingsize_, other->embeddingsize_);
+  std::swap(hasbias_, other->hasbias_);
+  std::swap(_cached_size_, other->_cached_size_);
+}
+
+::std::string EmbeddingNDLayerParams::GetTypeName() const {
+  return "CoreML.Specification.EmbeddingNDLayerParams";
+}
+
+#if PROTOBUF_INLINE_NOT_IN_HEADERS
+// EmbeddingNDLayerParams
+
+// uint64 vocabSize = 1;
+void EmbeddingNDLayerParams::clear_vocabsize() {
+  vocabsize_ = GOOGLE_ULONGLONG(0);
+}
+::google::protobuf::uint64 EmbeddingNDLayerParams::vocabsize() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.EmbeddingNDLayerParams.vocabSize)
+  return vocabsize_;
+}
+void EmbeddingNDLayerParams::set_vocabsize(::google::protobuf::uint64 value) {
   
   vocabsize_ = value;
   // @@protoc_insertion_point(field_set:CoreML.Specification.EmbeddingNDLayerParams.vocabSize)
@@ -30677,80 +32981,296 @@ PoolingLayerParams::PoolingPaddingTypeCase PoolingLayerParams::PoolingPaddingTyp
 // ===================================================================
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
-const int PaddingLayerParams_PaddingConstant::kValueFieldNumber;
+const int Pooling3DLayerParams::kTypeFieldNumber;
+const int Pooling3DLayerParams::kKernelDepthFieldNumber;
+const int Pooling3DLayerParams::kKernelHeightFieldNumber;
+const int Pooling3DLayerParams::kKernelWidthFieldNumber;
+const int Pooling3DLayerParams::kStrideDepthFieldNumber;
+const int Pooling3DLayerParams::kStrideHeightFieldNumber;
+const int Pooling3DLayerParams::kStrideWidthFieldNumber;
+const int Pooling3DLayerParams::kPaddingTypeFieldNumber;
+const int Pooling3DLayerParams::kCustomPaddingFrontFieldNumber;
+const int Pooling3DLayerParams::kCustomPaddingBackFieldNumber;
+const int Pooling3DLayerParams::kCustomPaddingTopFieldNumber;
+const int Pooling3DLayerParams::kCustomPaddingBottomFieldNumber;
+const int Pooling3DLayerParams::kCustomPaddingLeftFieldNumber;
+const int Pooling3DLayerParams::kCustomPaddingRightFieldNumber;
+const int Pooling3DLayerParams::kCountExcludePaddingFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
-PaddingLayerParams_PaddingConstant::PaddingLayerParams_PaddingConstant()
+Pooling3DLayerParams::Pooling3DLayerParams()
   : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
   if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
     protobuf_NeuralNetwork_2eproto::InitDefaults();
   }
   SharedCtor();
-  // @@protoc_insertion_point(constructor:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+  // @@protoc_insertion_point(constructor:CoreML.Specification.Pooling3DLayerParams)
 }
-PaddingLayerParams_PaddingConstant::PaddingLayerParams_PaddingConstant(const PaddingLayerParams_PaddingConstant& from)
+Pooling3DLayerParams::Pooling3DLayerParams(const Pooling3DLayerParams& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
-  value_ = from.value_;
-  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+  ::memcpy(&type_, &from.type_,
+    reinterpret_cast<char*>(&paddingtype_) -
+    reinterpret_cast<char*>(&type_) + sizeof(paddingtype_));
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.Pooling3DLayerParams)
 }
 
-void PaddingLayerParams_PaddingConstant::SharedCtor() {
-  value_ = 0;
+void Pooling3DLayerParams::SharedCtor() {
+  ::memset(&type_, 0, reinterpret_cast<char*>(&paddingtype_) -
+    reinterpret_cast<char*>(&type_) + sizeof(paddingtype_));
   _cached_size_ = 0;
 }
 
-PaddingLayerParams_PaddingConstant::~PaddingLayerParams_PaddingConstant() {
-  // @@protoc_insertion_point(destructor:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+Pooling3DLayerParams::~Pooling3DLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.Pooling3DLayerParams)
   SharedDtor();
 }
 
-void PaddingLayerParams_PaddingConstant::SharedDtor() {
+void Pooling3DLayerParams::SharedDtor() {
 }
 
-void PaddingLayerParams_PaddingConstant::SetCachedSize(int size) const {
+void Pooling3DLayerParams::SetCachedSize(int size) const {
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = size;
   GOOGLE_SAFE_CONCURRENT_WRITES_END();
 }
-const PaddingLayerParams_PaddingConstant& PaddingLayerParams_PaddingConstant::default_instance() {
+const Pooling3DLayerParams& Pooling3DLayerParams::default_instance() {
   protobuf_NeuralNetwork_2eproto::InitDefaults();
   return *internal_default_instance();
 }
 
-PaddingLayerParams_PaddingConstant* PaddingLayerParams_PaddingConstant::New(::google::protobuf::Arena* arena) const {
-  PaddingLayerParams_PaddingConstant* n = new PaddingLayerParams_PaddingConstant;
+Pooling3DLayerParams* Pooling3DLayerParams::New(::google::protobuf::Arena* arena) const {
+  Pooling3DLayerParams* n = new Pooling3DLayerParams;
   if (arena != NULL) {
     arena->Own(n);
   }
   return n;
 }
 
-void PaddingLayerParams_PaddingConstant::Clear() {
-// @@protoc_insertion_point(message_clear_start:CoreML.Specification.PaddingLayerParams.PaddingConstant)
-  value_ = 0;
+void Pooling3DLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.Pooling3DLayerParams)
+  ::memset(&type_, 0, reinterpret_cast<char*>(&paddingtype_) -
+    reinterpret_cast<char*>(&type_) + sizeof(paddingtype_));
 }
 
-bool PaddingLayerParams_PaddingConstant::MergePartialFromCodedStream(
+bool Pooling3DLayerParams::MergePartialFromCodedStream(
     ::google::protobuf::io::CodedInputStream* input) {
 #define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
   ::google::protobuf::uint32 tag;
-  // @@protoc_insertion_point(parse_start:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.Pooling3DLayerParams)
   for (;;) {
     ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
     tag = p.first;
     if (!p.second) goto handle_unusual;
     switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
-      // float value = 1;
+      // .CoreML.Specification.Pooling3DLayerParams.PoolingType3D type = 1;
       case 1: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(13u)) {
+            static_cast< ::google::protobuf::uint8>(8u)) {
+          int value;
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   int, ::google::protobuf::internal::WireFormatLite::TYPE_ENUM>(
+                 input, &value)));
+          set_type(static_cast< ::CoreML::Specification::Pooling3DLayerParams_PoolingType3D >(value));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 kernelDepth = 2;
+      case 2: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(16u)) {
 
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
-                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
-                 input, &value_)));
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &kerneldepth_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 kernelHeight = 3;
+      case 3: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(24u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &kernelheight_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 kernelWidth = 4;
+      case 4: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(32u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &kernelwidth_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 strideDepth = 5;
+      case 5: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(40u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &stridedepth_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 strideHeight = 6;
+      case 6: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(48u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &strideheight_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 strideWidth = 7;
+      case 7: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(56u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &stridewidth_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 customPaddingFront = 8;
+      case 8: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(64u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &custompaddingfront_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 customPaddingBack = 9;
+      case 9: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(72u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &custompaddingback_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 customPaddingTop = 10;
+      case 10: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(80u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &custompaddingtop_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 customPaddingBottom = 11;
+      case 11: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(88u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &custompaddingbottom_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 customPaddingLeft = 12;
+      case 12: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(96u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &custompaddingleft_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int32 customPaddingRight = 13;
+      case 13: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(104u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
+                 input, &custompaddingright_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // bool countExcludePadding = 14;
+      case 14: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(112u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
+                 input, &countexcludepadding_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // .CoreML.Specification.Pooling3DLayerParams.Pooling3DPaddingType paddingType = 15;
+      case 15: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(120u)) {
+          int value;
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   int, ::google::protobuf::internal::WireFormatLite::TYPE_ENUM>(
+                 input, &value)));
+          set_paddingtype(static_cast< ::CoreML::Specification::Pooling3DLayerParams_Pooling3DPaddingType >(value));
         } else {
           goto handle_unusual;
         }
@@ -30770,35 +33290,203 @@ bool PaddingLayerParams_PaddingConstant::MergePartialFromCodedStream(
     }
   }
 success:
-  // @@protoc_insertion_point(parse_success:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.Pooling3DLayerParams)
   return true;
 failure:
-  // @@protoc_insertion_point(parse_failure:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.Pooling3DLayerParams)
   return false;
 #undef DO_
 }
 
-void PaddingLayerParams_PaddingConstant::SerializeWithCachedSizes(
+void Pooling3DLayerParams::SerializeWithCachedSizes(
     ::google::protobuf::io::CodedOutputStream* output) const {
-  // @@protoc_insertion_point(serialize_start:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.Pooling3DLayerParams)
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  // float value = 1;
-  if (this->value() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteFloat(1, this->value(), output);
+  // .CoreML.Specification.Pooling3DLayerParams.PoolingType3D type = 1;
+  if (this->type() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteEnum(
+      1, this->type(), output);
   }
 
-  // @@protoc_insertion_point(serialize_end:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+  // int32 kernelDepth = 2;
+  if (this->kerneldepth() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(2, this->kerneldepth(), output);
+  }
+
+  // int32 kernelHeight = 3;
+  if (this->kernelheight() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(3, this->kernelheight(), output);
+  }
+
+  // int32 kernelWidth = 4;
+  if (this->kernelwidth() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(4, this->kernelwidth(), output);
+  }
+
+  // int32 strideDepth = 5;
+  if (this->stridedepth() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(5, this->stridedepth(), output);
+  }
+
+  // int32 strideHeight = 6;
+  if (this->strideheight() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(6, this->strideheight(), output);
+  }
+
+  // int32 strideWidth = 7;
+  if (this->stridewidth() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(7, this->stridewidth(), output);
+  }
+
+  // int32 customPaddingFront = 8;
+  if (this->custompaddingfront() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(8, this->custompaddingfront(), output);
+  }
+
+  // int32 customPaddingBack = 9;
+  if (this->custompaddingback() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(9, this->custompaddingback(), output);
+  }
+
+  // int32 customPaddingTop = 10;
+  if (this->custompaddingtop() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(10, this->custompaddingtop(), output);
+  }
+
+  // int32 customPaddingBottom = 11;
+  if (this->custompaddingbottom() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(11, this->custompaddingbottom(), output);
+  }
+
+  // int32 customPaddingLeft = 12;
+  if (this->custompaddingleft() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(12, this->custompaddingleft(), output);
+  }
+
+  // int32 customPaddingRight = 13;
+  if (this->custompaddingright() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt32(13, this->custompaddingright(), output);
+  }
+
+  // bool countExcludePadding = 14;
+  if (this->countexcludepadding() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteBool(14, this->countexcludepadding(), output);
+  }
+
+  // .CoreML.Specification.Pooling3DLayerParams.Pooling3DPaddingType paddingType = 15;
+  if (this->paddingtype() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteEnum(
+      15, this->paddingtype(), output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.Pooling3DLayerParams)
 }
 
-size_t PaddingLayerParams_PaddingConstant::ByteSizeLong() const {
-// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+size_t Pooling3DLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.Pooling3DLayerParams)
   size_t total_size = 0;
 
-  // float value = 1;
-  if (this->value() != 0) {
-    total_size += 1 + 4;
+  // .CoreML.Specification.Pooling3DLayerParams.PoolingType3D type = 1;
+  if (this->type() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::EnumSize(this->type());
+  }
+
+  // int32 kernelDepth = 2;
+  if (this->kerneldepth() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->kerneldepth());
+  }
+
+  // int32 kernelHeight = 3;
+  if (this->kernelheight() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->kernelheight());
+  }
+
+  // int32 kernelWidth = 4;
+  if (this->kernelwidth() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->kernelwidth());
+  }
+
+  // int32 strideDepth = 5;
+  if (this->stridedepth() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->stridedepth());
+  }
+
+  // int32 strideHeight = 6;
+  if (this->strideheight() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->strideheight());
+  }
+
+  // int32 strideWidth = 7;
+  if (this->stridewidth() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->stridewidth());
+  }
+
+  // int32 customPaddingFront = 8;
+  if (this->custompaddingfront() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->custompaddingfront());
+  }
+
+  // int32 customPaddingBack = 9;
+  if (this->custompaddingback() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->custompaddingback());
+  }
+
+  // int32 customPaddingTop = 10;
+  if (this->custompaddingtop() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->custompaddingtop());
+  }
+
+  // int32 customPaddingBottom = 11;
+  if (this->custompaddingbottom() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->custompaddingbottom());
+  }
+
+  // int32 customPaddingLeft = 12;
+  if (this->custompaddingleft() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->custompaddingleft());
+  }
+
+  // int32 customPaddingRight = 13;
+  if (this->custompaddingright() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int32Size(
+        this->custompaddingright());
+  }
+
+  // bool countExcludePadding = 14;
+  if (this->countexcludepadding() != 0) {
+    total_size += 1 + 1;
+  }
+
+  // .CoreML.Specification.Pooling3DLayerParams.Pooling3DPaddingType paddingType = 15;
+  if (this->paddingtype() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::EnumSize(this->paddingtype());
   }
 
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
@@ -30808,62 +33496,314 @@ size_t PaddingLayerParams_PaddingConstant::ByteSizeLong() const {
   return total_size;
 }
 
-void PaddingLayerParams_PaddingConstant::CheckTypeAndMergeFrom(
+void Pooling3DLayerParams::CheckTypeAndMergeFrom(
     const ::google::protobuf::MessageLite& from) {
-  MergeFrom(*::google::protobuf::down_cast<const PaddingLayerParams_PaddingConstant*>(&from));
+  MergeFrom(*::google::protobuf::down_cast<const Pooling3DLayerParams*>(&from));
 }
 
-void PaddingLayerParams_PaddingConstant::MergeFrom(const PaddingLayerParams_PaddingConstant& from) {
-// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+void Pooling3DLayerParams::MergeFrom(const Pooling3DLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.Pooling3DLayerParams)
   GOOGLE_DCHECK_NE(&from, this);
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  if (from.value() != 0) {
-    set_value(from.value());
+  if (from.type() != 0) {
+    set_type(from.type());
+  }
+  if (from.kerneldepth() != 0) {
+    set_kerneldepth(from.kerneldepth());
+  }
+  if (from.kernelheight() != 0) {
+    set_kernelheight(from.kernelheight());
+  }
+  if (from.kernelwidth() != 0) {
+    set_kernelwidth(from.kernelwidth());
+  }
+  if (from.stridedepth() != 0) {
+    set_stridedepth(from.stridedepth());
+  }
+  if (from.strideheight() != 0) {
+    set_strideheight(from.strideheight());
+  }
+  if (from.stridewidth() != 0) {
+    set_stridewidth(from.stridewidth());
+  }
+  if (from.custompaddingfront() != 0) {
+    set_custompaddingfront(from.custompaddingfront());
+  }
+  if (from.custompaddingback() != 0) {
+    set_custompaddingback(from.custompaddingback());
+  }
+  if (from.custompaddingtop() != 0) {
+    set_custompaddingtop(from.custompaddingtop());
+  }
+  if (from.custompaddingbottom() != 0) {
+    set_custompaddingbottom(from.custompaddingbottom());
+  }
+  if (from.custompaddingleft() != 0) {
+    set_custompaddingleft(from.custompaddingleft());
+  }
+  if (from.custompaddingright() != 0) {
+    set_custompaddingright(from.custompaddingright());
+  }
+  if (from.countexcludepadding() != 0) {
+    set_countexcludepadding(from.countexcludepadding());
+  }
+  if (from.paddingtype() != 0) {
+    set_paddingtype(from.paddingtype());
   }
 }
 
-void PaddingLayerParams_PaddingConstant::CopyFrom(const PaddingLayerParams_PaddingConstant& from) {
-// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+void Pooling3DLayerParams::CopyFrom(const Pooling3DLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.Pooling3DLayerParams)
   if (&from == this) return;
   Clear();
   MergeFrom(from);
 }
 
-bool PaddingLayerParams_PaddingConstant::IsInitialized() const {
+bool Pooling3DLayerParams::IsInitialized() const {
   return true;
 }
 
-void PaddingLayerParams_PaddingConstant::Swap(PaddingLayerParams_PaddingConstant* other) {
+void Pooling3DLayerParams::Swap(Pooling3DLayerParams* other) {
   if (other == this) return;
   InternalSwap(other);
 }
-void PaddingLayerParams_PaddingConstant::InternalSwap(PaddingLayerParams_PaddingConstant* other) {
-  std::swap(value_, other->value_);
+void Pooling3DLayerParams::InternalSwap(Pooling3DLayerParams* other) {
+  std::swap(type_, other->type_);
+  std::swap(kerneldepth_, other->kerneldepth_);
+  std::swap(kernelheight_, other->kernelheight_);
+  std::swap(kernelwidth_, other->kernelwidth_);
+  std::swap(stridedepth_, other->stridedepth_);
+  std::swap(strideheight_, other->strideheight_);
+  std::swap(stridewidth_, other->stridewidth_);
+  std::swap(custompaddingfront_, other->custompaddingfront_);
+  std::swap(custompaddingback_, other->custompaddingback_);
+  std::swap(custompaddingtop_, other->custompaddingtop_);
+  std::swap(custompaddingbottom_, other->custompaddingbottom_);
+  std::swap(custompaddingleft_, other->custompaddingleft_);
+  std::swap(custompaddingright_, other->custompaddingright_);
+  std::swap(countexcludepadding_, other->countexcludepadding_);
+  std::swap(paddingtype_, other->paddingtype_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
-::std::string PaddingLayerParams_PaddingConstant::GetTypeName() const {
-  return "CoreML.Specification.PaddingLayerParams.PaddingConstant";
+::std::string Pooling3DLayerParams::GetTypeName() const {
+  return "CoreML.Specification.Pooling3DLayerParams";
 }
 
 #if PROTOBUF_INLINE_NOT_IN_HEADERS
-// PaddingLayerParams_PaddingConstant
+// Pooling3DLayerParams
 
-// float value = 1;
-void PaddingLayerParams_PaddingConstant::clear_value() {
-  value_ = 0;
+// .CoreML.Specification.Pooling3DLayerParams.PoolingType3D type = 1;
+void Pooling3DLayerParams::clear_type() {
+  type_ = 0;
 }
-float PaddingLayerParams_PaddingConstant::value() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.PaddingLayerParams.PaddingConstant.value)
-  return value_;
+::CoreML::Specification::Pooling3DLayerParams_PoolingType3D Pooling3DLayerParams::type() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.type)
+  return static_cast< ::CoreML::Specification::Pooling3DLayerParams_PoolingType3D >(type_);
 }
-void PaddingLayerParams_PaddingConstant::set_value(float value) {
+void Pooling3DLayerParams::set_type(::CoreML::Specification::Pooling3DLayerParams_PoolingType3D value) {
   
-  value_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.PaddingLayerParams.PaddingConstant.value)
+  type_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.type)
+}
+
+// int32 kernelDepth = 2;
+void Pooling3DLayerParams::clear_kerneldepth() {
+  kerneldepth_ = 0;
+}
+::google::protobuf::int32 Pooling3DLayerParams::kerneldepth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.kernelDepth)
+  return kerneldepth_;
+}
+void Pooling3DLayerParams::set_kerneldepth(::google::protobuf::int32 value) {
+  
+  kerneldepth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.kernelDepth)
+}
+
+// int32 kernelHeight = 3;
+void Pooling3DLayerParams::clear_kernelheight() {
+  kernelheight_ = 0;
+}
+::google::protobuf::int32 Pooling3DLayerParams::kernelheight() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.kernelHeight)
+  return kernelheight_;
+}
+void Pooling3DLayerParams::set_kernelheight(::google::protobuf::int32 value) {
+  
+  kernelheight_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.kernelHeight)
+}
+
+// int32 kernelWidth = 4;
+void Pooling3DLayerParams::clear_kernelwidth() {
+  kernelwidth_ = 0;
+}
+::google::protobuf::int32 Pooling3DLayerParams::kernelwidth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.kernelWidth)
+  return kernelwidth_;
+}
+void Pooling3DLayerParams::set_kernelwidth(::google::protobuf::int32 value) {
+  
+  kernelwidth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.kernelWidth)
+}
+
+// int32 strideDepth = 5;
+void Pooling3DLayerParams::clear_stridedepth() {
+  stridedepth_ = 0;
+}
+::google::protobuf::int32 Pooling3DLayerParams::stridedepth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.strideDepth)
+  return stridedepth_;
+}
+void Pooling3DLayerParams::set_stridedepth(::google::protobuf::int32 value) {
+  
+  stridedepth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.strideDepth)
+}
+
+// int32 strideHeight = 6;
+void Pooling3DLayerParams::clear_strideheight() {
+  strideheight_ = 0;
+}
+::google::protobuf::int32 Pooling3DLayerParams::strideheight() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.strideHeight)
+  return strideheight_;
+}
+void Pooling3DLayerParams::set_strideheight(::google::protobuf::int32 value) {
+  
+  strideheight_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.strideHeight)
+}
+
+// int32 strideWidth = 7;
+void Pooling3DLayerParams::clear_stridewidth() {
+  stridewidth_ = 0;
+}
+::google::protobuf::int32 Pooling3DLayerParams::stridewidth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.strideWidth)
+  return stridewidth_;
+}
+void Pooling3DLayerParams::set_stridewidth(::google::protobuf::int32 value) {
+  
+  stridewidth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.strideWidth)
+}
+
+// .CoreML.Specification.Pooling3DLayerParams.Pooling3DPaddingType paddingType = 15;
+void Pooling3DLayerParams::clear_paddingtype() {
+  paddingtype_ = 0;
+}
+::CoreML::Specification::Pooling3DLayerParams_Pooling3DPaddingType Pooling3DLayerParams::paddingtype() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.paddingType)
+  return static_cast< ::CoreML::Specification::Pooling3DLayerParams_Pooling3DPaddingType >(paddingtype_);
+}
+void Pooling3DLayerParams::set_paddingtype(::CoreML::Specification::Pooling3DLayerParams_Pooling3DPaddingType value) {
+  
+  paddingtype_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.paddingType)
+}
+
+// int32 customPaddingFront = 8;
+void Pooling3DLayerParams::clear_custompaddingfront() {
+  custompaddingfront_ = 0;
+}
+::google::protobuf::int32 Pooling3DLayerParams::custompaddingfront() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.customPaddingFront)
+  return custompaddingfront_;
+}
+void Pooling3DLayerParams::set_custompaddingfront(::google::protobuf::int32 value) {
+  
+  custompaddingfront_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.customPaddingFront)
+}
+
+// int32 customPaddingBack = 9;
+void Pooling3DLayerParams::clear_custompaddingback() {
+  custompaddingback_ = 0;
+}
+::google::protobuf::int32 Pooling3DLayerParams::custompaddingback() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.customPaddingBack)
+  return custompaddingback_;
+}
+void Pooling3DLayerParams::set_custompaddingback(::google::protobuf::int32 value) {
+  
+  custompaddingback_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.customPaddingBack)
+}
+
+// int32 customPaddingTop = 10;
+void Pooling3DLayerParams::clear_custompaddingtop() {
+  custompaddingtop_ = 0;
+}
+::google::protobuf::int32 Pooling3DLayerParams::custompaddingtop() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.customPaddingTop)
+  return custompaddingtop_;
+}
+void Pooling3DLayerParams::set_custompaddingtop(::google::protobuf::int32 value) {
+  
+  custompaddingtop_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.customPaddingTop)
+}
+
+// int32 customPaddingBottom = 11;
+void Pooling3DLayerParams::clear_custompaddingbottom() {
+  custompaddingbottom_ = 0;
+}
+::google::protobuf::int32 Pooling3DLayerParams::custompaddingbottom() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.customPaddingBottom)
+  return custompaddingbottom_;
+}
+void Pooling3DLayerParams::set_custompaddingbottom(::google::protobuf::int32 value) {
+  
+  custompaddingbottom_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.customPaddingBottom)
+}
+
+// int32 customPaddingLeft = 12;
+void Pooling3DLayerParams::clear_custompaddingleft() {
+  custompaddingleft_ = 0;
+}
+::google::protobuf::int32 Pooling3DLayerParams::custompaddingleft() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.customPaddingLeft)
+  return custompaddingleft_;
+}
+void Pooling3DLayerParams::set_custompaddingleft(::google::protobuf::int32 value) {
+  
+  custompaddingleft_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.customPaddingLeft)
+}
+
+// int32 customPaddingRight = 13;
+void Pooling3DLayerParams::clear_custompaddingright() {
+  custompaddingright_ = 0;
+}
+::google::protobuf::int32 Pooling3DLayerParams::custompaddingright() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.customPaddingRight)
+  return custompaddingright_;
+}
+void Pooling3DLayerParams::set_custompaddingright(::google::protobuf::int32 value) {
+  
+  custompaddingright_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.customPaddingRight)
+}
+
+// bool countExcludePadding = 14;
+void Pooling3DLayerParams::clear_countexcludepadding() {
+  countexcludepadding_ = false;
+}
+bool Pooling3DLayerParams::countexcludepadding() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.countExcludePadding)
+  return countexcludepadding_;
+}
+void Pooling3DLayerParams::set_countexcludepadding(bool value) {
+  
+  countexcludepadding_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.countExcludePadding)
 }
 
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
@@ -30871,97 +33811,133 @@ void PaddingLayerParams_PaddingConstant::set_value(float value) {
 // ===================================================================
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
+const int GlobalPooling3DLayerParams::kTypeFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
-PaddingLayerParams_PaddingReflection::PaddingLayerParams_PaddingReflection()
+GlobalPooling3DLayerParams::GlobalPooling3DLayerParams()
   : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
   if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
     protobuf_NeuralNetwork_2eproto::InitDefaults();
   }
   SharedCtor();
-  // @@protoc_insertion_point(constructor:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+  // @@protoc_insertion_point(constructor:CoreML.Specification.GlobalPooling3DLayerParams)
 }
-PaddingLayerParams_PaddingReflection::PaddingLayerParams_PaddingReflection(const PaddingLayerParams_PaddingReflection& from)
+GlobalPooling3DLayerParams::GlobalPooling3DLayerParams(const GlobalPooling3DLayerParams& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
-  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+  type_ = from.type_;
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.GlobalPooling3DLayerParams)
 }
 
-void PaddingLayerParams_PaddingReflection::SharedCtor() {
+void GlobalPooling3DLayerParams::SharedCtor() {
+  type_ = 0;
   _cached_size_ = 0;
 }
 
-PaddingLayerParams_PaddingReflection::~PaddingLayerParams_PaddingReflection() {
-  // @@protoc_insertion_point(destructor:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+GlobalPooling3DLayerParams::~GlobalPooling3DLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.GlobalPooling3DLayerParams)
   SharedDtor();
 }
 
-void PaddingLayerParams_PaddingReflection::SharedDtor() {
+void GlobalPooling3DLayerParams::SharedDtor() {
 }
 
-void PaddingLayerParams_PaddingReflection::SetCachedSize(int size) const {
+void GlobalPooling3DLayerParams::SetCachedSize(int size) const {
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = size;
   GOOGLE_SAFE_CONCURRENT_WRITES_END();
 }
-const PaddingLayerParams_PaddingReflection& PaddingLayerParams_PaddingReflection::default_instance() {
+const GlobalPooling3DLayerParams& GlobalPooling3DLayerParams::default_instance() {
   protobuf_NeuralNetwork_2eproto::InitDefaults();
   return *internal_default_instance();
 }
 
-PaddingLayerParams_PaddingReflection* PaddingLayerParams_PaddingReflection::New(::google::protobuf::Arena* arena) const {
-  PaddingLayerParams_PaddingReflection* n = new PaddingLayerParams_PaddingReflection;
+GlobalPooling3DLayerParams* GlobalPooling3DLayerParams::New(::google::protobuf::Arena* arena) const {
+  GlobalPooling3DLayerParams* n = new GlobalPooling3DLayerParams;
   if (arena != NULL) {
     arena->Own(n);
   }
   return n;
 }
 
-void PaddingLayerParams_PaddingReflection::Clear() {
-// @@protoc_insertion_point(message_clear_start:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+void GlobalPooling3DLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.GlobalPooling3DLayerParams)
+  type_ = 0;
 }
 
-bool PaddingLayerParams_PaddingReflection::MergePartialFromCodedStream(
+bool GlobalPooling3DLayerParams::MergePartialFromCodedStream(
     ::google::protobuf::io::CodedInputStream* input) {
 #define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
   ::google::protobuf::uint32 tag;
-  // @@protoc_insertion_point(parse_start:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.GlobalPooling3DLayerParams)
   for (;;) {
     ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
     tag = p.first;
     if (!p.second) goto handle_unusual;
-  handle_unusual:
-    if (tag == 0 ||
-        ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
-        ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
-      goto success;
+    switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
+      // .CoreML.Specification.GlobalPooling3DLayerParams.GlobalPoolingType3D type = 1;
+      case 1: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(8u)) {
+          int value;
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   int, ::google::protobuf::internal::WireFormatLite::TYPE_ENUM>(
+                 input, &value)));
+          set_type(static_cast< ::CoreML::Specification::GlobalPooling3DLayerParams_GlobalPoolingType3D >(value));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      default: {
+      handle_unusual:
+        if (tag == 0 ||
+            ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
+            ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
+          goto success;
+        }
+        DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
+        break;
+      }
     }
-    DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
   }
 success:
-  // @@protoc_insertion_point(parse_success:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.GlobalPooling3DLayerParams)
   return true;
 failure:
-  // @@protoc_insertion_point(parse_failure:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.GlobalPooling3DLayerParams)
   return false;
 #undef DO_
 }
 
-void PaddingLayerParams_PaddingReflection::SerializeWithCachedSizes(
+void GlobalPooling3DLayerParams::SerializeWithCachedSizes(
     ::google::protobuf::io::CodedOutputStream* output) const {
-  // @@protoc_insertion_point(serialize_start:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.GlobalPooling3DLayerParams)
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  // @@protoc_insertion_point(serialize_end:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+  // .CoreML.Specification.GlobalPooling3DLayerParams.GlobalPoolingType3D type = 1;
+  if (this->type() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteEnum(
+      1, this->type(), output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.GlobalPooling3DLayerParams)
 }
 
-size_t PaddingLayerParams_PaddingReflection::ByteSizeLong() const {
-// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+size_t GlobalPooling3DLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.GlobalPooling3DLayerParams)
   size_t total_size = 0;
 
+  // .CoreML.Specification.GlobalPooling3DLayerParams.GlobalPoolingType3D type = 1;
+  if (this->type() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::EnumSize(this->type());
+  }
+
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = cached_size;
@@ -30969,104 +33945,459 @@ size_t PaddingLayerParams_PaddingReflection::ByteSizeLong() const {
   return total_size;
 }
 
-void PaddingLayerParams_PaddingReflection::CheckTypeAndMergeFrom(
+void GlobalPooling3DLayerParams::CheckTypeAndMergeFrom(
     const ::google::protobuf::MessageLite& from) {
-  MergeFrom(*::google::protobuf::down_cast<const PaddingLayerParams_PaddingReflection*>(&from));
+  MergeFrom(*::google::protobuf::down_cast<const GlobalPooling3DLayerParams*>(&from));
 }
 
-void PaddingLayerParams_PaddingReflection::MergeFrom(const PaddingLayerParams_PaddingReflection& from) {
-// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+void GlobalPooling3DLayerParams::MergeFrom(const GlobalPooling3DLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.GlobalPooling3DLayerParams)
   GOOGLE_DCHECK_NE(&from, this);
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
+  if (from.type() != 0) {
+    set_type(from.type());
+  }
 }
 
-void PaddingLayerParams_PaddingReflection::CopyFrom(const PaddingLayerParams_PaddingReflection& from) {
-// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+void GlobalPooling3DLayerParams::CopyFrom(const GlobalPooling3DLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.GlobalPooling3DLayerParams)
   if (&from == this) return;
   Clear();
   MergeFrom(from);
 }
 
-bool PaddingLayerParams_PaddingReflection::IsInitialized() const {
+bool GlobalPooling3DLayerParams::IsInitialized() const {
   return true;
 }
 
-void PaddingLayerParams_PaddingReflection::Swap(PaddingLayerParams_PaddingReflection* other) {
+void GlobalPooling3DLayerParams::Swap(GlobalPooling3DLayerParams* other) {
   if (other == this) return;
   InternalSwap(other);
 }
-void PaddingLayerParams_PaddingReflection::InternalSwap(PaddingLayerParams_PaddingReflection* other) {
+void GlobalPooling3DLayerParams::InternalSwap(GlobalPooling3DLayerParams* other) {
+  std::swap(type_, other->type_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
-::std::string PaddingLayerParams_PaddingReflection::GetTypeName() const {
-  return "CoreML.Specification.PaddingLayerParams.PaddingReflection";
+::std::string GlobalPooling3DLayerParams::GetTypeName() const {
+  return "CoreML.Specification.GlobalPooling3DLayerParams";
 }
 
 #if PROTOBUF_INLINE_NOT_IN_HEADERS
-// PaddingLayerParams_PaddingReflection
+// GlobalPooling3DLayerParams
+
+// .CoreML.Specification.GlobalPooling3DLayerParams.GlobalPoolingType3D type = 1;
+void GlobalPooling3DLayerParams::clear_type() {
+  type_ = 0;
+}
+::CoreML::Specification::GlobalPooling3DLayerParams_GlobalPoolingType3D GlobalPooling3DLayerParams::type() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.GlobalPooling3DLayerParams.type)
+  return static_cast< ::CoreML::Specification::GlobalPooling3DLayerParams_GlobalPoolingType3D >(type_);
+}
+void GlobalPooling3DLayerParams::set_type(::CoreML::Specification::GlobalPooling3DLayerParams_GlobalPoolingType3D value) {
+  
+  type_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.GlobalPooling3DLayerParams.type)
+}
 
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
 
 // ===================================================================
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
+const int PaddingLayerParams_PaddingConstant::kValueFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
-PaddingLayerParams_PaddingReplication::PaddingLayerParams_PaddingReplication()
+PaddingLayerParams_PaddingConstant::PaddingLayerParams_PaddingConstant()
   : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
   if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
     protobuf_NeuralNetwork_2eproto::InitDefaults();
   }
   SharedCtor();
-  // @@protoc_insertion_point(constructor:CoreML.Specification.PaddingLayerParams.PaddingReplication)
+  // @@protoc_insertion_point(constructor:CoreML.Specification.PaddingLayerParams.PaddingConstant)
 }
-PaddingLayerParams_PaddingReplication::PaddingLayerParams_PaddingReplication(const PaddingLayerParams_PaddingReplication& from)
+PaddingLayerParams_PaddingConstant::PaddingLayerParams_PaddingConstant(const PaddingLayerParams_PaddingConstant& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
-  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.PaddingLayerParams.PaddingReplication)
+  value_ = from.value_;
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.PaddingLayerParams.PaddingConstant)
 }
 
-void PaddingLayerParams_PaddingReplication::SharedCtor() {
+void PaddingLayerParams_PaddingConstant::SharedCtor() {
+  value_ = 0;
   _cached_size_ = 0;
 }
 
-PaddingLayerParams_PaddingReplication::~PaddingLayerParams_PaddingReplication() {
-  // @@protoc_insertion_point(destructor:CoreML.Specification.PaddingLayerParams.PaddingReplication)
+PaddingLayerParams_PaddingConstant::~PaddingLayerParams_PaddingConstant() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.PaddingLayerParams.PaddingConstant)
   SharedDtor();
 }
 
-void PaddingLayerParams_PaddingReplication::SharedDtor() {
+void PaddingLayerParams_PaddingConstant::SharedDtor() {
 }
 
-void PaddingLayerParams_PaddingReplication::SetCachedSize(int size) const {
+void PaddingLayerParams_PaddingConstant::SetCachedSize(int size) const {
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = size;
   GOOGLE_SAFE_CONCURRENT_WRITES_END();
 }
-const PaddingLayerParams_PaddingReplication& PaddingLayerParams_PaddingReplication::default_instance() {
+const PaddingLayerParams_PaddingConstant& PaddingLayerParams_PaddingConstant::default_instance() {
   protobuf_NeuralNetwork_2eproto::InitDefaults();
   return *internal_default_instance();
 }
 
-PaddingLayerParams_PaddingReplication* PaddingLayerParams_PaddingReplication::New(::google::protobuf::Arena* arena) const {
-  PaddingLayerParams_PaddingReplication* n = new PaddingLayerParams_PaddingReplication;
+PaddingLayerParams_PaddingConstant* PaddingLayerParams_PaddingConstant::New(::google::protobuf::Arena* arena) const {
+  PaddingLayerParams_PaddingConstant* n = new PaddingLayerParams_PaddingConstant;
   if (arena != NULL) {
     arena->Own(n);
   }
   return n;
 }
 
-void PaddingLayerParams_PaddingReplication::Clear() {
-// @@protoc_insertion_point(message_clear_start:CoreML.Specification.PaddingLayerParams.PaddingReplication)
+void PaddingLayerParams_PaddingConstant::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+  value_ = 0;
 }
 
-bool PaddingLayerParams_PaddingReplication::MergePartialFromCodedStream(
+bool PaddingLayerParams_PaddingConstant::MergePartialFromCodedStream(
+    ::google::protobuf::io::CodedInputStream* input) {
+#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
+  ::google::protobuf::uint32 tag;
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+  for (;;) {
+    ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
+    tag = p.first;
+    if (!p.second) goto handle_unusual;
+    switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
+      // float value = 1;
+      case 1: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(13u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &value_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      default: {
+      handle_unusual:
+        if (tag == 0 ||
+            ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
+            ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
+          goto success;
+        }
+        DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
+        break;
+      }
+    }
+  }
+success:
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+  return true;
+failure:
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+  return false;
+#undef DO_
+}
+
+void PaddingLayerParams_PaddingConstant::SerializeWithCachedSizes(
+    ::google::protobuf::io::CodedOutputStream* output) const {
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  // float value = 1;
+  if (this->value() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(1, this->value(), output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+}
+
+size_t PaddingLayerParams_PaddingConstant::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+  size_t total_size = 0;
+
+  // float value = 1;
+  if (this->value() != 0) {
+    total_size += 1 + 4;
+  }
+
+  int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = cached_size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+  return total_size;
+}
+
+void PaddingLayerParams_PaddingConstant::CheckTypeAndMergeFrom(
+    const ::google::protobuf::MessageLite& from) {
+  MergeFrom(*::google::protobuf::down_cast<const PaddingLayerParams_PaddingConstant*>(&from));
+}
+
+void PaddingLayerParams_PaddingConstant::MergeFrom(const PaddingLayerParams_PaddingConstant& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+  GOOGLE_DCHECK_NE(&from, this);
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  if (from.value() != 0) {
+    set_value(from.value());
+  }
+}
+
+void PaddingLayerParams_PaddingConstant::CopyFrom(const PaddingLayerParams_PaddingConstant& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.PaddingLayerParams.PaddingConstant)
+  if (&from == this) return;
+  Clear();
+  MergeFrom(from);
+}
+
+bool PaddingLayerParams_PaddingConstant::IsInitialized() const {
+  return true;
+}
+
+void PaddingLayerParams_PaddingConstant::Swap(PaddingLayerParams_PaddingConstant* other) {
+  if (other == this) return;
+  InternalSwap(other);
+}
+void PaddingLayerParams_PaddingConstant::InternalSwap(PaddingLayerParams_PaddingConstant* other) {
+  std::swap(value_, other->value_);
+  std::swap(_cached_size_, other->_cached_size_);
+}
+
+::std::string PaddingLayerParams_PaddingConstant::GetTypeName() const {
+  return "CoreML.Specification.PaddingLayerParams.PaddingConstant";
+}
+
+#if PROTOBUF_INLINE_NOT_IN_HEADERS
+// PaddingLayerParams_PaddingConstant
+
+// float value = 1;
+void PaddingLayerParams_PaddingConstant::clear_value() {
+  value_ = 0;
+}
+float PaddingLayerParams_PaddingConstant::value() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.PaddingLayerParams.PaddingConstant.value)
+  return value_;
+}
+void PaddingLayerParams_PaddingConstant::set_value(float value) {
+  
+  value_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.PaddingLayerParams.PaddingConstant.value)
+}
+
+#endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
+
+// ===================================================================
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+
+PaddingLayerParams_PaddingReflection::PaddingLayerParams_PaddingReflection()
+  : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
+  if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
+    protobuf_NeuralNetwork_2eproto::InitDefaults();
+  }
+  SharedCtor();
+  // @@protoc_insertion_point(constructor:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+}
+PaddingLayerParams_PaddingReflection::PaddingLayerParams_PaddingReflection(const PaddingLayerParams_PaddingReflection& from)
+  : ::google::protobuf::MessageLite(),
+      _internal_metadata_(NULL),
+      _cached_size_(0) {
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+}
+
+void PaddingLayerParams_PaddingReflection::SharedCtor() {
+  _cached_size_ = 0;
+}
+
+PaddingLayerParams_PaddingReflection::~PaddingLayerParams_PaddingReflection() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+  SharedDtor();
+}
+
+void PaddingLayerParams_PaddingReflection::SharedDtor() {
+}
+
+void PaddingLayerParams_PaddingReflection::SetCachedSize(int size) const {
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+}
+const PaddingLayerParams_PaddingReflection& PaddingLayerParams_PaddingReflection::default_instance() {
+  protobuf_NeuralNetwork_2eproto::InitDefaults();
+  return *internal_default_instance();
+}
+
+PaddingLayerParams_PaddingReflection* PaddingLayerParams_PaddingReflection::New(::google::protobuf::Arena* arena) const {
+  PaddingLayerParams_PaddingReflection* n = new PaddingLayerParams_PaddingReflection;
+  if (arena != NULL) {
+    arena->Own(n);
+  }
+  return n;
+}
+
+void PaddingLayerParams_PaddingReflection::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+}
+
+bool PaddingLayerParams_PaddingReflection::MergePartialFromCodedStream(
+    ::google::protobuf::io::CodedInputStream* input) {
+#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
+  ::google::protobuf::uint32 tag;
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+  for (;;) {
+    ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
+    tag = p.first;
+    if (!p.second) goto handle_unusual;
+  handle_unusual:
+    if (tag == 0 ||
+        ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
+        ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
+      goto success;
+    }
+    DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
+  }
+success:
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+  return true;
+failure:
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+  return false;
+#undef DO_
+}
+
+void PaddingLayerParams_PaddingReflection::SerializeWithCachedSizes(
+    ::google::protobuf::io::CodedOutputStream* output) const {
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+}
+
+size_t PaddingLayerParams_PaddingReflection::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+  size_t total_size = 0;
+
+  int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = cached_size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+  return total_size;
+}
+
+void PaddingLayerParams_PaddingReflection::CheckTypeAndMergeFrom(
+    const ::google::protobuf::MessageLite& from) {
+  MergeFrom(*::google::protobuf::down_cast<const PaddingLayerParams_PaddingReflection*>(&from));
+}
+
+void PaddingLayerParams_PaddingReflection::MergeFrom(const PaddingLayerParams_PaddingReflection& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+  GOOGLE_DCHECK_NE(&from, this);
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+}
+
+void PaddingLayerParams_PaddingReflection::CopyFrom(const PaddingLayerParams_PaddingReflection& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.PaddingLayerParams.PaddingReflection)
+  if (&from == this) return;
+  Clear();
+  MergeFrom(from);
+}
+
+bool PaddingLayerParams_PaddingReflection::IsInitialized() const {
+  return true;
+}
+
+void PaddingLayerParams_PaddingReflection::Swap(PaddingLayerParams_PaddingReflection* other) {
+  if (other == this) return;
+  InternalSwap(other);
+}
+void PaddingLayerParams_PaddingReflection::InternalSwap(PaddingLayerParams_PaddingReflection* other) {
+  std::swap(_cached_size_, other->_cached_size_);
+}
+
+::std::string PaddingLayerParams_PaddingReflection::GetTypeName() const {
+  return "CoreML.Specification.PaddingLayerParams.PaddingReflection";
+}
+
+#if PROTOBUF_INLINE_NOT_IN_HEADERS
+// PaddingLayerParams_PaddingReflection
+
+#endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
+
+// ===================================================================
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+
+PaddingLayerParams_PaddingReplication::PaddingLayerParams_PaddingReplication()
+  : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
+  if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
+    protobuf_NeuralNetwork_2eproto::InitDefaults();
+  }
+  SharedCtor();
+  // @@protoc_insertion_point(constructor:CoreML.Specification.PaddingLayerParams.PaddingReplication)
+}
+PaddingLayerParams_PaddingReplication::PaddingLayerParams_PaddingReplication(const PaddingLayerParams_PaddingReplication& from)
+  : ::google::protobuf::MessageLite(),
+      _internal_metadata_(NULL),
+      _cached_size_(0) {
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.PaddingLayerParams.PaddingReplication)
+}
+
+void PaddingLayerParams_PaddingReplication::SharedCtor() {
+  _cached_size_ = 0;
+}
+
+PaddingLayerParams_PaddingReplication::~PaddingLayerParams_PaddingReplication() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.PaddingLayerParams.PaddingReplication)
+  SharedDtor();
+}
+
+void PaddingLayerParams_PaddingReplication::SharedDtor() {
+}
+
+void PaddingLayerParams_PaddingReplication::SetCachedSize(int size) const {
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+}
+const PaddingLayerParams_PaddingReplication& PaddingLayerParams_PaddingReplication::default_instance() {
+  protobuf_NeuralNetwork_2eproto::InitDefaults();
+  return *internal_default_instance();
+}
+
+PaddingLayerParams_PaddingReplication* PaddingLayerParams_PaddingReplication::New(::google::protobuf::Arena* arena) const {
+  PaddingLayerParams_PaddingReplication* n = new PaddingLayerParams_PaddingReplication;
+  if (arena != NULL) {
+    arena->Own(n);
+  }
+  return n;
+}
+
+void PaddingLayerParams_PaddingReplication::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.PaddingLayerParams.PaddingReplication)
+}
+
+bool PaddingLayerParams_PaddingReplication::MergePartialFromCodedStream(
     ::google::protobuf::io::CodedInputStream* input) {
 #define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
   ::google::protobuf::uint32 tag;
@@ -33312,7 +36643,9 @@ void UnaryFunctionLayerParams::set_scale(float value) {
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
 const int UpsampleLayerParams::kScalingFactorFieldNumber;
+const int UpsampleLayerParams::kFractionalScalingFactorFieldNumber;
 const int UpsampleLayerParams::kModeFieldNumber;
+const int UpsampleLayerParams::kLinearUpsampleModeFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
 UpsampleLayerParams::UpsampleLayerParams()
@@ -33327,14 +36660,18 @@ UpsampleLayerParams::UpsampleLayerParams(const UpsampleLayerParams& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
       scalingfactor_(from.scalingfactor_),
+      fractionalscalingfactor_(from.fractionalscalingfactor_),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
-  mode_ = from.mode_;
+  ::memcpy(&mode_, &from.mode_,
+    reinterpret_cast<char*>(&linearupsamplemode_) -
+    reinterpret_cast<char*>(&mode_) + sizeof(linearupsamplemode_));
   // @@protoc_insertion_point(copy_constructor:CoreML.Specification.UpsampleLayerParams)
 }
 
 void UpsampleLayerParams::SharedCtor() {
-  mode_ = 0;
+  ::memset(&mode_, 0, reinterpret_cast<char*>(&linearupsamplemode_) -
+    reinterpret_cast<char*>(&mode_) + sizeof(linearupsamplemode_));
   _cached_size_ = 0;
 }
 
@@ -33367,7 +36704,9 @@ UpsampleLayerParams* UpsampleLayerParams::New(::google::protobuf::Arena* arena)
 void UpsampleLayerParams::Clear() {
 // @@protoc_insertion_point(message_clear_start:CoreML.Specification.UpsampleLayerParams)
   scalingfactor_.Clear();
-  mode_ = 0;
+  fractionalscalingfactor_.Clear();
+  ::memset(&mode_, 0, reinterpret_cast<char*>(&linearupsamplemode_) -
+    reinterpret_cast<char*>(&mode_) + sizeof(linearupsamplemode_));
 }
 
 bool UpsampleLayerParams::MergePartialFromCodedStream(
@@ -33413,6 +36752,39 @@ bool UpsampleLayerParams::MergePartialFromCodedStream(
         break;
       }
 
+      // .CoreML.Specification.UpsampleLayerParams.LinearUpsampleMode linearUpsampleMode = 6;
+      case 6: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(48u)) {
+          int value;
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   int, ::google::protobuf::internal::WireFormatLite::TYPE_ENUM>(
+                 input, &value)));
+          set_linearupsamplemode(static_cast< ::CoreML::Specification::UpsampleLayerParams_LinearUpsampleMode >(value));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // repeated float fractionalScalingFactor = 7;
+      case 7: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(58u)) {
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPackedPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, this->mutable_fractionalscalingfactor())));
+        } else if (static_cast< ::google::protobuf::uint8>(tag) ==
+                   static_cast< ::google::protobuf::uint8>(61u)) {
+          DO_((::google::protobuf::internal::WireFormatLite::ReadRepeatedPrimitiveNoInline<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 1, 58u, input, this->mutable_fractionalscalingfactor())));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
       default: {
       handle_unusual:
         if (tag == 0 ||
@@ -33456,6 +36828,20 @@ void UpsampleLayerParams::SerializeWithCachedSizes(
       5, this->mode(), output);
   }
 
+  // .CoreML.Specification.UpsampleLayerParams.LinearUpsampleMode linearUpsampleMode = 6;
+  if (this->linearupsamplemode() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteEnum(
+      6, this->linearupsamplemode(), output);
+  }
+
+  // repeated float fractionalScalingFactor = 7;
+  if (this->fractionalscalingfactor_size() > 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteTag(7, ::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output);
+    output->WriteVarint32(_fractionalscalingfactor_cached_byte_size_);
+    ::google::protobuf::internal::WireFormatLite::WriteFloatArray(
+      this->fractionalscalingfactor().data(), this->fractionalscalingfactor_size(), output);
+  }
+
   // @@protoc_insertion_point(serialize_end:CoreML.Specification.UpsampleLayerParams)
 }
 
@@ -33478,12 +36864,33 @@ size_t UpsampleLayerParams::ByteSizeLong() const {
     total_size += data_size;
   }
 
+  // repeated float fractionalScalingFactor = 7;
+  {
+    unsigned int count = this->fractionalscalingfactor_size();
+    size_t data_size = 4UL * count;
+    if (data_size > 0) {
+      total_size += 1 +
+        ::google::protobuf::internal::WireFormatLite::Int32Size(data_size);
+    }
+    int cached_size = ::google::protobuf::internal::ToCachedSize(data_size);
+    GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+    _fractionalscalingfactor_cached_byte_size_ = cached_size;
+    GOOGLE_SAFE_CONCURRENT_WRITES_END();
+    total_size += data_size;
+  }
+
   // .CoreML.Specification.UpsampleLayerParams.InterpolationMode mode = 5;
   if (this->mode() != 0) {
     total_size += 1 +
       ::google::protobuf::internal::WireFormatLite::EnumSize(this->mode());
   }
 
+  // .CoreML.Specification.UpsampleLayerParams.LinearUpsampleMode linearUpsampleMode = 6;
+  if (this->linearupsamplemode() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::EnumSize(this->linearupsamplemode());
+  }
+
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = cached_size;
@@ -33504,9 +36911,13 @@ void UpsampleLayerParams::MergeFrom(const UpsampleLayerParams& from) {
   (void) cached_has_bits;
 
   scalingfactor_.MergeFrom(from.scalingfactor_);
+  fractionalscalingfactor_.MergeFrom(from.fractionalscalingfactor_);
   if (from.mode() != 0) {
     set_mode(from.mode());
   }
+  if (from.linearupsamplemode() != 0) {
+    set_linearupsamplemode(from.linearupsamplemode());
+  }
 }
 
 void UpsampleLayerParams::CopyFrom(const UpsampleLayerParams& from) {
@@ -33526,7 +36937,9 @@ void UpsampleLayerParams::Swap(UpsampleLayerParams* other) {
 }
 void UpsampleLayerParams::InternalSwap(UpsampleLayerParams* other) {
   scalingfactor_.InternalSwap(&other->scalingfactor_);
+  fractionalscalingfactor_.InternalSwap(&other->fractionalscalingfactor_);
   std::swap(mode_, other->mode_);
+  std::swap(linearupsamplemode_, other->linearupsamplemode_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
@@ -33567,6 +36980,36 @@ UpsampleLayerParams::mutable_scalingfactor() {
   return &scalingfactor_;
 }
 
+// repeated float fractionalScalingFactor = 7;
+int UpsampleLayerParams::fractionalscalingfactor_size() const {
+  return fractionalscalingfactor_.size();
+}
+void UpsampleLayerParams::clear_fractionalscalingfactor() {
+  fractionalscalingfactor_.Clear();
+}
+float UpsampleLayerParams::fractionalscalingfactor(int index) const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.UpsampleLayerParams.fractionalScalingFactor)
+  return fractionalscalingfactor_.Get(index);
+}
+void UpsampleLayerParams::set_fractionalscalingfactor(int index, float value) {
+  fractionalscalingfactor_.Set(index, value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.UpsampleLayerParams.fractionalScalingFactor)
+}
+void UpsampleLayerParams::add_fractionalscalingfactor(float value) {
+  fractionalscalingfactor_.Add(value);
+  // @@protoc_insertion_point(field_add:CoreML.Specification.UpsampleLayerParams.fractionalScalingFactor)
+}
+const ::google::protobuf::RepeatedField< float >&
+UpsampleLayerParams::fractionalscalingfactor() const {
+  // @@protoc_insertion_point(field_list:CoreML.Specification.UpsampleLayerParams.fractionalScalingFactor)
+  return fractionalscalingfactor_;
+}
+::google::protobuf::RepeatedField< float >*
+UpsampleLayerParams::mutable_fractionalscalingfactor() {
+  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.UpsampleLayerParams.fractionalScalingFactor)
+  return &fractionalscalingfactor_;
+}
+
 // .CoreML.Specification.UpsampleLayerParams.InterpolationMode mode = 5;
 void UpsampleLayerParams::clear_mode() {
   mode_ = 0;
@@ -33581,6 +37024,20 @@ void UpsampleLayerParams::set_mode(::CoreML::Specification::UpsampleLayerParams_
   // @@protoc_insertion_point(field_set:CoreML.Specification.UpsampleLayerParams.mode)
 }
 
+// .CoreML.Specification.UpsampleLayerParams.LinearUpsampleMode linearUpsampleMode = 6;
+void UpsampleLayerParams::clear_linearupsamplemode() {
+  linearupsamplemode_ = 0;
+}
+::CoreML::Specification::UpsampleLayerParams_LinearUpsampleMode UpsampleLayerParams::linearupsamplemode() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.UpsampleLayerParams.linearUpsampleMode)
+  return static_cast< ::CoreML::Specification::UpsampleLayerParams_LinearUpsampleMode >(linearupsamplemode_);
+}
+void UpsampleLayerParams::set_linearupsamplemode(::CoreML::Specification::UpsampleLayerParams_LinearUpsampleMode value) {
+  
+  linearupsamplemode_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.UpsampleLayerParams.linearUpsampleMode)
+}
+
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
 
 // ===================================================================
@@ -44715,6 +48172,7 @@ const int BatchedMatMulLayerParams::kWeightMatrixSecondDimensionFieldNumber;
 const int BatchedMatMulLayerParams::kHasBiasFieldNumber;
 const int BatchedMatMulLayerParams::kWeightsFieldNumber;
 const int BatchedMatMulLayerParams::kBiasFieldNumber;
+const int BatchedMatMulLayerParams::kInt8DynamicQuantizeFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
 BatchedMatMulLayerParams::BatchedMatMulLayerParams()
@@ -44741,14 +48199,14 @@ BatchedMatMulLayerParams::BatchedMatMulLayerParams(const BatchedMatMulLayerParam
     bias_ = NULL;
   }
   ::memcpy(&weightmatrixfirstdimension_, &from.weightmatrixfirstdimension_,
-    reinterpret_cast<char*>(&hasbias_) -
-    reinterpret_cast<char*>(&weightmatrixfirstdimension_) + sizeof(hasbias_));
+    reinterpret_cast<char*>(&int8dynamicquantize_) -
+    reinterpret_cast<char*>(&weightmatrixfirstdimension_) + sizeof(int8dynamicquantize_));
   // @@protoc_insertion_point(copy_constructor:CoreML.Specification.BatchedMatMulLayerParams)
 }
 
 void BatchedMatMulLayerParams::SharedCtor() {
-  ::memset(&weights_, 0, reinterpret_cast<char*>(&hasbias_) -
-    reinterpret_cast<char*>(&weights_) + sizeof(hasbias_));
+  ::memset(&weights_, 0, reinterpret_cast<char*>(&int8dynamicquantize_) -
+    reinterpret_cast<char*>(&weights_) + sizeof(int8dynamicquantize_));
   _cached_size_ = 0;
 }
 
@@ -44794,8 +48252,8 @@ void BatchedMatMulLayerParams::Clear() {
     delete bias_;
   }
   bias_ = NULL;
-  ::memset(&weightmatrixfirstdimension_, 0, reinterpret_cast<char*>(&hasbias_) -
-    reinterpret_cast<char*>(&weightmatrixfirstdimension_) + sizeof(hasbias_));
+  ::memset(&weightmatrixfirstdimension_, 0, reinterpret_cast<char*>(&int8dynamicquantize_) -
+    reinterpret_cast<char*>(&weightmatrixfirstdimension_) + sizeof(int8dynamicquantize_));
 }
 
 bool BatchedMatMulLayerParams::MergePartialFromCodedStream(
@@ -44902,6 +48360,20 @@ bool BatchedMatMulLayerParams::MergePartialFromCodedStream(
         break;
       }
 
+      // bool int8DynamicQuantize = 10;
+      case 10: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(80u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
+                 input, &int8dynamicquantize_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
       default: {
       handle_unusual:
         if (tag == 0 ||
@@ -44966,6 +48438,11 @@ void BatchedMatMulLayerParams::SerializeWithCachedSizes(
       9, *this->bias_, output);
   }
 
+  // bool int8DynamicQuantize = 10;
+  if (this->int8dynamicquantize() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteBool(10, this->int8dynamicquantize(), output);
+  }
+
   // @@protoc_insertion_point(serialize_end:CoreML.Specification.BatchedMatMulLayerParams)
 }
 
@@ -45016,6 +48493,11 @@ size_t BatchedMatMulLayerParams::ByteSizeLong() const {
     total_size += 1 + 1;
   }
 
+  // bool int8DynamicQuantize = 10;
+  if (this->int8dynamicquantize() != 0) {
+    total_size += 1 + 1;
+  }
+
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = cached_size;
@@ -45056,6 +48538,9 @@ void BatchedMatMulLayerParams::MergeFrom(const BatchedMatMulLayerParams& from) {
   if (from.hasbias() != 0) {
     set_hasbias(from.hasbias());
   }
+  if (from.int8dynamicquantize() != 0) {
+    set_int8dynamicquantize(from.int8dynamicquantize());
+  }
 }
 
 void BatchedMatMulLayerParams::CopyFrom(const BatchedMatMulLayerParams& from) {
@@ -45081,6 +48566,7 @@ void BatchedMatMulLayerParams::InternalSwap(BatchedMatMulLayerParams* other) {
   std::swap(transposea_, other->transposea_);
   std::swap(transposeb_, other->transposeb_);
   std::swap(hasbias_, other->hasbias_);
+  std::swap(int8dynamicquantize_, other->int8dynamicquantize_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
@@ -45239,6 +48725,20 @@ void BatchedMatMulLayerParams::set_allocated_bias(::CoreML::Specification::Weigh
   // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.BatchedMatMulLayerParams.bias)
 }
 
+// bool int8DynamicQuantize = 10;
+void BatchedMatMulLayerParams::clear_int8dynamicquantize() {
+  int8dynamicquantize_ = false;
+}
+bool BatchedMatMulLayerParams::int8dynamicquantize() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.BatchedMatMulLayerParams.int8DynamicQuantize)
+  return int8dynamicquantize_;
+}
+void BatchedMatMulLayerParams::set_int8dynamicquantize(bool value) {
+  
+  int8dynamicquantize_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.BatchedMatMulLayerParams.int8DynamicQuantize)
+}
+
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
 
 // ===================================================================
@@ -62983,6 +66483,7 @@ const int SliceStaticLayerParams::kBeginMasksFieldNumber;
 const int SliceStaticLayerParams::kEndIdsFieldNumber;
 const int SliceStaticLayerParams::kEndMasksFieldNumber;
 const int SliceStaticLayerParams::kStridesFieldNumber;
+const int SliceStaticLayerParams::kSqueezeMasksFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
 SliceStaticLayerParams::SliceStaticLayerParams()
@@ -63001,6 +66502,7 @@ SliceStaticLayerParams::SliceStaticLayerParams(const SliceStaticLayerParams& fro
       endids_(from.endids_),
       endmasks_(from.endmasks_),
       strides_(from.strides_),
+      squeezemasks_(from.squeezemasks_),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   // @@protoc_insertion_point(copy_constructor:CoreML.Specification.SliceStaticLayerParams)
@@ -63043,6 +66545,7 @@ void SliceStaticLayerParams::Clear() {
   endids_.Clear();
   endmasks_.Clear();
   strides_.Clear();
+  squeezemasks_.Clear();
 }
 
 bool SliceStaticLayerParams::MergePartialFromCodedStream(
@@ -63145,6 +66648,24 @@ bool SliceStaticLayerParams::MergePartialFromCodedStream(
         break;
       }
 
+      // repeated bool squeezeMasks = 6;
+      case 6: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(50u)) {
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPackedPrimitive<
+                   bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
+                 input, this->mutable_squeezemasks())));
+        } else if (static_cast< ::google::protobuf::uint8>(tag) ==
+                   static_cast< ::google::protobuf::uint8>(48u)) {
+          DO_((::google::protobuf::internal::WireFormatLite::ReadRepeatedPrimitiveNoInline<
+                   bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
+                 1, 50u, input, this->mutable_squeezemasks())));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
       default: {
       handle_unusual:
         if (tag == 0 ||
@@ -63218,6 +66739,14 @@ void SliceStaticLayerParams::SerializeWithCachedSizes(
       this->strides(i), output);
   }
 
+  // repeated bool squeezeMasks = 6;
+  if (this->squeezemasks_size() > 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteTag(6, ::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output);
+    output->WriteVarint32(_squeezemasks_cached_byte_size_);
+    ::google::protobuf::internal::WireFormatLite::WriteBoolArray(
+      this->squeezemasks().data(), this->squeezemasks_size(), output);
+  }
+
   // @@protoc_insertion_point(serialize_end:CoreML.Specification.SliceStaticLayerParams)
 }
 
@@ -63300,6 +66829,21 @@ size_t SliceStaticLayerParams::ByteSizeLong() const {
     total_size += data_size;
   }
 
+  // repeated bool squeezeMasks = 6;
+  {
+    unsigned int count = this->squeezemasks_size();
+    size_t data_size = 1UL * count;
+    if (data_size > 0) {
+      total_size += 1 +
+        ::google::protobuf::internal::WireFormatLite::Int32Size(data_size);
+    }
+    int cached_size = ::google::protobuf::internal::ToCachedSize(data_size);
+    GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+    _squeezemasks_cached_byte_size_ = cached_size;
+    GOOGLE_SAFE_CONCURRENT_WRITES_END();
+    total_size += data_size;
+  }
+
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = cached_size;
@@ -63324,6 +66868,7 @@ void SliceStaticLayerParams::MergeFrom(const SliceStaticLayerParams& from) {
   endids_.MergeFrom(from.endids_);
   endmasks_.MergeFrom(from.endmasks_);
   strides_.MergeFrom(from.strides_);
+  squeezemasks_.MergeFrom(from.squeezemasks_);
 }
 
 void SliceStaticLayerParams::CopyFrom(const SliceStaticLayerParams& from) {
@@ -63347,6 +66892,7 @@ void SliceStaticLayerParams::InternalSwap(SliceStaticLayerParams* other) {
   endids_.InternalSwap(&other->endids_);
   endmasks_.InternalSwap(&other->endmasks_);
   strides_.InternalSwap(&other->strides_);
+  squeezemasks_.InternalSwap(&other->squeezemasks_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
@@ -63507,6 +67053,36 @@ SliceStaticLayerParams::mutable_strides() {
   return &strides_;
 }
 
+// repeated bool squeezeMasks = 6;
+int SliceStaticLayerParams::squeezemasks_size() const {
+  return squeezemasks_.size();
+}
+void SliceStaticLayerParams::clear_squeezemasks() {
+  squeezemasks_.Clear();
+}
+bool SliceStaticLayerParams::squeezemasks(int index) const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SliceStaticLayerParams.squeezeMasks)
+  return squeezemasks_.Get(index);
+}
+void SliceStaticLayerParams::set_squeezemasks(int index, bool value) {
+  squeezemasks_.Set(index, value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SliceStaticLayerParams.squeezeMasks)
+}
+void SliceStaticLayerParams::add_squeezemasks(bool value) {
+  squeezemasks_.Add(value);
+  // @@protoc_insertion_point(field_add:CoreML.Specification.SliceStaticLayerParams.squeezeMasks)
+}
+const ::google::protobuf::RepeatedField< bool >&
+SliceStaticLayerParams::squeezemasks() const {
+  // @@protoc_insertion_point(field_list:CoreML.Specification.SliceStaticLayerParams.squeezeMasks)
+  return squeezemasks_;
+}
+::google::protobuf::RepeatedField< bool >*
+SliceStaticLayerParams::mutable_squeezemasks() {
+  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.SliceStaticLayerParams.squeezeMasks)
+  return &squeezemasks_;
+}
+
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
 
 // ===================================================================
@@ -63516,6 +67092,7 @@ const int SliceDynamicLayerParams::kBeginMasksFieldNumber;
 const int SliceDynamicLayerParams::kEndIdsFieldNumber;
 const int SliceDynamicLayerParams::kEndMasksFieldNumber;
 const int SliceDynamicLayerParams::kStridesFieldNumber;
+const int SliceDynamicLayerParams::kSqueezeMasksFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
 SliceDynamicLayerParams::SliceDynamicLayerParams()
@@ -63533,6 +67110,7 @@ SliceDynamicLayerParams::SliceDynamicLayerParams(const SliceDynamicLayerParams&
       endids_(from.endids_),
       endmasks_(from.endmasks_),
       strides_(from.strides_),
+      squeezemasks_(from.squeezemasks_),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   // @@protoc_insertion_point(copy_constructor:CoreML.Specification.SliceDynamicLayerParams)
@@ -63574,6 +67152,7 @@ void SliceDynamicLayerParams::Clear() {
   endids_.Clear();
   endmasks_.Clear();
   strides_.Clear();
+  squeezemasks_.Clear();
 }
 
 bool SliceDynamicLayerParams::MergePartialFromCodedStream(
@@ -63658,6 +67237,24 @@ bool SliceDynamicLayerParams::MergePartialFromCodedStream(
         break;
       }
 
+      // repeated bool squeezeMasks = 6;
+      case 6: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(50u)) {
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPackedPrimitive<
+                   bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
+                 input, this->mutable_squeezemasks())));
+        } else if (static_cast< ::google::protobuf::uint8>(tag) ==
+                   static_cast< ::google::protobuf::uint8>(48u)) {
+          DO_((::google::protobuf::internal::WireFormatLite::ReadRepeatedPrimitiveNoInline<
+                   bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
+                 1, 50u, input, this->mutable_squeezemasks())));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
       default: {
       handle_unusual:
         if (tag == 0 ||
@@ -63721,6 +67318,14 @@ void SliceDynamicLayerParams::SerializeWithCachedSizes(
       this->strides(i), output);
   }
 
+  // repeated bool squeezeMasks = 6;
+  if (this->squeezemasks_size() > 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteTag(6, ::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output);
+    output->WriteVarint32(_squeezemasks_cached_byte_size_);
+    ::google::protobuf::internal::WireFormatLite::WriteBoolArray(
+      this->squeezemasks().data(), this->squeezemasks_size(), output);
+  }
+
   // @@protoc_insertion_point(serialize_end:CoreML.Specification.SliceDynamicLayerParams)
 }
 
@@ -63788,6 +67393,21 @@ size_t SliceDynamicLayerParams::ByteSizeLong() const {
     total_size += data_size;
   }
 
+  // repeated bool squeezeMasks = 6;
+  {
+    unsigned int count = this->squeezemasks_size();
+    size_t data_size = 1UL * count;
+    if (data_size > 0) {
+      total_size += 1 +
+        ::google::protobuf::internal::WireFormatLite::Int32Size(data_size);
+    }
+    int cached_size = ::google::protobuf::internal::ToCachedSize(data_size);
+    GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+    _squeezemasks_cached_byte_size_ = cached_size;
+    GOOGLE_SAFE_CONCURRENT_WRITES_END();
+    total_size += data_size;
+  }
+
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = cached_size;
@@ -63811,6 +67431,7 @@ void SliceDynamicLayerParams::MergeFrom(const SliceDynamicLayerParams& from) {
   endids_.MergeFrom(from.endids_);
   endmasks_.MergeFrom(from.endmasks_);
   strides_.MergeFrom(from.strides_);
+  squeezemasks_.MergeFrom(from.squeezemasks_);
 }
 
 void SliceDynamicLayerParams::CopyFrom(const SliceDynamicLayerParams& from) {
@@ -63833,6 +67454,7 @@ void SliceDynamicLayerParams::InternalSwap(SliceDynamicLayerParams* other) {
   endids_.InternalSwap(&other->endids_);
   endmasks_.InternalSwap(&other->endmasks_);
   strides_.InternalSwap(&other->strides_);
+  squeezemasks_.InternalSwap(&other->squeezemasks_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
@@ -63933,118 +67555,658 @@ SliceDynamicLayerParams::mutable_endmasks() {
   return &endmasks_;
 }
 
-// repeated int64 strides = 5;
-int SliceDynamicLayerParams::strides_size() const {
-  return strides_.size();
-}
-void SliceDynamicLayerParams::clear_strides() {
-  strides_.Clear();
+// repeated int64 strides = 5;
+int SliceDynamicLayerParams::strides_size() const {
+  return strides_.size();
+}
+void SliceDynamicLayerParams::clear_strides() {
+  strides_.Clear();
+}
+::google::protobuf::int64 SliceDynamicLayerParams::strides(int index) const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SliceDynamicLayerParams.strides)
+  return strides_.Get(index);
+}
+void SliceDynamicLayerParams::set_strides(int index, ::google::protobuf::int64 value) {
+  strides_.Set(index, value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SliceDynamicLayerParams.strides)
+}
+void SliceDynamicLayerParams::add_strides(::google::protobuf::int64 value) {
+  strides_.Add(value);
+  // @@protoc_insertion_point(field_add:CoreML.Specification.SliceDynamicLayerParams.strides)
+}
+const ::google::protobuf::RepeatedField< ::google::protobuf::int64 >&
+SliceDynamicLayerParams::strides() const {
+  // @@protoc_insertion_point(field_list:CoreML.Specification.SliceDynamicLayerParams.strides)
+  return strides_;
+}
+::google::protobuf::RepeatedField< ::google::protobuf::int64 >*
+SliceDynamicLayerParams::mutable_strides() {
+  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.SliceDynamicLayerParams.strides)
+  return &strides_;
+}
+
+// repeated bool squeezeMasks = 6;
+int SliceDynamicLayerParams::squeezemasks_size() const {
+  return squeezemasks_.size();
+}
+void SliceDynamicLayerParams::clear_squeezemasks() {
+  squeezemasks_.Clear();
+}
+bool SliceDynamicLayerParams::squeezemasks(int index) const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SliceDynamicLayerParams.squeezeMasks)
+  return squeezemasks_.Get(index);
+}
+void SliceDynamicLayerParams::set_squeezemasks(int index, bool value) {
+  squeezemasks_.Set(index, value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SliceDynamicLayerParams.squeezeMasks)
+}
+void SliceDynamicLayerParams::add_squeezemasks(bool value) {
+  squeezemasks_.Add(value);
+  // @@protoc_insertion_point(field_add:CoreML.Specification.SliceDynamicLayerParams.squeezeMasks)
+}
+const ::google::protobuf::RepeatedField< bool >&
+SliceDynamicLayerParams::squeezemasks() const {
+  // @@protoc_insertion_point(field_list:CoreML.Specification.SliceDynamicLayerParams.squeezeMasks)
+  return squeezemasks_;
+}
+::google::protobuf::RepeatedField< bool >*
+SliceDynamicLayerParams::mutable_squeezemasks() {
+  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.SliceDynamicLayerParams.squeezeMasks)
+  return &squeezemasks_;
+}
+
+#endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
+
+// ===================================================================
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+const int TileLayerParams::kRepsFieldNumber;
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+
+TileLayerParams::TileLayerParams()
+  : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
+  if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
+    protobuf_NeuralNetwork_2eproto::InitDefaults();
+  }
+  SharedCtor();
+  // @@protoc_insertion_point(constructor:CoreML.Specification.TileLayerParams)
+}
+TileLayerParams::TileLayerParams(const TileLayerParams& from)
+  : ::google::protobuf::MessageLite(),
+      _internal_metadata_(NULL),
+      reps_(from.reps_),
+      _cached_size_(0) {
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.TileLayerParams)
+}
+
+void TileLayerParams::SharedCtor() {
+  _cached_size_ = 0;
+}
+
+TileLayerParams::~TileLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.TileLayerParams)
+  SharedDtor();
+}
+
+void TileLayerParams::SharedDtor() {
+}
+
+void TileLayerParams::SetCachedSize(int size) const {
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+}
+const TileLayerParams& TileLayerParams::default_instance() {
+  protobuf_NeuralNetwork_2eproto::InitDefaults();
+  return *internal_default_instance();
+}
+
+TileLayerParams* TileLayerParams::New(::google::protobuf::Arena* arena) const {
+  TileLayerParams* n = new TileLayerParams;
+  if (arena != NULL) {
+    arena->Own(n);
+  }
+  return n;
+}
+
+void TileLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.TileLayerParams)
+  reps_.Clear();
+}
+
+bool TileLayerParams::MergePartialFromCodedStream(
+    ::google::protobuf::io::CodedInputStream* input) {
+#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
+  ::google::protobuf::uint32 tag;
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.TileLayerParams)
+  for (;;) {
+    ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
+    tag = p.first;
+    if (!p.second) goto handle_unusual;
+    switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
+      // repeated uint64 reps = 1;
+      case 1: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(10u)) {
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPackedPrimitive<
+                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
+                 input, this->mutable_reps())));
+        } else if (static_cast< ::google::protobuf::uint8>(tag) ==
+                   static_cast< ::google::protobuf::uint8>(8u)) {
+          DO_((::google::protobuf::internal::WireFormatLite::ReadRepeatedPrimitiveNoInline<
+                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
+                 1, 10u, input, this->mutable_reps())));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      default: {
+      handle_unusual:
+        if (tag == 0 ||
+            ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
+            ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
+          goto success;
+        }
+        DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
+        break;
+      }
+    }
+  }
+success:
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.TileLayerParams)
+  return true;
+failure:
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.TileLayerParams)
+  return false;
+#undef DO_
+}
+
+void TileLayerParams::SerializeWithCachedSizes(
+    ::google::protobuf::io::CodedOutputStream* output) const {
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.TileLayerParams)
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  // repeated uint64 reps = 1;
+  if (this->reps_size() > 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteTag(1, ::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output);
+    output->WriteVarint32(_reps_cached_byte_size_);
+  }
+  for (int i = 0, n = this->reps_size(); i < n; i++) {
+    ::google::protobuf::internal::WireFormatLite::WriteUInt64NoTag(
+      this->reps(i), output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.TileLayerParams)
+}
+
+size_t TileLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.TileLayerParams)
+  size_t total_size = 0;
+
+  // repeated uint64 reps = 1;
+  {
+    size_t data_size = ::google::protobuf::internal::WireFormatLite::
+      UInt64Size(this->reps_);
+    if (data_size > 0) {
+      total_size += 1 +
+        ::google::protobuf::internal::WireFormatLite::Int32Size(data_size);
+    }
+    int cached_size = ::google::protobuf::internal::ToCachedSize(data_size);
+    GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+    _reps_cached_byte_size_ = cached_size;
+    GOOGLE_SAFE_CONCURRENT_WRITES_END();
+    total_size += data_size;
+  }
+
+  int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = cached_size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+  return total_size;
+}
+
+void TileLayerParams::CheckTypeAndMergeFrom(
+    const ::google::protobuf::MessageLite& from) {
+  MergeFrom(*::google::protobuf::down_cast<const TileLayerParams*>(&from));
+}
+
+void TileLayerParams::MergeFrom(const TileLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.TileLayerParams)
+  GOOGLE_DCHECK_NE(&from, this);
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  reps_.MergeFrom(from.reps_);
+}
+
+void TileLayerParams::CopyFrom(const TileLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.TileLayerParams)
+  if (&from == this) return;
+  Clear();
+  MergeFrom(from);
+}
+
+bool TileLayerParams::IsInitialized() const {
+  return true;
+}
+
+void TileLayerParams::Swap(TileLayerParams* other) {
+  if (other == this) return;
+  InternalSwap(other);
+}
+void TileLayerParams::InternalSwap(TileLayerParams* other) {
+  reps_.InternalSwap(&other->reps_);
+  std::swap(_cached_size_, other->_cached_size_);
+}
+
+::std::string TileLayerParams::GetTypeName() const {
+  return "CoreML.Specification.TileLayerParams";
+}
+
+#if PROTOBUF_INLINE_NOT_IN_HEADERS
+// TileLayerParams
+
+// repeated uint64 reps = 1;
+int TileLayerParams::reps_size() const {
+  return reps_.size();
+}
+void TileLayerParams::clear_reps() {
+  reps_.Clear();
+}
+::google::protobuf::uint64 TileLayerParams::reps(int index) const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.TileLayerParams.reps)
+  return reps_.Get(index);
+}
+void TileLayerParams::set_reps(int index, ::google::protobuf::uint64 value) {
+  reps_.Set(index, value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.TileLayerParams.reps)
+}
+void TileLayerParams::add_reps(::google::protobuf::uint64 value) {
+  reps_.Add(value);
+  // @@protoc_insertion_point(field_add:CoreML.Specification.TileLayerParams.reps)
+}
+const ::google::protobuf::RepeatedField< ::google::protobuf::uint64 >&
+TileLayerParams::reps() const {
+  // @@protoc_insertion_point(field_list:CoreML.Specification.TileLayerParams.reps)
+  return reps_;
+}
+::google::protobuf::RepeatedField< ::google::protobuf::uint64 >*
+TileLayerParams::mutable_reps() {
+  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.TileLayerParams.reps)
+  return &reps_;
+}
+
+#endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
+
+// ===================================================================
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+
+GetShapeLayerParams::GetShapeLayerParams()
+  : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
+  if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
+    protobuf_NeuralNetwork_2eproto::InitDefaults();
+  }
+  SharedCtor();
+  // @@protoc_insertion_point(constructor:CoreML.Specification.GetShapeLayerParams)
+}
+GetShapeLayerParams::GetShapeLayerParams(const GetShapeLayerParams& from)
+  : ::google::protobuf::MessageLite(),
+      _internal_metadata_(NULL),
+      _cached_size_(0) {
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.GetShapeLayerParams)
+}
+
+void GetShapeLayerParams::SharedCtor() {
+  _cached_size_ = 0;
+}
+
+GetShapeLayerParams::~GetShapeLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.GetShapeLayerParams)
+  SharedDtor();
+}
+
+void GetShapeLayerParams::SharedDtor() {
+}
+
+void GetShapeLayerParams::SetCachedSize(int size) const {
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+}
+const GetShapeLayerParams& GetShapeLayerParams::default_instance() {
+  protobuf_NeuralNetwork_2eproto::InitDefaults();
+  return *internal_default_instance();
+}
+
+GetShapeLayerParams* GetShapeLayerParams::New(::google::protobuf::Arena* arena) const {
+  GetShapeLayerParams* n = new GetShapeLayerParams;
+  if (arena != NULL) {
+    arena->Own(n);
+  }
+  return n;
+}
+
+void GetShapeLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.GetShapeLayerParams)
+}
+
+bool GetShapeLayerParams::MergePartialFromCodedStream(
+    ::google::protobuf::io::CodedInputStream* input) {
+#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
+  ::google::protobuf::uint32 tag;
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.GetShapeLayerParams)
+  for (;;) {
+    ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
+    tag = p.first;
+    if (!p.second) goto handle_unusual;
+  handle_unusual:
+    if (tag == 0 ||
+        ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
+        ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
+      goto success;
+    }
+    DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
+  }
+success:
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.GetShapeLayerParams)
+  return true;
+failure:
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.GetShapeLayerParams)
+  return false;
+#undef DO_
+}
+
+void GetShapeLayerParams::SerializeWithCachedSizes(
+    ::google::protobuf::io::CodedOutputStream* output) const {
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.GetShapeLayerParams)
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.GetShapeLayerParams)
+}
+
+size_t GetShapeLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.GetShapeLayerParams)
+  size_t total_size = 0;
+
+  int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = cached_size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+  return total_size;
+}
+
+void GetShapeLayerParams::CheckTypeAndMergeFrom(
+    const ::google::protobuf::MessageLite& from) {
+  MergeFrom(*::google::protobuf::down_cast<const GetShapeLayerParams*>(&from));
+}
+
+void GetShapeLayerParams::MergeFrom(const GetShapeLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.GetShapeLayerParams)
+  GOOGLE_DCHECK_NE(&from, this);
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+}
+
+void GetShapeLayerParams::CopyFrom(const GetShapeLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.GetShapeLayerParams)
+  if (&from == this) return;
+  Clear();
+  MergeFrom(from);
+}
+
+bool GetShapeLayerParams::IsInitialized() const {
+  return true;
+}
+
+void GetShapeLayerParams::Swap(GetShapeLayerParams* other) {
+  if (other == this) return;
+  InternalSwap(other);
+}
+void GetShapeLayerParams::InternalSwap(GetShapeLayerParams* other) {
+  std::swap(_cached_size_, other->_cached_size_);
+}
+
+::std::string GetShapeLayerParams::GetTypeName() const {
+  return "CoreML.Specification.GetShapeLayerParams";
+}
+
+#if PROTOBUF_INLINE_NOT_IN_HEADERS
+// GetShapeLayerParams
+
+#endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
+
+// ===================================================================
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+
+ErfLayerParams::ErfLayerParams()
+  : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
+  if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
+    protobuf_NeuralNetwork_2eproto::InitDefaults();
+  }
+  SharedCtor();
+  // @@protoc_insertion_point(constructor:CoreML.Specification.ErfLayerParams)
+}
+ErfLayerParams::ErfLayerParams(const ErfLayerParams& from)
+  : ::google::protobuf::MessageLite(),
+      _internal_metadata_(NULL),
+      _cached_size_(0) {
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.ErfLayerParams)
+}
+
+void ErfLayerParams::SharedCtor() {
+  _cached_size_ = 0;
+}
+
+ErfLayerParams::~ErfLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.ErfLayerParams)
+  SharedDtor();
+}
+
+void ErfLayerParams::SharedDtor() {
+}
+
+void ErfLayerParams::SetCachedSize(int size) const {
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+}
+const ErfLayerParams& ErfLayerParams::default_instance() {
+  protobuf_NeuralNetwork_2eproto::InitDefaults();
+  return *internal_default_instance();
+}
+
+ErfLayerParams* ErfLayerParams::New(::google::protobuf::Arena* arena) const {
+  ErfLayerParams* n = new ErfLayerParams;
+  if (arena != NULL) {
+    arena->Own(n);
+  }
+  return n;
+}
+
+void ErfLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.ErfLayerParams)
+}
+
+bool ErfLayerParams::MergePartialFromCodedStream(
+    ::google::protobuf::io::CodedInputStream* input) {
+#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
+  ::google::protobuf::uint32 tag;
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.ErfLayerParams)
+  for (;;) {
+    ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
+    tag = p.first;
+    if (!p.second) goto handle_unusual;
+  handle_unusual:
+    if (tag == 0 ||
+        ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
+        ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
+      goto success;
+    }
+    DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
+  }
+success:
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.ErfLayerParams)
+  return true;
+failure:
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.ErfLayerParams)
+  return false;
+#undef DO_
+}
+
+void ErfLayerParams::SerializeWithCachedSizes(
+    ::google::protobuf::io::CodedOutputStream* output) const {
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.ErfLayerParams)
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.ErfLayerParams)
+}
+
+size_t ErfLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.ErfLayerParams)
+  size_t total_size = 0;
+
+  int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = cached_size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+  return total_size;
+}
+
+void ErfLayerParams::CheckTypeAndMergeFrom(
+    const ::google::protobuf::MessageLite& from) {
+  MergeFrom(*::google::protobuf::down_cast<const ErfLayerParams*>(&from));
+}
+
+void ErfLayerParams::MergeFrom(const ErfLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.ErfLayerParams)
+  GOOGLE_DCHECK_NE(&from, this);
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
 }
-::google::protobuf::int64 SliceDynamicLayerParams::strides(int index) const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.SliceDynamicLayerParams.strides)
-  return strides_.Get(index);
+
+void ErfLayerParams::CopyFrom(const ErfLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.ErfLayerParams)
+  if (&from == this) return;
+  Clear();
+  MergeFrom(from);
 }
-void SliceDynamicLayerParams::set_strides(int index, ::google::protobuf::int64 value) {
-  strides_.Set(index, value);
-  // @@protoc_insertion_point(field_set:CoreML.Specification.SliceDynamicLayerParams.strides)
+
+bool ErfLayerParams::IsInitialized() const {
+  return true;
 }
-void SliceDynamicLayerParams::add_strides(::google::protobuf::int64 value) {
-  strides_.Add(value);
-  // @@protoc_insertion_point(field_add:CoreML.Specification.SliceDynamicLayerParams.strides)
+
+void ErfLayerParams::Swap(ErfLayerParams* other) {
+  if (other == this) return;
+  InternalSwap(other);
 }
-const ::google::protobuf::RepeatedField< ::google::protobuf::int64 >&
-SliceDynamicLayerParams::strides() const {
-  // @@protoc_insertion_point(field_list:CoreML.Specification.SliceDynamicLayerParams.strides)
-  return strides_;
+void ErfLayerParams::InternalSwap(ErfLayerParams* other) {
+  std::swap(_cached_size_, other->_cached_size_);
 }
-::google::protobuf::RepeatedField< ::google::protobuf::int64 >*
-SliceDynamicLayerParams::mutable_strides() {
-  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.SliceDynamicLayerParams.strides)
-  return &strides_;
+
+::std::string ErfLayerParams::GetTypeName() const {
+  return "CoreML.Specification.ErfLayerParams";
 }
 
+#if PROTOBUF_INLINE_NOT_IN_HEADERS
+// ErfLayerParams
+
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
 
 // ===================================================================
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
-const int TileLayerParams::kRepsFieldNumber;
+const int GeluLayerParams::kModeFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
-TileLayerParams::TileLayerParams()
+GeluLayerParams::GeluLayerParams()
   : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
   if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
     protobuf_NeuralNetwork_2eproto::InitDefaults();
   }
   SharedCtor();
-  // @@protoc_insertion_point(constructor:CoreML.Specification.TileLayerParams)
+  // @@protoc_insertion_point(constructor:CoreML.Specification.GeluLayerParams)
 }
-TileLayerParams::TileLayerParams(const TileLayerParams& from)
+GeluLayerParams::GeluLayerParams(const GeluLayerParams& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
-      reps_(from.reps_),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
-  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.TileLayerParams)
+  mode_ = from.mode_;
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.GeluLayerParams)
 }
 
-void TileLayerParams::SharedCtor() {
+void GeluLayerParams::SharedCtor() {
+  mode_ = 0;
   _cached_size_ = 0;
 }
 
-TileLayerParams::~TileLayerParams() {
-  // @@protoc_insertion_point(destructor:CoreML.Specification.TileLayerParams)
+GeluLayerParams::~GeluLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.GeluLayerParams)
   SharedDtor();
 }
 
-void TileLayerParams::SharedDtor() {
+void GeluLayerParams::SharedDtor() {
 }
 
-void TileLayerParams::SetCachedSize(int size) const {
+void GeluLayerParams::SetCachedSize(int size) const {
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = size;
   GOOGLE_SAFE_CONCURRENT_WRITES_END();
 }
-const TileLayerParams& TileLayerParams::default_instance() {
+const GeluLayerParams& GeluLayerParams::default_instance() {
   protobuf_NeuralNetwork_2eproto::InitDefaults();
   return *internal_default_instance();
 }
 
-TileLayerParams* TileLayerParams::New(::google::protobuf::Arena* arena) const {
-  TileLayerParams* n = new TileLayerParams;
+GeluLayerParams* GeluLayerParams::New(::google::protobuf::Arena* arena) const {
+  GeluLayerParams* n = new GeluLayerParams;
   if (arena != NULL) {
     arena->Own(n);
   }
   return n;
 }
 
-void TileLayerParams::Clear() {
-// @@protoc_insertion_point(message_clear_start:CoreML.Specification.TileLayerParams)
-  reps_.Clear();
+void GeluLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.GeluLayerParams)
+  mode_ = 0;
 }
 
-bool TileLayerParams::MergePartialFromCodedStream(
+bool GeluLayerParams::MergePartialFromCodedStream(
     ::google::protobuf::io::CodedInputStream* input) {
 #define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
   ::google::protobuf::uint32 tag;
-  // @@protoc_insertion_point(parse_start:CoreML.Specification.TileLayerParams)
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.GeluLayerParams)
   for (;;) {
     ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
     tag = p.first;
     if (!p.second) goto handle_unusual;
     switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
-      // repeated uint64 reps = 1;
+      // .CoreML.Specification.GeluLayerParams.GeluMode mode = 1;
       case 1: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(10u)) {
-          DO_((::google::protobuf::internal::WireFormatLite::ReadPackedPrimitive<
-                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
-                 input, this->mutable_reps())));
-        } else if (static_cast< ::google::protobuf::uint8>(tag) ==
-                   static_cast< ::google::protobuf::uint8>(8u)) {
-          DO_((::google::protobuf::internal::WireFormatLite::ReadRepeatedPrimitiveNoInline<
-                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
-                 1, 10u, input, this->mutable_reps())));
+            static_cast< ::google::protobuf::uint8>(8u)) {
+          int value;
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   int, ::google::protobuf::internal::WireFormatLite::TYPE_ENUM>(
+                 input, &value)));
+          set_mode(static_cast< ::CoreML::Specification::GeluLayerParams_GeluMode >(value));
         } else {
           goto handle_unusual;
         }
@@ -64064,50 +68226,37 @@ bool TileLayerParams::MergePartialFromCodedStream(
     }
   }
 success:
-  // @@protoc_insertion_point(parse_success:CoreML.Specification.TileLayerParams)
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.GeluLayerParams)
   return true;
 failure:
-  // @@protoc_insertion_point(parse_failure:CoreML.Specification.TileLayerParams)
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.GeluLayerParams)
   return false;
 #undef DO_
 }
 
-void TileLayerParams::SerializeWithCachedSizes(
+void GeluLayerParams::SerializeWithCachedSizes(
     ::google::protobuf::io::CodedOutputStream* output) const {
-  // @@protoc_insertion_point(serialize_start:CoreML.Specification.TileLayerParams)
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.GeluLayerParams)
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  // repeated uint64 reps = 1;
-  if (this->reps_size() > 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteTag(1, ::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output);
-    output->WriteVarint32(_reps_cached_byte_size_);
-  }
-  for (int i = 0, n = this->reps_size(); i < n; i++) {
-    ::google::protobuf::internal::WireFormatLite::WriteUInt64NoTag(
-      this->reps(i), output);
+  // .CoreML.Specification.GeluLayerParams.GeluMode mode = 1;
+  if (this->mode() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteEnum(
+      1, this->mode(), output);
   }
 
-  // @@protoc_insertion_point(serialize_end:CoreML.Specification.TileLayerParams)
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.GeluLayerParams)
 }
 
-size_t TileLayerParams::ByteSizeLong() const {
-// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.TileLayerParams)
+size_t GeluLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.GeluLayerParams)
   size_t total_size = 0;
 
-  // repeated uint64 reps = 1;
-  {
-    size_t data_size = ::google::protobuf::internal::WireFormatLite::
-      UInt64Size(this->reps_);
-    if (data_size > 0) {
-      total_size += 1 +
-        ::google::protobuf::internal::WireFormatLite::Int32Size(data_size);
-    }
-    int cached_size = ::google::protobuf::internal::ToCachedSize(data_size);
-    GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
-    _reps_cached_byte_size_ = cached_size;
-    GOOGLE_SAFE_CONCURRENT_WRITES_END();
-    total_size += data_size;
+  // .CoreML.Specification.GeluLayerParams.GeluMode mode = 1;
+  if (this->mode() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::EnumSize(this->mode());
   }
 
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
@@ -64117,76 +68266,62 @@ size_t TileLayerParams::ByteSizeLong() const {
   return total_size;
 }
 
-void TileLayerParams::CheckTypeAndMergeFrom(
+void GeluLayerParams::CheckTypeAndMergeFrom(
     const ::google::protobuf::MessageLite& from) {
-  MergeFrom(*::google::protobuf::down_cast<const TileLayerParams*>(&from));
+  MergeFrom(*::google::protobuf::down_cast<const GeluLayerParams*>(&from));
 }
 
-void TileLayerParams::MergeFrom(const TileLayerParams& from) {
-// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.TileLayerParams)
+void GeluLayerParams::MergeFrom(const GeluLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.GeluLayerParams)
   GOOGLE_DCHECK_NE(&from, this);
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  reps_.MergeFrom(from.reps_);
+  if (from.mode() != 0) {
+    set_mode(from.mode());
+  }
 }
 
-void TileLayerParams::CopyFrom(const TileLayerParams& from) {
-// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.TileLayerParams)
+void GeluLayerParams::CopyFrom(const GeluLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.GeluLayerParams)
   if (&from == this) return;
   Clear();
   MergeFrom(from);
 }
 
-bool TileLayerParams::IsInitialized() const {
+bool GeluLayerParams::IsInitialized() const {
   return true;
 }
 
-void TileLayerParams::Swap(TileLayerParams* other) {
+void GeluLayerParams::Swap(GeluLayerParams* other) {
   if (other == this) return;
   InternalSwap(other);
 }
-void TileLayerParams::InternalSwap(TileLayerParams* other) {
-  reps_.InternalSwap(&other->reps_);
+void GeluLayerParams::InternalSwap(GeluLayerParams* other) {
+  std::swap(mode_, other->mode_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
-::std::string TileLayerParams::GetTypeName() const {
-  return "CoreML.Specification.TileLayerParams";
+::std::string GeluLayerParams::GetTypeName() const {
+  return "CoreML.Specification.GeluLayerParams";
 }
 
 #if PROTOBUF_INLINE_NOT_IN_HEADERS
-// TileLayerParams
+// GeluLayerParams
 
-// repeated uint64 reps = 1;
-int TileLayerParams::reps_size() const {
-  return reps_.size();
-}
-void TileLayerParams::clear_reps() {
-  reps_.Clear();
-}
-::google::protobuf::uint64 TileLayerParams::reps(int index) const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.TileLayerParams.reps)
-  return reps_.Get(index);
-}
-void TileLayerParams::set_reps(int index, ::google::protobuf::uint64 value) {
-  reps_.Set(index, value);
-  // @@protoc_insertion_point(field_set:CoreML.Specification.TileLayerParams.reps)
-}
-void TileLayerParams::add_reps(::google::protobuf::uint64 value) {
-  reps_.Add(value);
-  // @@protoc_insertion_point(field_add:CoreML.Specification.TileLayerParams.reps)
+// .CoreML.Specification.GeluLayerParams.GeluMode mode = 1;
+void GeluLayerParams::clear_mode() {
+  mode_ = 0;
 }
-const ::google::protobuf::RepeatedField< ::google::protobuf::uint64 >&
-TileLayerParams::reps() const {
-  // @@protoc_insertion_point(field_list:CoreML.Specification.TileLayerParams.reps)
-  return reps_;
+::CoreML::Specification::GeluLayerParams_GeluMode GeluLayerParams::mode() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.GeluLayerParams.mode)
+  return static_cast< ::CoreML::Specification::GeluLayerParams_GeluMode >(mode_);
 }
-::google::protobuf::RepeatedField< ::google::protobuf::uint64 >*
-TileLayerParams::mutable_reps() {
-  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.TileLayerParams.reps)
-  return &reps_;
+void GeluLayerParams::set_mode(::CoreML::Specification::GeluLayerParams_GeluMode value) {
+  
+  mode_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.GeluLayerParams.mode)
 }
 
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
@@ -64194,97 +68329,184 @@ TileLayerParams::mutable_reps() {
 // ===================================================================
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
+const int RangeStaticLayerParams::kEndValueFieldNumber;
+const int RangeStaticLayerParams::kStartValueFieldNumber;
+const int RangeStaticLayerParams::kStepSizeValueFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
-GetShapeLayerParams::GetShapeLayerParams()
+RangeStaticLayerParams::RangeStaticLayerParams()
   : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
   if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
     protobuf_NeuralNetwork_2eproto::InitDefaults();
   }
   SharedCtor();
-  // @@protoc_insertion_point(constructor:CoreML.Specification.GetShapeLayerParams)
+  // @@protoc_insertion_point(constructor:CoreML.Specification.RangeStaticLayerParams)
 }
-GetShapeLayerParams::GetShapeLayerParams(const GetShapeLayerParams& from)
+RangeStaticLayerParams::RangeStaticLayerParams(const RangeStaticLayerParams& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
-  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.GetShapeLayerParams)
+  ::memcpy(&endvalue_, &from.endvalue_,
+    reinterpret_cast<char*>(&stepsizevalue_) -
+    reinterpret_cast<char*>(&endvalue_) + sizeof(stepsizevalue_));
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.RangeStaticLayerParams)
 }
 
-void GetShapeLayerParams::SharedCtor() {
+void RangeStaticLayerParams::SharedCtor() {
+  ::memset(&endvalue_, 0, reinterpret_cast<char*>(&stepsizevalue_) -
+    reinterpret_cast<char*>(&endvalue_) + sizeof(stepsizevalue_));
   _cached_size_ = 0;
 }
 
-GetShapeLayerParams::~GetShapeLayerParams() {
-  // @@protoc_insertion_point(destructor:CoreML.Specification.GetShapeLayerParams)
+RangeStaticLayerParams::~RangeStaticLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.RangeStaticLayerParams)
   SharedDtor();
 }
 
-void GetShapeLayerParams::SharedDtor() {
+void RangeStaticLayerParams::SharedDtor() {
 }
 
-void GetShapeLayerParams::SetCachedSize(int size) const {
+void RangeStaticLayerParams::SetCachedSize(int size) const {
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = size;
   GOOGLE_SAFE_CONCURRENT_WRITES_END();
 }
-const GetShapeLayerParams& GetShapeLayerParams::default_instance() {
+const RangeStaticLayerParams& RangeStaticLayerParams::default_instance() {
   protobuf_NeuralNetwork_2eproto::InitDefaults();
   return *internal_default_instance();
 }
 
-GetShapeLayerParams* GetShapeLayerParams::New(::google::protobuf::Arena* arena) const {
-  GetShapeLayerParams* n = new GetShapeLayerParams;
+RangeStaticLayerParams* RangeStaticLayerParams::New(::google::protobuf::Arena* arena) const {
+  RangeStaticLayerParams* n = new RangeStaticLayerParams;
   if (arena != NULL) {
     arena->Own(n);
   }
   return n;
 }
 
-void GetShapeLayerParams::Clear() {
-// @@protoc_insertion_point(message_clear_start:CoreML.Specification.GetShapeLayerParams)
+void RangeStaticLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.RangeStaticLayerParams)
+  ::memset(&endvalue_, 0, reinterpret_cast<char*>(&stepsizevalue_) -
+    reinterpret_cast<char*>(&endvalue_) + sizeof(stepsizevalue_));
 }
 
-bool GetShapeLayerParams::MergePartialFromCodedStream(
+bool RangeStaticLayerParams::MergePartialFromCodedStream(
     ::google::protobuf::io::CodedInputStream* input) {
 #define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
   ::google::protobuf::uint32 tag;
-  // @@protoc_insertion_point(parse_start:CoreML.Specification.GetShapeLayerParams)
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.RangeStaticLayerParams)
   for (;;) {
     ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
     tag = p.first;
     if (!p.second) goto handle_unusual;
-  handle_unusual:
-    if (tag == 0 ||
-        ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
-        ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
-      goto success;
+    switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
+      // float endValue = 1;
+      case 1: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(13u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &endvalue_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // float startValue = 2;
+      case 2: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(21u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &startvalue_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // float stepSizeValue = 3;
+      case 3: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(29u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &stepsizevalue_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      default: {
+      handle_unusual:
+        if (tag == 0 ||
+            ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
+            ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
+          goto success;
+        }
+        DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
+        break;
+      }
     }
-    DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
   }
 success:
-  // @@protoc_insertion_point(parse_success:CoreML.Specification.GetShapeLayerParams)
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.RangeStaticLayerParams)
   return true;
 failure:
-  // @@protoc_insertion_point(parse_failure:CoreML.Specification.GetShapeLayerParams)
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.RangeStaticLayerParams)
   return false;
 #undef DO_
 }
 
-void GetShapeLayerParams::SerializeWithCachedSizes(
+void RangeStaticLayerParams::SerializeWithCachedSizes(
     ::google::protobuf::io::CodedOutputStream* output) const {
-  // @@protoc_insertion_point(serialize_start:CoreML.Specification.GetShapeLayerParams)
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.RangeStaticLayerParams)
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  // @@protoc_insertion_point(serialize_end:CoreML.Specification.GetShapeLayerParams)
+  // float endValue = 1;
+  if (this->endvalue() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(1, this->endvalue(), output);
+  }
+
+  // float startValue = 2;
+  if (this->startvalue() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(2, this->startvalue(), output);
+  }
+
+  // float stepSizeValue = 3;
+  if (this->stepsizevalue() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(3, this->stepsizevalue(), output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.RangeStaticLayerParams)
 }
 
-size_t GetShapeLayerParams::ByteSizeLong() const {
-// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.GetShapeLayerParams)
+size_t RangeStaticLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.RangeStaticLayerParams)
   size_t total_size = 0;
 
+  // float endValue = 1;
+  if (this->endvalue() != 0) {
+    total_size += 1 + 4;
+  }
+
+  // float startValue = 2;
+  if (this->startvalue() != 0) {
+    total_size += 1 + 4;
+  }
+
+  // float stepSizeValue = 3;
+  if (this->stepsizevalue() != 0) {
+    total_size += 1 + 4;
+  }
+
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = cached_size;
@@ -64292,142 +68514,258 @@ size_t GetShapeLayerParams::ByteSizeLong() const {
   return total_size;
 }
 
-void GetShapeLayerParams::CheckTypeAndMergeFrom(
+void RangeStaticLayerParams::CheckTypeAndMergeFrom(
     const ::google::protobuf::MessageLite& from) {
-  MergeFrom(*::google::protobuf::down_cast<const GetShapeLayerParams*>(&from));
+  MergeFrom(*::google::protobuf::down_cast<const RangeStaticLayerParams*>(&from));
 }
 
-void GetShapeLayerParams::MergeFrom(const GetShapeLayerParams& from) {
-// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.GetShapeLayerParams)
+void RangeStaticLayerParams::MergeFrom(const RangeStaticLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.RangeStaticLayerParams)
   GOOGLE_DCHECK_NE(&from, this);
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
+  if (from.endvalue() != 0) {
+    set_endvalue(from.endvalue());
+  }
+  if (from.startvalue() != 0) {
+    set_startvalue(from.startvalue());
+  }
+  if (from.stepsizevalue() != 0) {
+    set_stepsizevalue(from.stepsizevalue());
+  }
 }
 
-void GetShapeLayerParams::CopyFrom(const GetShapeLayerParams& from) {
-// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.GetShapeLayerParams)
+void RangeStaticLayerParams::CopyFrom(const RangeStaticLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.RangeStaticLayerParams)
   if (&from == this) return;
   Clear();
   MergeFrom(from);
 }
 
-bool GetShapeLayerParams::IsInitialized() const {
+bool RangeStaticLayerParams::IsInitialized() const {
   return true;
 }
 
-void GetShapeLayerParams::Swap(GetShapeLayerParams* other) {
+void RangeStaticLayerParams::Swap(RangeStaticLayerParams* other) {
   if (other == this) return;
   InternalSwap(other);
 }
-void GetShapeLayerParams::InternalSwap(GetShapeLayerParams* other) {
+void RangeStaticLayerParams::InternalSwap(RangeStaticLayerParams* other) {
+  std::swap(endvalue_, other->endvalue_);
+  std::swap(startvalue_, other->startvalue_);
+  std::swap(stepsizevalue_, other->stepsizevalue_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
-::std::string GetShapeLayerParams::GetTypeName() const {
-  return "CoreML.Specification.GetShapeLayerParams";
+::std::string RangeStaticLayerParams::GetTypeName() const {
+  return "CoreML.Specification.RangeStaticLayerParams";
 }
 
 #if PROTOBUF_INLINE_NOT_IN_HEADERS
-// GetShapeLayerParams
+// RangeStaticLayerParams
+
+// float endValue = 1;
+void RangeStaticLayerParams::clear_endvalue() {
+  endvalue_ = 0;
+}
+float RangeStaticLayerParams::endvalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.RangeStaticLayerParams.endValue)
+  return endvalue_;
+}
+void RangeStaticLayerParams::set_endvalue(float value) {
+  
+  endvalue_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.RangeStaticLayerParams.endValue)
+}
+
+// float startValue = 2;
+void RangeStaticLayerParams::clear_startvalue() {
+  startvalue_ = 0;
+}
+float RangeStaticLayerParams::startvalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.RangeStaticLayerParams.startValue)
+  return startvalue_;
+}
+void RangeStaticLayerParams::set_startvalue(float value) {
+  
+  startvalue_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.RangeStaticLayerParams.startValue)
+}
+
+// float stepSizeValue = 3;
+void RangeStaticLayerParams::clear_stepsizevalue() {
+  stepsizevalue_ = 0;
+}
+float RangeStaticLayerParams::stepsizevalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.RangeStaticLayerParams.stepSizeValue)
+  return stepsizevalue_;
+}
+void RangeStaticLayerParams::set_stepsizevalue(float value) {
+  
+  stepsizevalue_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.RangeStaticLayerParams.stepSizeValue)
+}
 
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
 
 // ===================================================================
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
+const int RangeDynamicLayerParams::kStartValueFieldNumber;
+const int RangeDynamicLayerParams::kStepSizeValueFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
-ErfLayerParams::ErfLayerParams()
+RangeDynamicLayerParams::RangeDynamicLayerParams()
   : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
   if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
     protobuf_NeuralNetwork_2eproto::InitDefaults();
   }
   SharedCtor();
-  // @@protoc_insertion_point(constructor:CoreML.Specification.ErfLayerParams)
+  // @@protoc_insertion_point(constructor:CoreML.Specification.RangeDynamicLayerParams)
 }
-ErfLayerParams::ErfLayerParams(const ErfLayerParams& from)
+RangeDynamicLayerParams::RangeDynamicLayerParams(const RangeDynamicLayerParams& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
-  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.ErfLayerParams)
+  ::memcpy(&startvalue_, &from.startvalue_,
+    reinterpret_cast<char*>(&stepsizevalue_) -
+    reinterpret_cast<char*>(&startvalue_) + sizeof(stepsizevalue_));
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.RangeDynamicLayerParams)
 }
 
-void ErfLayerParams::SharedCtor() {
+void RangeDynamicLayerParams::SharedCtor() {
+  ::memset(&startvalue_, 0, reinterpret_cast<char*>(&stepsizevalue_) -
+    reinterpret_cast<char*>(&startvalue_) + sizeof(stepsizevalue_));
   _cached_size_ = 0;
 }
 
-ErfLayerParams::~ErfLayerParams() {
-  // @@protoc_insertion_point(destructor:CoreML.Specification.ErfLayerParams)
+RangeDynamicLayerParams::~RangeDynamicLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.RangeDynamicLayerParams)
   SharedDtor();
 }
 
-void ErfLayerParams::SharedDtor() {
+void RangeDynamicLayerParams::SharedDtor() {
 }
 
-void ErfLayerParams::SetCachedSize(int size) const {
+void RangeDynamicLayerParams::SetCachedSize(int size) const {
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = size;
   GOOGLE_SAFE_CONCURRENT_WRITES_END();
 }
-const ErfLayerParams& ErfLayerParams::default_instance() {
+const RangeDynamicLayerParams& RangeDynamicLayerParams::default_instance() {
   protobuf_NeuralNetwork_2eproto::InitDefaults();
   return *internal_default_instance();
 }
 
-ErfLayerParams* ErfLayerParams::New(::google::protobuf::Arena* arena) const {
-  ErfLayerParams* n = new ErfLayerParams;
+RangeDynamicLayerParams* RangeDynamicLayerParams::New(::google::protobuf::Arena* arena) const {
+  RangeDynamicLayerParams* n = new RangeDynamicLayerParams;
   if (arena != NULL) {
     arena->Own(n);
   }
   return n;
 }
 
-void ErfLayerParams::Clear() {
-// @@protoc_insertion_point(message_clear_start:CoreML.Specification.ErfLayerParams)
+void RangeDynamicLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.RangeDynamicLayerParams)
+  ::memset(&startvalue_, 0, reinterpret_cast<char*>(&stepsizevalue_) -
+    reinterpret_cast<char*>(&startvalue_) + sizeof(stepsizevalue_));
 }
 
-bool ErfLayerParams::MergePartialFromCodedStream(
+bool RangeDynamicLayerParams::MergePartialFromCodedStream(
     ::google::protobuf::io::CodedInputStream* input) {
 #define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
   ::google::protobuf::uint32 tag;
-  // @@protoc_insertion_point(parse_start:CoreML.Specification.ErfLayerParams)
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.RangeDynamicLayerParams)
   for (;;) {
     ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
     tag = p.first;
     if (!p.second) goto handle_unusual;
-  handle_unusual:
-    if (tag == 0 ||
-        ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
-        ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
-      goto success;
+    switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
+      // float startValue = 2;
+      case 2: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(21u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &startvalue_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // float stepSizeValue = 3;
+      case 3: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(29u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &stepsizevalue_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      default: {
+      handle_unusual:
+        if (tag == 0 ||
+            ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
+            ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
+          goto success;
+        }
+        DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
+        break;
+      }
     }
-    DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
   }
 success:
-  // @@protoc_insertion_point(parse_success:CoreML.Specification.ErfLayerParams)
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.RangeDynamicLayerParams)
   return true;
 failure:
-  // @@protoc_insertion_point(parse_failure:CoreML.Specification.ErfLayerParams)
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.RangeDynamicLayerParams)
   return false;
 #undef DO_
 }
 
-void ErfLayerParams::SerializeWithCachedSizes(
+void RangeDynamicLayerParams::SerializeWithCachedSizes(
     ::google::protobuf::io::CodedOutputStream* output) const {
-  // @@protoc_insertion_point(serialize_start:CoreML.Specification.ErfLayerParams)
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.RangeDynamicLayerParams)
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  // @@protoc_insertion_point(serialize_end:CoreML.Specification.ErfLayerParams)
+  // float startValue = 2;
+  if (this->startvalue() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(2, this->startvalue(), output);
+  }
+
+  // float stepSizeValue = 3;
+  if (this->stepsizevalue() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(3, this->stepsizevalue(), output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.RangeDynamicLayerParams)
 }
 
-size_t ErfLayerParams::ByteSizeLong() const {
-// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.ErfLayerParams)
+size_t RangeDynamicLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.RangeDynamicLayerParams)
   size_t total_size = 0;
 
+  // float startValue = 2;
+  if (this->startvalue() != 0) {
+    total_size += 1 + 4;
+  }
+
+  // float stepSizeValue = 3;
+  if (this->stepsizevalue() != 0) {
+    total_size += 1 + 4;
+  }
+
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = cached_size;
@@ -64435,126 +68773,195 @@ size_t ErfLayerParams::ByteSizeLong() const {
   return total_size;
 }
 
-void ErfLayerParams::CheckTypeAndMergeFrom(
+void RangeDynamicLayerParams::CheckTypeAndMergeFrom(
     const ::google::protobuf::MessageLite& from) {
-  MergeFrom(*::google::protobuf::down_cast<const ErfLayerParams*>(&from));
+  MergeFrom(*::google::protobuf::down_cast<const RangeDynamicLayerParams*>(&from));
 }
 
-void ErfLayerParams::MergeFrom(const ErfLayerParams& from) {
-// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.ErfLayerParams)
+void RangeDynamicLayerParams::MergeFrom(const RangeDynamicLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.RangeDynamicLayerParams)
   GOOGLE_DCHECK_NE(&from, this);
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
+  if (from.startvalue() != 0) {
+    set_startvalue(from.startvalue());
+  }
+  if (from.stepsizevalue() != 0) {
+    set_stepsizevalue(from.stepsizevalue());
+  }
 }
 
-void ErfLayerParams::CopyFrom(const ErfLayerParams& from) {
-// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.ErfLayerParams)
+void RangeDynamicLayerParams::CopyFrom(const RangeDynamicLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.RangeDynamicLayerParams)
   if (&from == this) return;
   Clear();
   MergeFrom(from);
 }
 
-bool ErfLayerParams::IsInitialized() const {
+bool RangeDynamicLayerParams::IsInitialized() const {
   return true;
 }
 
-void ErfLayerParams::Swap(ErfLayerParams* other) {
+void RangeDynamicLayerParams::Swap(RangeDynamicLayerParams* other) {
   if (other == this) return;
   InternalSwap(other);
 }
-void ErfLayerParams::InternalSwap(ErfLayerParams* other) {
+void RangeDynamicLayerParams::InternalSwap(RangeDynamicLayerParams* other) {
+  std::swap(startvalue_, other->startvalue_);
+  std::swap(stepsizevalue_, other->stepsizevalue_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
-::std::string ErfLayerParams::GetTypeName() const {
-  return "CoreML.Specification.ErfLayerParams";
+::std::string RangeDynamicLayerParams::GetTypeName() const {
+  return "CoreML.Specification.RangeDynamicLayerParams";
 }
 
 #if PROTOBUF_INLINE_NOT_IN_HEADERS
-// ErfLayerParams
+// RangeDynamicLayerParams
+
+// float startValue = 2;
+void RangeDynamicLayerParams::clear_startvalue() {
+  startvalue_ = 0;
+}
+float RangeDynamicLayerParams::startvalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.RangeDynamicLayerParams.startValue)
+  return startvalue_;
+}
+void RangeDynamicLayerParams::set_startvalue(float value) {
+  
+  startvalue_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.RangeDynamicLayerParams.startValue)
+}
+
+// float stepSizeValue = 3;
+void RangeDynamicLayerParams::clear_stepsizevalue() {
+  stepsizevalue_ = 0;
+}
+float RangeDynamicLayerParams::stepsizevalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.RangeDynamicLayerParams.stepSizeValue)
+  return stepsizevalue_;
+}
+void RangeDynamicLayerParams::set_stepsizevalue(float value) {
+  
+  stepsizevalue_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.RangeDynamicLayerParams.stepSizeValue)
+}
 
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
 
 // ===================================================================
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
-const int GeluLayerParams::kModeFieldNumber;
+const int SlidingWindowsLayerParams::kAxisFieldNumber;
+const int SlidingWindowsLayerParams::kWindowSizeFieldNumber;
+const int SlidingWindowsLayerParams::kStepFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
-GeluLayerParams::GeluLayerParams()
+SlidingWindowsLayerParams::SlidingWindowsLayerParams()
   : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
   if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
     protobuf_NeuralNetwork_2eproto::InitDefaults();
   }
   SharedCtor();
-  // @@protoc_insertion_point(constructor:CoreML.Specification.GeluLayerParams)
+  // @@protoc_insertion_point(constructor:CoreML.Specification.SlidingWindowsLayerParams)
 }
-GeluLayerParams::GeluLayerParams(const GeluLayerParams& from)
+SlidingWindowsLayerParams::SlidingWindowsLayerParams(const SlidingWindowsLayerParams& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
-  mode_ = from.mode_;
-  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.GeluLayerParams)
+  ::memcpy(&axis_, &from.axis_,
+    reinterpret_cast<char*>(&step_) -
+    reinterpret_cast<char*>(&axis_) + sizeof(step_));
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.SlidingWindowsLayerParams)
 }
 
-void GeluLayerParams::SharedCtor() {
-  mode_ = 0;
+void SlidingWindowsLayerParams::SharedCtor() {
+  ::memset(&axis_, 0, reinterpret_cast<char*>(&step_) -
+    reinterpret_cast<char*>(&axis_) + sizeof(step_));
   _cached_size_ = 0;
 }
 
-GeluLayerParams::~GeluLayerParams() {
-  // @@protoc_insertion_point(destructor:CoreML.Specification.GeluLayerParams)
+SlidingWindowsLayerParams::~SlidingWindowsLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.SlidingWindowsLayerParams)
   SharedDtor();
 }
 
-void GeluLayerParams::SharedDtor() {
+void SlidingWindowsLayerParams::SharedDtor() {
 }
 
-void GeluLayerParams::SetCachedSize(int size) const {
+void SlidingWindowsLayerParams::SetCachedSize(int size) const {
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = size;
   GOOGLE_SAFE_CONCURRENT_WRITES_END();
 }
-const GeluLayerParams& GeluLayerParams::default_instance() {
+const SlidingWindowsLayerParams& SlidingWindowsLayerParams::default_instance() {
   protobuf_NeuralNetwork_2eproto::InitDefaults();
   return *internal_default_instance();
 }
 
-GeluLayerParams* GeluLayerParams::New(::google::protobuf::Arena* arena) const {
-  GeluLayerParams* n = new GeluLayerParams;
+SlidingWindowsLayerParams* SlidingWindowsLayerParams::New(::google::protobuf::Arena* arena) const {
+  SlidingWindowsLayerParams* n = new SlidingWindowsLayerParams;
   if (arena != NULL) {
     arena->Own(n);
   }
   return n;
 }
 
-void GeluLayerParams::Clear() {
-// @@protoc_insertion_point(message_clear_start:CoreML.Specification.GeluLayerParams)
-  mode_ = 0;
+void SlidingWindowsLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.SlidingWindowsLayerParams)
+  ::memset(&axis_, 0, reinterpret_cast<char*>(&step_) -
+    reinterpret_cast<char*>(&axis_) + sizeof(step_));
 }
 
-bool GeluLayerParams::MergePartialFromCodedStream(
+bool SlidingWindowsLayerParams::MergePartialFromCodedStream(
     ::google::protobuf::io::CodedInputStream* input) {
 #define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
   ::google::protobuf::uint32 tag;
-  // @@protoc_insertion_point(parse_start:CoreML.Specification.GeluLayerParams)
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.SlidingWindowsLayerParams)
   for (;;) {
     ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
     tag = p.first;
     if (!p.second) goto handle_unusual;
     switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
-      // .CoreML.Specification.GeluLayerParams.GeluMode mode = 1;
+      // int64 axis = 1;
       case 1: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
             static_cast< ::google::protobuf::uint8>(8u)) {
-          int value;
+
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
-                   int, ::google::protobuf::internal::WireFormatLite::TYPE_ENUM>(
-                 input, &value)));
-          set_mode(static_cast< ::CoreML::Specification::GeluLayerParams_GeluMode >(value));
+                   ::google::protobuf::int64, ::google::protobuf::internal::WireFormatLite::TYPE_INT64>(
+                 input, &axis_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // uint64 windowSize = 2;
+      case 2: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(16u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
+                 input, &windowsize_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // uint64 step = 3;
+      case 3: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(24u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
+                 input, &step_)));
         } else {
           goto handle_unusual;
         }
@@ -64574,37 +68981,61 @@ bool GeluLayerParams::MergePartialFromCodedStream(
     }
   }
 success:
-  // @@protoc_insertion_point(parse_success:CoreML.Specification.GeluLayerParams)
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.SlidingWindowsLayerParams)
   return true;
 failure:
-  // @@protoc_insertion_point(parse_failure:CoreML.Specification.GeluLayerParams)
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.SlidingWindowsLayerParams)
   return false;
 #undef DO_
 }
 
-void GeluLayerParams::SerializeWithCachedSizes(
+void SlidingWindowsLayerParams::SerializeWithCachedSizes(
     ::google::protobuf::io::CodedOutputStream* output) const {
-  // @@protoc_insertion_point(serialize_start:CoreML.Specification.GeluLayerParams)
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.SlidingWindowsLayerParams)
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  // .CoreML.Specification.GeluLayerParams.GeluMode mode = 1;
-  if (this->mode() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteEnum(
-      1, this->mode(), output);
+  // int64 axis = 1;
+  if (this->axis() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt64(1, this->axis(), output);
   }
 
-  // @@protoc_insertion_point(serialize_end:CoreML.Specification.GeluLayerParams)
+  // uint64 windowSize = 2;
+  if (this->windowsize() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteUInt64(2, this->windowsize(), output);
+  }
+
+  // uint64 step = 3;
+  if (this->step() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteUInt64(3, this->step(), output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.SlidingWindowsLayerParams)
 }
 
-size_t GeluLayerParams::ByteSizeLong() const {
-// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.GeluLayerParams)
+size_t SlidingWindowsLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.SlidingWindowsLayerParams)
   size_t total_size = 0;
 
-  // .CoreML.Specification.GeluLayerParams.GeluMode mode = 1;
-  if (this->mode() != 0) {
+  // int64 axis = 1;
+  if (this->axis() != 0) {
     total_size += 1 +
-      ::google::protobuf::internal::WireFormatLite::EnumSize(this->mode());
+      ::google::protobuf::internal::WireFormatLite::Int64Size(
+        this->axis());
+  }
+
+  // uint64 windowSize = 2;
+  if (this->windowsize() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::UInt64Size(
+        this->windowsize());
+  }
+
+  // uint64 step = 3;
+  if (this->step() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::UInt64Size(
+        this->step());
   }
 
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
@@ -64614,62 +69045,98 @@ size_t GeluLayerParams::ByteSizeLong() const {
   return total_size;
 }
 
-void GeluLayerParams::CheckTypeAndMergeFrom(
+void SlidingWindowsLayerParams::CheckTypeAndMergeFrom(
     const ::google::protobuf::MessageLite& from) {
-  MergeFrom(*::google::protobuf::down_cast<const GeluLayerParams*>(&from));
+  MergeFrom(*::google::protobuf::down_cast<const SlidingWindowsLayerParams*>(&from));
 }
 
-void GeluLayerParams::MergeFrom(const GeluLayerParams& from) {
-// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.GeluLayerParams)
+void SlidingWindowsLayerParams::MergeFrom(const SlidingWindowsLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.SlidingWindowsLayerParams)
   GOOGLE_DCHECK_NE(&from, this);
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  if (from.mode() != 0) {
-    set_mode(from.mode());
+  if (from.axis() != 0) {
+    set_axis(from.axis());
+  }
+  if (from.windowsize() != 0) {
+    set_windowsize(from.windowsize());
+  }
+  if (from.step() != 0) {
+    set_step(from.step());
   }
 }
 
-void GeluLayerParams::CopyFrom(const GeluLayerParams& from) {
-// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.GeluLayerParams)
+void SlidingWindowsLayerParams::CopyFrom(const SlidingWindowsLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.SlidingWindowsLayerParams)
   if (&from == this) return;
   Clear();
   MergeFrom(from);
 }
 
-bool GeluLayerParams::IsInitialized() const {
+bool SlidingWindowsLayerParams::IsInitialized() const {
   return true;
 }
 
-void GeluLayerParams::Swap(GeluLayerParams* other) {
+void SlidingWindowsLayerParams::Swap(SlidingWindowsLayerParams* other) {
   if (other == this) return;
   InternalSwap(other);
 }
-void GeluLayerParams::InternalSwap(GeluLayerParams* other) {
-  std::swap(mode_, other->mode_);
+void SlidingWindowsLayerParams::InternalSwap(SlidingWindowsLayerParams* other) {
+  std::swap(axis_, other->axis_);
+  std::swap(windowsize_, other->windowsize_);
+  std::swap(step_, other->step_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
-::std::string GeluLayerParams::GetTypeName() const {
-  return "CoreML.Specification.GeluLayerParams";
+::std::string SlidingWindowsLayerParams::GetTypeName() const {
+  return "CoreML.Specification.SlidingWindowsLayerParams";
 }
 
 #if PROTOBUF_INLINE_NOT_IN_HEADERS
-// GeluLayerParams
+// SlidingWindowsLayerParams
 
-// .CoreML.Specification.GeluLayerParams.GeluMode mode = 1;
-void GeluLayerParams::clear_mode() {
-  mode_ = 0;
+// int64 axis = 1;
+void SlidingWindowsLayerParams::clear_axis() {
+  axis_ = GOOGLE_LONGLONG(0);
 }
-::CoreML::Specification::GeluLayerParams_GeluMode GeluLayerParams::mode() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.GeluLayerParams.mode)
-  return static_cast< ::CoreML::Specification::GeluLayerParams_GeluMode >(mode_);
+::google::protobuf::int64 SlidingWindowsLayerParams::axis() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SlidingWindowsLayerParams.axis)
+  return axis_;
 }
-void GeluLayerParams::set_mode(::CoreML::Specification::GeluLayerParams_GeluMode value) {
+void SlidingWindowsLayerParams::set_axis(::google::protobuf::int64 value) {
   
-  mode_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.GeluLayerParams.mode)
+  axis_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SlidingWindowsLayerParams.axis)
+}
+
+// uint64 windowSize = 2;
+void SlidingWindowsLayerParams::clear_windowsize() {
+  windowsize_ = GOOGLE_ULONGLONG(0);
+}
+::google::protobuf::uint64 SlidingWindowsLayerParams::windowsize() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SlidingWindowsLayerParams.windowSize)
+  return windowsize_;
+}
+void SlidingWindowsLayerParams::set_windowsize(::google::protobuf::uint64 value) {
+  
+  windowsize_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SlidingWindowsLayerParams.windowSize)
+}
+
+// uint64 step = 3;
+void SlidingWindowsLayerParams::clear_step() {
+  step_ = GOOGLE_ULONGLONG(0);
+}
+::google::protobuf::uint64 SlidingWindowsLayerParams::step() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SlidingWindowsLayerParams.step)
+  return step_;
+}
+void SlidingWindowsLayerParams::set_step(::google::protobuf::uint64 value) {
+  
+  step_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SlidingWindowsLayerParams.step)
 }
 
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
@@ -64677,114 +69144,152 @@ void GeluLayerParams::set_mode(::CoreML::Specification::GeluLayerParams_GeluMode
 // ===================================================================
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
-const int RangeStaticLayerParams::kEndValueFieldNumber;
-const int RangeStaticLayerParams::kStartValueFieldNumber;
-const int RangeStaticLayerParams::kStepSizeValueFieldNumber;
+const int LayerNormalizationLayerParams::kNormalizedShapeFieldNumber;
+const int LayerNormalizationLayerParams::kEpsFieldNumber;
+const int LayerNormalizationLayerParams::kGammaFieldNumber;
+const int LayerNormalizationLayerParams::kBetaFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
-RangeStaticLayerParams::RangeStaticLayerParams()
+LayerNormalizationLayerParams::LayerNormalizationLayerParams()
   : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
   if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
     protobuf_NeuralNetwork_2eproto::InitDefaults();
   }
   SharedCtor();
-  // @@protoc_insertion_point(constructor:CoreML.Specification.RangeStaticLayerParams)
+  // @@protoc_insertion_point(constructor:CoreML.Specification.LayerNormalizationLayerParams)
 }
-RangeStaticLayerParams::RangeStaticLayerParams(const RangeStaticLayerParams& from)
+LayerNormalizationLayerParams::LayerNormalizationLayerParams(const LayerNormalizationLayerParams& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
+      normalizedshape_(from.normalizedshape_),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
-  ::memcpy(&endvalue_, &from.endvalue_,
-    reinterpret_cast<char*>(&stepsizevalue_) -
-    reinterpret_cast<char*>(&endvalue_) + sizeof(stepsizevalue_));
-  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.RangeStaticLayerParams)
+  if (from.has_gamma()) {
+    gamma_ = new ::CoreML::Specification::WeightParams(*from.gamma_);
+  } else {
+    gamma_ = NULL;
+  }
+  if (from.has_beta()) {
+    beta_ = new ::CoreML::Specification::WeightParams(*from.beta_);
+  } else {
+    beta_ = NULL;
+  }
+  eps_ = from.eps_;
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.LayerNormalizationLayerParams)
 }
 
-void RangeStaticLayerParams::SharedCtor() {
-  ::memset(&endvalue_, 0, reinterpret_cast<char*>(&stepsizevalue_) -
-    reinterpret_cast<char*>(&endvalue_) + sizeof(stepsizevalue_));
+void LayerNormalizationLayerParams::SharedCtor() {
+  ::memset(&gamma_, 0, reinterpret_cast<char*>(&eps_) -
+    reinterpret_cast<char*>(&gamma_) + sizeof(eps_));
   _cached_size_ = 0;
 }
 
-RangeStaticLayerParams::~RangeStaticLayerParams() {
-  // @@protoc_insertion_point(destructor:CoreML.Specification.RangeStaticLayerParams)
+LayerNormalizationLayerParams::~LayerNormalizationLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.LayerNormalizationLayerParams)
   SharedDtor();
 }
 
-void RangeStaticLayerParams::SharedDtor() {
+void LayerNormalizationLayerParams::SharedDtor() {
+  if (this != internal_default_instance()) {
+    delete gamma_;
+  }
+  if (this != internal_default_instance()) {
+    delete beta_;
+  }
 }
 
-void RangeStaticLayerParams::SetCachedSize(int size) const {
+void LayerNormalizationLayerParams::SetCachedSize(int size) const {
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = size;
   GOOGLE_SAFE_CONCURRENT_WRITES_END();
 }
-const RangeStaticLayerParams& RangeStaticLayerParams::default_instance() {
+const LayerNormalizationLayerParams& LayerNormalizationLayerParams::default_instance() {
   protobuf_NeuralNetwork_2eproto::InitDefaults();
   return *internal_default_instance();
 }
 
-RangeStaticLayerParams* RangeStaticLayerParams::New(::google::protobuf::Arena* arena) const {
-  RangeStaticLayerParams* n = new RangeStaticLayerParams;
+LayerNormalizationLayerParams* LayerNormalizationLayerParams::New(::google::protobuf::Arena* arena) const {
+  LayerNormalizationLayerParams* n = new LayerNormalizationLayerParams;
   if (arena != NULL) {
     arena->Own(n);
   }
   return n;
 }
 
-void RangeStaticLayerParams::Clear() {
-// @@protoc_insertion_point(message_clear_start:CoreML.Specification.RangeStaticLayerParams)
-  ::memset(&endvalue_, 0, reinterpret_cast<char*>(&stepsizevalue_) -
-    reinterpret_cast<char*>(&endvalue_) + sizeof(stepsizevalue_));
+void LayerNormalizationLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.LayerNormalizationLayerParams)
+  normalizedshape_.Clear();
+  if (GetArenaNoVirtual() == NULL && gamma_ != NULL) {
+    delete gamma_;
+  }
+  gamma_ = NULL;
+  if (GetArenaNoVirtual() == NULL && beta_ != NULL) {
+    delete beta_;
+  }
+  beta_ = NULL;
+  eps_ = 0;
 }
 
-bool RangeStaticLayerParams::MergePartialFromCodedStream(
+bool LayerNormalizationLayerParams::MergePartialFromCodedStream(
     ::google::protobuf::io::CodedInputStream* input) {
 #define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
   ::google::protobuf::uint32 tag;
-  // @@protoc_insertion_point(parse_start:CoreML.Specification.RangeStaticLayerParams)
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.LayerNormalizationLayerParams)
   for (;;) {
     ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
     tag = p.first;
     if (!p.second) goto handle_unusual;
     switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
-      // float endValue = 1;
+      // repeated int64 normalizedShape = 1;
       case 1: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(13u)) {
-
-          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
-                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
-                 input, &endvalue_)));
+            static_cast< ::google::protobuf::uint8>(10u)) {
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPackedPrimitive<
+                   ::google::protobuf::int64, ::google::protobuf::internal::WireFormatLite::TYPE_INT64>(
+                 input, this->mutable_normalizedshape())));
+        } else if (static_cast< ::google::protobuf::uint8>(tag) ==
+                   static_cast< ::google::protobuf::uint8>(8u)) {
+          DO_((::google::protobuf::internal::WireFormatLite::ReadRepeatedPrimitiveNoInline<
+                   ::google::protobuf::int64, ::google::protobuf::internal::WireFormatLite::TYPE_INT64>(
+                 1, 10u, input, this->mutable_normalizedshape())));
         } else {
           goto handle_unusual;
         }
         break;
       }
 
-      // float startValue = 2;
+      // float eps = 2;
       case 2: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
             static_cast< ::google::protobuf::uint8>(21u)) {
 
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
                    float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
-                 input, &startvalue_)));
+                 input, &eps_)));
         } else {
           goto handle_unusual;
         }
         break;
       }
 
-      // float stepSizeValue = 3;
+      // .CoreML.Specification.WeightParams gamma = 3;
       case 3: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(29u)) {
+            static_cast< ::google::protobuf::uint8>(26u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
+               input, mutable_gamma()));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
 
-          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
-                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
-                 input, &stepsizevalue_)));
+      // .CoreML.Specification.WeightParams beta = 4;
+      case 4: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(34u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
+               input, mutable_beta()));
         } else {
           goto handle_unusual;
         }
@@ -64804,54 +69309,85 @@ bool RangeStaticLayerParams::MergePartialFromCodedStream(
     }
   }
 success:
-  // @@protoc_insertion_point(parse_success:CoreML.Specification.RangeStaticLayerParams)
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.LayerNormalizationLayerParams)
   return true;
 failure:
-  // @@protoc_insertion_point(parse_failure:CoreML.Specification.RangeStaticLayerParams)
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.LayerNormalizationLayerParams)
   return false;
 #undef DO_
 }
 
-void RangeStaticLayerParams::SerializeWithCachedSizes(
+void LayerNormalizationLayerParams::SerializeWithCachedSizes(
     ::google::protobuf::io::CodedOutputStream* output) const {
-  // @@protoc_insertion_point(serialize_start:CoreML.Specification.RangeStaticLayerParams)
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.LayerNormalizationLayerParams)
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  // float endValue = 1;
-  if (this->endvalue() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteFloat(1, this->endvalue(), output);
+  // repeated int64 normalizedShape = 1;
+  if (this->normalizedshape_size() > 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteTag(1, ::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output);
+    output->WriteVarint32(_normalizedshape_cached_byte_size_);
+  }
+  for (int i = 0, n = this->normalizedshape_size(); i < n; i++) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt64NoTag(
+      this->normalizedshape(i), output);
   }
 
-  // float startValue = 2;
-  if (this->startvalue() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteFloat(2, this->startvalue(), output);
+  // float eps = 2;
+  if (this->eps() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(2, this->eps(), output);
   }
 
-  // float stepSizeValue = 3;
-  if (this->stepsizevalue() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteFloat(3, this->stepsizevalue(), output);
+  // .CoreML.Specification.WeightParams gamma = 3;
+  if (this->has_gamma()) {
+    ::google::protobuf::internal::WireFormatLite::WriteMessage(
+      3, *this->gamma_, output);
   }
 
-  // @@protoc_insertion_point(serialize_end:CoreML.Specification.RangeStaticLayerParams)
+  // .CoreML.Specification.WeightParams beta = 4;
+  if (this->has_beta()) {
+    ::google::protobuf::internal::WireFormatLite::WriteMessage(
+      4, *this->beta_, output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.LayerNormalizationLayerParams)
 }
 
-size_t RangeStaticLayerParams::ByteSizeLong() const {
-// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.RangeStaticLayerParams)
+size_t LayerNormalizationLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.LayerNormalizationLayerParams)
   size_t total_size = 0;
 
-  // float endValue = 1;
-  if (this->endvalue() != 0) {
-    total_size += 1 + 4;
+  // repeated int64 normalizedShape = 1;
+  {
+    size_t data_size = ::google::protobuf::internal::WireFormatLite::
+      Int64Size(this->normalizedshape_);
+    if (data_size > 0) {
+      total_size += 1 +
+        ::google::protobuf::internal::WireFormatLite::Int32Size(data_size);
+    }
+    int cached_size = ::google::protobuf::internal::ToCachedSize(data_size);
+    GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+    _normalizedshape_cached_byte_size_ = cached_size;
+    GOOGLE_SAFE_CONCURRENT_WRITES_END();
+    total_size += data_size;
   }
 
-  // float startValue = 2;
-  if (this->startvalue() != 0) {
-    total_size += 1 + 4;
+  // .CoreML.Specification.WeightParams gamma = 3;
+  if (this->has_gamma()) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
+        *this->gamma_);
   }
 
-  // float stepSizeValue = 3;
-  if (this->stepsizevalue() != 0) {
+  // .CoreML.Specification.WeightParams beta = 4;
+  if (this->has_beta()) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
+        *this->beta_);
+  }
+
+  // float eps = 2;
+  if (this->eps() != 0) {
     total_size += 1 + 4;
   }
 
@@ -64862,98 +69398,180 @@ size_t RangeStaticLayerParams::ByteSizeLong() const {
   return total_size;
 }
 
-void RangeStaticLayerParams::CheckTypeAndMergeFrom(
+void LayerNormalizationLayerParams::CheckTypeAndMergeFrom(
     const ::google::protobuf::MessageLite& from) {
-  MergeFrom(*::google::protobuf::down_cast<const RangeStaticLayerParams*>(&from));
+  MergeFrom(*::google::protobuf::down_cast<const LayerNormalizationLayerParams*>(&from));
 }
 
-void RangeStaticLayerParams::MergeFrom(const RangeStaticLayerParams& from) {
-// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.RangeStaticLayerParams)
+void LayerNormalizationLayerParams::MergeFrom(const LayerNormalizationLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.LayerNormalizationLayerParams)
   GOOGLE_DCHECK_NE(&from, this);
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  if (from.endvalue() != 0) {
-    set_endvalue(from.endvalue());
+  normalizedshape_.MergeFrom(from.normalizedshape_);
+  if (from.has_gamma()) {
+    mutable_gamma()->::CoreML::Specification::WeightParams::MergeFrom(from.gamma());
   }
-  if (from.startvalue() != 0) {
-    set_startvalue(from.startvalue());
+  if (from.has_beta()) {
+    mutable_beta()->::CoreML::Specification::WeightParams::MergeFrom(from.beta());
   }
-  if (from.stepsizevalue() != 0) {
-    set_stepsizevalue(from.stepsizevalue());
+  if (from.eps() != 0) {
+    set_eps(from.eps());
   }
 }
 
-void RangeStaticLayerParams::CopyFrom(const RangeStaticLayerParams& from) {
-// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.RangeStaticLayerParams)
+void LayerNormalizationLayerParams::CopyFrom(const LayerNormalizationLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.LayerNormalizationLayerParams)
   if (&from == this) return;
   Clear();
   MergeFrom(from);
 }
 
-bool RangeStaticLayerParams::IsInitialized() const {
+bool LayerNormalizationLayerParams::IsInitialized() const {
   return true;
 }
 
-void RangeStaticLayerParams::Swap(RangeStaticLayerParams* other) {
+void LayerNormalizationLayerParams::Swap(LayerNormalizationLayerParams* other) {
   if (other == this) return;
   InternalSwap(other);
 }
-void RangeStaticLayerParams::InternalSwap(RangeStaticLayerParams* other) {
-  std::swap(endvalue_, other->endvalue_);
-  std::swap(startvalue_, other->startvalue_);
-  std::swap(stepsizevalue_, other->stepsizevalue_);
+void LayerNormalizationLayerParams::InternalSwap(LayerNormalizationLayerParams* other) {
+  normalizedshape_.InternalSwap(&other->normalizedshape_);
+  std::swap(gamma_, other->gamma_);
+  std::swap(beta_, other->beta_);
+  std::swap(eps_, other->eps_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
-::std::string RangeStaticLayerParams::GetTypeName() const {
-  return "CoreML.Specification.RangeStaticLayerParams";
+::std::string LayerNormalizationLayerParams::GetTypeName() const {
+  return "CoreML.Specification.LayerNormalizationLayerParams";
 }
 
 #if PROTOBUF_INLINE_NOT_IN_HEADERS
-// RangeStaticLayerParams
+// LayerNormalizationLayerParams
 
-// float endValue = 1;
-void RangeStaticLayerParams::clear_endvalue() {
-  endvalue_ = 0;
+// repeated int64 normalizedShape = 1;
+int LayerNormalizationLayerParams::normalizedshape_size() const {
+  return normalizedshape_.size();
 }
-float RangeStaticLayerParams::endvalue() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.RangeStaticLayerParams.endValue)
-  return endvalue_;
+void LayerNormalizationLayerParams::clear_normalizedshape() {
+  normalizedshape_.Clear();
 }
-void RangeStaticLayerParams::set_endvalue(float value) {
+::google::protobuf::int64 LayerNormalizationLayerParams::normalizedshape(int index) const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.LayerNormalizationLayerParams.normalizedShape)
+  return normalizedshape_.Get(index);
+}
+void LayerNormalizationLayerParams::set_normalizedshape(int index, ::google::protobuf::int64 value) {
+  normalizedshape_.Set(index, value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.LayerNormalizationLayerParams.normalizedShape)
+}
+void LayerNormalizationLayerParams::add_normalizedshape(::google::protobuf::int64 value) {
+  normalizedshape_.Add(value);
+  // @@protoc_insertion_point(field_add:CoreML.Specification.LayerNormalizationLayerParams.normalizedShape)
+}
+const ::google::protobuf::RepeatedField< ::google::protobuf::int64 >&
+LayerNormalizationLayerParams::normalizedshape() const {
+  // @@protoc_insertion_point(field_list:CoreML.Specification.LayerNormalizationLayerParams.normalizedShape)
+  return normalizedshape_;
+}
+::google::protobuf::RepeatedField< ::google::protobuf::int64 >*
+LayerNormalizationLayerParams::mutable_normalizedshape() {
+  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.LayerNormalizationLayerParams.normalizedShape)
+  return &normalizedshape_;
+}
+
+// float eps = 2;
+void LayerNormalizationLayerParams::clear_eps() {
+  eps_ = 0;
+}
+float LayerNormalizationLayerParams::eps() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.LayerNormalizationLayerParams.eps)
+  return eps_;
+}
+void LayerNormalizationLayerParams::set_eps(float value) {
   
-  endvalue_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.RangeStaticLayerParams.endValue)
+  eps_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.LayerNormalizationLayerParams.eps)
 }
 
-// float startValue = 2;
-void RangeStaticLayerParams::clear_startvalue() {
-  startvalue_ = 0;
+// .CoreML.Specification.WeightParams gamma = 3;
+bool LayerNormalizationLayerParams::has_gamma() const {
+  return this != internal_default_instance() && gamma_ != NULL;
 }
-float RangeStaticLayerParams::startvalue() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.RangeStaticLayerParams.startValue)
-  return startvalue_;
+void LayerNormalizationLayerParams::clear_gamma() {
+  if (GetArenaNoVirtual() == NULL && gamma_ != NULL) delete gamma_;
+  gamma_ = NULL;
 }
-void RangeStaticLayerParams::set_startvalue(float value) {
+const ::CoreML::Specification::WeightParams& LayerNormalizationLayerParams::gamma() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.LayerNormalizationLayerParams.gamma)
+  return gamma_ != NULL ? *gamma_
+                         : *::CoreML::Specification::WeightParams::internal_default_instance();
+}
+::CoreML::Specification::WeightParams* LayerNormalizationLayerParams::mutable_gamma() {
   
-  startvalue_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.RangeStaticLayerParams.startValue)
+  if (gamma_ == NULL) {
+    gamma_ = new ::CoreML::Specification::WeightParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.LayerNormalizationLayerParams.gamma)
+  return gamma_;
+}
+::CoreML::Specification::WeightParams* LayerNormalizationLayerParams::release_gamma() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.LayerNormalizationLayerParams.gamma)
+  
+  ::CoreML::Specification::WeightParams* temp = gamma_;
+  gamma_ = NULL;
+  return temp;
+}
+void LayerNormalizationLayerParams::set_allocated_gamma(::CoreML::Specification::WeightParams* gamma) {
+  delete gamma_;
+  gamma_ = gamma;
+  if (gamma) {
+    
+  } else {
+    
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.LayerNormalizationLayerParams.gamma)
 }
 
-// float stepSizeValue = 3;
-void RangeStaticLayerParams::clear_stepsizevalue() {
-  stepsizevalue_ = 0;
+// .CoreML.Specification.WeightParams beta = 4;
+bool LayerNormalizationLayerParams::has_beta() const {
+  return this != internal_default_instance() && beta_ != NULL;
 }
-float RangeStaticLayerParams::stepsizevalue() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.RangeStaticLayerParams.stepSizeValue)
-  return stepsizevalue_;
+void LayerNormalizationLayerParams::clear_beta() {
+  if (GetArenaNoVirtual() == NULL && beta_ != NULL) delete beta_;
+  beta_ = NULL;
 }
-void RangeStaticLayerParams::set_stepsizevalue(float value) {
+const ::CoreML::Specification::WeightParams& LayerNormalizationLayerParams::beta() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.LayerNormalizationLayerParams.beta)
+  return beta_ != NULL ? *beta_
+                         : *::CoreML::Specification::WeightParams::internal_default_instance();
+}
+::CoreML::Specification::WeightParams* LayerNormalizationLayerParams::mutable_beta() {
   
-  stepsizevalue_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.RangeStaticLayerParams.stepSizeValue)
+  if (beta_ == NULL) {
+    beta_ = new ::CoreML::Specification::WeightParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.LayerNormalizationLayerParams.beta)
+  return beta_;
+}
+::CoreML::Specification::WeightParams* LayerNormalizationLayerParams::release_beta() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.LayerNormalizationLayerParams.beta)
+  
+  ::CoreML::Specification::WeightParams* temp = beta_;
+  beta_ = NULL;
+  return temp;
+}
+void LayerNormalizationLayerParams::set_allocated_beta(::CoreML::Specification::WeightParams* beta) {
+  delete beta_;
+  beta_ = beta;
+  if (beta) {
+    
+  } else {
+    
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.LayerNormalizationLayerParams.beta)
 }
 
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
@@ -64961,99 +69579,129 @@ void RangeStaticLayerParams::set_stepsizevalue(float value) {
 // ===================================================================
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
-const int RangeDynamicLayerParams::kStartValueFieldNumber;
-const int RangeDynamicLayerParams::kStepSizeValueFieldNumber;
+const int NonMaximumSuppressionLayerParams::kIouThresholdFieldNumber;
+const int NonMaximumSuppressionLayerParams::kScoreThresholdFieldNumber;
+const int NonMaximumSuppressionLayerParams::kMaxBoxesFieldNumber;
+const int NonMaximumSuppressionLayerParams::kPerClassSuppressionFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
-RangeDynamicLayerParams::RangeDynamicLayerParams()
+NonMaximumSuppressionLayerParams::NonMaximumSuppressionLayerParams()
   : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
   if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
     protobuf_NeuralNetwork_2eproto::InitDefaults();
   }
   SharedCtor();
-  // @@protoc_insertion_point(constructor:CoreML.Specification.RangeDynamicLayerParams)
+  // @@protoc_insertion_point(constructor:CoreML.Specification.NonMaximumSuppressionLayerParams)
 }
-RangeDynamicLayerParams::RangeDynamicLayerParams(const RangeDynamicLayerParams& from)
+NonMaximumSuppressionLayerParams::NonMaximumSuppressionLayerParams(const NonMaximumSuppressionLayerParams& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
-  ::memcpy(&startvalue_, &from.startvalue_,
-    reinterpret_cast<char*>(&stepsizevalue_) -
-    reinterpret_cast<char*>(&startvalue_) + sizeof(stepsizevalue_));
-  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.RangeDynamicLayerParams)
+  ::memcpy(&iouthreshold_, &from.iouthreshold_,
+    reinterpret_cast<char*>(&perclasssuppression_) -
+    reinterpret_cast<char*>(&iouthreshold_) + sizeof(perclasssuppression_));
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.NonMaximumSuppressionLayerParams)
 }
 
-void RangeDynamicLayerParams::SharedCtor() {
-  ::memset(&startvalue_, 0, reinterpret_cast<char*>(&stepsizevalue_) -
-    reinterpret_cast<char*>(&startvalue_) + sizeof(stepsizevalue_));
+void NonMaximumSuppressionLayerParams::SharedCtor() {
+  ::memset(&iouthreshold_, 0, reinterpret_cast<char*>(&perclasssuppression_) -
+    reinterpret_cast<char*>(&iouthreshold_) + sizeof(perclasssuppression_));
   _cached_size_ = 0;
 }
 
-RangeDynamicLayerParams::~RangeDynamicLayerParams() {
-  // @@protoc_insertion_point(destructor:CoreML.Specification.RangeDynamicLayerParams)
+NonMaximumSuppressionLayerParams::~NonMaximumSuppressionLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.NonMaximumSuppressionLayerParams)
   SharedDtor();
 }
 
-void RangeDynamicLayerParams::SharedDtor() {
+void NonMaximumSuppressionLayerParams::SharedDtor() {
 }
 
-void RangeDynamicLayerParams::SetCachedSize(int size) const {
+void NonMaximumSuppressionLayerParams::SetCachedSize(int size) const {
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = size;
   GOOGLE_SAFE_CONCURRENT_WRITES_END();
 }
-const RangeDynamicLayerParams& RangeDynamicLayerParams::default_instance() {
+const NonMaximumSuppressionLayerParams& NonMaximumSuppressionLayerParams::default_instance() {
   protobuf_NeuralNetwork_2eproto::InitDefaults();
   return *internal_default_instance();
 }
 
-RangeDynamicLayerParams* RangeDynamicLayerParams::New(::google::protobuf::Arena* arena) const {
-  RangeDynamicLayerParams* n = new RangeDynamicLayerParams;
+NonMaximumSuppressionLayerParams* NonMaximumSuppressionLayerParams::New(::google::protobuf::Arena* arena) const {
+  NonMaximumSuppressionLayerParams* n = new NonMaximumSuppressionLayerParams;
   if (arena != NULL) {
     arena->Own(n);
   }
   return n;
 }
 
-void RangeDynamicLayerParams::Clear() {
-// @@protoc_insertion_point(message_clear_start:CoreML.Specification.RangeDynamicLayerParams)
-  ::memset(&startvalue_, 0, reinterpret_cast<char*>(&stepsizevalue_) -
-    reinterpret_cast<char*>(&startvalue_) + sizeof(stepsizevalue_));
+void NonMaximumSuppressionLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.NonMaximumSuppressionLayerParams)
+  ::memset(&iouthreshold_, 0, reinterpret_cast<char*>(&perclasssuppression_) -
+    reinterpret_cast<char*>(&iouthreshold_) + sizeof(perclasssuppression_));
 }
 
-bool RangeDynamicLayerParams::MergePartialFromCodedStream(
+bool NonMaximumSuppressionLayerParams::MergePartialFromCodedStream(
     ::google::protobuf::io::CodedInputStream* input) {
 #define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
   ::google::protobuf::uint32 tag;
-  // @@protoc_insertion_point(parse_start:CoreML.Specification.RangeDynamicLayerParams)
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.NonMaximumSuppressionLayerParams)
   for (;;) {
     ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
     tag = p.first;
     if (!p.second) goto handle_unusual;
     switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
-      // float startValue = 2;
+      // float iouThreshold = 1;
+      case 1: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(13u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &iouthreshold_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // float scoreThreshold = 2;
       case 2: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
             static_cast< ::google::protobuf::uint8>(21u)) {
 
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
                    float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
-                 input, &startvalue_)));
+                 input, &scorethreshold_)));
         } else {
           goto handle_unusual;
         }
         break;
       }
 
-      // float stepSizeValue = 3;
+      // uint64 maxBoxes = 3;
       case 3: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(29u)) {
+            static_cast< ::google::protobuf::uint8>(24u)) {
 
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
-                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
-                 input, &stepsizevalue_)));
+                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
+                 input, &maxboxes_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // bool perClassSuppression = 4;
+      case 4: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(32u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
+                 input, &perclasssuppression_)));
         } else {
           goto handle_unusual;
         }
@@ -65073,47 +69721,69 @@ bool RangeDynamicLayerParams::MergePartialFromCodedStream(
     }
   }
 success:
-  // @@protoc_insertion_point(parse_success:CoreML.Specification.RangeDynamicLayerParams)
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.NonMaximumSuppressionLayerParams)
   return true;
 failure:
-  // @@protoc_insertion_point(parse_failure:CoreML.Specification.RangeDynamicLayerParams)
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.NonMaximumSuppressionLayerParams)
   return false;
 #undef DO_
 }
 
-void RangeDynamicLayerParams::SerializeWithCachedSizes(
+void NonMaximumSuppressionLayerParams::SerializeWithCachedSizes(
     ::google::protobuf::io::CodedOutputStream* output) const {
-  // @@protoc_insertion_point(serialize_start:CoreML.Specification.RangeDynamicLayerParams)
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.NonMaximumSuppressionLayerParams)
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  // float startValue = 2;
-  if (this->startvalue() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteFloat(2, this->startvalue(), output);
+  // float iouThreshold = 1;
+  if (this->iouthreshold() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(1, this->iouthreshold(), output);
   }
 
-  // float stepSizeValue = 3;
-  if (this->stepsizevalue() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteFloat(3, this->stepsizevalue(), output);
+  // float scoreThreshold = 2;
+  if (this->scorethreshold() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(2, this->scorethreshold(), output);
   }
 
-  // @@protoc_insertion_point(serialize_end:CoreML.Specification.RangeDynamicLayerParams)
+  // uint64 maxBoxes = 3;
+  if (this->maxboxes() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteUInt64(3, this->maxboxes(), output);
+  }
+
+  // bool perClassSuppression = 4;
+  if (this->perclasssuppression() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteBool(4, this->perclasssuppression(), output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.NonMaximumSuppressionLayerParams)
 }
 
-size_t RangeDynamicLayerParams::ByteSizeLong() const {
-// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.RangeDynamicLayerParams)
+size_t NonMaximumSuppressionLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.NonMaximumSuppressionLayerParams)
   size_t total_size = 0;
 
-  // float startValue = 2;
-  if (this->startvalue() != 0) {
+  // float iouThreshold = 1;
+  if (this->iouthreshold() != 0) {
     total_size += 1 + 4;
   }
 
-  // float stepSizeValue = 3;
-  if (this->stepsizevalue() != 0) {
+  // float scoreThreshold = 2;
+  if (this->scorethreshold() != 0) {
     total_size += 1 + 4;
   }
 
+  // uint64 maxBoxes = 3;
+  if (this->maxboxes() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::UInt64Size(
+        this->maxboxes());
+  }
+
+  // bool perClassSuppression = 4;
+  if (this->perclasssuppression() != 0) {
+    total_size += 1 + 1;
+  }
+
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = cached_size;
@@ -65121,80 +69791,116 @@ size_t RangeDynamicLayerParams::ByteSizeLong() const {
   return total_size;
 }
 
-void RangeDynamicLayerParams::CheckTypeAndMergeFrom(
+void NonMaximumSuppressionLayerParams::CheckTypeAndMergeFrom(
     const ::google::protobuf::MessageLite& from) {
-  MergeFrom(*::google::protobuf::down_cast<const RangeDynamicLayerParams*>(&from));
+  MergeFrom(*::google::protobuf::down_cast<const NonMaximumSuppressionLayerParams*>(&from));
 }
 
-void RangeDynamicLayerParams::MergeFrom(const RangeDynamicLayerParams& from) {
-// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.RangeDynamicLayerParams)
+void NonMaximumSuppressionLayerParams::MergeFrom(const NonMaximumSuppressionLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.NonMaximumSuppressionLayerParams)
   GOOGLE_DCHECK_NE(&from, this);
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  if (from.startvalue() != 0) {
-    set_startvalue(from.startvalue());
+  if (from.iouthreshold() != 0) {
+    set_iouthreshold(from.iouthreshold());
   }
-  if (from.stepsizevalue() != 0) {
-    set_stepsizevalue(from.stepsizevalue());
+  if (from.scorethreshold() != 0) {
+    set_scorethreshold(from.scorethreshold());
+  }
+  if (from.maxboxes() != 0) {
+    set_maxboxes(from.maxboxes());
+  }
+  if (from.perclasssuppression() != 0) {
+    set_perclasssuppression(from.perclasssuppression());
   }
 }
 
-void RangeDynamicLayerParams::CopyFrom(const RangeDynamicLayerParams& from) {
-// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.RangeDynamicLayerParams)
+void NonMaximumSuppressionLayerParams::CopyFrom(const NonMaximumSuppressionLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.NonMaximumSuppressionLayerParams)
   if (&from == this) return;
   Clear();
   MergeFrom(from);
 }
 
-bool RangeDynamicLayerParams::IsInitialized() const {
+bool NonMaximumSuppressionLayerParams::IsInitialized() const {
   return true;
 }
 
-void RangeDynamicLayerParams::Swap(RangeDynamicLayerParams* other) {
+void NonMaximumSuppressionLayerParams::Swap(NonMaximumSuppressionLayerParams* other) {
   if (other == this) return;
   InternalSwap(other);
 }
-void RangeDynamicLayerParams::InternalSwap(RangeDynamicLayerParams* other) {
-  std::swap(startvalue_, other->startvalue_);
-  std::swap(stepsizevalue_, other->stepsizevalue_);
+void NonMaximumSuppressionLayerParams::InternalSwap(NonMaximumSuppressionLayerParams* other) {
+  std::swap(iouthreshold_, other->iouthreshold_);
+  std::swap(scorethreshold_, other->scorethreshold_);
+  std::swap(maxboxes_, other->maxboxes_);
+  std::swap(perclasssuppression_, other->perclasssuppression_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
-::std::string RangeDynamicLayerParams::GetTypeName() const {
-  return "CoreML.Specification.RangeDynamicLayerParams";
+::std::string NonMaximumSuppressionLayerParams::GetTypeName() const {
+  return "CoreML.Specification.NonMaximumSuppressionLayerParams";
 }
 
 #if PROTOBUF_INLINE_NOT_IN_HEADERS
-// RangeDynamicLayerParams
+// NonMaximumSuppressionLayerParams
 
-// float startValue = 2;
-void RangeDynamicLayerParams::clear_startvalue() {
-  startvalue_ = 0;
+// float iouThreshold = 1;
+void NonMaximumSuppressionLayerParams::clear_iouthreshold() {
+  iouthreshold_ = 0;
 }
-float RangeDynamicLayerParams::startvalue() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.RangeDynamicLayerParams.startValue)
-  return startvalue_;
+float NonMaximumSuppressionLayerParams::iouthreshold() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NonMaximumSuppressionLayerParams.iouThreshold)
+  return iouthreshold_;
 }
-void RangeDynamicLayerParams::set_startvalue(float value) {
+void NonMaximumSuppressionLayerParams::set_iouthreshold(float value) {
   
-  startvalue_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.RangeDynamicLayerParams.startValue)
+  iouthreshold_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.NonMaximumSuppressionLayerParams.iouThreshold)
 }
 
-// float stepSizeValue = 3;
-void RangeDynamicLayerParams::clear_stepsizevalue() {
-  stepsizevalue_ = 0;
+// float scoreThreshold = 2;
+void NonMaximumSuppressionLayerParams::clear_scorethreshold() {
+  scorethreshold_ = 0;
 }
-float RangeDynamicLayerParams::stepsizevalue() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.RangeDynamicLayerParams.stepSizeValue)
-  return stepsizevalue_;
+float NonMaximumSuppressionLayerParams::scorethreshold() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NonMaximumSuppressionLayerParams.scoreThreshold)
+  return scorethreshold_;
 }
-void RangeDynamicLayerParams::set_stepsizevalue(float value) {
+void NonMaximumSuppressionLayerParams::set_scorethreshold(float value) {
   
-  stepsizevalue_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.RangeDynamicLayerParams.stepSizeValue)
+  scorethreshold_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.NonMaximumSuppressionLayerParams.scoreThreshold)
+}
+
+// uint64 maxBoxes = 3;
+void NonMaximumSuppressionLayerParams::clear_maxboxes() {
+  maxboxes_ = GOOGLE_ULONGLONG(0);
+}
+::google::protobuf::uint64 NonMaximumSuppressionLayerParams::maxboxes() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NonMaximumSuppressionLayerParams.maxBoxes)
+  return maxboxes_;
+}
+void NonMaximumSuppressionLayerParams::set_maxboxes(::google::protobuf::uint64 value) {
+  
+  maxboxes_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.NonMaximumSuppressionLayerParams.maxBoxes)
+}
+
+// bool perClassSuppression = 4;
+void NonMaximumSuppressionLayerParams::clear_perclasssuppression() {
+  perclasssuppression_ = false;
+}
+bool NonMaximumSuppressionLayerParams::perclasssuppression() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NonMaximumSuppressionLayerParams.perClassSuppression)
+  return perclasssuppression_;
+}
+void NonMaximumSuppressionLayerParams::set_perclasssuppression(bool value) {
+  
+  perclasssuppression_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.NonMaximumSuppressionLayerParams.perClassSuppression)
 }
 
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
@@ -65202,114 +69908,99 @@ void RangeDynamicLayerParams::set_stepsizevalue(float value) {
 // ===================================================================
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
-const int SlidingWindowsLayerParams::kAxisFieldNumber;
-const int SlidingWindowsLayerParams::kWindowSizeFieldNumber;
-const int SlidingWindowsLayerParams::kStepFieldNumber;
+const int ClampedReLULayerParams::kAlphaFieldNumber;
+const int ClampedReLULayerParams::kBetaFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
-SlidingWindowsLayerParams::SlidingWindowsLayerParams()
+ClampedReLULayerParams::ClampedReLULayerParams()
   : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
   if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
     protobuf_NeuralNetwork_2eproto::InitDefaults();
   }
   SharedCtor();
-  // @@protoc_insertion_point(constructor:CoreML.Specification.SlidingWindowsLayerParams)
+  // @@protoc_insertion_point(constructor:CoreML.Specification.ClampedReLULayerParams)
 }
-SlidingWindowsLayerParams::SlidingWindowsLayerParams(const SlidingWindowsLayerParams& from)
+ClampedReLULayerParams::ClampedReLULayerParams(const ClampedReLULayerParams& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
-  ::memcpy(&axis_, &from.axis_,
-    reinterpret_cast<char*>(&step_) -
-    reinterpret_cast<char*>(&axis_) + sizeof(step_));
-  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.SlidingWindowsLayerParams)
+  ::memcpy(&alpha_, &from.alpha_,
+    reinterpret_cast<char*>(&beta_) -
+    reinterpret_cast<char*>(&alpha_) + sizeof(beta_));
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.ClampedReLULayerParams)
 }
 
-void SlidingWindowsLayerParams::SharedCtor() {
-  ::memset(&axis_, 0, reinterpret_cast<char*>(&step_) -
-    reinterpret_cast<char*>(&axis_) + sizeof(step_));
+void ClampedReLULayerParams::SharedCtor() {
+  ::memset(&alpha_, 0, reinterpret_cast<char*>(&beta_) -
+    reinterpret_cast<char*>(&alpha_) + sizeof(beta_));
   _cached_size_ = 0;
 }
 
-SlidingWindowsLayerParams::~SlidingWindowsLayerParams() {
-  // @@protoc_insertion_point(destructor:CoreML.Specification.SlidingWindowsLayerParams)
+ClampedReLULayerParams::~ClampedReLULayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.ClampedReLULayerParams)
   SharedDtor();
 }
 
-void SlidingWindowsLayerParams::SharedDtor() {
+void ClampedReLULayerParams::SharedDtor() {
 }
 
-void SlidingWindowsLayerParams::SetCachedSize(int size) const {
+void ClampedReLULayerParams::SetCachedSize(int size) const {
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = size;
   GOOGLE_SAFE_CONCURRENT_WRITES_END();
 }
-const SlidingWindowsLayerParams& SlidingWindowsLayerParams::default_instance() {
+const ClampedReLULayerParams& ClampedReLULayerParams::default_instance() {
   protobuf_NeuralNetwork_2eproto::InitDefaults();
   return *internal_default_instance();
 }
 
-SlidingWindowsLayerParams* SlidingWindowsLayerParams::New(::google::protobuf::Arena* arena) const {
-  SlidingWindowsLayerParams* n = new SlidingWindowsLayerParams;
+ClampedReLULayerParams* ClampedReLULayerParams::New(::google::protobuf::Arena* arena) const {
+  ClampedReLULayerParams* n = new ClampedReLULayerParams;
   if (arena != NULL) {
     arena->Own(n);
   }
   return n;
 }
 
-void SlidingWindowsLayerParams::Clear() {
-// @@protoc_insertion_point(message_clear_start:CoreML.Specification.SlidingWindowsLayerParams)
-  ::memset(&axis_, 0, reinterpret_cast<char*>(&step_) -
-    reinterpret_cast<char*>(&axis_) + sizeof(step_));
+void ClampedReLULayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.ClampedReLULayerParams)
+  ::memset(&alpha_, 0, reinterpret_cast<char*>(&beta_) -
+    reinterpret_cast<char*>(&alpha_) + sizeof(beta_));
 }
 
-bool SlidingWindowsLayerParams::MergePartialFromCodedStream(
+bool ClampedReLULayerParams::MergePartialFromCodedStream(
     ::google::protobuf::io::CodedInputStream* input) {
 #define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
   ::google::protobuf::uint32 tag;
-  // @@protoc_insertion_point(parse_start:CoreML.Specification.SlidingWindowsLayerParams)
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.ClampedReLULayerParams)
   for (;;) {
     ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
     tag = p.first;
     if (!p.second) goto handle_unusual;
     switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
-      // int64 axis = 1;
+      // float alpha = 1;
       case 1: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(8u)) {
+            static_cast< ::google::protobuf::uint8>(13u)) {
 
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
-                   ::google::protobuf::int64, ::google::protobuf::internal::WireFormatLite::TYPE_INT64>(
-                 input, &axis_)));
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &alpha_)));
         } else {
           goto handle_unusual;
         }
         break;
       }
 
-      // uint64 windowSize = 2;
+      // float beta = 2;
       case 2: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(16u)) {
-
-          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
-                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
-                 input, &windowsize_)));
-        } else {
-          goto handle_unusual;
-        }
-        break;
-      }
-
-      // uint64 step = 3;
-      case 3: {
-        if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(24u)) {
+            static_cast< ::google::protobuf::uint8>(21u)) {
 
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
-                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
-                 input, &step_)));
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &beta_)));
         } else {
           goto handle_unusual;
         }
@@ -65329,61 +70020,45 @@ bool SlidingWindowsLayerParams::MergePartialFromCodedStream(
     }
   }
 success:
-  // @@protoc_insertion_point(parse_success:CoreML.Specification.SlidingWindowsLayerParams)
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.ClampedReLULayerParams)
   return true;
 failure:
-  // @@protoc_insertion_point(parse_failure:CoreML.Specification.SlidingWindowsLayerParams)
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.ClampedReLULayerParams)
   return false;
 #undef DO_
 }
 
-void SlidingWindowsLayerParams::SerializeWithCachedSizes(
+void ClampedReLULayerParams::SerializeWithCachedSizes(
     ::google::protobuf::io::CodedOutputStream* output) const {
-  // @@protoc_insertion_point(serialize_start:CoreML.Specification.SlidingWindowsLayerParams)
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.ClampedReLULayerParams)
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  // int64 axis = 1;
-  if (this->axis() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteInt64(1, this->axis(), output);
-  }
-
-  // uint64 windowSize = 2;
-  if (this->windowsize() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteUInt64(2, this->windowsize(), output);
+  // float alpha = 1;
+  if (this->alpha() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(1, this->alpha(), output);
   }
 
-  // uint64 step = 3;
-  if (this->step() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteUInt64(3, this->step(), output);
+  // float beta = 2;
+  if (this->beta() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(2, this->beta(), output);
   }
 
-  // @@protoc_insertion_point(serialize_end:CoreML.Specification.SlidingWindowsLayerParams)
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.ClampedReLULayerParams)
 }
 
-size_t SlidingWindowsLayerParams::ByteSizeLong() const {
-// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.SlidingWindowsLayerParams)
+size_t ClampedReLULayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.ClampedReLULayerParams)
   size_t total_size = 0;
 
-  // int64 axis = 1;
-  if (this->axis() != 0) {
-    total_size += 1 +
-      ::google::protobuf::internal::WireFormatLite::Int64Size(
-        this->axis());
-  }
-
-  // uint64 windowSize = 2;
-  if (this->windowsize() != 0) {
-    total_size += 1 +
-      ::google::protobuf::internal::WireFormatLite::UInt64Size(
-        this->windowsize());
+  // float alpha = 1;
+  if (this->alpha() != 0) {
+    total_size += 1 + 4;
   }
 
-  // uint64 step = 3;
-  if (this->step() != 0) {
-    total_size += 1 +
-      ::google::protobuf::internal::WireFormatLite::UInt64Size(
-        this->step());
+  // float beta = 2;
+  if (this->beta() != 0) {
+    total_size += 1 + 4;
   }
 
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
@@ -65393,98 +70068,80 @@ size_t SlidingWindowsLayerParams::ByteSizeLong() const {
   return total_size;
 }
 
-void SlidingWindowsLayerParams::CheckTypeAndMergeFrom(
+void ClampedReLULayerParams::CheckTypeAndMergeFrom(
     const ::google::protobuf::MessageLite& from) {
-  MergeFrom(*::google::protobuf::down_cast<const SlidingWindowsLayerParams*>(&from));
+  MergeFrom(*::google::protobuf::down_cast<const ClampedReLULayerParams*>(&from));
 }
 
-void SlidingWindowsLayerParams::MergeFrom(const SlidingWindowsLayerParams& from) {
-// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.SlidingWindowsLayerParams)
+void ClampedReLULayerParams::MergeFrom(const ClampedReLULayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.ClampedReLULayerParams)
   GOOGLE_DCHECK_NE(&from, this);
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  if (from.axis() != 0) {
-    set_axis(from.axis());
-  }
-  if (from.windowsize() != 0) {
-    set_windowsize(from.windowsize());
+  if (from.alpha() != 0) {
+    set_alpha(from.alpha());
   }
-  if (from.step() != 0) {
-    set_step(from.step());
+  if (from.beta() != 0) {
+    set_beta(from.beta());
   }
 }
 
-void SlidingWindowsLayerParams::CopyFrom(const SlidingWindowsLayerParams& from) {
-// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.SlidingWindowsLayerParams)
+void ClampedReLULayerParams::CopyFrom(const ClampedReLULayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.ClampedReLULayerParams)
   if (&from == this) return;
   Clear();
   MergeFrom(from);
 }
 
-bool SlidingWindowsLayerParams::IsInitialized() const {
+bool ClampedReLULayerParams::IsInitialized() const {
   return true;
 }
 
-void SlidingWindowsLayerParams::Swap(SlidingWindowsLayerParams* other) {
+void ClampedReLULayerParams::Swap(ClampedReLULayerParams* other) {
   if (other == this) return;
   InternalSwap(other);
 }
-void SlidingWindowsLayerParams::InternalSwap(SlidingWindowsLayerParams* other) {
-  std::swap(axis_, other->axis_);
-  std::swap(windowsize_, other->windowsize_);
-  std::swap(step_, other->step_);
+void ClampedReLULayerParams::InternalSwap(ClampedReLULayerParams* other) {
+  std::swap(alpha_, other->alpha_);
+  std::swap(beta_, other->beta_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
-::std::string SlidingWindowsLayerParams::GetTypeName() const {
-  return "CoreML.Specification.SlidingWindowsLayerParams";
+::std::string ClampedReLULayerParams::GetTypeName() const {
+  return "CoreML.Specification.ClampedReLULayerParams";
 }
 
 #if PROTOBUF_INLINE_NOT_IN_HEADERS
-// SlidingWindowsLayerParams
-
-// int64 axis = 1;
-void SlidingWindowsLayerParams::clear_axis() {
-  axis_ = GOOGLE_LONGLONG(0);
-}
-::google::protobuf::int64 SlidingWindowsLayerParams::axis() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.SlidingWindowsLayerParams.axis)
-  return axis_;
-}
-void SlidingWindowsLayerParams::set_axis(::google::protobuf::int64 value) {
-  
-  axis_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.SlidingWindowsLayerParams.axis)
-}
+// ClampedReLULayerParams
 
-// uint64 windowSize = 2;
-void SlidingWindowsLayerParams::clear_windowsize() {
-  windowsize_ = GOOGLE_ULONGLONG(0);
+// float alpha = 1;
+void ClampedReLULayerParams::clear_alpha() {
+  alpha_ = 0;
 }
-::google::protobuf::uint64 SlidingWindowsLayerParams::windowsize() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.SlidingWindowsLayerParams.windowSize)
-  return windowsize_;
+float ClampedReLULayerParams::alpha() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.ClampedReLULayerParams.alpha)
+  return alpha_;
 }
-void SlidingWindowsLayerParams::set_windowsize(::google::protobuf::uint64 value) {
+void ClampedReLULayerParams::set_alpha(float value) {
   
-  windowsize_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.SlidingWindowsLayerParams.windowSize)
+  alpha_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.ClampedReLULayerParams.alpha)
 }
 
-// uint64 step = 3;
-void SlidingWindowsLayerParams::clear_step() {
-  step_ = GOOGLE_ULONGLONG(0);
+// float beta = 2;
+void ClampedReLULayerParams::clear_beta() {
+  beta_ = 0;
 }
-::google::protobuf::uint64 SlidingWindowsLayerParams::step() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.SlidingWindowsLayerParams.step)
-  return step_;
+float ClampedReLULayerParams::beta() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.ClampedReLULayerParams.beta)
+  return beta_;
 }
-void SlidingWindowsLayerParams::set_step(::google::protobuf::uint64 value) {
+void ClampedReLULayerParams::set_beta(float value) {
   
-  step_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.SlidingWindowsLayerParams.step)
+  beta_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.ClampedReLULayerParams.beta)
 }
 
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
@@ -65492,152 +70149,99 @@ void SlidingWindowsLayerParams::set_step(::google::protobuf::uint64 value) {
 // ===================================================================
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
-const int LayerNormalizationLayerParams::kNormalizedShapeFieldNumber;
-const int LayerNormalizationLayerParams::kEpsFieldNumber;
-const int LayerNormalizationLayerParams::kGammaFieldNumber;
-const int LayerNormalizationLayerParams::kBetaFieldNumber;
+const int ArgSortLayerParams::kAxisFieldNumber;
+const int ArgSortLayerParams::kDescendingFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
-LayerNormalizationLayerParams::LayerNormalizationLayerParams()
+ArgSortLayerParams::ArgSortLayerParams()
   : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
   if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
     protobuf_NeuralNetwork_2eproto::InitDefaults();
   }
   SharedCtor();
-  // @@protoc_insertion_point(constructor:CoreML.Specification.LayerNormalizationLayerParams)
+  // @@protoc_insertion_point(constructor:CoreML.Specification.ArgSortLayerParams)
 }
-LayerNormalizationLayerParams::LayerNormalizationLayerParams(const LayerNormalizationLayerParams& from)
+ArgSortLayerParams::ArgSortLayerParams(const ArgSortLayerParams& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
-      normalizedshape_(from.normalizedshape_),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
-  if (from.has_gamma()) {
-    gamma_ = new ::CoreML::Specification::WeightParams(*from.gamma_);
-  } else {
-    gamma_ = NULL;
-  }
-  if (from.has_beta()) {
-    beta_ = new ::CoreML::Specification::WeightParams(*from.beta_);
-  } else {
-    beta_ = NULL;
-  }
-  eps_ = from.eps_;
-  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.LayerNormalizationLayerParams)
+  ::memcpy(&axis_, &from.axis_,
+    reinterpret_cast<char*>(&descending_) -
+    reinterpret_cast<char*>(&axis_) + sizeof(descending_));
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.ArgSortLayerParams)
 }
 
-void LayerNormalizationLayerParams::SharedCtor() {
-  ::memset(&gamma_, 0, reinterpret_cast<char*>(&eps_) -
-    reinterpret_cast<char*>(&gamma_) + sizeof(eps_));
+void ArgSortLayerParams::SharedCtor() {
+  ::memset(&axis_, 0, reinterpret_cast<char*>(&descending_) -
+    reinterpret_cast<char*>(&axis_) + sizeof(descending_));
   _cached_size_ = 0;
 }
 
-LayerNormalizationLayerParams::~LayerNormalizationLayerParams() {
-  // @@protoc_insertion_point(destructor:CoreML.Specification.LayerNormalizationLayerParams)
+ArgSortLayerParams::~ArgSortLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.ArgSortLayerParams)
   SharedDtor();
 }
 
-void LayerNormalizationLayerParams::SharedDtor() {
-  if (this != internal_default_instance()) {
-    delete gamma_;
-  }
-  if (this != internal_default_instance()) {
-    delete beta_;
-  }
+void ArgSortLayerParams::SharedDtor() {
 }
 
-void LayerNormalizationLayerParams::SetCachedSize(int size) const {
+void ArgSortLayerParams::SetCachedSize(int size) const {
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = size;
   GOOGLE_SAFE_CONCURRENT_WRITES_END();
 }
-const LayerNormalizationLayerParams& LayerNormalizationLayerParams::default_instance() {
+const ArgSortLayerParams& ArgSortLayerParams::default_instance() {
   protobuf_NeuralNetwork_2eproto::InitDefaults();
   return *internal_default_instance();
 }
 
-LayerNormalizationLayerParams* LayerNormalizationLayerParams::New(::google::protobuf::Arena* arena) const {
-  LayerNormalizationLayerParams* n = new LayerNormalizationLayerParams;
+ArgSortLayerParams* ArgSortLayerParams::New(::google::protobuf::Arena* arena) const {
+  ArgSortLayerParams* n = new ArgSortLayerParams;
   if (arena != NULL) {
     arena->Own(n);
   }
   return n;
 }
 
-void LayerNormalizationLayerParams::Clear() {
-// @@protoc_insertion_point(message_clear_start:CoreML.Specification.LayerNormalizationLayerParams)
-  normalizedshape_.Clear();
-  if (GetArenaNoVirtual() == NULL && gamma_ != NULL) {
-    delete gamma_;
-  }
-  gamma_ = NULL;
-  if (GetArenaNoVirtual() == NULL && beta_ != NULL) {
-    delete beta_;
-  }
-  beta_ = NULL;
-  eps_ = 0;
+void ArgSortLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.ArgSortLayerParams)
+  ::memset(&axis_, 0, reinterpret_cast<char*>(&descending_) -
+    reinterpret_cast<char*>(&axis_) + sizeof(descending_));
 }
 
-bool LayerNormalizationLayerParams::MergePartialFromCodedStream(
+bool ArgSortLayerParams::MergePartialFromCodedStream(
     ::google::protobuf::io::CodedInputStream* input) {
 #define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
   ::google::protobuf::uint32 tag;
-  // @@protoc_insertion_point(parse_start:CoreML.Specification.LayerNormalizationLayerParams)
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.ArgSortLayerParams)
   for (;;) {
     ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
     tag = p.first;
     if (!p.second) goto handle_unusual;
     switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
-      // repeated int64 normalizedShape = 1;
+      // int64 axis = 1;
       case 1: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(10u)) {
-          DO_((::google::protobuf::internal::WireFormatLite::ReadPackedPrimitive<
-                   ::google::protobuf::int64, ::google::protobuf::internal::WireFormatLite::TYPE_INT64>(
-                 input, this->mutable_normalizedshape())));
-        } else if (static_cast< ::google::protobuf::uint8>(tag) ==
-                   static_cast< ::google::protobuf::uint8>(8u)) {
-          DO_((::google::protobuf::internal::WireFormatLite::ReadRepeatedPrimitiveNoInline<
+            static_cast< ::google::protobuf::uint8>(8u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
                    ::google::protobuf::int64, ::google::protobuf::internal::WireFormatLite::TYPE_INT64>(
-                 1, 10u, input, this->mutable_normalizedshape())));
+                 input, &axis_)));
         } else {
           goto handle_unusual;
         }
         break;
       }
 
-      // float eps = 2;
+      // bool descending = 2;
       case 2: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(21u)) {
+            static_cast< ::google::protobuf::uint8>(16u)) {
 
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
-                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
-                 input, &eps_)));
-        } else {
-          goto handle_unusual;
-        }
-        break;
-      }
-
-      // .CoreML.Specification.WeightParams gamma = 3;
-      case 3: {
-        if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(26u)) {
-          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
-               input, mutable_gamma()));
-        } else {
-          goto handle_unusual;
-        }
-        break;
-      }
-
-      // .CoreML.Specification.WeightParams beta = 4;
-      case 4: {
-        if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(34u)) {
-          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
-               input, mutable_beta()));
+                   bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
+                 input, &descending_)));
         } else {
           goto handle_unusual;
         }
@@ -65657,86 +70261,47 @@ bool LayerNormalizationLayerParams::MergePartialFromCodedStream(
     }
   }
 success:
-  // @@protoc_insertion_point(parse_success:CoreML.Specification.LayerNormalizationLayerParams)
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.ArgSortLayerParams)
   return true;
 failure:
-  // @@protoc_insertion_point(parse_failure:CoreML.Specification.LayerNormalizationLayerParams)
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.ArgSortLayerParams)
   return false;
 #undef DO_
 }
 
-void LayerNormalizationLayerParams::SerializeWithCachedSizes(
+void ArgSortLayerParams::SerializeWithCachedSizes(
     ::google::protobuf::io::CodedOutputStream* output) const {
-  // @@protoc_insertion_point(serialize_start:CoreML.Specification.LayerNormalizationLayerParams)
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.ArgSortLayerParams)
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  // repeated int64 normalizedShape = 1;
-  if (this->normalizedshape_size() > 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteTag(1, ::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output);
-    output->WriteVarint32(_normalizedshape_cached_byte_size_);
-  }
-  for (int i = 0, n = this->normalizedshape_size(); i < n; i++) {
-    ::google::protobuf::internal::WireFormatLite::WriteInt64NoTag(
-      this->normalizedshape(i), output);
-  }
-
-  // float eps = 2;
-  if (this->eps() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteFloat(2, this->eps(), output);
-  }
-
-  // .CoreML.Specification.WeightParams gamma = 3;
-  if (this->has_gamma()) {
-    ::google::protobuf::internal::WireFormatLite::WriteMessage(
-      3, *this->gamma_, output);
+  // int64 axis = 1;
+  if (this->axis() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt64(1, this->axis(), output);
   }
 
-  // .CoreML.Specification.WeightParams beta = 4;
-  if (this->has_beta()) {
-    ::google::protobuf::internal::WireFormatLite::WriteMessage(
-      4, *this->beta_, output);
+  // bool descending = 2;
+  if (this->descending() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteBool(2, this->descending(), output);
   }
 
-  // @@protoc_insertion_point(serialize_end:CoreML.Specification.LayerNormalizationLayerParams)
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.ArgSortLayerParams)
 }
 
-size_t LayerNormalizationLayerParams::ByteSizeLong() const {
-// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.LayerNormalizationLayerParams)
+size_t ArgSortLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.ArgSortLayerParams)
   size_t total_size = 0;
 
-  // repeated int64 normalizedShape = 1;
-  {
-    size_t data_size = ::google::protobuf::internal::WireFormatLite::
-      Int64Size(this->normalizedshape_);
-    if (data_size > 0) {
-      total_size += 1 +
-        ::google::protobuf::internal::WireFormatLite::Int32Size(data_size);
-    }
-    int cached_size = ::google::protobuf::internal::ToCachedSize(data_size);
-    GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
-    _normalizedshape_cached_byte_size_ = cached_size;
-    GOOGLE_SAFE_CONCURRENT_WRITES_END();
-    total_size += data_size;
-  }
-
-  // .CoreML.Specification.WeightParams gamma = 3;
-  if (this->has_gamma()) {
-    total_size += 1 +
-      ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
-        *this->gamma_);
-  }
-
-  // .CoreML.Specification.WeightParams beta = 4;
-  if (this->has_beta()) {
+  // int64 axis = 1;
+  if (this->axis() != 0) {
     total_size += 1 +
-      ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
-        *this->beta_);
+      ::google::protobuf::internal::WireFormatLite::Int64Size(
+        this->axis());
   }
 
-  // float eps = 2;
-  if (this->eps() != 0) {
-    total_size += 1 + 4;
+  // bool descending = 2;
+  if (this->descending() != 0) {
+    total_size += 1 + 1;
   }
 
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
@@ -65746,180 +70311,80 @@ size_t LayerNormalizationLayerParams::ByteSizeLong() const {
   return total_size;
 }
 
-void LayerNormalizationLayerParams::CheckTypeAndMergeFrom(
+void ArgSortLayerParams::CheckTypeAndMergeFrom(
     const ::google::protobuf::MessageLite& from) {
-  MergeFrom(*::google::protobuf::down_cast<const LayerNormalizationLayerParams*>(&from));
+  MergeFrom(*::google::protobuf::down_cast<const ArgSortLayerParams*>(&from));
 }
 
-void LayerNormalizationLayerParams::MergeFrom(const LayerNormalizationLayerParams& from) {
-// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.LayerNormalizationLayerParams)
+void ArgSortLayerParams::MergeFrom(const ArgSortLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.ArgSortLayerParams)
   GOOGLE_DCHECK_NE(&from, this);
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  normalizedshape_.MergeFrom(from.normalizedshape_);
-  if (from.has_gamma()) {
-    mutable_gamma()->::CoreML::Specification::WeightParams::MergeFrom(from.gamma());
-  }
-  if (from.has_beta()) {
-    mutable_beta()->::CoreML::Specification::WeightParams::MergeFrom(from.beta());
+  if (from.axis() != 0) {
+    set_axis(from.axis());
   }
-  if (from.eps() != 0) {
-    set_eps(from.eps());
+  if (from.descending() != 0) {
+    set_descending(from.descending());
   }
 }
 
-void LayerNormalizationLayerParams::CopyFrom(const LayerNormalizationLayerParams& from) {
-// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.LayerNormalizationLayerParams)
+void ArgSortLayerParams::CopyFrom(const ArgSortLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.ArgSortLayerParams)
   if (&from == this) return;
   Clear();
   MergeFrom(from);
 }
 
-bool LayerNormalizationLayerParams::IsInitialized() const {
+bool ArgSortLayerParams::IsInitialized() const {
   return true;
 }
 
-void LayerNormalizationLayerParams::Swap(LayerNormalizationLayerParams* other) {
+void ArgSortLayerParams::Swap(ArgSortLayerParams* other) {
   if (other == this) return;
   InternalSwap(other);
 }
-void LayerNormalizationLayerParams::InternalSwap(LayerNormalizationLayerParams* other) {
-  normalizedshape_.InternalSwap(&other->normalizedshape_);
-  std::swap(gamma_, other->gamma_);
-  std::swap(beta_, other->beta_);
-  std::swap(eps_, other->eps_);
+void ArgSortLayerParams::InternalSwap(ArgSortLayerParams* other) {
+  std::swap(axis_, other->axis_);
+  std::swap(descending_, other->descending_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
-::std::string LayerNormalizationLayerParams::GetTypeName() const {
-  return "CoreML.Specification.LayerNormalizationLayerParams";
+::std::string ArgSortLayerParams::GetTypeName() const {
+  return "CoreML.Specification.ArgSortLayerParams";
 }
 
 #if PROTOBUF_INLINE_NOT_IN_HEADERS
-// LayerNormalizationLayerParams
-
-// repeated int64 normalizedShape = 1;
-int LayerNormalizationLayerParams::normalizedshape_size() const {
-  return normalizedshape_.size();
-}
-void LayerNormalizationLayerParams::clear_normalizedshape() {
-  normalizedshape_.Clear();
-}
-::google::protobuf::int64 LayerNormalizationLayerParams::normalizedshape(int index) const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.LayerNormalizationLayerParams.normalizedShape)
-  return normalizedshape_.Get(index);
-}
-void LayerNormalizationLayerParams::set_normalizedshape(int index, ::google::protobuf::int64 value) {
-  normalizedshape_.Set(index, value);
-  // @@protoc_insertion_point(field_set:CoreML.Specification.LayerNormalizationLayerParams.normalizedShape)
-}
-void LayerNormalizationLayerParams::add_normalizedshape(::google::protobuf::int64 value) {
-  normalizedshape_.Add(value);
-  // @@protoc_insertion_point(field_add:CoreML.Specification.LayerNormalizationLayerParams.normalizedShape)
-}
-const ::google::protobuf::RepeatedField< ::google::protobuf::int64 >&
-LayerNormalizationLayerParams::normalizedshape() const {
-  // @@protoc_insertion_point(field_list:CoreML.Specification.LayerNormalizationLayerParams.normalizedShape)
-  return normalizedshape_;
-}
-::google::protobuf::RepeatedField< ::google::protobuf::int64 >*
-LayerNormalizationLayerParams::mutable_normalizedshape() {
-  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.LayerNormalizationLayerParams.normalizedShape)
-  return &normalizedshape_;
-}
-
-// float eps = 2;
-void LayerNormalizationLayerParams::clear_eps() {
-  eps_ = 0;
-}
-float LayerNormalizationLayerParams::eps() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.LayerNormalizationLayerParams.eps)
-  return eps_;
-}
-void LayerNormalizationLayerParams::set_eps(float value) {
-  
-  eps_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.LayerNormalizationLayerParams.eps)
-}
+// ArgSortLayerParams
 
-// .CoreML.Specification.WeightParams gamma = 3;
-bool LayerNormalizationLayerParams::has_gamma() const {
-  return this != internal_default_instance() && gamma_ != NULL;
-}
-void LayerNormalizationLayerParams::clear_gamma() {
-  if (GetArenaNoVirtual() == NULL && gamma_ != NULL) delete gamma_;
-  gamma_ = NULL;
-}
-const ::CoreML::Specification::WeightParams& LayerNormalizationLayerParams::gamma() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.LayerNormalizationLayerParams.gamma)
-  return gamma_ != NULL ? *gamma_
-                         : *::CoreML::Specification::WeightParams::internal_default_instance();
+// int64 axis = 1;
+void ArgSortLayerParams::clear_axis() {
+  axis_ = GOOGLE_LONGLONG(0);
 }
-::CoreML::Specification::WeightParams* LayerNormalizationLayerParams::mutable_gamma() {
-  
-  if (gamma_ == NULL) {
-    gamma_ = new ::CoreML::Specification::WeightParams;
-  }
-  // @@protoc_insertion_point(field_mutable:CoreML.Specification.LayerNormalizationLayerParams.gamma)
-  return gamma_;
+::google::protobuf::int64 ArgSortLayerParams::axis() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.ArgSortLayerParams.axis)
+  return axis_;
 }
-::CoreML::Specification::WeightParams* LayerNormalizationLayerParams::release_gamma() {
-  // @@protoc_insertion_point(field_release:CoreML.Specification.LayerNormalizationLayerParams.gamma)
+void ArgSortLayerParams::set_axis(::google::protobuf::int64 value) {
   
-  ::CoreML::Specification::WeightParams* temp = gamma_;
-  gamma_ = NULL;
-  return temp;
-}
-void LayerNormalizationLayerParams::set_allocated_gamma(::CoreML::Specification::WeightParams* gamma) {
-  delete gamma_;
-  gamma_ = gamma;
-  if (gamma) {
-    
-  } else {
-    
-  }
-  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.LayerNormalizationLayerParams.gamma)
+  axis_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.ArgSortLayerParams.axis)
 }
 
-// .CoreML.Specification.WeightParams beta = 4;
-bool LayerNormalizationLayerParams::has_beta() const {
-  return this != internal_default_instance() && beta_ != NULL;
-}
-void LayerNormalizationLayerParams::clear_beta() {
-  if (GetArenaNoVirtual() == NULL && beta_ != NULL) delete beta_;
-  beta_ = NULL;
-}
-const ::CoreML::Specification::WeightParams& LayerNormalizationLayerParams::beta() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.LayerNormalizationLayerParams.beta)
-  return beta_ != NULL ? *beta_
-                         : *::CoreML::Specification::WeightParams::internal_default_instance();
+// bool descending = 2;
+void ArgSortLayerParams::clear_descending() {
+  descending_ = false;
 }
-::CoreML::Specification::WeightParams* LayerNormalizationLayerParams::mutable_beta() {
-  
-  if (beta_ == NULL) {
-    beta_ = new ::CoreML::Specification::WeightParams;
-  }
-  // @@protoc_insertion_point(field_mutable:CoreML.Specification.LayerNormalizationLayerParams.beta)
-  return beta_;
+bool ArgSortLayerParams::descending() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.ArgSortLayerParams.descending)
+  return descending_;
 }
-::CoreML::Specification::WeightParams* LayerNormalizationLayerParams::release_beta() {
-  // @@protoc_insertion_point(field_release:CoreML.Specification.LayerNormalizationLayerParams.beta)
+void ArgSortLayerParams::set_descending(bool value) {
   
-  ::CoreML::Specification::WeightParams* temp = beta_;
-  beta_ = NULL;
-  return temp;
-}
-void LayerNormalizationLayerParams::set_allocated_beta(::CoreML::Specification::WeightParams* beta) {
-  delete beta_;
-  beta_ = beta;
-  if (beta) {
-    
-  } else {
-    
-  }
-  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.LayerNormalizationLayerParams.beta)
+  descending_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.ArgSortLayerParams.descending)
 }
 
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
@@ -65927,129 +70392,99 @@ void LayerNormalizationLayerParams::set_allocated_beta(::CoreML::Specification::
 // ===================================================================
 
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
-const int NonMaximumSuppressionLayerParams::kIouThresholdFieldNumber;
-const int NonMaximumSuppressionLayerParams::kScoreThresholdFieldNumber;
-const int NonMaximumSuppressionLayerParams::kMaxBoxesFieldNumber;
-const int NonMaximumSuppressionLayerParams::kPerClassSuppressionFieldNumber;
+const int SliceBySizeLayerParams::kSizeFieldNumber;
+const int SliceBySizeLayerParams::kAxisFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
-NonMaximumSuppressionLayerParams::NonMaximumSuppressionLayerParams()
+SliceBySizeLayerParams::SliceBySizeLayerParams()
   : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
   if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
     protobuf_NeuralNetwork_2eproto::InitDefaults();
   }
   SharedCtor();
-  // @@protoc_insertion_point(constructor:CoreML.Specification.NonMaximumSuppressionLayerParams)
+  // @@protoc_insertion_point(constructor:CoreML.Specification.SliceBySizeLayerParams)
 }
-NonMaximumSuppressionLayerParams::NonMaximumSuppressionLayerParams(const NonMaximumSuppressionLayerParams& from)
+SliceBySizeLayerParams::SliceBySizeLayerParams(const SliceBySizeLayerParams& from)
   : ::google::protobuf::MessageLite(),
       _internal_metadata_(NULL),
       _cached_size_(0) {
   _internal_metadata_.MergeFrom(from._internal_metadata_);
-  ::memcpy(&iouthreshold_, &from.iouthreshold_,
-    reinterpret_cast<char*>(&perclasssuppression_) -
-    reinterpret_cast<char*>(&iouthreshold_) + sizeof(perclasssuppression_));
-  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.NonMaximumSuppressionLayerParams)
+  ::memcpy(&size_, &from.size_,
+    reinterpret_cast<char*>(&axis_) -
+    reinterpret_cast<char*>(&size_) + sizeof(axis_));
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.SliceBySizeLayerParams)
 }
 
-void NonMaximumSuppressionLayerParams::SharedCtor() {
-  ::memset(&iouthreshold_, 0, reinterpret_cast<char*>(&perclasssuppression_) -
-    reinterpret_cast<char*>(&iouthreshold_) + sizeof(perclasssuppression_));
+void SliceBySizeLayerParams::SharedCtor() {
+  ::memset(&size_, 0, reinterpret_cast<char*>(&axis_) -
+    reinterpret_cast<char*>(&size_) + sizeof(axis_));
   _cached_size_ = 0;
 }
 
-NonMaximumSuppressionLayerParams::~NonMaximumSuppressionLayerParams() {
-  // @@protoc_insertion_point(destructor:CoreML.Specification.NonMaximumSuppressionLayerParams)
+SliceBySizeLayerParams::~SliceBySizeLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.SliceBySizeLayerParams)
   SharedDtor();
 }
 
-void NonMaximumSuppressionLayerParams::SharedDtor() {
+void SliceBySizeLayerParams::SharedDtor() {
 }
 
-void NonMaximumSuppressionLayerParams::SetCachedSize(int size) const {
+void SliceBySizeLayerParams::SetCachedSize(int size) const {
   GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
   _cached_size_ = size;
   GOOGLE_SAFE_CONCURRENT_WRITES_END();
 }
-const NonMaximumSuppressionLayerParams& NonMaximumSuppressionLayerParams::default_instance() {
+const SliceBySizeLayerParams& SliceBySizeLayerParams::default_instance() {
   protobuf_NeuralNetwork_2eproto::InitDefaults();
   return *internal_default_instance();
 }
 
-NonMaximumSuppressionLayerParams* NonMaximumSuppressionLayerParams::New(::google::protobuf::Arena* arena) const {
-  NonMaximumSuppressionLayerParams* n = new NonMaximumSuppressionLayerParams;
+SliceBySizeLayerParams* SliceBySizeLayerParams::New(::google::protobuf::Arena* arena) const {
+  SliceBySizeLayerParams* n = new SliceBySizeLayerParams;
   if (arena != NULL) {
     arena->Own(n);
   }
   return n;
 }
 
-void NonMaximumSuppressionLayerParams::Clear() {
-// @@protoc_insertion_point(message_clear_start:CoreML.Specification.NonMaximumSuppressionLayerParams)
-  ::memset(&iouthreshold_, 0, reinterpret_cast<char*>(&perclasssuppression_) -
-    reinterpret_cast<char*>(&iouthreshold_) + sizeof(perclasssuppression_));
+void SliceBySizeLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.SliceBySizeLayerParams)
+  ::memset(&size_, 0, reinterpret_cast<char*>(&axis_) -
+    reinterpret_cast<char*>(&size_) + sizeof(axis_));
 }
 
-bool NonMaximumSuppressionLayerParams::MergePartialFromCodedStream(
+bool SliceBySizeLayerParams::MergePartialFromCodedStream(
     ::google::protobuf::io::CodedInputStream* input) {
 #define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
   ::google::protobuf::uint32 tag;
-  // @@protoc_insertion_point(parse_start:CoreML.Specification.NonMaximumSuppressionLayerParams)
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.SliceBySizeLayerParams)
   for (;;) {
     ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
     tag = p.first;
     if (!p.second) goto handle_unusual;
     switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
-      // float iouThreshold = 1;
-      case 1: {
-        if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(13u)) {
-
-          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
-                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
-                 input, &iouthreshold_)));
-        } else {
-          goto handle_unusual;
-        }
-        break;
-      }
-
-      // float scoreThreshold = 2;
+      // int64 size = 2;
       case 2: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(21u)) {
+            static_cast< ::google::protobuf::uint8>(16u)) {
 
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
-                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
-                 input, &scorethreshold_)));
+                   ::google::protobuf::int64, ::google::protobuf::internal::WireFormatLite::TYPE_INT64>(
+                 input, &size_)));
         } else {
           goto handle_unusual;
         }
         break;
       }
 
-      // uint64 maxBoxes = 3;
+      // int64 axis = 3;
       case 3: {
         if (static_cast< ::google::protobuf::uint8>(tag) ==
             static_cast< ::google::protobuf::uint8>(24u)) {
 
           DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
-                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
-                 input, &maxboxes_)));
-        } else {
-          goto handle_unusual;
-        }
-        break;
-      }
-
-      // bool perClassSuppression = 4;
-      case 4: {
-        if (static_cast< ::google::protobuf::uint8>(tag) ==
-            static_cast< ::google::protobuf::uint8>(32u)) {
-
-          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
-                   bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
-                 input, &perclasssuppression_)));
+                   ::google::protobuf::int64, ::google::protobuf::internal::WireFormatLite::TYPE_INT64>(
+                 input, &axis_)));
         } else {
           goto handle_unusual;
         }
@@ -66069,67 +70504,49 @@ bool NonMaximumSuppressionLayerParams::MergePartialFromCodedStream(
     }
   }
 success:
-  // @@protoc_insertion_point(parse_success:CoreML.Specification.NonMaximumSuppressionLayerParams)
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.SliceBySizeLayerParams)
   return true;
 failure:
-  // @@protoc_insertion_point(parse_failure:CoreML.Specification.NonMaximumSuppressionLayerParams)
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.SliceBySizeLayerParams)
   return false;
 #undef DO_
 }
 
-void NonMaximumSuppressionLayerParams::SerializeWithCachedSizes(
+void SliceBySizeLayerParams::SerializeWithCachedSizes(
     ::google::protobuf::io::CodedOutputStream* output) const {
-  // @@protoc_insertion_point(serialize_start:CoreML.Specification.NonMaximumSuppressionLayerParams)
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.SliceBySizeLayerParams)
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  // float iouThreshold = 1;
-  if (this->iouthreshold() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteFloat(1, this->iouthreshold(), output);
-  }
-
-  // float scoreThreshold = 2;
-  if (this->scorethreshold() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteFloat(2, this->scorethreshold(), output);
-  }
-
-  // uint64 maxBoxes = 3;
-  if (this->maxboxes() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteUInt64(3, this->maxboxes(), output);
-  }
-
-  // bool perClassSuppression = 4;
-  if (this->perclasssuppression() != 0) {
-    ::google::protobuf::internal::WireFormatLite::WriteBool(4, this->perclasssuppression(), output);
-  }
-
-  // @@protoc_insertion_point(serialize_end:CoreML.Specification.NonMaximumSuppressionLayerParams)
-}
-
-size_t NonMaximumSuppressionLayerParams::ByteSizeLong() const {
-// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.NonMaximumSuppressionLayerParams)
-  size_t total_size = 0;
-
-  // float iouThreshold = 1;
-  if (this->iouthreshold() != 0) {
-    total_size += 1 + 4;
+  // int64 size = 2;
+  if (this->size() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt64(2, this->size(), output);
   }
-
-  // float scoreThreshold = 2;
-  if (this->scorethreshold() != 0) {
-    total_size += 1 + 4;
+
+  // int64 axis = 3;
+  if (this->axis() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt64(3, this->axis(), output);
   }
 
-  // uint64 maxBoxes = 3;
-  if (this->maxboxes() != 0) {
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.SliceBySizeLayerParams)
+}
+
+size_t SliceBySizeLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.SliceBySizeLayerParams)
+  size_t total_size = 0;
+
+  // int64 size = 2;
+  if (this->size() != 0) {
     total_size += 1 +
-      ::google::protobuf::internal::WireFormatLite::UInt64Size(
-        this->maxboxes());
+      ::google::protobuf::internal::WireFormatLite::Int64Size(
+        this->size());
   }
 
-  // bool perClassSuppression = 4;
-  if (this->perclasssuppression() != 0) {
-    total_size += 1 + 1;
+  // int64 axis = 3;
+  if (this->axis() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int64Size(
+        this->axis());
   }
 
   int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
@@ -66139,116 +70556,80 @@ size_t NonMaximumSuppressionLayerParams::ByteSizeLong() const {
   return total_size;
 }
 
-void NonMaximumSuppressionLayerParams::CheckTypeAndMergeFrom(
+void SliceBySizeLayerParams::CheckTypeAndMergeFrom(
     const ::google::protobuf::MessageLite& from) {
-  MergeFrom(*::google::protobuf::down_cast<const NonMaximumSuppressionLayerParams*>(&from));
+  MergeFrom(*::google::protobuf::down_cast<const SliceBySizeLayerParams*>(&from));
 }
 
-void NonMaximumSuppressionLayerParams::MergeFrom(const NonMaximumSuppressionLayerParams& from) {
-// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.NonMaximumSuppressionLayerParams)
+void SliceBySizeLayerParams::MergeFrom(const SliceBySizeLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.SliceBySizeLayerParams)
   GOOGLE_DCHECK_NE(&from, this);
   _internal_metadata_.MergeFrom(from._internal_metadata_);
   ::google::protobuf::uint32 cached_has_bits = 0;
   (void) cached_has_bits;
 
-  if (from.iouthreshold() != 0) {
-    set_iouthreshold(from.iouthreshold());
+  if (from.size() != 0) {
+    set_size(from.size());
   }
-  if (from.scorethreshold() != 0) {
-    set_scorethreshold(from.scorethreshold());
-  }
-  if (from.maxboxes() != 0) {
-    set_maxboxes(from.maxboxes());
-  }
-  if (from.perclasssuppression() != 0) {
-    set_perclasssuppression(from.perclasssuppression());
+  if (from.axis() != 0) {
+    set_axis(from.axis());
   }
 }
 
-void NonMaximumSuppressionLayerParams::CopyFrom(const NonMaximumSuppressionLayerParams& from) {
-// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.NonMaximumSuppressionLayerParams)
+void SliceBySizeLayerParams::CopyFrom(const SliceBySizeLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.SliceBySizeLayerParams)
   if (&from == this) return;
   Clear();
   MergeFrom(from);
 }
 
-bool NonMaximumSuppressionLayerParams::IsInitialized() const {
+bool SliceBySizeLayerParams::IsInitialized() const {
   return true;
 }
 
-void NonMaximumSuppressionLayerParams::Swap(NonMaximumSuppressionLayerParams* other) {
+void SliceBySizeLayerParams::Swap(SliceBySizeLayerParams* other) {
   if (other == this) return;
   InternalSwap(other);
 }
-void NonMaximumSuppressionLayerParams::InternalSwap(NonMaximumSuppressionLayerParams* other) {
-  std::swap(iouthreshold_, other->iouthreshold_);
-  std::swap(scorethreshold_, other->scorethreshold_);
-  std::swap(maxboxes_, other->maxboxes_);
-  std::swap(perclasssuppression_, other->perclasssuppression_);
+void SliceBySizeLayerParams::InternalSwap(SliceBySizeLayerParams* other) {
+  std::swap(size_, other->size_);
+  std::swap(axis_, other->axis_);
   std::swap(_cached_size_, other->_cached_size_);
 }
 
-::std::string NonMaximumSuppressionLayerParams::GetTypeName() const {
-  return "CoreML.Specification.NonMaximumSuppressionLayerParams";
+::std::string SliceBySizeLayerParams::GetTypeName() const {
+  return "CoreML.Specification.SliceBySizeLayerParams";
 }
 
 #if PROTOBUF_INLINE_NOT_IN_HEADERS
-// NonMaximumSuppressionLayerParams
-
-// float iouThreshold = 1;
-void NonMaximumSuppressionLayerParams::clear_iouthreshold() {
-  iouthreshold_ = 0;
-}
-float NonMaximumSuppressionLayerParams::iouthreshold() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.NonMaximumSuppressionLayerParams.iouThreshold)
-  return iouthreshold_;
-}
-void NonMaximumSuppressionLayerParams::set_iouthreshold(float value) {
-  
-  iouthreshold_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.NonMaximumSuppressionLayerParams.iouThreshold)
-}
-
-// float scoreThreshold = 2;
-void NonMaximumSuppressionLayerParams::clear_scorethreshold() {
-  scorethreshold_ = 0;
-}
-float NonMaximumSuppressionLayerParams::scorethreshold() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.NonMaximumSuppressionLayerParams.scoreThreshold)
-  return scorethreshold_;
-}
-void NonMaximumSuppressionLayerParams::set_scorethreshold(float value) {
-  
-  scorethreshold_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.NonMaximumSuppressionLayerParams.scoreThreshold)
-}
+// SliceBySizeLayerParams
 
-// uint64 maxBoxes = 3;
-void NonMaximumSuppressionLayerParams::clear_maxboxes() {
-  maxboxes_ = GOOGLE_ULONGLONG(0);
+// int64 size = 2;
+void SliceBySizeLayerParams::clear_size() {
+  size_ = GOOGLE_LONGLONG(0);
 }
-::google::protobuf::uint64 NonMaximumSuppressionLayerParams::maxboxes() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.NonMaximumSuppressionLayerParams.maxBoxes)
-  return maxboxes_;
+::google::protobuf::int64 SliceBySizeLayerParams::size() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SliceBySizeLayerParams.size)
+  return size_;
 }
-void NonMaximumSuppressionLayerParams::set_maxboxes(::google::protobuf::uint64 value) {
+void SliceBySizeLayerParams::set_size(::google::protobuf::int64 value) {
   
-  maxboxes_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.NonMaximumSuppressionLayerParams.maxBoxes)
+  size_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SliceBySizeLayerParams.size)
 }
 
-// bool perClassSuppression = 4;
-void NonMaximumSuppressionLayerParams::clear_perclasssuppression() {
-  perclasssuppression_ = false;
+// int64 axis = 3;
+void SliceBySizeLayerParams::clear_axis() {
+  axis_ = GOOGLE_LONGLONG(0);
 }
-bool NonMaximumSuppressionLayerParams::perclasssuppression() const {
-  // @@protoc_insertion_point(field_get:CoreML.Specification.NonMaximumSuppressionLayerParams.perClassSuppression)
-  return perclasssuppression_;
+::google::protobuf::int64 SliceBySizeLayerParams::axis() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SliceBySizeLayerParams.axis)
+  return axis_;
 }
-void NonMaximumSuppressionLayerParams::set_perclasssuppression(bool value) {
+void SliceBySizeLayerParams::set_axis(::google::protobuf::int64 value) {
   
-  perclasssuppression_ = value;
-  // @@protoc_insertion_point(field_set:CoreML.Specification.NonMaximumSuppressionLayerParams.perClassSuppression)
+  axis_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SliceBySizeLayerParams.axis)
 }
 
 #endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
@@ -67025,6 +71406,623 @@ NeuralNetworkClassifier::ClassLabelsCase NeuralNetworkClassifier::ClassLabels_ca
 
 // ===================================================================
 
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+const int OneHotLayerParams::kOneHotVectorSizeFieldNumber;
+const int OneHotLayerParams::kAxisFieldNumber;
+const int OneHotLayerParams::kOnValueFieldNumber;
+const int OneHotLayerParams::kOffValueFieldNumber;
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+
+OneHotLayerParams::OneHotLayerParams()
+  : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
+  if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
+    protobuf_NeuralNetwork_2eproto::InitDefaults();
+  }
+  SharedCtor();
+  // @@protoc_insertion_point(constructor:CoreML.Specification.OneHotLayerParams)
+}
+OneHotLayerParams::OneHotLayerParams(const OneHotLayerParams& from)
+  : ::google::protobuf::MessageLite(),
+      _internal_metadata_(NULL),
+      _cached_size_(0) {
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  ::memcpy(&onehotvectorsize_, &from.onehotvectorsize_,
+    reinterpret_cast<char*>(&offvalue_) -
+    reinterpret_cast<char*>(&onehotvectorsize_) + sizeof(offvalue_));
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.OneHotLayerParams)
+}
+
+void OneHotLayerParams::SharedCtor() {
+  ::memset(&onehotvectorsize_, 0, reinterpret_cast<char*>(&offvalue_) -
+    reinterpret_cast<char*>(&onehotvectorsize_) + sizeof(offvalue_));
+  _cached_size_ = 0;
+}
+
+OneHotLayerParams::~OneHotLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.OneHotLayerParams)
+  SharedDtor();
+}
+
+void OneHotLayerParams::SharedDtor() {
+}
+
+void OneHotLayerParams::SetCachedSize(int size) const {
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+}
+const OneHotLayerParams& OneHotLayerParams::default_instance() {
+  protobuf_NeuralNetwork_2eproto::InitDefaults();
+  return *internal_default_instance();
+}
+
+OneHotLayerParams* OneHotLayerParams::New(::google::protobuf::Arena* arena) const {
+  OneHotLayerParams* n = new OneHotLayerParams;
+  if (arena != NULL) {
+    arena->Own(n);
+  }
+  return n;
+}
+
+void OneHotLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.OneHotLayerParams)
+  ::memset(&onehotvectorsize_, 0, reinterpret_cast<char*>(&offvalue_) -
+    reinterpret_cast<char*>(&onehotvectorsize_) + sizeof(offvalue_));
+}
+
+bool OneHotLayerParams::MergePartialFromCodedStream(
+    ::google::protobuf::io::CodedInputStream* input) {
+#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
+  ::google::protobuf::uint32 tag;
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.OneHotLayerParams)
+  for (;;) {
+    ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
+    tag = p.first;
+    if (!p.second) goto handle_unusual;
+    switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
+      // uint64 oneHotVectorSize = 1;
+      case 1: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(8u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::uint64, ::google::protobuf::internal::WireFormatLite::TYPE_UINT64>(
+                 input, &onehotvectorsize_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // int64 axis = 2;
+      case 2: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(16u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int64, ::google::protobuf::internal::WireFormatLite::TYPE_INT64>(
+                 input, &axis_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // float onValue = 3;
+      case 3: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(29u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &onvalue_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // float offValue = 4;
+      case 4: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(37u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
+                 input, &offvalue_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      default: {
+      handle_unusual:
+        if (tag == 0 ||
+            ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
+            ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
+          goto success;
+        }
+        DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
+        break;
+      }
+    }
+  }
+success:
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.OneHotLayerParams)
+  return true;
+failure:
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.OneHotLayerParams)
+  return false;
+#undef DO_
+}
+
+void OneHotLayerParams::SerializeWithCachedSizes(
+    ::google::protobuf::io::CodedOutputStream* output) const {
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.OneHotLayerParams)
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  // uint64 oneHotVectorSize = 1;
+  if (this->onehotvectorsize() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteUInt64(1, this->onehotvectorsize(), output);
+  }
+
+  // int64 axis = 2;
+  if (this->axis() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt64(2, this->axis(), output);
+  }
+
+  // float onValue = 3;
+  if (this->onvalue() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(3, this->onvalue(), output);
+  }
+
+  // float offValue = 4;
+  if (this->offvalue() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteFloat(4, this->offvalue(), output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.OneHotLayerParams)
+}
+
+size_t OneHotLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.OneHotLayerParams)
+  size_t total_size = 0;
+
+  // uint64 oneHotVectorSize = 1;
+  if (this->onehotvectorsize() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::UInt64Size(
+        this->onehotvectorsize());
+  }
+
+  // int64 axis = 2;
+  if (this->axis() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int64Size(
+        this->axis());
+  }
+
+  // float onValue = 3;
+  if (this->onvalue() != 0) {
+    total_size += 1 + 4;
+  }
+
+  // float offValue = 4;
+  if (this->offvalue() != 0) {
+    total_size += 1 + 4;
+  }
+
+  int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = cached_size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+  return total_size;
+}
+
+void OneHotLayerParams::CheckTypeAndMergeFrom(
+    const ::google::protobuf::MessageLite& from) {
+  MergeFrom(*::google::protobuf::down_cast<const OneHotLayerParams*>(&from));
+}
+
+void OneHotLayerParams::MergeFrom(const OneHotLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.OneHotLayerParams)
+  GOOGLE_DCHECK_NE(&from, this);
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  if (from.onehotvectorsize() != 0) {
+    set_onehotvectorsize(from.onehotvectorsize());
+  }
+  if (from.axis() != 0) {
+    set_axis(from.axis());
+  }
+  if (from.onvalue() != 0) {
+    set_onvalue(from.onvalue());
+  }
+  if (from.offvalue() != 0) {
+    set_offvalue(from.offvalue());
+  }
+}
+
+void OneHotLayerParams::CopyFrom(const OneHotLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.OneHotLayerParams)
+  if (&from == this) return;
+  Clear();
+  MergeFrom(from);
+}
+
+bool OneHotLayerParams::IsInitialized() const {
+  return true;
+}
+
+void OneHotLayerParams::Swap(OneHotLayerParams* other) {
+  if (other == this) return;
+  InternalSwap(other);
+}
+void OneHotLayerParams::InternalSwap(OneHotLayerParams* other) {
+  std::swap(onehotvectorsize_, other->onehotvectorsize_);
+  std::swap(axis_, other->axis_);
+  std::swap(onvalue_, other->onvalue_);
+  std::swap(offvalue_, other->offvalue_);
+  std::swap(_cached_size_, other->_cached_size_);
+}
+
+::std::string OneHotLayerParams::GetTypeName() const {
+  return "CoreML.Specification.OneHotLayerParams";
+}
+
+#if PROTOBUF_INLINE_NOT_IN_HEADERS
+// OneHotLayerParams
+
+// uint64 oneHotVectorSize = 1;
+void OneHotLayerParams::clear_onehotvectorsize() {
+  onehotvectorsize_ = GOOGLE_ULONGLONG(0);
+}
+::google::protobuf::uint64 OneHotLayerParams::onehotvectorsize() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.OneHotLayerParams.oneHotVectorSize)
+  return onehotvectorsize_;
+}
+void OneHotLayerParams::set_onehotvectorsize(::google::protobuf::uint64 value) {
+  
+  onehotvectorsize_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.OneHotLayerParams.oneHotVectorSize)
+}
+
+// int64 axis = 2;
+void OneHotLayerParams::clear_axis() {
+  axis_ = GOOGLE_LONGLONG(0);
+}
+::google::protobuf::int64 OneHotLayerParams::axis() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.OneHotLayerParams.axis)
+  return axis_;
+}
+void OneHotLayerParams::set_axis(::google::protobuf::int64 value) {
+  
+  axis_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.OneHotLayerParams.axis)
+}
+
+// float onValue = 3;
+void OneHotLayerParams::clear_onvalue() {
+  onvalue_ = 0;
+}
+float OneHotLayerParams::onvalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.OneHotLayerParams.onValue)
+  return onvalue_;
+}
+void OneHotLayerParams::set_onvalue(float value) {
+  
+  onvalue_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.OneHotLayerParams.onValue)
+}
+
+// float offValue = 4;
+void OneHotLayerParams::clear_offvalue() {
+  offvalue_ = 0;
+}
+float OneHotLayerParams::offvalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.OneHotLayerParams.offValue)
+  return offvalue_;
+}
+void OneHotLayerParams::set_offvalue(float value) {
+  
+  offvalue_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.OneHotLayerParams.offValue)
+}
+
+#endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
+
+// ===================================================================
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+const int CumSumLayerParams::kAxisFieldNumber;
+const int CumSumLayerParams::kExcludeFinalSumFieldNumber;
+const int CumSumLayerParams::kReverseFieldNumber;
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+
+CumSumLayerParams::CumSumLayerParams()
+  : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
+  if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
+    protobuf_NeuralNetwork_2eproto::InitDefaults();
+  }
+  SharedCtor();
+  // @@protoc_insertion_point(constructor:CoreML.Specification.CumSumLayerParams)
+}
+CumSumLayerParams::CumSumLayerParams(const CumSumLayerParams& from)
+  : ::google::protobuf::MessageLite(),
+      _internal_metadata_(NULL),
+      _cached_size_(0) {
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  ::memcpy(&axis_, &from.axis_,
+    reinterpret_cast<char*>(&reverse_) -
+    reinterpret_cast<char*>(&axis_) + sizeof(reverse_));
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.CumSumLayerParams)
+}
+
+void CumSumLayerParams::SharedCtor() {
+  ::memset(&axis_, 0, reinterpret_cast<char*>(&reverse_) -
+    reinterpret_cast<char*>(&axis_) + sizeof(reverse_));
+  _cached_size_ = 0;
+}
+
+CumSumLayerParams::~CumSumLayerParams() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.CumSumLayerParams)
+  SharedDtor();
+}
+
+void CumSumLayerParams::SharedDtor() {
+}
+
+void CumSumLayerParams::SetCachedSize(int size) const {
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+}
+const CumSumLayerParams& CumSumLayerParams::default_instance() {
+  protobuf_NeuralNetwork_2eproto::InitDefaults();
+  return *internal_default_instance();
+}
+
+CumSumLayerParams* CumSumLayerParams::New(::google::protobuf::Arena* arena) const {
+  CumSumLayerParams* n = new CumSumLayerParams;
+  if (arena != NULL) {
+    arena->Own(n);
+  }
+  return n;
+}
+
+void CumSumLayerParams::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.CumSumLayerParams)
+  ::memset(&axis_, 0, reinterpret_cast<char*>(&reverse_) -
+    reinterpret_cast<char*>(&axis_) + sizeof(reverse_));
+}
+
+bool CumSumLayerParams::MergePartialFromCodedStream(
+    ::google::protobuf::io::CodedInputStream* input) {
+#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
+  ::google::protobuf::uint32 tag;
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.CumSumLayerParams)
+  for (;;) {
+    ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
+    tag = p.first;
+    if (!p.second) goto handle_unusual;
+    switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
+      // int64 axis = 1;
+      case 1: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(8u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   ::google::protobuf::int64, ::google::protobuf::internal::WireFormatLite::TYPE_INT64>(
+                 input, &axis_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // bool excludeFinalSum = 2;
+      case 2: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(16u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
+                 input, &excludefinalsum_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // bool reverse = 3;
+      case 3: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(24u)) {
+
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(
+                 input, &reverse_)));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      default: {
+      handle_unusual:
+        if (tag == 0 ||
+            ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
+            ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
+          goto success;
+        }
+        DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
+        break;
+      }
+    }
+  }
+success:
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.CumSumLayerParams)
+  return true;
+failure:
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.CumSumLayerParams)
+  return false;
+#undef DO_
+}
+
+void CumSumLayerParams::SerializeWithCachedSizes(
+    ::google::protobuf::io::CodedOutputStream* output) const {
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.CumSumLayerParams)
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  // int64 axis = 1;
+  if (this->axis() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteInt64(1, this->axis(), output);
+  }
+
+  // bool excludeFinalSum = 2;
+  if (this->excludefinalsum() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteBool(2, this->excludefinalsum(), output);
+  }
+
+  // bool reverse = 3;
+  if (this->reverse() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteBool(3, this->reverse(), output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.CumSumLayerParams)
+}
+
+size_t CumSumLayerParams::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.CumSumLayerParams)
+  size_t total_size = 0;
+
+  // int64 axis = 1;
+  if (this->axis() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::Int64Size(
+        this->axis());
+  }
+
+  // bool excludeFinalSum = 2;
+  if (this->excludefinalsum() != 0) {
+    total_size += 1 + 1;
+  }
+
+  // bool reverse = 3;
+  if (this->reverse() != 0) {
+    total_size += 1 + 1;
+  }
+
+  int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = cached_size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+  return total_size;
+}
+
+void CumSumLayerParams::CheckTypeAndMergeFrom(
+    const ::google::protobuf::MessageLite& from) {
+  MergeFrom(*::google::protobuf::down_cast<const CumSumLayerParams*>(&from));
+}
+
+void CumSumLayerParams::MergeFrom(const CumSumLayerParams& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.CumSumLayerParams)
+  GOOGLE_DCHECK_NE(&from, this);
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  if (from.axis() != 0) {
+    set_axis(from.axis());
+  }
+  if (from.excludefinalsum() != 0) {
+    set_excludefinalsum(from.excludefinalsum());
+  }
+  if (from.reverse() != 0) {
+    set_reverse(from.reverse());
+  }
+}
+
+void CumSumLayerParams::CopyFrom(const CumSumLayerParams& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.CumSumLayerParams)
+  if (&from == this) return;
+  Clear();
+  MergeFrom(from);
+}
+
+bool CumSumLayerParams::IsInitialized() const {
+  return true;
+}
+
+void CumSumLayerParams::Swap(CumSumLayerParams* other) {
+  if (other == this) return;
+  InternalSwap(other);
+}
+void CumSumLayerParams::InternalSwap(CumSumLayerParams* other) {
+  std::swap(axis_, other->axis_);
+  std::swap(excludefinalsum_, other->excludefinalsum_);
+  std::swap(reverse_, other->reverse_);
+  std::swap(_cached_size_, other->_cached_size_);
+}
+
+::std::string CumSumLayerParams::GetTypeName() const {
+  return "CoreML.Specification.CumSumLayerParams";
+}
+
+#if PROTOBUF_INLINE_NOT_IN_HEADERS
+// CumSumLayerParams
+
+// int64 axis = 1;
+void CumSumLayerParams::clear_axis() {
+  axis_ = GOOGLE_LONGLONG(0);
+}
+::google::protobuf::int64 CumSumLayerParams::axis() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.CumSumLayerParams.axis)
+  return axis_;
+}
+void CumSumLayerParams::set_axis(::google::protobuf::int64 value) {
+  
+  axis_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.CumSumLayerParams.axis)
+}
+
+// bool excludeFinalSum = 2;
+void CumSumLayerParams::clear_excludefinalsum() {
+  excludefinalsum_ = false;
+}
+bool CumSumLayerParams::excludefinalsum() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.CumSumLayerParams.excludeFinalSum)
+  return excludefinalsum_;
+}
+void CumSumLayerParams::set_excludefinalsum(bool value) {
+  
+  excludefinalsum_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.CumSumLayerParams.excludeFinalSum)
+}
+
+// bool reverse = 3;
+void CumSumLayerParams::clear_reverse() {
+  reverse_ = false;
+}
+bool CumSumLayerParams::reverse() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.CumSumLayerParams.reverse)
+  return reverse_;
+}
+void CumSumLayerParams::set_reverse(bool value) {
+  
+  reverse_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.CumSumLayerParams.reverse)
+}
+
+#endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
+
+// ===================================================================
+
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
 const int NeuralNetworkRegressor::kLayersFieldNumber;
 const int NeuralNetworkRegressor::kPreprocessingFieldNumber;
diff --git a/mlmodel/build/format/NeuralNetwork.pb.h b/mlmodel/build/format/NeuralNetwork.pb.h
index ac4c90758..173cc1a11 100644
--- a/mlmodel/build/format/NeuralNetwork.pb.h
+++ b/mlmodel/build/format/NeuralNetwork.pb.h
@@ -99,6 +99,9 @@ extern ArgMaxLayerParamsDefaultTypeInternal _ArgMaxLayerParams_default_instance_
 class ArgMinLayerParams;
 class ArgMinLayerParamsDefaultTypeInternal;
 extern ArgMinLayerParamsDefaultTypeInternal _ArgMinLayerParams_default_instance_;
+class ArgSortLayerParams;
+class ArgSortLayerParamsDefaultTypeInternal;
+extern ArgSortLayerParamsDefaultTypeInternal _ArgSortLayerParams_default_instance_;
 class ArrayFeatureType;
 class ArrayFeatureTypeDefaultTypeInternal;
 extern ArrayFeatureTypeDefaultTypeInternal _ArrayFeatureType_default_instance_;
@@ -171,6 +174,9 @@ extern CategoricalDistributionLayerParamsDefaultTypeInternal _CategoricalDistrib
 class CeilLayerParams;
 class CeilLayerParamsDefaultTypeInternal;
 extern CeilLayerParamsDefaultTypeInternal _CeilLayerParams_default_instance_;
+class ClampedReLULayerParams;
+class ClampedReLULayerParamsDefaultTypeInternal;
+extern ClampedReLULayerParamsDefaultTypeInternal _ClampedReLULayerParams_default_instance_;
 class ClipLayerParams;
 class ClipLayerParamsDefaultTypeInternal;
 extern ClipLayerParamsDefaultTypeInternal _ClipLayerParams_default_instance_;
@@ -183,6 +189,9 @@ extern ConcatNDLayerParamsDefaultTypeInternal _ConcatNDLayerParams_default_insta
 class ConstantPaddingLayerParams;
 class ConstantPaddingLayerParamsDefaultTypeInternal;
 extern ConstantPaddingLayerParamsDefaultTypeInternal _ConstantPaddingLayerParams_default_instance_;
+class Convolution3DLayerParams;
+class Convolution3DLayerParamsDefaultTypeInternal;
+extern Convolution3DLayerParamsDefaultTypeInternal _Convolution3DLayerParams_default_instance_;
 class ConvolutionLayerParams;
 class ConvolutionLayerParamsDefaultTypeInternal;
 extern ConvolutionLayerParamsDefaultTypeInternal _ConvolutionLayerParams_default_instance_;
@@ -201,6 +210,9 @@ extern CropLayerParamsDefaultTypeInternal _CropLayerParams_default_instance_;
 class CropResizeLayerParams;
 class CropResizeLayerParamsDefaultTypeInternal;
 extern CropResizeLayerParamsDefaultTypeInternal _CropResizeLayerParams_default_instance_;
+class CumSumLayerParams;
+class CumSumLayerParamsDefaultTypeInternal;
+extern CumSumLayerParamsDefaultTypeInternal _CumSumLayerParams_default_instance_;
 class CustomLayerParams;
 class CustomLayerParamsDefaultTypeInternal;
 extern CustomLayerParamsDefaultTypeInternal _CustomLayerParams_default_instance_;
@@ -294,6 +306,9 @@ extern GeluLayerParamsDefaultTypeInternal _GeluLayerParams_default_instance_;
 class GetShapeLayerParams;
 class GetShapeLayerParamsDefaultTypeInternal;
 extern GetShapeLayerParamsDefaultTypeInternal _GetShapeLayerParams_default_instance_;
+class GlobalPooling3DLayerParams;
+class GlobalPooling3DLayerParamsDefaultTypeInternal;
+extern GlobalPooling3DLayerParamsDefaultTypeInternal _GlobalPooling3DLayerParams_default_instance_;
 class GreaterEqualLayerParams;
 class GreaterEqualLayerParamsDefaultTypeInternal;
 extern GreaterEqualLayerParamsDefaultTypeInternal _GreaterEqualLayerParams_default_instance_;
@@ -462,6 +477,9 @@ extern NonMaximumSuppressionLayerParamsDefaultTypeInternal _NonMaximumSuppressio
 class NotEqualLayerParams;
 class NotEqualLayerParamsDefaultTypeInternal;
 extern NotEqualLayerParamsDefaultTypeInternal _NotEqualLayerParams_default_instance_;
+class OneHotLayerParams;
+class OneHotLayerParamsDefaultTypeInternal;
+extern OneHotLayerParamsDefaultTypeInternal _OneHotLayerParams_default_instance_;
 class Optimizer;
 class OptimizerDefaultTypeInternal;
 extern OptimizerDefaultTypeInternal _Optimizer_default_instance_;
@@ -480,6 +498,9 @@ extern PaddingLayerParams_PaddingReplicationDefaultTypeInternal _PaddingLayerPar
 class PermuteLayerParams;
 class PermuteLayerParamsDefaultTypeInternal;
 extern PermuteLayerParamsDefaultTypeInternal _PermuteLayerParams_default_instance_;
+class Pooling3DLayerParams;
+class Pooling3DLayerParamsDefaultTypeInternal;
+extern Pooling3DLayerParamsDefaultTypeInternal _Pooling3DLayerParams_default_instance_;
 class PoolingLayerParams;
 class PoolingLayerParamsDefaultTypeInternal;
 extern PoolingLayerParamsDefaultTypeInternal _PoolingLayerParams_default_instance_;
@@ -630,6 +651,9 @@ extern SinhLayerParamsDefaultTypeInternal _SinhLayerParams_default_instance_;
 class SizeRange;
 class SizeRangeDefaultTypeInternal;
 extern SizeRangeDefaultTypeInternal _SizeRange_default_instance_;
+class SliceBySizeLayerParams;
+class SliceBySizeLayerParamsDefaultTypeInternal;
+extern SliceBySizeLayerParamsDefaultTypeInternal _SliceBySizeLayerParams_default_instance_;
 class SliceDynamicLayerParams;
 class SliceDynamicLayerParamsDefaultTypeInternal;
 extern SliceDynamicLayerParamsDefaultTypeInternal _SliceDynamicLayerParams_default_instance_;
@@ -783,6 +807,18 @@ const BoxCoordinatesMode_Coordinates BoxCoordinatesMode_Coordinates_Coordinates_
 const BoxCoordinatesMode_Coordinates BoxCoordinatesMode_Coordinates_Coordinates_MAX = BoxCoordinatesMode_Coordinates_CENTER_SIZE_WIDTH_FIRST;
 const int BoxCoordinatesMode_Coordinates_Coordinates_ARRAYSIZE = BoxCoordinatesMode_Coordinates_Coordinates_MAX + 1;
 
+enum Convolution3DLayerParams_PaddingType {
+  Convolution3DLayerParams_PaddingType_CUSTOM = 0,
+  Convolution3DLayerParams_PaddingType_VALID = 1,
+  Convolution3DLayerParams_PaddingType_SAME = 2,
+  Convolution3DLayerParams_PaddingType_Convolution3DLayerParams_PaddingType_INT_MIN_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32min,
+  Convolution3DLayerParams_PaddingType_Convolution3DLayerParams_PaddingType_INT_MAX_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32max
+};
+bool Convolution3DLayerParams_PaddingType_IsValid(int value);
+const Convolution3DLayerParams_PaddingType Convolution3DLayerParams_PaddingType_PaddingType_MIN = Convolution3DLayerParams_PaddingType_CUSTOM;
+const Convolution3DLayerParams_PaddingType Convolution3DLayerParams_PaddingType_PaddingType_MAX = Convolution3DLayerParams_PaddingType_SAME;
+const int Convolution3DLayerParams_PaddingType_PaddingType_ARRAYSIZE = Convolution3DLayerParams_PaddingType_PaddingType_MAX + 1;
+
 enum PoolingLayerParams_PoolingType {
   PoolingLayerParams_PoolingType_MAX = 0,
   PoolingLayerParams_PoolingType_AVERAGE = 1,
@@ -795,6 +831,40 @@ const PoolingLayerParams_PoolingType PoolingLayerParams_PoolingType_PoolingType_
 const PoolingLayerParams_PoolingType PoolingLayerParams_PoolingType_PoolingType_MAX = PoolingLayerParams_PoolingType_L2;
 const int PoolingLayerParams_PoolingType_PoolingType_ARRAYSIZE = PoolingLayerParams_PoolingType_PoolingType_MAX + 1;
 
+enum Pooling3DLayerParams_PoolingType3D {
+  Pooling3DLayerParams_PoolingType3D_MAX = 0,
+  Pooling3DLayerParams_PoolingType3D_AVERAGE = 1,
+  Pooling3DLayerParams_PoolingType3D_Pooling3DLayerParams_PoolingType3D_INT_MIN_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32min,
+  Pooling3DLayerParams_PoolingType3D_Pooling3DLayerParams_PoolingType3D_INT_MAX_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32max
+};
+bool Pooling3DLayerParams_PoolingType3D_IsValid(int value);
+const Pooling3DLayerParams_PoolingType3D Pooling3DLayerParams_PoolingType3D_PoolingType3D_MIN = Pooling3DLayerParams_PoolingType3D_MAX;
+const Pooling3DLayerParams_PoolingType3D Pooling3DLayerParams_PoolingType3D_PoolingType3D_MAX = Pooling3DLayerParams_PoolingType3D_AVERAGE;
+const int Pooling3DLayerParams_PoolingType3D_PoolingType3D_ARRAYSIZE = Pooling3DLayerParams_PoolingType3D_PoolingType3D_MAX + 1;
+
+enum Pooling3DLayerParams_Pooling3DPaddingType {
+  Pooling3DLayerParams_Pooling3DPaddingType_CUSTOM = 0,
+  Pooling3DLayerParams_Pooling3DPaddingType_VALID = 1,
+  Pooling3DLayerParams_Pooling3DPaddingType_SAME = 2,
+  Pooling3DLayerParams_Pooling3DPaddingType_Pooling3DLayerParams_Pooling3DPaddingType_INT_MIN_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32min,
+  Pooling3DLayerParams_Pooling3DPaddingType_Pooling3DLayerParams_Pooling3DPaddingType_INT_MAX_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32max
+};
+bool Pooling3DLayerParams_Pooling3DPaddingType_IsValid(int value);
+const Pooling3DLayerParams_Pooling3DPaddingType Pooling3DLayerParams_Pooling3DPaddingType_Pooling3DPaddingType_MIN = Pooling3DLayerParams_Pooling3DPaddingType_CUSTOM;
+const Pooling3DLayerParams_Pooling3DPaddingType Pooling3DLayerParams_Pooling3DPaddingType_Pooling3DPaddingType_MAX = Pooling3DLayerParams_Pooling3DPaddingType_SAME;
+const int Pooling3DLayerParams_Pooling3DPaddingType_Pooling3DPaddingType_ARRAYSIZE = Pooling3DLayerParams_Pooling3DPaddingType_Pooling3DPaddingType_MAX + 1;
+
+enum GlobalPooling3DLayerParams_GlobalPoolingType3D {
+  GlobalPooling3DLayerParams_GlobalPoolingType3D_MAX = 0,
+  GlobalPooling3DLayerParams_GlobalPoolingType3D_AVERAGE = 1,
+  GlobalPooling3DLayerParams_GlobalPoolingType3D_GlobalPooling3DLayerParams_GlobalPoolingType3D_INT_MIN_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32min,
+  GlobalPooling3DLayerParams_GlobalPoolingType3D_GlobalPooling3DLayerParams_GlobalPoolingType3D_INT_MAX_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32max
+};
+bool GlobalPooling3DLayerParams_GlobalPoolingType3D_IsValid(int value);
+const GlobalPooling3DLayerParams_GlobalPoolingType3D GlobalPooling3DLayerParams_GlobalPoolingType3D_GlobalPoolingType3D_MIN = GlobalPooling3DLayerParams_GlobalPoolingType3D_MAX;
+const GlobalPooling3DLayerParams_GlobalPoolingType3D GlobalPooling3DLayerParams_GlobalPoolingType3D_GlobalPoolingType3D_MAX = GlobalPooling3DLayerParams_GlobalPoolingType3D_AVERAGE;
+const int GlobalPooling3DLayerParams_GlobalPoolingType3D_GlobalPoolingType3D_ARRAYSIZE = GlobalPooling3DLayerParams_GlobalPoolingType3D_GlobalPoolingType3D_MAX + 1;
+
 enum UnaryFunctionLayerParams_Operation {
   UnaryFunctionLayerParams_Operation_SQRT = 0,
   UnaryFunctionLayerParams_Operation_RSQRT = 1,
@@ -823,6 +893,18 @@ const UpsampleLayerParams_InterpolationMode UpsampleLayerParams_InterpolationMod
 const UpsampleLayerParams_InterpolationMode UpsampleLayerParams_InterpolationMode_InterpolationMode_MAX = UpsampleLayerParams_InterpolationMode_BILINEAR;
 const int UpsampleLayerParams_InterpolationMode_InterpolationMode_ARRAYSIZE = UpsampleLayerParams_InterpolationMode_InterpolationMode_MAX + 1;
 
+enum UpsampleLayerParams_LinearUpsampleMode {
+  UpsampleLayerParams_LinearUpsampleMode_DEFAULT = 0,
+  UpsampleLayerParams_LinearUpsampleMode_ALIGN_CORNERS_TRUE = 1,
+  UpsampleLayerParams_LinearUpsampleMode_ALIGN_CORNERS_FALSE = 2,
+  UpsampleLayerParams_LinearUpsampleMode_UpsampleLayerParams_LinearUpsampleMode_INT_MIN_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32min,
+  UpsampleLayerParams_LinearUpsampleMode_UpsampleLayerParams_LinearUpsampleMode_INT_MAX_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32max
+};
+bool UpsampleLayerParams_LinearUpsampleMode_IsValid(int value);
+const UpsampleLayerParams_LinearUpsampleMode UpsampleLayerParams_LinearUpsampleMode_LinearUpsampleMode_MIN = UpsampleLayerParams_LinearUpsampleMode_DEFAULT;
+const UpsampleLayerParams_LinearUpsampleMode UpsampleLayerParams_LinearUpsampleMode_LinearUpsampleMode_MAX = UpsampleLayerParams_LinearUpsampleMode_ALIGN_CORNERS_FALSE;
+const int UpsampleLayerParams_LinearUpsampleMode_LinearUpsampleMode_ARRAYSIZE = UpsampleLayerParams_LinearUpsampleMode_LinearUpsampleMode_MAX + 1;
+
 enum FlattenLayerParams_FlattenOrder {
   FlattenLayerParams_FlattenOrder_CHANNEL_FIRST = 0,
   FlattenLayerParams_FlattenOrder_CHANNEL_LAST = 1,
@@ -848,12 +930,13 @@ const int ReshapeLayerParams_ReshapeOrder_ReshapeOrder_ARRAYSIZE = ReshapeLayerP
 enum ReorganizeDataLayerParams_ReorganizationType {
   ReorganizeDataLayerParams_ReorganizationType_SPACE_TO_DEPTH = 0,
   ReorganizeDataLayerParams_ReorganizationType_DEPTH_TO_SPACE = 1,
+  ReorganizeDataLayerParams_ReorganizationType_PIXEL_SHUFFLE = 2,
   ReorganizeDataLayerParams_ReorganizationType_ReorganizeDataLayerParams_ReorganizationType_INT_MIN_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32min,
   ReorganizeDataLayerParams_ReorganizationType_ReorganizeDataLayerParams_ReorganizationType_INT_MAX_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32max
 };
 bool ReorganizeDataLayerParams_ReorganizationType_IsValid(int value);
 const ReorganizeDataLayerParams_ReorganizationType ReorganizeDataLayerParams_ReorganizationType_ReorganizationType_MIN = ReorganizeDataLayerParams_ReorganizationType_SPACE_TO_DEPTH;
-const ReorganizeDataLayerParams_ReorganizationType ReorganizeDataLayerParams_ReorganizationType_ReorganizationType_MAX = ReorganizeDataLayerParams_ReorganizationType_DEPTH_TO_SPACE;
+const ReorganizeDataLayerParams_ReorganizationType ReorganizeDataLayerParams_ReorganizationType_ReorganizationType_MAX = ReorganizeDataLayerParams_ReorganizationType_PIXEL_SHUFFLE;
 const int ReorganizeDataLayerParams_ReorganizationType_ReorganizationType_ARRAYSIZE = ReorganizeDataLayerParams_ReorganizationType_ReorganizationType_MAX + 1;
 
 enum SliceLayerParams_SliceAxis {
@@ -2903,6 +2986,14 @@ class NeuralNetworkLayer : public ::google::protobuf::MessageLite /* @@protoc_in
     kWhereBroadcastable = 1330,
     kLayerNormalization = 1350,
     kNonMaximumSuppression = 1400,
+    kOneHot = 1450,
+    kCumSum = 1455,
+    kClampedReLU = 1460,
+    kArgSort = 1461,
+    kPooling3D = 1465,
+    kGlobalPooling3D = 1466,
+    kSliceBySize = 1470,
+    kConvolution3D = 1471,
     LAYER_NOT_SET = 0,
   };
 
@@ -4392,6 +4483,78 @@ class NeuralNetworkLayer : public ::google::protobuf::MessageLite /* @@protoc_in
   ::CoreML::Specification::NonMaximumSuppressionLayerParams* release_nonmaximumsuppression();
   void set_allocated_nonmaximumsuppression(::CoreML::Specification::NonMaximumSuppressionLayerParams* nonmaximumsuppression);
 
+  // .CoreML.Specification.OneHotLayerParams oneHot = 1450;
+  bool has_onehot() const;
+  void clear_onehot();
+  static const int kOneHotFieldNumber = 1450;
+  const ::CoreML::Specification::OneHotLayerParams& onehot() const;
+  ::CoreML::Specification::OneHotLayerParams* mutable_onehot();
+  ::CoreML::Specification::OneHotLayerParams* release_onehot();
+  void set_allocated_onehot(::CoreML::Specification::OneHotLayerParams* onehot);
+
+  // .CoreML.Specification.CumSumLayerParams cumSum = 1455;
+  bool has_cumsum() const;
+  void clear_cumsum();
+  static const int kCumSumFieldNumber = 1455;
+  const ::CoreML::Specification::CumSumLayerParams& cumsum() const;
+  ::CoreML::Specification::CumSumLayerParams* mutable_cumsum();
+  ::CoreML::Specification::CumSumLayerParams* release_cumsum();
+  void set_allocated_cumsum(::CoreML::Specification::CumSumLayerParams* cumsum);
+
+  // .CoreML.Specification.ClampedReLULayerParams clampedReLU = 1460;
+  bool has_clampedrelu() const;
+  void clear_clampedrelu();
+  static const int kClampedReLUFieldNumber = 1460;
+  const ::CoreML::Specification::ClampedReLULayerParams& clampedrelu() const;
+  ::CoreML::Specification::ClampedReLULayerParams* mutable_clampedrelu();
+  ::CoreML::Specification::ClampedReLULayerParams* release_clampedrelu();
+  void set_allocated_clampedrelu(::CoreML::Specification::ClampedReLULayerParams* clampedrelu);
+
+  // .CoreML.Specification.ArgSortLayerParams argSort = 1461;
+  bool has_argsort() const;
+  void clear_argsort();
+  static const int kArgSortFieldNumber = 1461;
+  const ::CoreML::Specification::ArgSortLayerParams& argsort() const;
+  ::CoreML::Specification::ArgSortLayerParams* mutable_argsort();
+  ::CoreML::Specification::ArgSortLayerParams* release_argsort();
+  void set_allocated_argsort(::CoreML::Specification::ArgSortLayerParams* argsort);
+
+  // .CoreML.Specification.Pooling3DLayerParams pooling3d = 1465;
+  bool has_pooling3d() const;
+  void clear_pooling3d();
+  static const int kPooling3DFieldNumber = 1465;
+  const ::CoreML::Specification::Pooling3DLayerParams& pooling3d() const;
+  ::CoreML::Specification::Pooling3DLayerParams* mutable_pooling3d();
+  ::CoreML::Specification::Pooling3DLayerParams* release_pooling3d();
+  void set_allocated_pooling3d(::CoreML::Specification::Pooling3DLayerParams* pooling3d);
+
+  // .CoreML.Specification.GlobalPooling3DLayerParams globalPooling3d = 1466;
+  bool has_globalpooling3d() const;
+  void clear_globalpooling3d();
+  static const int kGlobalPooling3DFieldNumber = 1466;
+  const ::CoreML::Specification::GlobalPooling3DLayerParams& globalpooling3d() const;
+  ::CoreML::Specification::GlobalPooling3DLayerParams* mutable_globalpooling3d();
+  ::CoreML::Specification::GlobalPooling3DLayerParams* release_globalpooling3d();
+  void set_allocated_globalpooling3d(::CoreML::Specification::GlobalPooling3DLayerParams* globalpooling3d);
+
+  // .CoreML.Specification.SliceBySizeLayerParams sliceBySize = 1470;
+  bool has_slicebysize() const;
+  void clear_slicebysize();
+  static const int kSliceBySizeFieldNumber = 1470;
+  const ::CoreML::Specification::SliceBySizeLayerParams& slicebysize() const;
+  ::CoreML::Specification::SliceBySizeLayerParams* mutable_slicebysize();
+  ::CoreML::Specification::SliceBySizeLayerParams* release_slicebysize();
+  void set_allocated_slicebysize(::CoreML::Specification::SliceBySizeLayerParams* slicebysize);
+
+  // .CoreML.Specification.Convolution3DLayerParams convolution3d = 1471;
+  bool has_convolution3d() const;
+  void clear_convolution3d();
+  static const int kConvolution3DFieldNumber = 1471;
+  const ::CoreML::Specification::Convolution3DLayerParams& convolution3d() const;
+  ::CoreML::Specification::Convolution3DLayerParams* mutable_convolution3d();
+  ::CoreML::Specification::Convolution3DLayerParams* release_convolution3d();
+  void set_allocated_convolution3d(::CoreML::Specification::Convolution3DLayerParams* convolution3d);
+
   LayerCase layer_case() const;
   // @@protoc_insertion_point(class_scope:CoreML.Specification.NeuralNetworkLayer)
  private:
@@ -4545,6 +4708,14 @@ class NeuralNetworkLayer : public ::google::protobuf::MessageLite /* @@protoc_in
   void set_has_wherebroadcastable();
   void set_has_layernormalization();
   void set_has_nonmaximumsuppression();
+  void set_has_onehot();
+  void set_has_cumsum();
+  void set_has_clampedrelu();
+  void set_has_argsort();
+  void set_has_pooling3d();
+  void set_has_globalpooling3d();
+  void set_has_slicebysize();
+  void set_has_convolution3d();
 
   inline bool has_layer() const;
   void clear_layer();
@@ -4709,6 +4880,14 @@ class NeuralNetworkLayer : public ::google::protobuf::MessageLite /* @@protoc_in
     ::CoreML::Specification::WhereBroadcastableLayerParams* wherebroadcastable_;
     ::CoreML::Specification::LayerNormalizationLayerParams* layernormalization_;
     ::CoreML::Specification::NonMaximumSuppressionLayerParams* nonmaximumsuppression_;
+    ::CoreML::Specification::OneHotLayerParams* onehot_;
+    ::CoreML::Specification::CumSumLayerParams* cumsum_;
+    ::CoreML::Specification::ClampedReLULayerParams* clampedrelu_;
+    ::CoreML::Specification::ArgSortLayerParams* argsort_;
+    ::CoreML::Specification::Pooling3DLayerParams* pooling3d_;
+    ::CoreML::Specification::GlobalPooling3DLayerParams* globalpooling3d_;
+    ::CoreML::Specification::SliceBySizeLayerParams* slicebysize_;
+    ::CoreML::Specification::Convolution3DLayerParams* convolution3d_;
   } layer_;
   mutable int _cached_size_;
   ::google::protobuf::uint32 _oneof_case_[1];
@@ -6527,6 +6706,20 @@ class WeightParams : public ::google::protobuf::MessageLite /* @@protoc_insertio
   ::std::string* release_rawvalue();
   void set_allocated_rawvalue(::std::string* rawvalue);
 
+  // bytes int8RawValue = 31;
+  void clear_int8rawvalue();
+  static const int kInt8RawValueFieldNumber = 31;
+  const ::std::string& int8rawvalue() const;
+  void set_int8rawvalue(const ::std::string& value);
+  #if LANG_CXX11
+  void set_int8rawvalue(::std::string&& value);
+  #endif
+  void set_int8rawvalue(const char* value);
+  void set_int8rawvalue(const void* value, size_t size);
+  ::std::string* mutable_int8rawvalue();
+  ::std::string* release_int8rawvalue();
+  void set_allocated_int8rawvalue(::std::string* int8rawvalue);
+
   // .CoreML.Specification.QuantizationParams quantization = 40;
   bool has_quantization() const;
   void clear_quantization();
@@ -6550,6 +6743,7 @@ class WeightParams : public ::google::protobuf::MessageLite /* @@protoc_insertio
   mutable int _floatvalue_cached_byte_size_;
   ::google::protobuf::internal::ArenaStringPtr float16value_;
   ::google::protobuf::internal::ArenaStringPtr rawvalue_;
+  ::google::protobuf::internal::ArenaStringPtr int8rawvalue_;
   ::CoreML::Specification::QuantizationParams* quantization_;
   bool isupdatable_;
   mutable int _cached_size_;
@@ -7077,6 +7271,275 @@ class ConvolutionLayerParams : public ::google::protobuf::MessageLite /* @@proto
 };
 // -------------------------------------------------------------------
 
+class Convolution3DLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.Convolution3DLayerParams) */ {
+ public:
+  Convolution3DLayerParams();
+  virtual ~Convolution3DLayerParams();
+
+  Convolution3DLayerParams(const Convolution3DLayerParams& from);
+
+  inline Convolution3DLayerParams& operator=(const Convolution3DLayerParams& from) {
+    CopyFrom(from);
+    return *this;
+  }
+
+  static const Convolution3DLayerParams& default_instance();
+
+  static inline const Convolution3DLayerParams* internal_default_instance() {
+    return reinterpret_cast<const Convolution3DLayerParams*>(
+               &_Convolution3DLayerParams_default_instance_);
+  }
+  static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
+    46;
+
+  void Swap(Convolution3DLayerParams* other);
+
+  // implements Message ----------------------------------------------
+
+  inline Convolution3DLayerParams* New() const PROTOBUF_FINAL { return New(NULL); }
+
+  Convolution3DLayerParams* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
+  void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from)
+    PROTOBUF_FINAL;
+  void CopyFrom(const Convolution3DLayerParams& from);
+  void MergeFrom(const Convolution3DLayerParams& from);
+  void Clear() PROTOBUF_FINAL;
+  bool IsInitialized() const PROTOBUF_FINAL;
+
+  size_t ByteSizeLong() const PROTOBUF_FINAL;
+  bool MergePartialFromCodedStream(
+      ::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
+  void SerializeWithCachedSizes(
+      ::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
+  void DiscardUnknownFields();
+  int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
+  private:
+  void SharedCtor();
+  void SharedDtor();
+  void SetCachedSize(int size) const;
+  void InternalSwap(Convolution3DLayerParams* other);
+  private:
+  inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
+    return NULL;
+  }
+  inline void* MaybeArenaPtr() const {
+    return NULL;
+  }
+  public:
+
+  ::std::string GetTypeName() const PROTOBUF_FINAL;
+
+  // nested types ----------------------------------------------------
+
+  typedef Convolution3DLayerParams_PaddingType PaddingType;
+  static const PaddingType CUSTOM =
+    Convolution3DLayerParams_PaddingType_CUSTOM;
+  static const PaddingType VALID =
+    Convolution3DLayerParams_PaddingType_VALID;
+  static const PaddingType SAME =
+    Convolution3DLayerParams_PaddingType_SAME;
+  static inline bool PaddingType_IsValid(int value) {
+    return Convolution3DLayerParams_PaddingType_IsValid(value);
+  }
+  static const PaddingType PaddingType_MIN =
+    Convolution3DLayerParams_PaddingType_PaddingType_MIN;
+  static const PaddingType PaddingType_MAX =
+    Convolution3DLayerParams_PaddingType_PaddingType_MAX;
+  static const int PaddingType_ARRAYSIZE =
+    Convolution3DLayerParams_PaddingType_PaddingType_ARRAYSIZE;
+
+  // accessors -------------------------------------------------------
+
+  // repeated uint64 outputShape = 87;
+  int outputshape_size() const;
+  void clear_outputshape();
+  static const int kOutputShapeFieldNumber = 87;
+  ::google::protobuf::uint64 outputshape(int index) const;
+  void set_outputshape(int index, ::google::protobuf::uint64 value);
+  void add_outputshape(::google::protobuf::uint64 value);
+  const ::google::protobuf::RepeatedField< ::google::protobuf::uint64 >&
+      outputshape() const;
+  ::google::protobuf::RepeatedField< ::google::protobuf::uint64 >*
+      mutable_outputshape();
+
+  // .CoreML.Specification.WeightParams weights = 60;
+  bool has_weights() const;
+  void clear_weights();
+  static const int kWeightsFieldNumber = 60;
+  const ::CoreML::Specification::WeightParams& weights() const;
+  ::CoreML::Specification::WeightParams* mutable_weights();
+  ::CoreML::Specification::WeightParams* release_weights();
+  void set_allocated_weights(::CoreML::Specification::WeightParams* weights);
+
+  // .CoreML.Specification.WeightParams bias = 61;
+  bool has_bias() const;
+  void clear_bias();
+  static const int kBiasFieldNumber = 61;
+  const ::CoreML::Specification::WeightParams& bias() const;
+  ::CoreML::Specification::WeightParams* mutable_bias();
+  ::CoreML::Specification::WeightParams* release_bias();
+  void set_allocated_bias(::CoreML::Specification::WeightParams* bias);
+
+  // int32 outputChannels = 1;
+  void clear_outputchannels();
+  static const int kOutputChannelsFieldNumber = 1;
+  ::google::protobuf::int32 outputchannels() const;
+  void set_outputchannels(::google::protobuf::int32 value);
+
+  // int32 inputChannels = 2;
+  void clear_inputchannels();
+  static const int kInputChannelsFieldNumber = 2;
+  ::google::protobuf::int32 inputchannels() const;
+  void set_inputchannels(::google::protobuf::int32 value);
+
+  // int32 nGroups = 10;
+  void clear_ngroups();
+  static const int kNGroupsFieldNumber = 10;
+  ::google::protobuf::int32 ngroups() const;
+  void set_ngroups(::google::protobuf::int32 value);
+
+  // int32 kernelDepth = 20;
+  void clear_kerneldepth();
+  static const int kKernelDepthFieldNumber = 20;
+  ::google::protobuf::int32 kerneldepth() const;
+  void set_kerneldepth(::google::protobuf::int32 value);
+
+  // int32 kernelHeight = 21;
+  void clear_kernelheight();
+  static const int kKernelHeightFieldNumber = 21;
+  ::google::protobuf::int32 kernelheight() const;
+  void set_kernelheight(::google::protobuf::int32 value);
+
+  // int32 kernelWidth = 22;
+  void clear_kernelwidth();
+  static const int kKernelWidthFieldNumber = 22;
+  ::google::protobuf::int32 kernelwidth() const;
+  void set_kernelwidth(::google::protobuf::int32 value);
+
+  // int32 strideDepth = 31;
+  void clear_stridedepth();
+  static const int kStrideDepthFieldNumber = 31;
+  ::google::protobuf::int32 stridedepth() const;
+  void set_stridedepth(::google::protobuf::int32 value);
+
+  // int32 strideHeight = 32;
+  void clear_strideheight();
+  static const int kStrideHeightFieldNumber = 32;
+  ::google::protobuf::int32 strideheight() const;
+  void set_strideheight(::google::protobuf::int32 value);
+
+  // int32 strideWidth = 33;
+  void clear_stridewidth();
+  static const int kStrideWidthFieldNumber = 33;
+  ::google::protobuf::int32 stridewidth() const;
+  void set_stridewidth(::google::protobuf::int32 value);
+
+  // int32 dilationDepth = 40;
+  void clear_dilationdepth();
+  static const int kDilationDepthFieldNumber = 40;
+  ::google::protobuf::int32 dilationdepth() const;
+  void set_dilationdepth(::google::protobuf::int32 value);
+
+  // int32 dilationHeight = 41;
+  void clear_dilationheight();
+  static const int kDilationHeightFieldNumber = 41;
+  ::google::protobuf::int32 dilationheight() const;
+  void set_dilationheight(::google::protobuf::int32 value);
+
+  // int32 dilationWidth = 42;
+  void clear_dilationwidth();
+  static const int kDilationWidthFieldNumber = 42;
+  ::google::protobuf::int32 dilationwidth() const;
+  void set_dilationwidth(::google::protobuf::int32 value);
+
+  // bool hasBias = 50;
+  void clear_hasbias();
+  static const int kHasBiasFieldNumber = 50;
+  bool hasbias() const;
+  void set_hasbias(bool value);
+
+  // bool isDeconvolution = 86;
+  void clear_isdeconvolution();
+  static const int kIsDeconvolutionFieldNumber = 86;
+  bool isdeconvolution() const;
+  void set_isdeconvolution(bool value);
+
+  // .CoreML.Specification.Convolution3DLayerParams.PaddingType paddingType = 70;
+  void clear_paddingtype();
+  static const int kPaddingTypeFieldNumber = 70;
+  ::CoreML::Specification::Convolution3DLayerParams_PaddingType paddingtype() const;
+  void set_paddingtype(::CoreML::Specification::Convolution3DLayerParams_PaddingType value);
+
+  // int32 customPaddingFront = 80;
+  void clear_custompaddingfront();
+  static const int kCustomPaddingFrontFieldNumber = 80;
+  ::google::protobuf::int32 custompaddingfront() const;
+  void set_custompaddingfront(::google::protobuf::int32 value);
+
+  // int32 customPaddingBack = 81;
+  void clear_custompaddingback();
+  static const int kCustomPaddingBackFieldNumber = 81;
+  ::google::protobuf::int32 custompaddingback() const;
+  void set_custompaddingback(::google::protobuf::int32 value);
+
+  // int32 customPaddingTop = 82;
+  void clear_custompaddingtop();
+  static const int kCustomPaddingTopFieldNumber = 82;
+  ::google::protobuf::int32 custompaddingtop() const;
+  void set_custompaddingtop(::google::protobuf::int32 value);
+
+  // int32 customPaddingBottom = 83;
+  void clear_custompaddingbottom();
+  static const int kCustomPaddingBottomFieldNumber = 83;
+  ::google::protobuf::int32 custompaddingbottom() const;
+  void set_custompaddingbottom(::google::protobuf::int32 value);
+
+  // int32 customPaddingLeft = 84;
+  void clear_custompaddingleft();
+  static const int kCustomPaddingLeftFieldNumber = 84;
+  ::google::protobuf::int32 custompaddingleft() const;
+  void set_custompaddingleft(::google::protobuf::int32 value);
+
+  // int32 customPaddingRight = 85;
+  void clear_custompaddingright();
+  static const int kCustomPaddingRightFieldNumber = 85;
+  ::google::protobuf::int32 custompaddingright() const;
+  void set_custompaddingright(::google::protobuf::int32 value);
+
+  // @@protoc_insertion_point(class_scope:CoreML.Specification.Convolution3DLayerParams)
+ private:
+
+  ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
+  ::google::protobuf::RepeatedField< ::google::protobuf::uint64 > outputshape_;
+  mutable int _outputshape_cached_byte_size_;
+  ::CoreML::Specification::WeightParams* weights_;
+  ::CoreML::Specification::WeightParams* bias_;
+  ::google::protobuf::int32 outputchannels_;
+  ::google::protobuf::int32 inputchannels_;
+  ::google::protobuf::int32 ngroups_;
+  ::google::protobuf::int32 kerneldepth_;
+  ::google::protobuf::int32 kernelheight_;
+  ::google::protobuf::int32 kernelwidth_;
+  ::google::protobuf::int32 stridedepth_;
+  ::google::protobuf::int32 strideheight_;
+  ::google::protobuf::int32 stridewidth_;
+  ::google::protobuf::int32 dilationdepth_;
+  ::google::protobuf::int32 dilationheight_;
+  ::google::protobuf::int32 dilationwidth_;
+  bool hasbias_;
+  bool isdeconvolution_;
+  int paddingtype_;
+  ::google::protobuf::int32 custompaddingfront_;
+  ::google::protobuf::int32 custompaddingback_;
+  ::google::protobuf::int32 custompaddingtop_;
+  ::google::protobuf::int32 custompaddingbottom_;
+  ::google::protobuf::int32 custompaddingleft_;
+  ::google::protobuf::int32 custompaddingright_;
+  mutable int _cached_size_;
+  friend struct protobuf_NeuralNetwork_2eproto::TableStruct;
+};
+// -------------------------------------------------------------------
+
 class InnerProductLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.InnerProductLayerParams) */ {
  public:
   InnerProductLayerParams();
@@ -7096,7 +7559,7 @@ class InnerProductLayerParams : public ::google::protobuf::MessageLite /* @@prot
                &_InnerProductLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    46;
+    47;
 
   void Swap(InnerProductLayerParams* other);
 
@@ -7175,6 +7638,12 @@ class InnerProductLayerParams : public ::google::protobuf::MessageLite /* @@prot
   bool hasbias() const;
   void set_hasbias(bool value);
 
+  // bool int8DynamicQuantize = 22;
+  void clear_int8dynamicquantize();
+  static const int kInt8DynamicQuantizeFieldNumber = 22;
+  bool int8dynamicquantize() const;
+  void set_int8dynamicquantize(bool value);
+
   // @@protoc_insertion_point(class_scope:CoreML.Specification.InnerProductLayerParams)
  private:
 
@@ -7184,6 +7653,7 @@ class InnerProductLayerParams : public ::google::protobuf::MessageLite /* @@prot
   ::google::protobuf::uint64 inputchannels_;
   ::google::protobuf::uint64 outputchannels_;
   bool hasbias_;
+  bool int8dynamicquantize_;
   mutable int _cached_size_;
   friend struct protobuf_NeuralNetwork_2eproto::TableStruct;
 };
@@ -7208,7 +7678,7 @@ class EmbeddingLayerParams : public ::google::protobuf::MessageLite /* @@protoc_
                &_EmbeddingLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    47;
+    48;
 
   void Swap(EmbeddingLayerParams* other);
 
@@ -7320,7 +7790,7 @@ class EmbeddingNDLayerParams : public ::google::protobuf::MessageLite /* @@proto
                &_EmbeddingNDLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    48;
+    49;
 
   void Swap(EmbeddingNDLayerParams* other);
 
@@ -7432,7 +7902,7 @@ class BatchnormLayerParams : public ::google::protobuf::MessageLite /* @@protoc_
                &_BatchnormLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    49;
+    50;
 
   void Swap(BatchnormLayerParams* other);
 
@@ -7571,7 +8041,7 @@ class PoolingLayerParams_ValidCompletePadding : public ::google::protobuf::Messa
                &_PoolingLayerParams_ValidCompletePadding_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    50;
+    51;
 
   void Swap(PoolingLayerParams_ValidCompletePadding* other);
 
@@ -7663,7 +8133,7 @@ class PoolingLayerParams : public ::google::protobuf::MessageLite /* @@protoc_in
                &_PoolingLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    51;
+    52;
 
   void Swap(PoolingLayerParams* other);
 
@@ -7826,6 +8296,307 @@ class PoolingLayerParams : public ::google::protobuf::MessageLite /* @@protoc_in
 };
 // -------------------------------------------------------------------
 
+class Pooling3DLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.Pooling3DLayerParams) */ {
+ public:
+  Pooling3DLayerParams();
+  virtual ~Pooling3DLayerParams();
+
+  Pooling3DLayerParams(const Pooling3DLayerParams& from);
+
+  inline Pooling3DLayerParams& operator=(const Pooling3DLayerParams& from) {
+    CopyFrom(from);
+    return *this;
+  }
+
+  static const Pooling3DLayerParams& default_instance();
+
+  static inline const Pooling3DLayerParams* internal_default_instance() {
+    return reinterpret_cast<const Pooling3DLayerParams*>(
+               &_Pooling3DLayerParams_default_instance_);
+  }
+  static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
+    53;
+
+  void Swap(Pooling3DLayerParams* other);
+
+  // implements Message ----------------------------------------------
+
+  inline Pooling3DLayerParams* New() const PROTOBUF_FINAL { return New(NULL); }
+
+  Pooling3DLayerParams* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
+  void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from)
+    PROTOBUF_FINAL;
+  void CopyFrom(const Pooling3DLayerParams& from);
+  void MergeFrom(const Pooling3DLayerParams& from);
+  void Clear() PROTOBUF_FINAL;
+  bool IsInitialized() const PROTOBUF_FINAL;
+
+  size_t ByteSizeLong() const PROTOBUF_FINAL;
+  bool MergePartialFromCodedStream(
+      ::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
+  void SerializeWithCachedSizes(
+      ::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
+  void DiscardUnknownFields();
+  int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
+  private:
+  void SharedCtor();
+  void SharedDtor();
+  void SetCachedSize(int size) const;
+  void InternalSwap(Pooling3DLayerParams* other);
+  private:
+  inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
+    return NULL;
+  }
+  inline void* MaybeArenaPtr() const {
+    return NULL;
+  }
+  public:
+
+  ::std::string GetTypeName() const PROTOBUF_FINAL;
+
+  // nested types ----------------------------------------------------
+
+  typedef Pooling3DLayerParams_PoolingType3D PoolingType3D;
+  static const PoolingType3D MAX =
+    Pooling3DLayerParams_PoolingType3D_MAX;
+  static const PoolingType3D AVERAGE =
+    Pooling3DLayerParams_PoolingType3D_AVERAGE;
+  static inline bool PoolingType3D_IsValid(int value) {
+    return Pooling3DLayerParams_PoolingType3D_IsValid(value);
+  }
+  static const PoolingType3D PoolingType3D_MIN =
+    Pooling3DLayerParams_PoolingType3D_PoolingType3D_MIN;
+  static const PoolingType3D PoolingType3D_MAX =
+    Pooling3DLayerParams_PoolingType3D_PoolingType3D_MAX;
+  static const int PoolingType3D_ARRAYSIZE =
+    Pooling3DLayerParams_PoolingType3D_PoolingType3D_ARRAYSIZE;
+
+  typedef Pooling3DLayerParams_Pooling3DPaddingType Pooling3DPaddingType;
+  static const Pooling3DPaddingType CUSTOM =
+    Pooling3DLayerParams_Pooling3DPaddingType_CUSTOM;
+  static const Pooling3DPaddingType VALID =
+    Pooling3DLayerParams_Pooling3DPaddingType_VALID;
+  static const Pooling3DPaddingType SAME =
+    Pooling3DLayerParams_Pooling3DPaddingType_SAME;
+  static inline bool Pooling3DPaddingType_IsValid(int value) {
+    return Pooling3DLayerParams_Pooling3DPaddingType_IsValid(value);
+  }
+  static const Pooling3DPaddingType Pooling3DPaddingType_MIN =
+    Pooling3DLayerParams_Pooling3DPaddingType_Pooling3DPaddingType_MIN;
+  static const Pooling3DPaddingType Pooling3DPaddingType_MAX =
+    Pooling3DLayerParams_Pooling3DPaddingType_Pooling3DPaddingType_MAX;
+  static const int Pooling3DPaddingType_ARRAYSIZE =
+    Pooling3DLayerParams_Pooling3DPaddingType_Pooling3DPaddingType_ARRAYSIZE;
+
+  // accessors -------------------------------------------------------
+
+  // .CoreML.Specification.Pooling3DLayerParams.PoolingType3D type = 1;
+  void clear_type();
+  static const int kTypeFieldNumber = 1;
+  ::CoreML::Specification::Pooling3DLayerParams_PoolingType3D type() const;
+  void set_type(::CoreML::Specification::Pooling3DLayerParams_PoolingType3D value);
+
+  // int32 kernelDepth = 2;
+  void clear_kerneldepth();
+  static const int kKernelDepthFieldNumber = 2;
+  ::google::protobuf::int32 kerneldepth() const;
+  void set_kerneldepth(::google::protobuf::int32 value);
+
+  // int32 kernelHeight = 3;
+  void clear_kernelheight();
+  static const int kKernelHeightFieldNumber = 3;
+  ::google::protobuf::int32 kernelheight() const;
+  void set_kernelheight(::google::protobuf::int32 value);
+
+  // int32 kernelWidth = 4;
+  void clear_kernelwidth();
+  static const int kKernelWidthFieldNumber = 4;
+  ::google::protobuf::int32 kernelwidth() const;
+  void set_kernelwidth(::google::protobuf::int32 value);
+
+  // int32 strideDepth = 5;
+  void clear_stridedepth();
+  static const int kStrideDepthFieldNumber = 5;
+  ::google::protobuf::int32 stridedepth() const;
+  void set_stridedepth(::google::protobuf::int32 value);
+
+  // int32 strideHeight = 6;
+  void clear_strideheight();
+  static const int kStrideHeightFieldNumber = 6;
+  ::google::protobuf::int32 strideheight() const;
+  void set_strideheight(::google::protobuf::int32 value);
+
+  // int32 strideWidth = 7;
+  void clear_stridewidth();
+  static const int kStrideWidthFieldNumber = 7;
+  ::google::protobuf::int32 stridewidth() const;
+  void set_stridewidth(::google::protobuf::int32 value);
+
+  // int32 customPaddingFront = 8;
+  void clear_custompaddingfront();
+  static const int kCustomPaddingFrontFieldNumber = 8;
+  ::google::protobuf::int32 custompaddingfront() const;
+  void set_custompaddingfront(::google::protobuf::int32 value);
+
+  // int32 customPaddingBack = 9;
+  void clear_custompaddingback();
+  static const int kCustomPaddingBackFieldNumber = 9;
+  ::google::protobuf::int32 custompaddingback() const;
+  void set_custompaddingback(::google::protobuf::int32 value);
+
+  // int32 customPaddingTop = 10;
+  void clear_custompaddingtop();
+  static const int kCustomPaddingTopFieldNumber = 10;
+  ::google::protobuf::int32 custompaddingtop() const;
+  void set_custompaddingtop(::google::protobuf::int32 value);
+
+  // int32 customPaddingBottom = 11;
+  void clear_custompaddingbottom();
+  static const int kCustomPaddingBottomFieldNumber = 11;
+  ::google::protobuf::int32 custompaddingbottom() const;
+  void set_custompaddingbottom(::google::protobuf::int32 value);
+
+  // int32 customPaddingLeft = 12;
+  void clear_custompaddingleft();
+  static const int kCustomPaddingLeftFieldNumber = 12;
+  ::google::protobuf::int32 custompaddingleft() const;
+  void set_custompaddingleft(::google::protobuf::int32 value);
+
+  // int32 customPaddingRight = 13;
+  void clear_custompaddingright();
+  static const int kCustomPaddingRightFieldNumber = 13;
+  ::google::protobuf::int32 custompaddingright() const;
+  void set_custompaddingright(::google::protobuf::int32 value);
+
+  // bool countExcludePadding = 14;
+  void clear_countexcludepadding();
+  static const int kCountExcludePaddingFieldNumber = 14;
+  bool countexcludepadding() const;
+  void set_countexcludepadding(bool value);
+
+  // .CoreML.Specification.Pooling3DLayerParams.Pooling3DPaddingType paddingType = 15;
+  void clear_paddingtype();
+  static const int kPaddingTypeFieldNumber = 15;
+  ::CoreML::Specification::Pooling3DLayerParams_Pooling3DPaddingType paddingtype() const;
+  void set_paddingtype(::CoreML::Specification::Pooling3DLayerParams_Pooling3DPaddingType value);
+
+  // @@protoc_insertion_point(class_scope:CoreML.Specification.Pooling3DLayerParams)
+ private:
+
+  ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
+  int type_;
+  ::google::protobuf::int32 kerneldepth_;
+  ::google::protobuf::int32 kernelheight_;
+  ::google::protobuf::int32 kernelwidth_;
+  ::google::protobuf::int32 stridedepth_;
+  ::google::protobuf::int32 strideheight_;
+  ::google::protobuf::int32 stridewidth_;
+  ::google::protobuf::int32 custompaddingfront_;
+  ::google::protobuf::int32 custompaddingback_;
+  ::google::protobuf::int32 custompaddingtop_;
+  ::google::protobuf::int32 custompaddingbottom_;
+  ::google::protobuf::int32 custompaddingleft_;
+  ::google::protobuf::int32 custompaddingright_;
+  bool countexcludepadding_;
+  int paddingtype_;
+  mutable int _cached_size_;
+  friend struct protobuf_NeuralNetwork_2eproto::TableStruct;
+};
+// -------------------------------------------------------------------
+
+class GlobalPooling3DLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.GlobalPooling3DLayerParams) */ {
+ public:
+  GlobalPooling3DLayerParams();
+  virtual ~GlobalPooling3DLayerParams();
+
+  GlobalPooling3DLayerParams(const GlobalPooling3DLayerParams& from);
+
+  inline GlobalPooling3DLayerParams& operator=(const GlobalPooling3DLayerParams& from) {
+    CopyFrom(from);
+    return *this;
+  }
+
+  static const GlobalPooling3DLayerParams& default_instance();
+
+  static inline const GlobalPooling3DLayerParams* internal_default_instance() {
+    return reinterpret_cast<const GlobalPooling3DLayerParams*>(
+               &_GlobalPooling3DLayerParams_default_instance_);
+  }
+  static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
+    54;
+
+  void Swap(GlobalPooling3DLayerParams* other);
+
+  // implements Message ----------------------------------------------
+
+  inline GlobalPooling3DLayerParams* New() const PROTOBUF_FINAL { return New(NULL); }
+
+  GlobalPooling3DLayerParams* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
+  void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from)
+    PROTOBUF_FINAL;
+  void CopyFrom(const GlobalPooling3DLayerParams& from);
+  void MergeFrom(const GlobalPooling3DLayerParams& from);
+  void Clear() PROTOBUF_FINAL;
+  bool IsInitialized() const PROTOBUF_FINAL;
+
+  size_t ByteSizeLong() const PROTOBUF_FINAL;
+  bool MergePartialFromCodedStream(
+      ::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
+  void SerializeWithCachedSizes(
+      ::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
+  void DiscardUnknownFields();
+  int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
+  private:
+  void SharedCtor();
+  void SharedDtor();
+  void SetCachedSize(int size) const;
+  void InternalSwap(GlobalPooling3DLayerParams* other);
+  private:
+  inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
+    return NULL;
+  }
+  inline void* MaybeArenaPtr() const {
+    return NULL;
+  }
+  public:
+
+  ::std::string GetTypeName() const PROTOBUF_FINAL;
+
+  // nested types ----------------------------------------------------
+
+  typedef GlobalPooling3DLayerParams_GlobalPoolingType3D GlobalPoolingType3D;
+  static const GlobalPoolingType3D MAX =
+    GlobalPooling3DLayerParams_GlobalPoolingType3D_MAX;
+  static const GlobalPoolingType3D AVERAGE =
+    GlobalPooling3DLayerParams_GlobalPoolingType3D_AVERAGE;
+  static inline bool GlobalPoolingType3D_IsValid(int value) {
+    return GlobalPooling3DLayerParams_GlobalPoolingType3D_IsValid(value);
+  }
+  static const GlobalPoolingType3D GlobalPoolingType3D_MIN =
+    GlobalPooling3DLayerParams_GlobalPoolingType3D_GlobalPoolingType3D_MIN;
+  static const GlobalPoolingType3D GlobalPoolingType3D_MAX =
+    GlobalPooling3DLayerParams_GlobalPoolingType3D_GlobalPoolingType3D_MAX;
+  static const int GlobalPoolingType3D_ARRAYSIZE =
+    GlobalPooling3DLayerParams_GlobalPoolingType3D_GlobalPoolingType3D_ARRAYSIZE;
+
+  // accessors -------------------------------------------------------
+
+  // .CoreML.Specification.GlobalPooling3DLayerParams.GlobalPoolingType3D type = 1;
+  void clear_type();
+  static const int kTypeFieldNumber = 1;
+  ::CoreML::Specification::GlobalPooling3DLayerParams_GlobalPoolingType3D type() const;
+  void set_type(::CoreML::Specification::GlobalPooling3DLayerParams_GlobalPoolingType3D value);
+
+  // @@protoc_insertion_point(class_scope:CoreML.Specification.GlobalPooling3DLayerParams)
+ private:
+
+  ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
+  int type_;
+  mutable int _cached_size_;
+  friend struct protobuf_NeuralNetwork_2eproto::TableStruct;
+};
+// -------------------------------------------------------------------
+
 class PaddingLayerParams_PaddingConstant : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.PaddingLayerParams.PaddingConstant) */ {
  public:
   PaddingLayerParams_PaddingConstant();
@@ -7845,7 +8616,7 @@ class PaddingLayerParams_PaddingConstant : public ::google::protobuf::MessageLit
                &_PaddingLayerParams_PaddingConstant_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    52;
+    55;
 
   void Swap(PaddingLayerParams_PaddingConstant* other);
 
@@ -7923,7 +8694,7 @@ class PaddingLayerParams_PaddingReflection : public ::google::protobuf::MessageL
                &_PaddingLayerParams_PaddingReflection_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    53;
+    56;
 
   void Swap(PaddingLayerParams_PaddingReflection* other);
 
@@ -7994,7 +8765,7 @@ class PaddingLayerParams_PaddingReplication : public ::google::protobuf::Message
                &_PaddingLayerParams_PaddingReplication_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    54;
+    57;
 
   void Swap(PaddingLayerParams_PaddingReplication* other);
 
@@ -8072,7 +8843,7 @@ class PaddingLayerParams : public ::google::protobuf::MessageLite /* @@protoc_in
                &_PaddingLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    55;
+    58;
 
   void Swap(PaddingLayerParams* other);
 
@@ -8200,7 +8971,7 @@ class ConcatLayerParams : public ::google::protobuf::MessageLite /* @@protoc_ins
                &_ConcatLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    56;
+    59;
 
   void Swap(ConcatLayerParams* other);
 
@@ -8278,7 +9049,7 @@ class LRNLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insert
                &_LRNLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    57;
+    60;
 
   void Swap(LRNLayerParams* other);
 
@@ -8377,7 +9148,7 @@ class SoftmaxLayerParams : public ::google::protobuf::MessageLite /* @@protoc_in
                &_SoftmaxLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    58;
+    61;
 
   void Swap(SoftmaxLayerParams* other);
 
@@ -8448,7 +9219,7 @@ class SplitLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inse
                &_SplitLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    59;
+    62;
 
   void Swap(SplitLayerParams* other);
 
@@ -8526,7 +9297,7 @@ class AddLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insert
                &_AddLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    60;
+    63;
 
   void Swap(AddLayerParams* other);
 
@@ -8604,7 +9375,7 @@ class MultiplyLayerParams : public ::google::protobuf::MessageLite /* @@protoc_i
                &_MultiplyLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    61;
+    64;
 
   void Swap(MultiplyLayerParams* other);
 
@@ -8682,7 +9453,7 @@ class UnaryFunctionLayerParams : public ::google::protobuf::MessageLite /* @@pro
                &_UnaryFunctionLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    62;
+    65;
 
   void Swap(UnaryFunctionLayerParams* other);
 
@@ -8815,7 +9586,7 @@ class UpsampleLayerParams : public ::google::protobuf::MessageLite /* @@protoc_i
                &_UpsampleLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    63;
+    66;
 
   void Swap(UpsampleLayerParams* other);
 
@@ -8871,6 +9642,23 @@ class UpsampleLayerParams : public ::google::protobuf::MessageLite /* @@protoc_i
   static const int InterpolationMode_ARRAYSIZE =
     UpsampleLayerParams_InterpolationMode_InterpolationMode_ARRAYSIZE;
 
+  typedef UpsampleLayerParams_LinearUpsampleMode LinearUpsampleMode;
+  static const LinearUpsampleMode DEFAULT =
+    UpsampleLayerParams_LinearUpsampleMode_DEFAULT;
+  static const LinearUpsampleMode ALIGN_CORNERS_TRUE =
+    UpsampleLayerParams_LinearUpsampleMode_ALIGN_CORNERS_TRUE;
+  static const LinearUpsampleMode ALIGN_CORNERS_FALSE =
+    UpsampleLayerParams_LinearUpsampleMode_ALIGN_CORNERS_FALSE;
+  static inline bool LinearUpsampleMode_IsValid(int value) {
+    return UpsampleLayerParams_LinearUpsampleMode_IsValid(value);
+  }
+  static const LinearUpsampleMode LinearUpsampleMode_MIN =
+    UpsampleLayerParams_LinearUpsampleMode_LinearUpsampleMode_MIN;
+  static const LinearUpsampleMode LinearUpsampleMode_MAX =
+    UpsampleLayerParams_LinearUpsampleMode_LinearUpsampleMode_MAX;
+  static const int LinearUpsampleMode_ARRAYSIZE =
+    UpsampleLayerParams_LinearUpsampleMode_LinearUpsampleMode_ARRAYSIZE;
+
   // accessors -------------------------------------------------------
 
   // repeated uint64 scalingFactor = 1;
@@ -8885,19 +9673,40 @@ class UpsampleLayerParams : public ::google::protobuf::MessageLite /* @@protoc_i
   ::google::protobuf::RepeatedField< ::google::protobuf::uint64 >*
       mutable_scalingfactor();
 
+  // repeated float fractionalScalingFactor = 7;
+  int fractionalscalingfactor_size() const;
+  void clear_fractionalscalingfactor();
+  static const int kFractionalScalingFactorFieldNumber = 7;
+  float fractionalscalingfactor(int index) const;
+  void set_fractionalscalingfactor(int index, float value);
+  void add_fractionalscalingfactor(float value);
+  const ::google::protobuf::RepeatedField< float >&
+      fractionalscalingfactor() const;
+  ::google::protobuf::RepeatedField< float >*
+      mutable_fractionalscalingfactor();
+
   // .CoreML.Specification.UpsampleLayerParams.InterpolationMode mode = 5;
   void clear_mode();
   static const int kModeFieldNumber = 5;
   ::CoreML::Specification::UpsampleLayerParams_InterpolationMode mode() const;
   void set_mode(::CoreML::Specification::UpsampleLayerParams_InterpolationMode value);
 
+  // .CoreML.Specification.UpsampleLayerParams.LinearUpsampleMode linearUpsampleMode = 6;
+  void clear_linearupsamplemode();
+  static const int kLinearUpsampleModeFieldNumber = 6;
+  ::CoreML::Specification::UpsampleLayerParams_LinearUpsampleMode linearupsamplemode() const;
+  void set_linearupsamplemode(::CoreML::Specification::UpsampleLayerParams_LinearUpsampleMode value);
+
   // @@protoc_insertion_point(class_scope:CoreML.Specification.UpsampleLayerParams)
  private:
 
   ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
   ::google::protobuf::RepeatedField< ::google::protobuf::uint64 > scalingfactor_;
   mutable int _scalingfactor_cached_byte_size_;
+  ::google::protobuf::RepeatedField< float > fractionalscalingfactor_;
+  mutable int _fractionalscalingfactor_cached_byte_size_;
   int mode_;
+  int linearupsamplemode_;
   mutable int _cached_size_;
   friend struct protobuf_NeuralNetwork_2eproto::TableStruct;
 };
@@ -8922,7 +9731,7 @@ class ResizeBilinearLayerParams : public ::google::protobuf::MessageLite /* @@pr
                &_ResizeBilinearLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    64;
+    67;
 
   void Swap(ResizeBilinearLayerParams* other);
 
@@ -9017,7 +9826,7 @@ class CropResizeLayerParams : public ::google::protobuf::MessageLite /* @@protoc
                &_CropResizeLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    65;
+    68;
 
   void Swap(CropResizeLayerParams* other);
 
@@ -9136,7 +9945,7 @@ class BiasLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inser
                &_BiasLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    66;
+    69;
 
   void Swap(BiasLayerParams* other);
 
@@ -9231,7 +10040,7 @@ class ScaleLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inse
                &_ScaleLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    67;
+    70;
 
   void Swap(ScaleLayerParams* other);
 
@@ -9357,7 +10166,7 @@ class LoadConstantLayerParams : public ::google::protobuf::MessageLite /* @@prot
                &_LoadConstantLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    68;
+    71;
 
   void Swap(LoadConstantLayerParams* other);
 
@@ -9452,7 +10261,7 @@ class L2NormalizeLayerParams : public ::google::protobuf::MessageLite /* @@proto
                &_L2NormalizeLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    69;
+    72;
 
   void Swap(L2NormalizeLayerParams* other);
 
@@ -9530,7 +10339,7 @@ class FlattenLayerParams : public ::google::protobuf::MessageLite /* @@protoc_in
                &_FlattenLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    70;
+    73;
 
   void Swap(FlattenLayerParams* other);
 
@@ -9623,7 +10432,7 @@ class ReshapeLayerParams : public ::google::protobuf::MessageLite /* @@protoc_in
                &_ReshapeLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    71;
+    74;
 
   void Swap(ReshapeLayerParams* other);
 
@@ -9730,7 +10539,7 @@ class PermuteLayerParams : public ::google::protobuf::MessageLite /* @@protoc_in
                &_PermuteLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    72;
+    75;
 
   void Swap(PermuteLayerParams* other);
 
@@ -9815,7 +10624,7 @@ class ReorganizeDataLayerParams : public ::google::protobuf::MessageLite /* @@pr
                &_ReorganizeDataLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    73;
+    76;
 
   void Swap(ReorganizeDataLayerParams* other);
 
@@ -9861,6 +10670,8 @@ class ReorganizeDataLayerParams : public ::google::protobuf::MessageLite /* @@pr
     ReorganizeDataLayerParams_ReorganizationType_SPACE_TO_DEPTH;
   static const ReorganizationType DEPTH_TO_SPACE =
     ReorganizeDataLayerParams_ReorganizationType_DEPTH_TO_SPACE;
+  static const ReorganizationType PIXEL_SHUFFLE =
+    ReorganizeDataLayerParams_ReorganizationType_PIXEL_SHUFFLE;
   static inline bool ReorganizationType_IsValid(int value) {
     return ReorganizeDataLayerParams_ReorganizationType_IsValid(value);
   }
@@ -9915,7 +10726,7 @@ class SliceLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inse
                &_SliceLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    74;
+    77;
 
   void Swap(SliceLayerParams* other);
 
@@ -10031,7 +10842,7 @@ class ReduceLayerParams : public ::google::protobuf::MessageLite /* @@protoc_ins
                &_ReduceLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    75;
+    78;
 
   void Swap(ReduceLayerParams* other);
 
@@ -10175,7 +10986,7 @@ class CropLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inser
                &_CropLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    76;
+    79;
 
   void Swap(CropLayerParams* other);
 
@@ -10270,7 +11081,7 @@ class AverageLayerParams : public ::google::protobuf::MessageLite /* @@protoc_in
                &_AverageLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    77;
+    80;
 
   void Swap(AverageLayerParams* other);
 
@@ -10341,7 +11152,7 @@ class MaxLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insert
                &_MaxLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    78;
+    81;
 
   void Swap(MaxLayerParams* other);
 
@@ -10412,7 +11223,7 @@ class MinLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insert
                &_MinLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    79;
+    82;
 
   void Swap(MinLayerParams* other);
 
@@ -10483,7 +11294,7 @@ class DotProductLayerParams : public ::google::protobuf::MessageLite /* @@protoc
                &_DotProductLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    80;
+    83;
 
   void Swap(DotProductLayerParams* other);
 
@@ -10561,7 +11372,7 @@ class MeanVarianceNormalizeLayerParams : public ::google::protobuf::MessageLite
                &_MeanVarianceNormalizeLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    81;
+    84;
 
   void Swap(MeanVarianceNormalizeLayerParams* other);
 
@@ -10653,7 +11464,7 @@ class SequenceRepeatLayerParams : public ::google::protobuf::MessageLite /* @@pr
                &_SequenceRepeatLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    82;
+    85;
 
   void Swap(SequenceRepeatLayerParams* other);
 
@@ -10731,7 +11542,7 @@ class SimpleRecurrentLayerParams : public ::google::protobuf::MessageLite /* @@p
                &_SimpleRecurrentLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    83;
+    86;
 
   void Swap(SimpleRecurrentLayerParams* other);
 
@@ -10877,7 +11688,7 @@ class GRULayerParams : public ::google::protobuf::MessageLite /* @@protoc_insert
                &_GRULayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    84;
+    87;
 
   void Swap(GRULayerParams* other);
 
@@ -11086,7 +11897,7 @@ class LSTMParams : public ::google::protobuf::MessageLite /* @@protoc_insertion_
                &_LSTMParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    85;
+    88;
 
   void Swap(LSTMParams* other);
 
@@ -11199,7 +12010,7 @@ class LSTMWeightParams : public ::google::protobuf::MessageLite /* @@protoc_inse
                &_LSTMWeightParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    86;
+    89;
 
   void Swap(LSTMWeightParams* other);
 
@@ -11420,7 +12231,7 @@ class UniDirectionalLSTMLayerParams : public ::google::protobuf::MessageLite /*
                &_UniDirectionalLSTMLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    87;
+    90;
 
   void Swap(UniDirectionalLSTMLayerParams* other);
 
@@ -11545,7 +12356,7 @@ class BiDirectionalLSTMLayerParams : public ::google::protobuf::MessageLite /* @
                &_BiDirectionalLSTMLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    88;
+    91;
 
   void Swap(BiDirectionalLSTMLayerParams* other);
 
@@ -11688,7 +12499,7 @@ class CustomLayerParams_CustomLayerParamValue : public ::google::protobuf::Messa
                &_CustomLayerParams_CustomLayerParamValue_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    89;
+    92;
 
   void Swap(CustomLayerParams_CustomLayerParamValue* other);
 
@@ -11835,7 +12646,7 @@ class CustomLayerParams : public ::google::protobuf::MessageLite /* @@protoc_ins
                &_CustomLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    91;
+    94;
 
   void Swap(CustomLayerParams* other);
 
@@ -11974,7 +12785,7 @@ class TransposeLayerParams : public ::google::protobuf::MessageLite /* @@protoc_
                &_TransposeLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    92;
+    95;
 
   void Swap(TransposeLayerParams* other);
 
@@ -12059,7 +12870,7 @@ class BatchedMatMulLayerParams : public ::google::protobuf::MessageLite /* @@pro
                &_BatchedMatMulLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    93;
+    96;
 
   void Swap(BatchedMatMulLayerParams* other);
 
@@ -12150,6 +12961,12 @@ class BatchedMatMulLayerParams : public ::google::protobuf::MessageLite /* @@pro
   bool hasbias() const;
   void set_hasbias(bool value);
 
+  // bool int8DynamicQuantize = 10;
+  void clear_int8dynamicquantize();
+  static const int kInt8DynamicQuantizeFieldNumber = 10;
+  bool int8dynamicquantize() const;
+  void set_int8dynamicquantize(bool value);
+
   // @@protoc_insertion_point(class_scope:CoreML.Specification.BatchedMatMulLayerParams)
  private:
 
@@ -12161,6 +12978,7 @@ class BatchedMatMulLayerParams : public ::google::protobuf::MessageLite /* @@pro
   bool transposea_;
   bool transposeb_;
   bool hasbias_;
+  bool int8dynamicquantize_;
   mutable int _cached_size_;
   friend struct protobuf_NeuralNetwork_2eproto::TableStruct;
 };
@@ -12185,7 +13003,7 @@ class ConcatNDLayerParams : public ::google::protobuf::MessageLite /* @@protoc_i
                &_ConcatNDLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    94;
+    97;
 
   void Swap(ConcatNDLayerParams* other);
 
@@ -12263,7 +13081,7 @@ class SoftmaxNDLayerParams : public ::google::protobuf::MessageLite /* @@protoc_
                &_SoftmaxNDLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    95;
+    98;
 
   void Swap(SoftmaxNDLayerParams* other);
 
@@ -12341,7 +13159,7 @@ class ReverseLayerParams : public ::google::protobuf::MessageLite /* @@protoc_in
                &_ReverseLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    96;
+    99;
 
   void Swap(ReverseLayerParams* other);
 
@@ -12426,7 +13244,7 @@ class ReverseSeqLayerParams : public ::google::protobuf::MessageLite /* @@protoc
                &_ReverseSeqLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    97;
+    100;
 
   void Swap(ReverseSeqLayerParams* other);
 
@@ -12511,7 +13329,7 @@ class LoadConstantNDLayerParams : public ::google::protobuf::MessageLite /* @@pr
                &_LoadConstantNDLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    98;
+    101;
 
   void Swap(LoadConstantNDLayerParams* other);
 
@@ -12606,7 +13424,7 @@ class FillLikeLayerParams : public ::google::protobuf::MessageLite /* @@protoc_i
                &_FillLikeLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    99;
+    102;
 
   void Swap(FillLikeLayerParams* other);
 
@@ -12684,7 +13502,7 @@ class FillStaticLayerParams : public ::google::protobuf::MessageLite /* @@protoc
                &_FillStaticLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    100;
+    103;
 
   void Swap(FillStaticLayerParams* other);
 
@@ -12776,7 +13594,7 @@ class FillDynamicLayerParams : public ::google::protobuf::MessageLite /* @@proto
                &_FillDynamicLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    101;
+    104;
 
   void Swap(FillDynamicLayerParams* other);
 
@@ -12854,7 +13672,7 @@ class WhereBroadcastableLayerParams : public ::google::protobuf::MessageLite /*
                &_WhereBroadcastableLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    102;
+    105;
 
   void Swap(WhereBroadcastableLayerParams* other);
 
@@ -12925,7 +13743,7 @@ class SinLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insert
                &_SinLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    103;
+    106;
 
   void Swap(SinLayerParams* other);
 
@@ -12996,7 +13814,7 @@ class CosLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insert
                &_CosLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    104;
+    107;
 
   void Swap(CosLayerParams* other);
 
@@ -13067,7 +13885,7 @@ class TanLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insert
                &_TanLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    105;
+    108;
 
   void Swap(TanLayerParams* other);
 
@@ -13138,7 +13956,7 @@ class AsinLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inser
                &_AsinLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    106;
+    109;
 
   void Swap(AsinLayerParams* other);
 
@@ -13209,7 +14027,7 @@ class AcosLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inser
                &_AcosLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    107;
+    110;
 
   void Swap(AcosLayerParams* other);
 
@@ -13280,7 +14098,7 @@ class AtanLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inser
                &_AtanLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    108;
+    111;
 
   void Swap(AtanLayerParams* other);
 
@@ -13351,7 +14169,7 @@ class SinhLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inser
                &_SinhLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    109;
+    112;
 
   void Swap(SinhLayerParams* other);
 
@@ -13422,7 +14240,7 @@ class CoshLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inser
                &_CoshLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    110;
+    113;
 
   void Swap(CoshLayerParams* other);
 
@@ -13493,7 +14311,7 @@ class TanhLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inser
                &_TanhLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    111;
+    114;
 
   void Swap(TanhLayerParams* other);
 
@@ -13564,7 +14382,7 @@ class AsinhLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inse
                &_AsinhLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    112;
+    115;
 
   void Swap(AsinhLayerParams* other);
 
@@ -13635,7 +14453,7 @@ class AcoshLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inse
                &_AcoshLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    113;
+    116;
 
   void Swap(AcoshLayerParams* other);
 
@@ -13706,7 +14524,7 @@ class AtanhLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inse
                &_AtanhLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    114;
+    117;
 
   void Swap(AtanhLayerParams* other);
 
@@ -13777,7 +14595,7 @@ class PowBroadcastableLayerParams : public ::google::protobuf::MessageLite /* @@
                &_PowBroadcastableLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    115;
+    118;
 
   void Swap(PowBroadcastableLayerParams* other);
 
@@ -13848,7 +14666,7 @@ class Exp2LayerParams : public ::google::protobuf::MessageLite /* @@protoc_inser
                &_Exp2LayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    116;
+    119;
 
   void Swap(Exp2LayerParams* other);
 
@@ -13919,7 +14737,7 @@ class WhereNonZeroLayerParams : public ::google::protobuf::MessageLite /* @@prot
                &_WhereNonZeroLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    117;
+    120;
 
   void Swap(WhereNonZeroLayerParams* other);
 
@@ -13990,7 +14808,7 @@ class MatrixBandPartLayerParams : public ::google::protobuf::MessageLite /* @@pr
                &_MatrixBandPartLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    118;
+    121;
 
   void Swap(MatrixBandPartLayerParams* other);
 
@@ -14075,7 +14893,7 @@ class UpperTriangularLayerParams : public ::google::protobuf::MessageLite /* @@p
                &_UpperTriangularLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    119;
+    122;
 
   void Swap(UpperTriangularLayerParams* other);
 
@@ -14153,7 +14971,7 @@ class LowerTriangularLayerParams : public ::google::protobuf::MessageLite /* @@p
                &_LowerTriangularLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    120;
+    123;
 
   void Swap(LowerTriangularLayerParams* other);
 
@@ -14231,7 +15049,7 @@ class BroadcastToLikeLayerParams : public ::google::protobuf::MessageLite /* @@p
                &_BroadcastToLikeLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    121;
+    124;
 
   void Swap(BroadcastToLikeLayerParams* other);
 
@@ -14302,7 +15120,7 @@ class BroadcastToStaticLayerParams : public ::google::protobuf::MessageLite /* @
                &_BroadcastToStaticLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    122;
+    125;
 
   void Swap(BroadcastToStaticLayerParams* other);
 
@@ -14387,7 +15205,7 @@ class BroadcastToDynamicLayerParams : public ::google::protobuf::MessageLite /*
                &_BroadcastToDynamicLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    123;
+    126;
 
   void Swap(BroadcastToDynamicLayerParams* other);
 
@@ -14458,7 +15276,7 @@ class AddBroadcastableLayerParams : public ::google::protobuf::MessageLite /* @@
                &_AddBroadcastableLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    124;
+    127;
 
   void Swap(AddBroadcastableLayerParams* other);
 
@@ -14529,7 +15347,7 @@ class MaxBroadcastableLayerParams : public ::google::protobuf::MessageLite /* @@
                &_MaxBroadcastableLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    125;
+    128;
 
   void Swap(MaxBroadcastableLayerParams* other);
 
@@ -14600,7 +15418,7 @@ class MinBroadcastableLayerParams : public ::google::protobuf::MessageLite /* @@
                &_MinBroadcastableLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    126;
+    129;
 
   void Swap(MinBroadcastableLayerParams* other);
 
@@ -14671,7 +15489,7 @@ class ModBroadcastableLayerParams : public ::google::protobuf::MessageLite /* @@
                &_ModBroadcastableLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    127;
+    130;
 
   void Swap(ModBroadcastableLayerParams* other);
 
@@ -14742,7 +15560,7 @@ class FloorDivBroadcastableLayerParams : public ::google::protobuf::MessageLite
                &_FloorDivBroadcastableLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    128;
+    131;
 
   void Swap(FloorDivBroadcastableLayerParams* other);
 
@@ -14813,7 +15631,7 @@ class SubtractBroadcastableLayerParams : public ::google::protobuf::MessageLite
                &_SubtractBroadcastableLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    129;
+    132;
 
   void Swap(SubtractBroadcastableLayerParams* other);
 
@@ -14884,7 +15702,7 @@ class MultiplyBroadcastableLayerParams : public ::google::protobuf::MessageLite
                &_MultiplyBroadcastableLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    130;
+    133;
 
   void Swap(MultiplyBroadcastableLayerParams* other);
 
@@ -14955,7 +15773,7 @@ class DivideBroadcastableLayerParams : public ::google::protobuf::MessageLite /*
                &_DivideBroadcastableLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    131;
+    134;
 
   void Swap(DivideBroadcastableLayerParams* other);
 
@@ -15026,7 +15844,7 @@ class GatherLayerParams : public ::google::protobuf::MessageLite /* @@protoc_ins
                &_GatherLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    132;
+    135;
 
   void Swap(GatherLayerParams* other);
 
@@ -15104,7 +15922,7 @@ class ScatterLayerParams : public ::google::protobuf::MessageLite /* @@protoc_in
                &_ScatterLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    133;
+    136;
 
   void Swap(ScatterLayerParams* other);
 
@@ -15189,7 +16007,7 @@ class GatherNDLayerParams : public ::google::protobuf::MessageLite /* @@protoc_i
                &_GatherNDLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    134;
+    137;
 
   void Swap(GatherNDLayerParams* other);
 
@@ -15260,7 +16078,7 @@ class ScatterNDLayerParams : public ::google::protobuf::MessageLite /* @@protoc_
                &_ScatterNDLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    135;
+    138;
 
   void Swap(ScatterNDLayerParams* other);
 
@@ -15338,7 +16156,7 @@ class GatherAlongAxisLayerParams : public ::google::protobuf::MessageLite /* @@p
                &_GatherAlongAxisLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    136;
+    139;
 
   void Swap(GatherAlongAxisLayerParams* other);
 
@@ -15416,7 +16234,7 @@ class ScatterAlongAxisLayerParams : public ::google::protobuf::MessageLite /* @@
                &_ScatterAlongAxisLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    137;
+    140;
 
   void Swap(ScatterAlongAxisLayerParams* other);
 
@@ -15501,7 +16319,7 @@ class StackLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inse
                &_StackLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    138;
+    141;
 
   void Swap(StackLayerParams* other);
 
@@ -15579,7 +16397,7 @@ class RankPreservingReshapeLayerParams : public ::google::protobuf::MessageLite
                &_RankPreservingReshapeLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    139;
+    142;
 
   void Swap(RankPreservingReshapeLayerParams* other);
 
@@ -15664,7 +16482,7 @@ class ConstantPaddingLayerParams : public ::google::protobuf::MessageLite /* @@p
                &_ConstantPaddingLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    140;
+    143;
 
   void Swap(ConstantPaddingLayerParams* other);
 
@@ -15763,7 +16581,7 @@ class RandomNormalLikeLayerParams : public ::google::protobuf::MessageLite /* @@
                &_RandomNormalLikeLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    141;
+    144;
 
   void Swap(RandomNormalLikeLayerParams* other);
 
@@ -15855,7 +16673,7 @@ class RandomNormalStaticLayerParams : public ::google::protobuf::MessageLite /*
                &_RandomNormalStaticLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    142;
+    145;
 
   void Swap(RandomNormalStaticLayerParams* other);
 
@@ -15961,7 +16779,7 @@ class RandomNormalDynamicLayerParams : public ::google::protobuf::MessageLite /*
                &_RandomNormalDynamicLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    143;
+    146;
 
   void Swap(RandomNormalDynamicLayerParams* other);
 
@@ -16053,7 +16871,7 @@ class RandomUniformLikeLayerParams : public ::google::protobuf::MessageLite /* @
                &_RandomUniformLikeLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    144;
+    147;
 
   void Swap(RandomUniformLikeLayerParams* other);
 
@@ -16145,7 +16963,7 @@ class RandomUniformStaticLayerParams : public ::google::protobuf::MessageLite /*
                &_RandomUniformStaticLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    145;
+    148;
 
   void Swap(RandomUniformStaticLayerParams* other);
 
@@ -16251,7 +17069,7 @@ class RandomUniformDynamicLayerParams : public ::google::protobuf::MessageLite /
                &_RandomUniformDynamicLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    146;
+    149;
 
   void Swap(RandomUniformDynamicLayerParams* other);
 
@@ -16343,7 +17161,7 @@ class RandomBernoulliLikeLayerParams : public ::google::protobuf::MessageLite /*
                &_RandomBernoulliLikeLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    147;
+    150;
 
   void Swap(RandomBernoulliLikeLayerParams* other);
 
@@ -16428,7 +17246,7 @@ class RandomBernoulliStaticLayerParams : public ::google::protobuf::MessageLite
                &_RandomBernoulliStaticLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    148;
+    151;
 
   void Swap(RandomBernoulliStaticLayerParams* other);
 
@@ -16527,7 +17345,7 @@ class RandomBernoulliDynamicLayerParams : public ::google::protobuf::MessageLite
                &_RandomBernoulliDynamicLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    149;
+    152;
 
   void Swap(RandomBernoulliDynamicLayerParams* other);
 
@@ -16612,7 +17430,7 @@ class CategoricalDistributionLayerParams : public ::google::protobuf::MessageLit
                &_CategoricalDistributionLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    150;
+    153;
 
   void Swap(CategoricalDistributionLayerParams* other);
 
@@ -16718,7 +17536,7 @@ class ReduceL1LayerParams : public ::google::protobuf::MessageLite /* @@protoc_i
                &_ReduceL1LayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    151;
+    154;
 
   void Swap(ReduceL1LayerParams* other);
 
@@ -16817,7 +17635,7 @@ class ReduceL2LayerParams : public ::google::protobuf::MessageLite /* @@protoc_i
                &_ReduceL2LayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    152;
+    155;
 
   void Swap(ReduceL2LayerParams* other);
 
@@ -16916,7 +17734,7 @@ class ReduceMaxLayerParams : public ::google::protobuf::MessageLite /* @@protoc_
                &_ReduceMaxLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    153;
+    156;
 
   void Swap(ReduceMaxLayerParams* other);
 
@@ -17015,7 +17833,7 @@ class ReduceMinLayerParams : public ::google::protobuf::MessageLite /* @@protoc_
                &_ReduceMinLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    154;
+    157;
 
   void Swap(ReduceMinLayerParams* other);
 
@@ -17114,7 +17932,7 @@ class ReduceSumLayerParams : public ::google::protobuf::MessageLite /* @@protoc_
                &_ReduceSumLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    155;
+    158;
 
   void Swap(ReduceSumLayerParams* other);
 
@@ -17213,7 +18031,7 @@ class ReduceProdLayerParams : public ::google::protobuf::MessageLite /* @@protoc
                &_ReduceProdLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    156;
+    159;
 
   void Swap(ReduceProdLayerParams* other);
 
@@ -17312,7 +18130,7 @@ class ReduceMeanLayerParams : public ::google::protobuf::MessageLite /* @@protoc
                &_ReduceMeanLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    157;
+    160;
 
   void Swap(ReduceMeanLayerParams* other);
 
@@ -17411,7 +18229,7 @@ class ReduceLogSumLayerParams : public ::google::protobuf::MessageLite /* @@prot
                &_ReduceLogSumLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    158;
+    161;
 
   void Swap(ReduceLogSumLayerParams* other);
 
@@ -17510,7 +18328,7 @@ class ReduceSumSquareLayerParams : public ::google::protobuf::MessageLite /* @@p
                &_ReduceSumSquareLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    159;
+    162;
 
   void Swap(ReduceSumSquareLayerParams* other);
 
@@ -17609,7 +18427,7 @@ class ReduceLogSumExpLayerParams : public ::google::protobuf::MessageLite /* @@p
                &_ReduceLogSumExpLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    160;
+    163;
 
   void Swap(ReduceLogSumExpLayerParams* other);
 
@@ -17708,7 +18526,7 @@ class ExpandDimsLayerParams : public ::google::protobuf::MessageLite /* @@protoc
                &_ExpandDimsLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    161;
+    164;
 
   void Swap(ExpandDimsLayerParams* other);
 
@@ -17793,7 +18611,7 @@ class FlattenTo2DLayerParams : public ::google::protobuf::MessageLite /* @@proto
                &_FlattenTo2DLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    162;
+    165;
 
   void Swap(FlattenTo2DLayerParams* other);
 
@@ -17871,7 +18689,7 @@ class ReshapeStaticLayerParams : public ::google::protobuf::MessageLite /* @@pro
                &_ReshapeStaticLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    163;
+    166;
 
   void Swap(ReshapeStaticLayerParams* other);
 
@@ -17956,7 +18774,7 @@ class ReshapeLikeLayerParams : public ::google::protobuf::MessageLite /* @@proto
                &_ReshapeLikeLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    164;
+    167;
 
   void Swap(ReshapeLikeLayerParams* other);
 
@@ -18027,7 +18845,7 @@ class ReshapeDynamicLayerParams : public ::google::protobuf::MessageLite /* @@pr
                &_ReshapeDynamicLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    165;
+    168;
 
   void Swap(ReshapeDynamicLayerParams* other);
 
@@ -18098,7 +18916,7 @@ class SqueezeLayerParams : public ::google::protobuf::MessageLite /* @@protoc_in
                &_SqueezeLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    166;
+    169;
 
   void Swap(SqueezeLayerParams* other);
 
@@ -18190,7 +19008,7 @@ class TopKLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inser
                &_TopKLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    167;
+    170;
 
   void Swap(TopKLayerParams* other);
 
@@ -18282,7 +19100,7 @@ class ArgMaxLayerParams : public ::google::protobuf::MessageLite /* @@protoc_ins
                &_ArgMaxLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    168;
+    171;
 
   void Swap(ArgMaxLayerParams* other);
 
@@ -18367,7 +19185,7 @@ class ArgMinLayerParams : public ::google::protobuf::MessageLite /* @@protoc_ins
                &_ArgMinLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    169;
+    172;
 
   void Swap(ArgMinLayerParams* other);
 
@@ -18452,7 +19270,7 @@ class SplitNDLayerParams : public ::google::protobuf::MessageLite /* @@protoc_in
                &_SplitNDLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    170;
+    173;
 
   void Swap(SplitNDLayerParams* other);
 
@@ -18551,7 +19369,7 @@ class CeilLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inser
                &_CeilLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    171;
+    174;
 
   void Swap(CeilLayerParams* other);
 
@@ -18622,7 +19440,7 @@ class RoundLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inse
                &_RoundLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    172;
+    175;
 
   void Swap(RoundLayerParams* other);
 
@@ -18693,7 +19511,7 @@ class FloorLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inse
                &_FloorLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    173;
+    176;
 
   void Swap(FloorLayerParams* other);
 
@@ -18764,7 +19582,7 @@ class SignLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inser
                &_SignLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    174;
+    177;
 
   void Swap(SignLayerParams* other);
 
@@ -18835,7 +19653,7 @@ class ClipLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inser
                &_ClipLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    175;
+    178;
 
   void Swap(ClipLayerParams* other);
 
@@ -18920,7 +19738,7 @@ class SliceStaticLayerParams : public ::google::protobuf::MessageLite /* @@proto
                &_SliceStaticLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    176;
+    179;
 
   void Swap(SliceStaticLayerParams* other);
 
@@ -19023,6 +19841,18 @@ class SliceStaticLayerParams : public ::google::protobuf::MessageLite /* @@proto
   ::google::protobuf::RepeatedField< ::google::protobuf::int64 >*
       mutable_strides();
 
+  // repeated bool squeezeMasks = 6;
+  int squeezemasks_size() const;
+  void clear_squeezemasks();
+  static const int kSqueezeMasksFieldNumber = 6;
+  bool squeezemasks(int index) const;
+  void set_squeezemasks(int index, bool value);
+  void add_squeezemasks(bool value);
+  const ::google::protobuf::RepeatedField< bool >&
+      squeezemasks() const;
+  ::google::protobuf::RepeatedField< bool >*
+      mutable_squeezemasks();
+
   // @@protoc_insertion_point(class_scope:CoreML.Specification.SliceStaticLayerParams)
  private:
 
@@ -19037,6 +19867,8 @@ class SliceStaticLayerParams : public ::google::protobuf::MessageLite /* @@proto
   mutable int _endmasks_cached_byte_size_;
   ::google::protobuf::RepeatedField< ::google::protobuf::int64 > strides_;
   mutable int _strides_cached_byte_size_;
+  ::google::protobuf::RepeatedField< bool > squeezemasks_;
+  mutable int _squeezemasks_cached_byte_size_;
   mutable int _cached_size_;
   friend struct protobuf_NeuralNetwork_2eproto::TableStruct;
 };
@@ -19061,7 +19893,7 @@ class SliceDynamicLayerParams : public ::google::protobuf::MessageLite /* @@prot
                &_SliceDynamicLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    177;
+    180;
 
   void Swap(SliceDynamicLayerParams* other);
 
@@ -19152,6 +19984,18 @@ class SliceDynamicLayerParams : public ::google::protobuf::MessageLite /* @@prot
   ::google::protobuf::RepeatedField< ::google::protobuf::int64 >*
       mutable_strides();
 
+  // repeated bool squeezeMasks = 6;
+  int squeezemasks_size() const;
+  void clear_squeezemasks();
+  static const int kSqueezeMasksFieldNumber = 6;
+  bool squeezemasks(int index) const;
+  void set_squeezemasks(int index, bool value);
+  void add_squeezemasks(bool value);
+  const ::google::protobuf::RepeatedField< bool >&
+      squeezemasks() const;
+  ::google::protobuf::RepeatedField< bool >*
+      mutable_squeezemasks();
+
   // @@protoc_insertion_point(class_scope:CoreML.Specification.SliceDynamicLayerParams)
  private:
 
@@ -19164,6 +20008,8 @@ class SliceDynamicLayerParams : public ::google::protobuf::MessageLite /* @@prot
   mutable int _endmasks_cached_byte_size_;
   ::google::protobuf::RepeatedField< ::google::protobuf::int64 > strides_;
   mutable int _strides_cached_byte_size_;
+  ::google::protobuf::RepeatedField< bool > squeezemasks_;
+  mutable int _squeezemasks_cached_byte_size_;
   mutable int _cached_size_;
   friend struct protobuf_NeuralNetwork_2eproto::TableStruct;
 };
@@ -19188,7 +20034,7 @@ class TileLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inser
                &_TileLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    178;
+    181;
 
   void Swap(TileLayerParams* other);
 
@@ -19273,7 +20119,7 @@ class GetShapeLayerParams : public ::google::protobuf::MessageLite /* @@protoc_i
                &_GetShapeLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    179;
+    182;
 
   void Swap(GetShapeLayerParams* other);
 
@@ -19344,7 +20190,7 @@ class ErfLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insert
                &_ErfLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    180;
+    183;
 
   void Swap(ErfLayerParams* other);
 
@@ -19415,7 +20261,7 @@ class GeluLayerParams : public ::google::protobuf::MessageLite /* @@protoc_inser
                &_GeluLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    181;
+    184;
 
   void Swap(GeluLayerParams* other);
 
@@ -19510,7 +20356,7 @@ class RangeStaticLayerParams : public ::google::protobuf::MessageLite /* @@proto
                &_RangeStaticLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    182;
+    185;
 
   void Swap(RangeStaticLayerParams* other);
 
@@ -19602,7 +20448,7 @@ class RangeDynamicLayerParams : public ::google::protobuf::MessageLite /* @@prot
                &_RangeDynamicLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    183;
+    186;
 
   void Swap(RangeDynamicLayerParams* other);
 
@@ -19687,7 +20533,7 @@ class SlidingWindowsLayerParams : public ::google::protobuf::MessageLite /* @@pr
                &_SlidingWindowsLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    184;
+    187;
 
   void Swap(SlidingWindowsLayerParams* other);
 
@@ -19779,7 +20625,7 @@ class LayerNormalizationLayerParams : public ::google::protobuf::MessageLite /*
                &_LayerNormalizationLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    185;
+    188;
 
   void Swap(LayerNormalizationLayerParams* other);
 
@@ -19891,7 +20737,7 @@ class NonMaximumSuppressionLayerParams : public ::google::protobuf::MessageLite
                &_NonMaximumSuppressionLayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    186;
+    189;
 
   void Swap(NonMaximumSuppressionLayerParams* other);
 
@@ -19971,44 +20817,38 @@ class NonMaximumSuppressionLayerParams : public ::google::protobuf::MessageLite
 };
 // -------------------------------------------------------------------
 
-class NeuralNetworkClassifier : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.NeuralNetworkClassifier) */ {
+class ClampedReLULayerParams : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.ClampedReLULayerParams) */ {
  public:
-  NeuralNetworkClassifier();
-  virtual ~NeuralNetworkClassifier();
+  ClampedReLULayerParams();
+  virtual ~ClampedReLULayerParams();
 
-  NeuralNetworkClassifier(const NeuralNetworkClassifier& from);
+  ClampedReLULayerParams(const ClampedReLULayerParams& from);
 
-  inline NeuralNetworkClassifier& operator=(const NeuralNetworkClassifier& from) {
+  inline ClampedReLULayerParams& operator=(const ClampedReLULayerParams& from) {
     CopyFrom(from);
     return *this;
   }
 
-  static const NeuralNetworkClassifier& default_instance();
-
-  enum ClassLabelsCase {
-    kStringClassLabels = 100,
-    kInt64ClassLabels = 101,
-    CLASSLABELS_NOT_SET = 0,
-  };
+  static const ClampedReLULayerParams& default_instance();
 
-  static inline const NeuralNetworkClassifier* internal_default_instance() {
-    return reinterpret_cast<const NeuralNetworkClassifier*>(
-               &_NeuralNetworkClassifier_default_instance_);
+  static inline const ClampedReLULayerParams* internal_default_instance() {
+    return reinterpret_cast<const ClampedReLULayerParams*>(
+               &_ClampedReLULayerParams_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    187;
+    190;
 
-  void Swap(NeuralNetworkClassifier* other);
+  void Swap(ClampedReLULayerParams* other);
 
   // implements Message ----------------------------------------------
 
-  inline NeuralNetworkClassifier* New() const PROTOBUF_FINAL { return New(NULL); }
+  inline ClampedReLULayerParams* New() const PROTOBUF_FINAL { return New(NULL); }
 
-  NeuralNetworkClassifier* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
+  ClampedReLULayerParams* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
   void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from)
     PROTOBUF_FINAL;
-  void CopyFrom(const NeuralNetworkClassifier& from);
-  void MergeFrom(const NeuralNetworkClassifier& from);
+  void CopyFrom(const ClampedReLULayerParams& from);
+  void MergeFrom(const ClampedReLULayerParams& from);
   void Clear() PROTOBUF_FINAL;
   bool IsInitialized() const PROTOBUF_FINAL;
 
@@ -20023,7 +20863,7 @@ class NeuralNetworkClassifier : public ::google::protobuf::MessageLite /* @@prot
   void SharedCtor();
   void SharedDtor();
   void SetCachedSize(int size) const;
-  void InternalSwap(NeuralNetworkClassifier* other);
+  void InternalSwap(ClampedReLULayerParams* other);
   private:
   inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
     return NULL;
@@ -20039,144 +20879,237 @@ class NeuralNetworkClassifier : public ::google::protobuf::MessageLite /* @@prot
 
   // accessors -------------------------------------------------------
 
-  // repeated .CoreML.Specification.NeuralNetworkLayer layers = 1;
-  int layers_size() const;
-  void clear_layers();
-  static const int kLayersFieldNumber = 1;
-  const ::CoreML::Specification::NeuralNetworkLayer& layers(int index) const;
-  ::CoreML::Specification::NeuralNetworkLayer* mutable_layers(int index);
-  ::CoreML::Specification::NeuralNetworkLayer* add_layers();
-  ::google::protobuf::RepeatedPtrField< ::CoreML::Specification::NeuralNetworkLayer >*
-      mutable_layers();
-  const ::google::protobuf::RepeatedPtrField< ::CoreML::Specification::NeuralNetworkLayer >&
-      layers() const;
+  // float alpha = 1;
+  void clear_alpha();
+  static const int kAlphaFieldNumber = 1;
+  float alpha() const;
+  void set_alpha(float value);
 
-  // repeated .CoreML.Specification.NeuralNetworkPreprocessing preprocessing = 2;
-  int preprocessing_size() const;
-  void clear_preprocessing();
-  static const int kPreprocessingFieldNumber = 2;
-  const ::CoreML::Specification::NeuralNetworkPreprocessing& preprocessing(int index) const;
-  ::CoreML::Specification::NeuralNetworkPreprocessing* mutable_preprocessing(int index);
-  ::CoreML::Specification::NeuralNetworkPreprocessing* add_preprocessing();
-  ::google::protobuf::RepeatedPtrField< ::CoreML::Specification::NeuralNetworkPreprocessing >*
-      mutable_preprocessing();
-  const ::google::protobuf::RepeatedPtrField< ::CoreML::Specification::NeuralNetworkPreprocessing >&
-      preprocessing() const;
+  // float beta = 2;
+  void clear_beta();
+  static const int kBetaFieldNumber = 2;
+  float beta() const;
+  void set_beta(float value);
 
-  // string labelProbabilityLayerName = 200;
-  void clear_labelprobabilitylayername();
-  static const int kLabelProbabilityLayerNameFieldNumber = 200;
-  const ::std::string& labelprobabilitylayername() const;
-  void set_labelprobabilitylayername(const ::std::string& value);
-  #if LANG_CXX11
-  void set_labelprobabilitylayername(::std::string&& value);
-  #endif
-  void set_labelprobabilitylayername(const char* value);
-  void set_labelprobabilitylayername(const char* value, size_t size);
-  ::std::string* mutable_labelprobabilitylayername();
-  ::std::string* release_labelprobabilitylayername();
-  void set_allocated_labelprobabilitylayername(::std::string* labelprobabilitylayername);
+  // @@protoc_insertion_point(class_scope:CoreML.Specification.ClampedReLULayerParams)
+ private:
 
-  // .CoreML.Specification.NetworkUpdateParameters updateParams = 10;
-  bool has_updateparams() const;
-  void clear_updateparams();
-  static const int kUpdateParamsFieldNumber = 10;
-  const ::CoreML::Specification::NetworkUpdateParameters& updateparams() const;
-  ::CoreML::Specification::NetworkUpdateParameters* mutable_updateparams();
-  ::CoreML::Specification::NetworkUpdateParameters* release_updateparams();
-  void set_allocated_updateparams(::CoreML::Specification::NetworkUpdateParameters* updateparams);
+  ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
+  float alpha_;
+  float beta_;
+  mutable int _cached_size_;
+  friend struct protobuf_NeuralNetwork_2eproto::TableStruct;
+};
+// -------------------------------------------------------------------
 
-  // .CoreML.Specification.NeuralNetworkMultiArrayShapeMapping arrayInputShapeMapping = 5;
-  void clear_arrayinputshapemapping();
-  static const int kArrayInputShapeMappingFieldNumber = 5;
-  ::CoreML::Specification::NeuralNetworkMultiArrayShapeMapping arrayinputshapemapping() const;
-  void set_arrayinputshapemapping(::CoreML::Specification::NeuralNetworkMultiArrayShapeMapping value);
+class ArgSortLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.ArgSortLayerParams) */ {
+ public:
+  ArgSortLayerParams();
+  virtual ~ArgSortLayerParams();
 
-  // .CoreML.Specification.NeuralNetworkImageShapeMapping imageInputShapeMapping = 6;
-  void clear_imageinputshapemapping();
-  static const int kImageInputShapeMappingFieldNumber = 6;
-  ::CoreML::Specification::NeuralNetworkImageShapeMapping imageinputshapemapping() const;
-  void set_imageinputshapemapping(::CoreML::Specification::NeuralNetworkImageShapeMapping value);
+  ArgSortLayerParams(const ArgSortLayerParams& from);
 
-  // .CoreML.Specification.StringVector stringClassLabels = 100;
-  bool has_stringclasslabels() const;
-  void clear_stringclasslabels();
-  static const int kStringClassLabelsFieldNumber = 100;
-  const ::CoreML::Specification::StringVector& stringclasslabels() const;
-  ::CoreML::Specification::StringVector* mutable_stringclasslabels();
-  ::CoreML::Specification::StringVector* release_stringclasslabels();
-  void set_allocated_stringclasslabels(::CoreML::Specification::StringVector* stringclasslabels);
+  inline ArgSortLayerParams& operator=(const ArgSortLayerParams& from) {
+    CopyFrom(from);
+    return *this;
+  }
 
-  // .CoreML.Specification.Int64Vector int64ClassLabels = 101;
-  bool has_int64classlabels() const;
-  void clear_int64classlabels();
-  static const int kInt64ClassLabelsFieldNumber = 101;
-  const ::CoreML::Specification::Int64Vector& int64classlabels() const;
-  ::CoreML::Specification::Int64Vector* mutable_int64classlabels();
-  ::CoreML::Specification::Int64Vector* release_int64classlabels();
-  void set_allocated_int64classlabels(::CoreML::Specification::Int64Vector* int64classlabels);
+  static const ArgSortLayerParams& default_instance();
 
-  ClassLabelsCase ClassLabels_case() const;
-  // @@protoc_insertion_point(class_scope:CoreML.Specification.NeuralNetworkClassifier)
- private:
-  void set_has_stringclasslabels();
-  void set_has_int64classlabels();
+  static inline const ArgSortLayerParams* internal_default_instance() {
+    return reinterpret_cast<const ArgSortLayerParams*>(
+               &_ArgSortLayerParams_default_instance_);
+  }
+  static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
+    191;
 
-  inline bool has_ClassLabels() const;
-  void clear_ClassLabels();
-  inline void clear_has_ClassLabels();
+  void Swap(ArgSortLayerParams* other);
+
+  // implements Message ----------------------------------------------
+
+  inline ArgSortLayerParams* New() const PROTOBUF_FINAL { return New(NULL); }
+
+  ArgSortLayerParams* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
+  void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from)
+    PROTOBUF_FINAL;
+  void CopyFrom(const ArgSortLayerParams& from);
+  void MergeFrom(const ArgSortLayerParams& from);
+  void Clear() PROTOBUF_FINAL;
+  bool IsInitialized() const PROTOBUF_FINAL;
+
+  size_t ByteSizeLong() const PROTOBUF_FINAL;
+  bool MergePartialFromCodedStream(
+      ::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
+  void SerializeWithCachedSizes(
+      ::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
+  void DiscardUnknownFields();
+  int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
+  private:
+  void SharedCtor();
+  void SharedDtor();
+  void SetCachedSize(int size) const;
+  void InternalSwap(ArgSortLayerParams* other);
+  private:
+  inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
+    return NULL;
+  }
+  inline void* MaybeArenaPtr() const {
+    return NULL;
+  }
+  public:
+
+  ::std::string GetTypeName() const PROTOBUF_FINAL;
+
+  // nested types ----------------------------------------------------
+
+  // accessors -------------------------------------------------------
+
+  // int64 axis = 1;
+  void clear_axis();
+  static const int kAxisFieldNumber = 1;
+  ::google::protobuf::int64 axis() const;
+  void set_axis(::google::protobuf::int64 value);
+
+  // bool descending = 2;
+  void clear_descending();
+  static const int kDescendingFieldNumber = 2;
+  bool descending() const;
+  void set_descending(bool value);
+
+  // @@protoc_insertion_point(class_scope:CoreML.Specification.ArgSortLayerParams)
+ private:
 
   ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
-  ::google::protobuf::RepeatedPtrField< ::CoreML::Specification::NeuralNetworkLayer > layers_;
-  ::google::protobuf::RepeatedPtrField< ::CoreML::Specification::NeuralNetworkPreprocessing > preprocessing_;
-  ::google::protobuf::internal::ArenaStringPtr labelprobabilitylayername_;
-  ::CoreML::Specification::NetworkUpdateParameters* updateparams_;
-  int arrayinputshapemapping_;
-  int imageinputshapemapping_;
-  union ClassLabelsUnion {
-    ClassLabelsUnion() {}
-    ::CoreML::Specification::StringVector* stringclasslabels_;
-    ::CoreML::Specification::Int64Vector* int64classlabels_;
-  } ClassLabels_;
+  ::google::protobuf::int64 axis_;
+  bool descending_;
   mutable int _cached_size_;
-  ::google::protobuf::uint32 _oneof_case_[1];
+  friend struct protobuf_NeuralNetwork_2eproto::TableStruct;
+};
+// -------------------------------------------------------------------
+
+class SliceBySizeLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.SliceBySizeLayerParams) */ {
+ public:
+  SliceBySizeLayerParams();
+  virtual ~SliceBySizeLayerParams();
+
+  SliceBySizeLayerParams(const SliceBySizeLayerParams& from);
+
+  inline SliceBySizeLayerParams& operator=(const SliceBySizeLayerParams& from) {
+    CopyFrom(from);
+    return *this;
+  }
+
+  static const SliceBySizeLayerParams& default_instance();
+
+  static inline const SliceBySizeLayerParams* internal_default_instance() {
+    return reinterpret_cast<const SliceBySizeLayerParams*>(
+               &_SliceBySizeLayerParams_default_instance_);
+  }
+  static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
+    192;
+
+  void Swap(SliceBySizeLayerParams* other);
+
+  // implements Message ----------------------------------------------
+
+  inline SliceBySizeLayerParams* New() const PROTOBUF_FINAL { return New(NULL); }
+
+  SliceBySizeLayerParams* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
+  void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from)
+    PROTOBUF_FINAL;
+  void CopyFrom(const SliceBySizeLayerParams& from);
+  void MergeFrom(const SliceBySizeLayerParams& from);
+  void Clear() PROTOBUF_FINAL;
+  bool IsInitialized() const PROTOBUF_FINAL;
+
+  size_t ByteSizeLong() const PROTOBUF_FINAL;
+  bool MergePartialFromCodedStream(
+      ::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
+  void SerializeWithCachedSizes(
+      ::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
+  void DiscardUnknownFields();
+  int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
+  private:
+  void SharedCtor();
+  void SharedDtor();
+  void SetCachedSize(int size) const;
+  void InternalSwap(SliceBySizeLayerParams* other);
+  private:
+  inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
+    return NULL;
+  }
+  inline void* MaybeArenaPtr() const {
+    return NULL;
+  }
+  public:
+
+  ::std::string GetTypeName() const PROTOBUF_FINAL;
+
+  // nested types ----------------------------------------------------
+
+  // accessors -------------------------------------------------------
+
+  // int64 size = 2;
+  void clear_size();
+  static const int kSizeFieldNumber = 2;
+  ::google::protobuf::int64 size() const;
+  void set_size(::google::protobuf::int64 value);
+
+  // int64 axis = 3;
+  void clear_axis();
+  static const int kAxisFieldNumber = 3;
+  ::google::protobuf::int64 axis() const;
+  void set_axis(::google::protobuf::int64 value);
 
+  // @@protoc_insertion_point(class_scope:CoreML.Specification.SliceBySizeLayerParams)
+ private:
+
+  ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
+  ::google::protobuf::int64 size_;
+  ::google::protobuf::int64 axis_;
+  mutable int _cached_size_;
   friend struct protobuf_NeuralNetwork_2eproto::TableStruct;
 };
 // -------------------------------------------------------------------
 
-class NeuralNetworkRegressor : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.NeuralNetworkRegressor) */ {
+class NeuralNetworkClassifier : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.NeuralNetworkClassifier) */ {
  public:
-  NeuralNetworkRegressor();
-  virtual ~NeuralNetworkRegressor();
+  NeuralNetworkClassifier();
+  virtual ~NeuralNetworkClassifier();
 
-  NeuralNetworkRegressor(const NeuralNetworkRegressor& from);
+  NeuralNetworkClassifier(const NeuralNetworkClassifier& from);
 
-  inline NeuralNetworkRegressor& operator=(const NeuralNetworkRegressor& from) {
+  inline NeuralNetworkClassifier& operator=(const NeuralNetworkClassifier& from) {
     CopyFrom(from);
     return *this;
   }
 
-  static const NeuralNetworkRegressor& default_instance();
+  static const NeuralNetworkClassifier& default_instance();
 
-  static inline const NeuralNetworkRegressor* internal_default_instance() {
-    return reinterpret_cast<const NeuralNetworkRegressor*>(
-               &_NeuralNetworkRegressor_default_instance_);
+  enum ClassLabelsCase {
+    kStringClassLabels = 100,
+    kInt64ClassLabels = 101,
+    CLASSLABELS_NOT_SET = 0,
+  };
+
+  static inline const NeuralNetworkClassifier* internal_default_instance() {
+    return reinterpret_cast<const NeuralNetworkClassifier*>(
+               &_NeuralNetworkClassifier_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    188;
+    193;
 
-  void Swap(NeuralNetworkRegressor* other);
+  void Swap(NeuralNetworkClassifier* other);
 
   // implements Message ----------------------------------------------
 
-  inline NeuralNetworkRegressor* New() const PROTOBUF_FINAL { return New(NULL); }
+  inline NeuralNetworkClassifier* New() const PROTOBUF_FINAL { return New(NULL); }
 
-  NeuralNetworkRegressor* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
+  NeuralNetworkClassifier* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
   void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from)
     PROTOBUF_FINAL;
-  void CopyFrom(const NeuralNetworkRegressor& from);
-  void MergeFrom(const NeuralNetworkRegressor& from);
+  void CopyFrom(const NeuralNetworkClassifier& from);
+  void MergeFrom(const NeuralNetworkClassifier& from);
   void Clear() PROTOBUF_FINAL;
   bool IsInitialized() const PROTOBUF_FINAL;
 
@@ -20191,7 +21124,366 @@ class NeuralNetworkRegressor : public ::google::protobuf::MessageLite /* @@proto
   void SharedCtor();
   void SharedDtor();
   void SetCachedSize(int size) const;
-  void InternalSwap(NeuralNetworkRegressor* other);
+  void InternalSwap(NeuralNetworkClassifier* other);
+  private:
+  inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
+    return NULL;
+  }
+  inline void* MaybeArenaPtr() const {
+    return NULL;
+  }
+  public:
+
+  ::std::string GetTypeName() const PROTOBUF_FINAL;
+
+  // nested types ----------------------------------------------------
+
+  // accessors -------------------------------------------------------
+
+  // repeated .CoreML.Specification.NeuralNetworkLayer layers = 1;
+  int layers_size() const;
+  void clear_layers();
+  static const int kLayersFieldNumber = 1;
+  const ::CoreML::Specification::NeuralNetworkLayer& layers(int index) const;
+  ::CoreML::Specification::NeuralNetworkLayer* mutable_layers(int index);
+  ::CoreML::Specification::NeuralNetworkLayer* add_layers();
+  ::google::protobuf::RepeatedPtrField< ::CoreML::Specification::NeuralNetworkLayer >*
+      mutable_layers();
+  const ::google::protobuf::RepeatedPtrField< ::CoreML::Specification::NeuralNetworkLayer >&
+      layers() const;
+
+  // repeated .CoreML.Specification.NeuralNetworkPreprocessing preprocessing = 2;
+  int preprocessing_size() const;
+  void clear_preprocessing();
+  static const int kPreprocessingFieldNumber = 2;
+  const ::CoreML::Specification::NeuralNetworkPreprocessing& preprocessing(int index) const;
+  ::CoreML::Specification::NeuralNetworkPreprocessing* mutable_preprocessing(int index);
+  ::CoreML::Specification::NeuralNetworkPreprocessing* add_preprocessing();
+  ::google::protobuf::RepeatedPtrField< ::CoreML::Specification::NeuralNetworkPreprocessing >*
+      mutable_preprocessing();
+  const ::google::protobuf::RepeatedPtrField< ::CoreML::Specification::NeuralNetworkPreprocessing >&
+      preprocessing() const;
+
+  // string labelProbabilityLayerName = 200;
+  void clear_labelprobabilitylayername();
+  static const int kLabelProbabilityLayerNameFieldNumber = 200;
+  const ::std::string& labelprobabilitylayername() const;
+  void set_labelprobabilitylayername(const ::std::string& value);
+  #if LANG_CXX11
+  void set_labelprobabilitylayername(::std::string&& value);
+  #endif
+  void set_labelprobabilitylayername(const char* value);
+  void set_labelprobabilitylayername(const char* value, size_t size);
+  ::std::string* mutable_labelprobabilitylayername();
+  ::std::string* release_labelprobabilitylayername();
+  void set_allocated_labelprobabilitylayername(::std::string* labelprobabilitylayername);
+
+  // .CoreML.Specification.NetworkUpdateParameters updateParams = 10;
+  bool has_updateparams() const;
+  void clear_updateparams();
+  static const int kUpdateParamsFieldNumber = 10;
+  const ::CoreML::Specification::NetworkUpdateParameters& updateparams() const;
+  ::CoreML::Specification::NetworkUpdateParameters* mutable_updateparams();
+  ::CoreML::Specification::NetworkUpdateParameters* release_updateparams();
+  void set_allocated_updateparams(::CoreML::Specification::NetworkUpdateParameters* updateparams);
+
+  // .CoreML.Specification.NeuralNetworkMultiArrayShapeMapping arrayInputShapeMapping = 5;
+  void clear_arrayinputshapemapping();
+  static const int kArrayInputShapeMappingFieldNumber = 5;
+  ::CoreML::Specification::NeuralNetworkMultiArrayShapeMapping arrayinputshapemapping() const;
+  void set_arrayinputshapemapping(::CoreML::Specification::NeuralNetworkMultiArrayShapeMapping value);
+
+  // .CoreML.Specification.NeuralNetworkImageShapeMapping imageInputShapeMapping = 6;
+  void clear_imageinputshapemapping();
+  static const int kImageInputShapeMappingFieldNumber = 6;
+  ::CoreML::Specification::NeuralNetworkImageShapeMapping imageinputshapemapping() const;
+  void set_imageinputshapemapping(::CoreML::Specification::NeuralNetworkImageShapeMapping value);
+
+  // .CoreML.Specification.StringVector stringClassLabels = 100;
+  bool has_stringclasslabels() const;
+  void clear_stringclasslabels();
+  static const int kStringClassLabelsFieldNumber = 100;
+  const ::CoreML::Specification::StringVector& stringclasslabels() const;
+  ::CoreML::Specification::StringVector* mutable_stringclasslabels();
+  ::CoreML::Specification::StringVector* release_stringclasslabels();
+  void set_allocated_stringclasslabels(::CoreML::Specification::StringVector* stringclasslabels);
+
+  // .CoreML.Specification.Int64Vector int64ClassLabels = 101;
+  bool has_int64classlabels() const;
+  void clear_int64classlabels();
+  static const int kInt64ClassLabelsFieldNumber = 101;
+  const ::CoreML::Specification::Int64Vector& int64classlabels() const;
+  ::CoreML::Specification::Int64Vector* mutable_int64classlabels();
+  ::CoreML::Specification::Int64Vector* release_int64classlabels();
+  void set_allocated_int64classlabels(::CoreML::Specification::Int64Vector* int64classlabels);
+
+  ClassLabelsCase ClassLabels_case() const;
+  // @@protoc_insertion_point(class_scope:CoreML.Specification.NeuralNetworkClassifier)
+ private:
+  void set_has_stringclasslabels();
+  void set_has_int64classlabels();
+
+  inline bool has_ClassLabels() const;
+  void clear_ClassLabels();
+  inline void clear_has_ClassLabels();
+
+  ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
+  ::google::protobuf::RepeatedPtrField< ::CoreML::Specification::NeuralNetworkLayer > layers_;
+  ::google::protobuf::RepeatedPtrField< ::CoreML::Specification::NeuralNetworkPreprocessing > preprocessing_;
+  ::google::protobuf::internal::ArenaStringPtr labelprobabilitylayername_;
+  ::CoreML::Specification::NetworkUpdateParameters* updateparams_;
+  int arrayinputshapemapping_;
+  int imageinputshapemapping_;
+  union ClassLabelsUnion {
+    ClassLabelsUnion() {}
+    ::CoreML::Specification::StringVector* stringclasslabels_;
+    ::CoreML::Specification::Int64Vector* int64classlabels_;
+  } ClassLabels_;
+  mutable int _cached_size_;
+  ::google::protobuf::uint32 _oneof_case_[1];
+
+  friend struct protobuf_NeuralNetwork_2eproto::TableStruct;
+};
+// -------------------------------------------------------------------
+
+class OneHotLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.OneHotLayerParams) */ {
+ public:
+  OneHotLayerParams();
+  virtual ~OneHotLayerParams();
+
+  OneHotLayerParams(const OneHotLayerParams& from);
+
+  inline OneHotLayerParams& operator=(const OneHotLayerParams& from) {
+    CopyFrom(from);
+    return *this;
+  }
+
+  static const OneHotLayerParams& default_instance();
+
+  static inline const OneHotLayerParams* internal_default_instance() {
+    return reinterpret_cast<const OneHotLayerParams*>(
+               &_OneHotLayerParams_default_instance_);
+  }
+  static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
+    194;
+
+  void Swap(OneHotLayerParams* other);
+
+  // implements Message ----------------------------------------------
+
+  inline OneHotLayerParams* New() const PROTOBUF_FINAL { return New(NULL); }
+
+  OneHotLayerParams* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
+  void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from)
+    PROTOBUF_FINAL;
+  void CopyFrom(const OneHotLayerParams& from);
+  void MergeFrom(const OneHotLayerParams& from);
+  void Clear() PROTOBUF_FINAL;
+  bool IsInitialized() const PROTOBUF_FINAL;
+
+  size_t ByteSizeLong() const PROTOBUF_FINAL;
+  bool MergePartialFromCodedStream(
+      ::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
+  void SerializeWithCachedSizes(
+      ::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
+  void DiscardUnknownFields();
+  int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
+  private:
+  void SharedCtor();
+  void SharedDtor();
+  void SetCachedSize(int size) const;
+  void InternalSwap(OneHotLayerParams* other);
+  private:
+  inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
+    return NULL;
+  }
+  inline void* MaybeArenaPtr() const {
+    return NULL;
+  }
+  public:
+
+  ::std::string GetTypeName() const PROTOBUF_FINAL;
+
+  // nested types ----------------------------------------------------
+
+  // accessors -------------------------------------------------------
+
+  // uint64 oneHotVectorSize = 1;
+  void clear_onehotvectorsize();
+  static const int kOneHotVectorSizeFieldNumber = 1;
+  ::google::protobuf::uint64 onehotvectorsize() const;
+  void set_onehotvectorsize(::google::protobuf::uint64 value);
+
+  // int64 axis = 2;
+  void clear_axis();
+  static const int kAxisFieldNumber = 2;
+  ::google::protobuf::int64 axis() const;
+  void set_axis(::google::protobuf::int64 value);
+
+  // float onValue = 3;
+  void clear_onvalue();
+  static const int kOnValueFieldNumber = 3;
+  float onvalue() const;
+  void set_onvalue(float value);
+
+  // float offValue = 4;
+  void clear_offvalue();
+  static const int kOffValueFieldNumber = 4;
+  float offvalue() const;
+  void set_offvalue(float value);
+
+  // @@protoc_insertion_point(class_scope:CoreML.Specification.OneHotLayerParams)
+ private:
+
+  ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
+  ::google::protobuf::uint64 onehotvectorsize_;
+  ::google::protobuf::int64 axis_;
+  float onvalue_;
+  float offvalue_;
+  mutable int _cached_size_;
+  friend struct protobuf_NeuralNetwork_2eproto::TableStruct;
+};
+// -------------------------------------------------------------------
+
+class CumSumLayerParams : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.CumSumLayerParams) */ {
+ public:
+  CumSumLayerParams();
+  virtual ~CumSumLayerParams();
+
+  CumSumLayerParams(const CumSumLayerParams& from);
+
+  inline CumSumLayerParams& operator=(const CumSumLayerParams& from) {
+    CopyFrom(from);
+    return *this;
+  }
+
+  static const CumSumLayerParams& default_instance();
+
+  static inline const CumSumLayerParams* internal_default_instance() {
+    return reinterpret_cast<const CumSumLayerParams*>(
+               &_CumSumLayerParams_default_instance_);
+  }
+  static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
+    195;
+
+  void Swap(CumSumLayerParams* other);
+
+  // implements Message ----------------------------------------------
+
+  inline CumSumLayerParams* New() const PROTOBUF_FINAL { return New(NULL); }
+
+  CumSumLayerParams* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
+  void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from)
+    PROTOBUF_FINAL;
+  void CopyFrom(const CumSumLayerParams& from);
+  void MergeFrom(const CumSumLayerParams& from);
+  void Clear() PROTOBUF_FINAL;
+  bool IsInitialized() const PROTOBUF_FINAL;
+
+  size_t ByteSizeLong() const PROTOBUF_FINAL;
+  bool MergePartialFromCodedStream(
+      ::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
+  void SerializeWithCachedSizes(
+      ::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
+  void DiscardUnknownFields();
+  int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
+  private:
+  void SharedCtor();
+  void SharedDtor();
+  void SetCachedSize(int size) const;
+  void InternalSwap(CumSumLayerParams* other);
+  private:
+  inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
+    return NULL;
+  }
+  inline void* MaybeArenaPtr() const {
+    return NULL;
+  }
+  public:
+
+  ::std::string GetTypeName() const PROTOBUF_FINAL;
+
+  // nested types ----------------------------------------------------
+
+  // accessors -------------------------------------------------------
+
+  // int64 axis = 1;
+  void clear_axis();
+  static const int kAxisFieldNumber = 1;
+  ::google::protobuf::int64 axis() const;
+  void set_axis(::google::protobuf::int64 value);
+
+  // bool excludeFinalSum = 2;
+  void clear_excludefinalsum();
+  static const int kExcludeFinalSumFieldNumber = 2;
+  bool excludefinalsum() const;
+  void set_excludefinalsum(bool value);
+
+  // bool reverse = 3;
+  void clear_reverse();
+  static const int kReverseFieldNumber = 3;
+  bool reverse() const;
+  void set_reverse(bool value);
+
+  // @@protoc_insertion_point(class_scope:CoreML.Specification.CumSumLayerParams)
+ private:
+
+  ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
+  ::google::protobuf::int64 axis_;
+  bool excludefinalsum_;
+  bool reverse_;
+  mutable int _cached_size_;
+  friend struct protobuf_NeuralNetwork_2eproto::TableStruct;
+};
+// -------------------------------------------------------------------
+
+class NeuralNetworkRegressor : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.NeuralNetworkRegressor) */ {
+ public:
+  NeuralNetworkRegressor();
+  virtual ~NeuralNetworkRegressor();
+
+  NeuralNetworkRegressor(const NeuralNetworkRegressor& from);
+
+  inline NeuralNetworkRegressor& operator=(const NeuralNetworkRegressor& from) {
+    CopyFrom(from);
+    return *this;
+  }
+
+  static const NeuralNetworkRegressor& default_instance();
+
+  static inline const NeuralNetworkRegressor* internal_default_instance() {
+    return reinterpret_cast<const NeuralNetworkRegressor*>(
+               &_NeuralNetworkRegressor_default_instance_);
+  }
+  static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
+    196;
+
+  void Swap(NeuralNetworkRegressor* other);
+
+  // implements Message ----------------------------------------------
+
+  inline NeuralNetworkRegressor* New() const PROTOBUF_FINAL { return New(NULL); }
+
+  NeuralNetworkRegressor* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
+  void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from)
+    PROTOBUF_FINAL;
+  void CopyFrom(const NeuralNetworkRegressor& from);
+  void MergeFrom(const NeuralNetworkRegressor& from);
+  void Clear() PROTOBUF_FINAL;
+  bool IsInitialized() const PROTOBUF_FINAL;
+
+  size_t ByteSizeLong() const PROTOBUF_FINAL;
+  bool MergePartialFromCodedStream(
+      ::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
+  void SerializeWithCachedSizes(
+      ::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
+  void DiscardUnknownFields();
+  int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
+  private:
+  void SharedCtor();
+  void SharedDtor();
+  void SetCachedSize(int size) const;
+  void InternalSwap(NeuralNetworkRegressor* other);
   private:
   inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
     return NULL;
@@ -20285,7 +21577,7 @@ class NetworkUpdateParameters : public ::google::protobuf::MessageLite /* @@prot
                &_NetworkUpdateParameters_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    189;
+    197;
 
   void Swap(NetworkUpdateParameters* other);
 
@@ -20415,7 +21707,7 @@ class LossLayer : public ::google::protobuf::MessageLite /* @@protoc_insertion_p
                &_LossLayer_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    190;
+    198;
 
   void Swap(LossLayer* other);
 
@@ -20533,7 +21825,7 @@ class CategoricalCrossEntropyLossLayer : public ::google::protobuf::MessageLite
                &_CategoricalCrossEntropyLossLayer_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    191;
+    199;
 
   void Swap(CategoricalCrossEntropyLossLayer* other);
 
@@ -20634,7 +21926,7 @@ class MeanSquaredErrorLossLayer : public ::google::protobuf::MessageLite /* @@pr
                &_MeanSquaredErrorLossLayer_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    192;
+    200;
 
   void Swap(MeanSquaredErrorLossLayer* other);
 
@@ -20741,7 +22033,7 @@ class Optimizer : public ::google::protobuf::MessageLite /* @@protoc_insertion_p
                &_Optimizer_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    193;
+    201;
 
   void Swap(Optimizer* other);
 
@@ -20844,7 +22136,7 @@ class SGDOptimizer : public ::google::protobuf::MessageLite /* @@protoc_insertio
                &_SGDOptimizer_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    194;
+    202;
 
   void Swap(SGDOptimizer* other);
 
@@ -20945,7 +22237,7 @@ class AdamOptimizer : public ::google::protobuf::MessageLite /* @@protoc_inserti
                &_AdamOptimizer_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    195;
+    203;
 
   void Swap(AdamOptimizer* other);
 
@@ -29899,6 +31191,390 @@ inline void NeuralNetworkLayer::set_allocated_nonmaximumsuppression(::CoreML::Sp
   // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.NonMaximumSuppression)
 }
 
+// .CoreML.Specification.OneHotLayerParams oneHot = 1450;
+inline bool NeuralNetworkLayer::has_onehot() const {
+  return layer_case() == kOneHot;
+}
+inline void NeuralNetworkLayer::set_has_onehot() {
+  _oneof_case_[0] = kOneHot;
+}
+inline void NeuralNetworkLayer::clear_onehot() {
+  if (has_onehot()) {
+    delete layer_.onehot_;
+    clear_has_layer();
+  }
+}
+inline  const ::CoreML::Specification::OneHotLayerParams& NeuralNetworkLayer::onehot() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.oneHot)
+  return has_onehot()
+      ? *layer_.onehot_
+      : ::CoreML::Specification::OneHotLayerParams::default_instance();
+}
+inline ::CoreML::Specification::OneHotLayerParams* NeuralNetworkLayer::mutable_onehot() {
+  if (!has_onehot()) {
+    clear_layer();
+    set_has_onehot();
+    layer_.onehot_ = new ::CoreML::Specification::OneHotLayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.oneHot)
+  return layer_.onehot_;
+}
+inline ::CoreML::Specification::OneHotLayerParams* NeuralNetworkLayer::release_onehot() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.oneHot)
+  if (has_onehot()) {
+    clear_has_layer();
+    ::CoreML::Specification::OneHotLayerParams* temp = layer_.onehot_;
+    layer_.onehot_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+inline void NeuralNetworkLayer::set_allocated_onehot(::CoreML::Specification::OneHotLayerParams* onehot) {
+  clear_layer();
+  if (onehot) {
+    set_has_onehot();
+    layer_.onehot_ = onehot;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.oneHot)
+}
+
+// .CoreML.Specification.CumSumLayerParams cumSum = 1455;
+inline bool NeuralNetworkLayer::has_cumsum() const {
+  return layer_case() == kCumSum;
+}
+inline void NeuralNetworkLayer::set_has_cumsum() {
+  _oneof_case_[0] = kCumSum;
+}
+inline void NeuralNetworkLayer::clear_cumsum() {
+  if (has_cumsum()) {
+    delete layer_.cumsum_;
+    clear_has_layer();
+  }
+}
+inline  const ::CoreML::Specification::CumSumLayerParams& NeuralNetworkLayer::cumsum() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.cumSum)
+  return has_cumsum()
+      ? *layer_.cumsum_
+      : ::CoreML::Specification::CumSumLayerParams::default_instance();
+}
+inline ::CoreML::Specification::CumSumLayerParams* NeuralNetworkLayer::mutable_cumsum() {
+  if (!has_cumsum()) {
+    clear_layer();
+    set_has_cumsum();
+    layer_.cumsum_ = new ::CoreML::Specification::CumSumLayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.cumSum)
+  return layer_.cumsum_;
+}
+inline ::CoreML::Specification::CumSumLayerParams* NeuralNetworkLayer::release_cumsum() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.cumSum)
+  if (has_cumsum()) {
+    clear_has_layer();
+    ::CoreML::Specification::CumSumLayerParams* temp = layer_.cumsum_;
+    layer_.cumsum_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+inline void NeuralNetworkLayer::set_allocated_cumsum(::CoreML::Specification::CumSumLayerParams* cumsum) {
+  clear_layer();
+  if (cumsum) {
+    set_has_cumsum();
+    layer_.cumsum_ = cumsum;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.cumSum)
+}
+
+// .CoreML.Specification.ClampedReLULayerParams clampedReLU = 1460;
+inline bool NeuralNetworkLayer::has_clampedrelu() const {
+  return layer_case() == kClampedReLU;
+}
+inline void NeuralNetworkLayer::set_has_clampedrelu() {
+  _oneof_case_[0] = kClampedReLU;
+}
+inline void NeuralNetworkLayer::clear_clampedrelu() {
+  if (has_clampedrelu()) {
+    delete layer_.clampedrelu_;
+    clear_has_layer();
+  }
+}
+inline  const ::CoreML::Specification::ClampedReLULayerParams& NeuralNetworkLayer::clampedrelu() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.clampedReLU)
+  return has_clampedrelu()
+      ? *layer_.clampedrelu_
+      : ::CoreML::Specification::ClampedReLULayerParams::default_instance();
+}
+inline ::CoreML::Specification::ClampedReLULayerParams* NeuralNetworkLayer::mutable_clampedrelu() {
+  if (!has_clampedrelu()) {
+    clear_layer();
+    set_has_clampedrelu();
+    layer_.clampedrelu_ = new ::CoreML::Specification::ClampedReLULayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.clampedReLU)
+  return layer_.clampedrelu_;
+}
+inline ::CoreML::Specification::ClampedReLULayerParams* NeuralNetworkLayer::release_clampedrelu() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.clampedReLU)
+  if (has_clampedrelu()) {
+    clear_has_layer();
+    ::CoreML::Specification::ClampedReLULayerParams* temp = layer_.clampedrelu_;
+    layer_.clampedrelu_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+inline void NeuralNetworkLayer::set_allocated_clampedrelu(::CoreML::Specification::ClampedReLULayerParams* clampedrelu) {
+  clear_layer();
+  if (clampedrelu) {
+    set_has_clampedrelu();
+    layer_.clampedrelu_ = clampedrelu;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.clampedReLU)
+}
+
+// .CoreML.Specification.ArgSortLayerParams argSort = 1461;
+inline bool NeuralNetworkLayer::has_argsort() const {
+  return layer_case() == kArgSort;
+}
+inline void NeuralNetworkLayer::set_has_argsort() {
+  _oneof_case_[0] = kArgSort;
+}
+inline void NeuralNetworkLayer::clear_argsort() {
+  if (has_argsort()) {
+    delete layer_.argsort_;
+    clear_has_layer();
+  }
+}
+inline  const ::CoreML::Specification::ArgSortLayerParams& NeuralNetworkLayer::argsort() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.argSort)
+  return has_argsort()
+      ? *layer_.argsort_
+      : ::CoreML::Specification::ArgSortLayerParams::default_instance();
+}
+inline ::CoreML::Specification::ArgSortLayerParams* NeuralNetworkLayer::mutable_argsort() {
+  if (!has_argsort()) {
+    clear_layer();
+    set_has_argsort();
+    layer_.argsort_ = new ::CoreML::Specification::ArgSortLayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.argSort)
+  return layer_.argsort_;
+}
+inline ::CoreML::Specification::ArgSortLayerParams* NeuralNetworkLayer::release_argsort() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.argSort)
+  if (has_argsort()) {
+    clear_has_layer();
+    ::CoreML::Specification::ArgSortLayerParams* temp = layer_.argsort_;
+    layer_.argsort_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+inline void NeuralNetworkLayer::set_allocated_argsort(::CoreML::Specification::ArgSortLayerParams* argsort) {
+  clear_layer();
+  if (argsort) {
+    set_has_argsort();
+    layer_.argsort_ = argsort;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.argSort)
+}
+
+// .CoreML.Specification.Pooling3DLayerParams pooling3d = 1465;
+inline bool NeuralNetworkLayer::has_pooling3d() const {
+  return layer_case() == kPooling3D;
+}
+inline void NeuralNetworkLayer::set_has_pooling3d() {
+  _oneof_case_[0] = kPooling3D;
+}
+inline void NeuralNetworkLayer::clear_pooling3d() {
+  if (has_pooling3d()) {
+    delete layer_.pooling3d_;
+    clear_has_layer();
+  }
+}
+inline  const ::CoreML::Specification::Pooling3DLayerParams& NeuralNetworkLayer::pooling3d() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.pooling3d)
+  return has_pooling3d()
+      ? *layer_.pooling3d_
+      : ::CoreML::Specification::Pooling3DLayerParams::default_instance();
+}
+inline ::CoreML::Specification::Pooling3DLayerParams* NeuralNetworkLayer::mutable_pooling3d() {
+  if (!has_pooling3d()) {
+    clear_layer();
+    set_has_pooling3d();
+    layer_.pooling3d_ = new ::CoreML::Specification::Pooling3DLayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.pooling3d)
+  return layer_.pooling3d_;
+}
+inline ::CoreML::Specification::Pooling3DLayerParams* NeuralNetworkLayer::release_pooling3d() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.pooling3d)
+  if (has_pooling3d()) {
+    clear_has_layer();
+    ::CoreML::Specification::Pooling3DLayerParams* temp = layer_.pooling3d_;
+    layer_.pooling3d_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+inline void NeuralNetworkLayer::set_allocated_pooling3d(::CoreML::Specification::Pooling3DLayerParams* pooling3d) {
+  clear_layer();
+  if (pooling3d) {
+    set_has_pooling3d();
+    layer_.pooling3d_ = pooling3d;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.pooling3d)
+}
+
+// .CoreML.Specification.GlobalPooling3DLayerParams globalPooling3d = 1466;
+inline bool NeuralNetworkLayer::has_globalpooling3d() const {
+  return layer_case() == kGlobalPooling3D;
+}
+inline void NeuralNetworkLayer::set_has_globalpooling3d() {
+  _oneof_case_[0] = kGlobalPooling3D;
+}
+inline void NeuralNetworkLayer::clear_globalpooling3d() {
+  if (has_globalpooling3d()) {
+    delete layer_.globalpooling3d_;
+    clear_has_layer();
+  }
+}
+inline  const ::CoreML::Specification::GlobalPooling3DLayerParams& NeuralNetworkLayer::globalpooling3d() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.globalPooling3d)
+  return has_globalpooling3d()
+      ? *layer_.globalpooling3d_
+      : ::CoreML::Specification::GlobalPooling3DLayerParams::default_instance();
+}
+inline ::CoreML::Specification::GlobalPooling3DLayerParams* NeuralNetworkLayer::mutable_globalpooling3d() {
+  if (!has_globalpooling3d()) {
+    clear_layer();
+    set_has_globalpooling3d();
+    layer_.globalpooling3d_ = new ::CoreML::Specification::GlobalPooling3DLayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.globalPooling3d)
+  return layer_.globalpooling3d_;
+}
+inline ::CoreML::Specification::GlobalPooling3DLayerParams* NeuralNetworkLayer::release_globalpooling3d() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.globalPooling3d)
+  if (has_globalpooling3d()) {
+    clear_has_layer();
+    ::CoreML::Specification::GlobalPooling3DLayerParams* temp = layer_.globalpooling3d_;
+    layer_.globalpooling3d_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+inline void NeuralNetworkLayer::set_allocated_globalpooling3d(::CoreML::Specification::GlobalPooling3DLayerParams* globalpooling3d) {
+  clear_layer();
+  if (globalpooling3d) {
+    set_has_globalpooling3d();
+    layer_.globalpooling3d_ = globalpooling3d;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.globalPooling3d)
+}
+
+// .CoreML.Specification.SliceBySizeLayerParams sliceBySize = 1470;
+inline bool NeuralNetworkLayer::has_slicebysize() const {
+  return layer_case() == kSliceBySize;
+}
+inline void NeuralNetworkLayer::set_has_slicebysize() {
+  _oneof_case_[0] = kSliceBySize;
+}
+inline void NeuralNetworkLayer::clear_slicebysize() {
+  if (has_slicebysize()) {
+    delete layer_.slicebysize_;
+    clear_has_layer();
+  }
+}
+inline  const ::CoreML::Specification::SliceBySizeLayerParams& NeuralNetworkLayer::slicebysize() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.sliceBySize)
+  return has_slicebysize()
+      ? *layer_.slicebysize_
+      : ::CoreML::Specification::SliceBySizeLayerParams::default_instance();
+}
+inline ::CoreML::Specification::SliceBySizeLayerParams* NeuralNetworkLayer::mutable_slicebysize() {
+  if (!has_slicebysize()) {
+    clear_layer();
+    set_has_slicebysize();
+    layer_.slicebysize_ = new ::CoreML::Specification::SliceBySizeLayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.sliceBySize)
+  return layer_.slicebysize_;
+}
+inline ::CoreML::Specification::SliceBySizeLayerParams* NeuralNetworkLayer::release_slicebysize() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.sliceBySize)
+  if (has_slicebysize()) {
+    clear_has_layer();
+    ::CoreML::Specification::SliceBySizeLayerParams* temp = layer_.slicebysize_;
+    layer_.slicebysize_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+inline void NeuralNetworkLayer::set_allocated_slicebysize(::CoreML::Specification::SliceBySizeLayerParams* slicebysize) {
+  clear_layer();
+  if (slicebysize) {
+    set_has_slicebysize();
+    layer_.slicebysize_ = slicebysize;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.sliceBySize)
+}
+
+// .CoreML.Specification.Convolution3DLayerParams convolution3d = 1471;
+inline bool NeuralNetworkLayer::has_convolution3d() const {
+  return layer_case() == kConvolution3D;
+}
+inline void NeuralNetworkLayer::set_has_convolution3d() {
+  _oneof_case_[0] = kConvolution3D;
+}
+inline void NeuralNetworkLayer::clear_convolution3d() {
+  if (has_convolution3d()) {
+    delete layer_.convolution3d_;
+    clear_has_layer();
+  }
+}
+inline  const ::CoreML::Specification::Convolution3DLayerParams& NeuralNetworkLayer::convolution3d() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.NeuralNetworkLayer.convolution3d)
+  return has_convolution3d()
+      ? *layer_.convolution3d_
+      : ::CoreML::Specification::Convolution3DLayerParams::default_instance();
+}
+inline ::CoreML::Specification::Convolution3DLayerParams* NeuralNetworkLayer::mutable_convolution3d() {
+  if (!has_convolution3d()) {
+    clear_layer();
+    set_has_convolution3d();
+    layer_.convolution3d_ = new ::CoreML::Specification::Convolution3DLayerParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.NeuralNetworkLayer.convolution3d)
+  return layer_.convolution3d_;
+}
+inline ::CoreML::Specification::Convolution3DLayerParams* NeuralNetworkLayer::release_convolution3d() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.NeuralNetworkLayer.convolution3d)
+  if (has_convolution3d()) {
+    clear_has_layer();
+    ::CoreML::Specification::Convolution3DLayerParams* temp = layer_.convolution3d_;
+    layer_.convolution3d_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+inline void NeuralNetworkLayer::set_allocated_convolution3d(::CoreML::Specification::Convolution3DLayerParams* convolution3d) {
+  clear_layer();
+  if (convolution3d) {
+    set_has_convolution3d();
+    layer_.convolution3d_ = convolution3d;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.NeuralNetworkLayer.convolution3d)
+}
+
 inline bool NeuralNetworkLayer::has_layer() const {
   return layer_case() != LAYER_NOT_SET;
 }
@@ -30578,6 +32254,59 @@ inline void WeightParams::set_allocated_rawvalue(::std::string* rawvalue) {
   // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.WeightParams.rawValue)
 }
 
+// bytes int8RawValue = 31;
+inline void WeightParams::clear_int8rawvalue() {
+  int8rawvalue_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+inline const ::std::string& WeightParams::int8rawvalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.WeightParams.int8RawValue)
+  return int8rawvalue_.GetNoArena();
+}
+inline void WeightParams::set_int8rawvalue(const ::std::string& value) {
+  
+  int8rawvalue_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.WeightParams.int8RawValue)
+}
+#if LANG_CXX11
+inline void WeightParams::set_int8rawvalue(::std::string&& value) {
+  
+  int8rawvalue_.SetNoArena(
+    &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value));
+  // @@protoc_insertion_point(field_set_rvalue:CoreML.Specification.WeightParams.int8RawValue)
+}
+#endif
+inline void WeightParams::set_int8rawvalue(const char* value) {
+  GOOGLE_DCHECK(value != NULL);
+  
+  int8rawvalue_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value));
+  // @@protoc_insertion_point(field_set_char:CoreML.Specification.WeightParams.int8RawValue)
+}
+inline void WeightParams::set_int8rawvalue(const void* value, size_t size) {
+  
+  int8rawvalue_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
+      ::std::string(reinterpret_cast<const char*>(value), size));
+  // @@protoc_insertion_point(field_set_pointer:CoreML.Specification.WeightParams.int8RawValue)
+}
+inline ::std::string* WeightParams::mutable_int8rawvalue() {
+  
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.WeightParams.int8RawValue)
+  return int8rawvalue_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+inline ::std::string* WeightParams::release_int8rawvalue() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.WeightParams.int8RawValue)
+  
+  return int8rawvalue_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+inline void WeightParams::set_allocated_int8rawvalue(::std::string* int8rawvalue) {
+  if (int8rawvalue != NULL) {
+    
+  } else {
+    
+  }
+  int8rawvalue_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), int8rawvalue);
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.WeightParams.int8RawValue)
+}
+
 // .CoreML.Specification.QuantizationParams quantization = 40;
 inline bool WeightParams::has_quantization() const {
   return this != internal_default_instance() && quantization_ != NULL;
@@ -31231,6 +32960,412 @@ inline ConvolutionLayerParams::ConvolutionPaddingTypeCase ConvolutionLayerParams
 }
 // -------------------------------------------------------------------
 
+// Convolution3DLayerParams
+
+// int32 outputChannels = 1;
+inline void Convolution3DLayerParams::clear_outputchannels() {
+  outputchannels_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::outputchannels() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.outputChannels)
+  return outputchannels_;
+}
+inline void Convolution3DLayerParams::set_outputchannels(::google::protobuf::int32 value) {
+  
+  outputchannels_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.outputChannels)
+}
+
+// int32 inputChannels = 2;
+inline void Convolution3DLayerParams::clear_inputchannels() {
+  inputchannels_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::inputchannels() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.inputChannels)
+  return inputchannels_;
+}
+inline void Convolution3DLayerParams::set_inputchannels(::google::protobuf::int32 value) {
+  
+  inputchannels_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.inputChannels)
+}
+
+// int32 nGroups = 10;
+inline void Convolution3DLayerParams::clear_ngroups() {
+  ngroups_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::ngroups() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.nGroups)
+  return ngroups_;
+}
+inline void Convolution3DLayerParams::set_ngroups(::google::protobuf::int32 value) {
+  
+  ngroups_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.nGroups)
+}
+
+// int32 kernelDepth = 20;
+inline void Convolution3DLayerParams::clear_kerneldepth() {
+  kerneldepth_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::kerneldepth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.kernelDepth)
+  return kerneldepth_;
+}
+inline void Convolution3DLayerParams::set_kerneldepth(::google::protobuf::int32 value) {
+  
+  kerneldepth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.kernelDepth)
+}
+
+// int32 kernelHeight = 21;
+inline void Convolution3DLayerParams::clear_kernelheight() {
+  kernelheight_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::kernelheight() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.kernelHeight)
+  return kernelheight_;
+}
+inline void Convolution3DLayerParams::set_kernelheight(::google::protobuf::int32 value) {
+  
+  kernelheight_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.kernelHeight)
+}
+
+// int32 kernelWidth = 22;
+inline void Convolution3DLayerParams::clear_kernelwidth() {
+  kernelwidth_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::kernelwidth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.kernelWidth)
+  return kernelwidth_;
+}
+inline void Convolution3DLayerParams::set_kernelwidth(::google::protobuf::int32 value) {
+  
+  kernelwidth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.kernelWidth)
+}
+
+// int32 strideDepth = 31;
+inline void Convolution3DLayerParams::clear_stridedepth() {
+  stridedepth_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::stridedepth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.strideDepth)
+  return stridedepth_;
+}
+inline void Convolution3DLayerParams::set_stridedepth(::google::protobuf::int32 value) {
+  
+  stridedepth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.strideDepth)
+}
+
+// int32 strideHeight = 32;
+inline void Convolution3DLayerParams::clear_strideheight() {
+  strideheight_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::strideheight() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.strideHeight)
+  return strideheight_;
+}
+inline void Convolution3DLayerParams::set_strideheight(::google::protobuf::int32 value) {
+  
+  strideheight_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.strideHeight)
+}
+
+// int32 strideWidth = 33;
+inline void Convolution3DLayerParams::clear_stridewidth() {
+  stridewidth_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::stridewidth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.strideWidth)
+  return stridewidth_;
+}
+inline void Convolution3DLayerParams::set_stridewidth(::google::protobuf::int32 value) {
+  
+  stridewidth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.strideWidth)
+}
+
+// int32 dilationDepth = 40;
+inline void Convolution3DLayerParams::clear_dilationdepth() {
+  dilationdepth_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::dilationdepth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.dilationDepth)
+  return dilationdepth_;
+}
+inline void Convolution3DLayerParams::set_dilationdepth(::google::protobuf::int32 value) {
+  
+  dilationdepth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.dilationDepth)
+}
+
+// int32 dilationHeight = 41;
+inline void Convolution3DLayerParams::clear_dilationheight() {
+  dilationheight_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::dilationheight() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.dilationHeight)
+  return dilationheight_;
+}
+inline void Convolution3DLayerParams::set_dilationheight(::google::protobuf::int32 value) {
+  
+  dilationheight_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.dilationHeight)
+}
+
+// int32 dilationWidth = 42;
+inline void Convolution3DLayerParams::clear_dilationwidth() {
+  dilationwidth_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::dilationwidth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.dilationWidth)
+  return dilationwidth_;
+}
+inline void Convolution3DLayerParams::set_dilationwidth(::google::protobuf::int32 value) {
+  
+  dilationwidth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.dilationWidth)
+}
+
+// bool hasBias = 50;
+inline void Convolution3DLayerParams::clear_hasbias() {
+  hasbias_ = false;
+}
+inline bool Convolution3DLayerParams::hasbias() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.hasBias)
+  return hasbias_;
+}
+inline void Convolution3DLayerParams::set_hasbias(bool value) {
+  
+  hasbias_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.hasBias)
+}
+
+// .CoreML.Specification.WeightParams weights = 60;
+inline bool Convolution3DLayerParams::has_weights() const {
+  return this != internal_default_instance() && weights_ != NULL;
+}
+inline void Convolution3DLayerParams::clear_weights() {
+  if (GetArenaNoVirtual() == NULL && weights_ != NULL) delete weights_;
+  weights_ = NULL;
+}
+inline const ::CoreML::Specification::WeightParams& Convolution3DLayerParams::weights() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.weights)
+  return weights_ != NULL ? *weights_
+                         : *::CoreML::Specification::WeightParams::internal_default_instance();
+}
+inline ::CoreML::Specification::WeightParams* Convolution3DLayerParams::mutable_weights() {
+  
+  if (weights_ == NULL) {
+    weights_ = new ::CoreML::Specification::WeightParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.Convolution3DLayerParams.weights)
+  return weights_;
+}
+inline ::CoreML::Specification::WeightParams* Convolution3DLayerParams::release_weights() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.Convolution3DLayerParams.weights)
+  
+  ::CoreML::Specification::WeightParams* temp = weights_;
+  weights_ = NULL;
+  return temp;
+}
+inline void Convolution3DLayerParams::set_allocated_weights(::CoreML::Specification::WeightParams* weights) {
+  delete weights_;
+  weights_ = weights;
+  if (weights) {
+    
+  } else {
+    
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.Convolution3DLayerParams.weights)
+}
+
+// .CoreML.Specification.WeightParams bias = 61;
+inline bool Convolution3DLayerParams::has_bias() const {
+  return this != internal_default_instance() && bias_ != NULL;
+}
+inline void Convolution3DLayerParams::clear_bias() {
+  if (GetArenaNoVirtual() == NULL && bias_ != NULL) delete bias_;
+  bias_ = NULL;
+}
+inline const ::CoreML::Specification::WeightParams& Convolution3DLayerParams::bias() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.bias)
+  return bias_ != NULL ? *bias_
+                         : *::CoreML::Specification::WeightParams::internal_default_instance();
+}
+inline ::CoreML::Specification::WeightParams* Convolution3DLayerParams::mutable_bias() {
+  
+  if (bias_ == NULL) {
+    bias_ = new ::CoreML::Specification::WeightParams;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.Convolution3DLayerParams.bias)
+  return bias_;
+}
+inline ::CoreML::Specification::WeightParams* Convolution3DLayerParams::release_bias() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.Convolution3DLayerParams.bias)
+  
+  ::CoreML::Specification::WeightParams* temp = bias_;
+  bias_ = NULL;
+  return temp;
+}
+inline void Convolution3DLayerParams::set_allocated_bias(::CoreML::Specification::WeightParams* bias) {
+  delete bias_;
+  bias_ = bias;
+  if (bias) {
+    
+  } else {
+    
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.Convolution3DLayerParams.bias)
+}
+
+// .CoreML.Specification.Convolution3DLayerParams.PaddingType paddingType = 70;
+inline void Convolution3DLayerParams::clear_paddingtype() {
+  paddingtype_ = 0;
+}
+inline ::CoreML::Specification::Convolution3DLayerParams_PaddingType Convolution3DLayerParams::paddingtype() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.paddingType)
+  return static_cast< ::CoreML::Specification::Convolution3DLayerParams_PaddingType >(paddingtype_);
+}
+inline void Convolution3DLayerParams::set_paddingtype(::CoreML::Specification::Convolution3DLayerParams_PaddingType value) {
+  
+  paddingtype_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.paddingType)
+}
+
+// int32 customPaddingFront = 80;
+inline void Convolution3DLayerParams::clear_custompaddingfront() {
+  custompaddingfront_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::custompaddingfront() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.customPaddingFront)
+  return custompaddingfront_;
+}
+inline void Convolution3DLayerParams::set_custompaddingfront(::google::protobuf::int32 value) {
+  
+  custompaddingfront_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.customPaddingFront)
+}
+
+// int32 customPaddingBack = 81;
+inline void Convolution3DLayerParams::clear_custompaddingback() {
+  custompaddingback_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::custompaddingback() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.customPaddingBack)
+  return custompaddingback_;
+}
+inline void Convolution3DLayerParams::set_custompaddingback(::google::protobuf::int32 value) {
+  
+  custompaddingback_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.customPaddingBack)
+}
+
+// int32 customPaddingTop = 82;
+inline void Convolution3DLayerParams::clear_custompaddingtop() {
+  custompaddingtop_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::custompaddingtop() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.customPaddingTop)
+  return custompaddingtop_;
+}
+inline void Convolution3DLayerParams::set_custompaddingtop(::google::protobuf::int32 value) {
+  
+  custompaddingtop_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.customPaddingTop)
+}
+
+// int32 customPaddingBottom = 83;
+inline void Convolution3DLayerParams::clear_custompaddingbottom() {
+  custompaddingbottom_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::custompaddingbottom() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.customPaddingBottom)
+  return custompaddingbottom_;
+}
+inline void Convolution3DLayerParams::set_custompaddingbottom(::google::protobuf::int32 value) {
+  
+  custompaddingbottom_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.customPaddingBottom)
+}
+
+// int32 customPaddingLeft = 84;
+inline void Convolution3DLayerParams::clear_custompaddingleft() {
+  custompaddingleft_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::custompaddingleft() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.customPaddingLeft)
+  return custompaddingleft_;
+}
+inline void Convolution3DLayerParams::set_custompaddingleft(::google::protobuf::int32 value) {
+  
+  custompaddingleft_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.customPaddingLeft)
+}
+
+// int32 customPaddingRight = 85;
+inline void Convolution3DLayerParams::clear_custompaddingright() {
+  custompaddingright_ = 0;
+}
+inline ::google::protobuf::int32 Convolution3DLayerParams::custompaddingright() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.customPaddingRight)
+  return custompaddingright_;
+}
+inline void Convolution3DLayerParams::set_custompaddingright(::google::protobuf::int32 value) {
+  
+  custompaddingright_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.customPaddingRight)
+}
+
+// bool isDeconvolution = 86;
+inline void Convolution3DLayerParams::clear_isdeconvolution() {
+  isdeconvolution_ = false;
+}
+inline bool Convolution3DLayerParams::isdeconvolution() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.isDeconvolution)
+  return isdeconvolution_;
+}
+inline void Convolution3DLayerParams::set_isdeconvolution(bool value) {
+  
+  isdeconvolution_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.isDeconvolution)
+}
+
+// repeated uint64 outputShape = 87;
+inline int Convolution3DLayerParams::outputshape_size() const {
+  return outputshape_.size();
+}
+inline void Convolution3DLayerParams::clear_outputshape() {
+  outputshape_.Clear();
+}
+inline ::google::protobuf::uint64 Convolution3DLayerParams::outputshape(int index) const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Convolution3DLayerParams.outputShape)
+  return outputshape_.Get(index);
+}
+inline void Convolution3DLayerParams::set_outputshape(int index, ::google::protobuf::uint64 value) {
+  outputshape_.Set(index, value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Convolution3DLayerParams.outputShape)
+}
+inline void Convolution3DLayerParams::add_outputshape(::google::protobuf::uint64 value) {
+  outputshape_.Add(value);
+  // @@protoc_insertion_point(field_add:CoreML.Specification.Convolution3DLayerParams.outputShape)
+}
+inline const ::google::protobuf::RepeatedField< ::google::protobuf::uint64 >&
+Convolution3DLayerParams::outputshape() const {
+  // @@protoc_insertion_point(field_list:CoreML.Specification.Convolution3DLayerParams.outputShape)
+  return outputshape_;
+}
+inline ::google::protobuf::RepeatedField< ::google::protobuf::uint64 >*
+Convolution3DLayerParams::mutable_outputshape() {
+  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.Convolution3DLayerParams.outputShape)
+  return &outputshape_;
+}
+
+// -------------------------------------------------------------------
+
 // InnerProductLayerParams
 
 // uint64 inputChannels = 1;
@@ -31353,6 +33488,20 @@ inline void InnerProductLayerParams::set_allocated_bias(::CoreML::Specification:
   // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.InnerProductLayerParams.bias)
 }
 
+// bool int8DynamicQuantize = 22;
+inline void InnerProductLayerParams::clear_int8dynamicquantize() {
+  int8dynamicquantize_ = false;
+}
+inline bool InnerProductLayerParams::int8dynamicquantize() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.InnerProductLayerParams.int8DynamicQuantize)
+  return int8dynamicquantize_;
+}
+inline void InnerProductLayerParams::set_int8dynamicquantize(bool value) {
+  
+  int8dynamicquantize_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.InnerProductLayerParams.int8DynamicQuantize)
+}
+
 // -------------------------------------------------------------------
 
 // EmbeddingLayerParams
@@ -32112,6 +34261,238 @@ inline PoolingLayerParams::PoolingPaddingTypeCase PoolingLayerParams::PoolingPad
 }
 // -------------------------------------------------------------------
 
+// Pooling3DLayerParams
+
+// .CoreML.Specification.Pooling3DLayerParams.PoolingType3D type = 1;
+inline void Pooling3DLayerParams::clear_type() {
+  type_ = 0;
+}
+inline ::CoreML::Specification::Pooling3DLayerParams_PoolingType3D Pooling3DLayerParams::type() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.type)
+  return static_cast< ::CoreML::Specification::Pooling3DLayerParams_PoolingType3D >(type_);
+}
+inline void Pooling3DLayerParams::set_type(::CoreML::Specification::Pooling3DLayerParams_PoolingType3D value) {
+  
+  type_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.type)
+}
+
+// int32 kernelDepth = 2;
+inline void Pooling3DLayerParams::clear_kerneldepth() {
+  kerneldepth_ = 0;
+}
+inline ::google::protobuf::int32 Pooling3DLayerParams::kerneldepth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.kernelDepth)
+  return kerneldepth_;
+}
+inline void Pooling3DLayerParams::set_kerneldepth(::google::protobuf::int32 value) {
+  
+  kerneldepth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.kernelDepth)
+}
+
+// int32 kernelHeight = 3;
+inline void Pooling3DLayerParams::clear_kernelheight() {
+  kernelheight_ = 0;
+}
+inline ::google::protobuf::int32 Pooling3DLayerParams::kernelheight() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.kernelHeight)
+  return kernelheight_;
+}
+inline void Pooling3DLayerParams::set_kernelheight(::google::protobuf::int32 value) {
+  
+  kernelheight_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.kernelHeight)
+}
+
+// int32 kernelWidth = 4;
+inline void Pooling3DLayerParams::clear_kernelwidth() {
+  kernelwidth_ = 0;
+}
+inline ::google::protobuf::int32 Pooling3DLayerParams::kernelwidth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.kernelWidth)
+  return kernelwidth_;
+}
+inline void Pooling3DLayerParams::set_kernelwidth(::google::protobuf::int32 value) {
+  
+  kernelwidth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.kernelWidth)
+}
+
+// int32 strideDepth = 5;
+inline void Pooling3DLayerParams::clear_stridedepth() {
+  stridedepth_ = 0;
+}
+inline ::google::protobuf::int32 Pooling3DLayerParams::stridedepth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.strideDepth)
+  return stridedepth_;
+}
+inline void Pooling3DLayerParams::set_stridedepth(::google::protobuf::int32 value) {
+  
+  stridedepth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.strideDepth)
+}
+
+// int32 strideHeight = 6;
+inline void Pooling3DLayerParams::clear_strideheight() {
+  strideheight_ = 0;
+}
+inline ::google::protobuf::int32 Pooling3DLayerParams::strideheight() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.strideHeight)
+  return strideheight_;
+}
+inline void Pooling3DLayerParams::set_strideheight(::google::protobuf::int32 value) {
+  
+  strideheight_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.strideHeight)
+}
+
+// int32 strideWidth = 7;
+inline void Pooling3DLayerParams::clear_stridewidth() {
+  stridewidth_ = 0;
+}
+inline ::google::protobuf::int32 Pooling3DLayerParams::stridewidth() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.strideWidth)
+  return stridewidth_;
+}
+inline void Pooling3DLayerParams::set_stridewidth(::google::protobuf::int32 value) {
+  
+  stridewidth_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.strideWidth)
+}
+
+// .CoreML.Specification.Pooling3DLayerParams.Pooling3DPaddingType paddingType = 15;
+inline void Pooling3DLayerParams::clear_paddingtype() {
+  paddingtype_ = 0;
+}
+inline ::CoreML::Specification::Pooling3DLayerParams_Pooling3DPaddingType Pooling3DLayerParams::paddingtype() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.paddingType)
+  return static_cast< ::CoreML::Specification::Pooling3DLayerParams_Pooling3DPaddingType >(paddingtype_);
+}
+inline void Pooling3DLayerParams::set_paddingtype(::CoreML::Specification::Pooling3DLayerParams_Pooling3DPaddingType value) {
+  
+  paddingtype_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.paddingType)
+}
+
+// int32 customPaddingFront = 8;
+inline void Pooling3DLayerParams::clear_custompaddingfront() {
+  custompaddingfront_ = 0;
+}
+inline ::google::protobuf::int32 Pooling3DLayerParams::custompaddingfront() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.customPaddingFront)
+  return custompaddingfront_;
+}
+inline void Pooling3DLayerParams::set_custompaddingfront(::google::protobuf::int32 value) {
+  
+  custompaddingfront_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.customPaddingFront)
+}
+
+// int32 customPaddingBack = 9;
+inline void Pooling3DLayerParams::clear_custompaddingback() {
+  custompaddingback_ = 0;
+}
+inline ::google::protobuf::int32 Pooling3DLayerParams::custompaddingback() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.customPaddingBack)
+  return custompaddingback_;
+}
+inline void Pooling3DLayerParams::set_custompaddingback(::google::protobuf::int32 value) {
+  
+  custompaddingback_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.customPaddingBack)
+}
+
+// int32 customPaddingTop = 10;
+inline void Pooling3DLayerParams::clear_custompaddingtop() {
+  custompaddingtop_ = 0;
+}
+inline ::google::protobuf::int32 Pooling3DLayerParams::custompaddingtop() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.customPaddingTop)
+  return custompaddingtop_;
+}
+inline void Pooling3DLayerParams::set_custompaddingtop(::google::protobuf::int32 value) {
+  
+  custompaddingtop_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.customPaddingTop)
+}
+
+// int32 customPaddingBottom = 11;
+inline void Pooling3DLayerParams::clear_custompaddingbottom() {
+  custompaddingbottom_ = 0;
+}
+inline ::google::protobuf::int32 Pooling3DLayerParams::custompaddingbottom() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.customPaddingBottom)
+  return custompaddingbottom_;
+}
+inline void Pooling3DLayerParams::set_custompaddingbottom(::google::protobuf::int32 value) {
+  
+  custompaddingbottom_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.customPaddingBottom)
+}
+
+// int32 customPaddingLeft = 12;
+inline void Pooling3DLayerParams::clear_custompaddingleft() {
+  custompaddingleft_ = 0;
+}
+inline ::google::protobuf::int32 Pooling3DLayerParams::custompaddingleft() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.customPaddingLeft)
+  return custompaddingleft_;
+}
+inline void Pooling3DLayerParams::set_custompaddingleft(::google::protobuf::int32 value) {
+  
+  custompaddingleft_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.customPaddingLeft)
+}
+
+// int32 customPaddingRight = 13;
+inline void Pooling3DLayerParams::clear_custompaddingright() {
+  custompaddingright_ = 0;
+}
+inline ::google::protobuf::int32 Pooling3DLayerParams::custompaddingright() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.customPaddingRight)
+  return custompaddingright_;
+}
+inline void Pooling3DLayerParams::set_custompaddingright(::google::protobuf::int32 value) {
+  
+  custompaddingright_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.customPaddingRight)
+}
+
+// bool countExcludePadding = 14;
+inline void Pooling3DLayerParams::clear_countexcludepadding() {
+  countexcludepadding_ = false;
+}
+inline bool Pooling3DLayerParams::countexcludepadding() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.Pooling3DLayerParams.countExcludePadding)
+  return countexcludepadding_;
+}
+inline void Pooling3DLayerParams::set_countexcludepadding(bool value) {
+  
+  countexcludepadding_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.Pooling3DLayerParams.countExcludePadding)
+}
+
+// -------------------------------------------------------------------
+
+// GlobalPooling3DLayerParams
+
+// .CoreML.Specification.GlobalPooling3DLayerParams.GlobalPoolingType3D type = 1;
+inline void GlobalPooling3DLayerParams::clear_type() {
+  type_ = 0;
+}
+inline ::CoreML::Specification::GlobalPooling3DLayerParams_GlobalPoolingType3D GlobalPooling3DLayerParams::type() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.GlobalPooling3DLayerParams.type)
+  return static_cast< ::CoreML::Specification::GlobalPooling3DLayerParams_GlobalPoolingType3D >(type_);
+}
+inline void GlobalPooling3DLayerParams::set_type(::CoreML::Specification::GlobalPooling3DLayerParams_GlobalPoolingType3D value) {
+  
+  type_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.GlobalPooling3DLayerParams.type)
+}
+
+// -------------------------------------------------------------------
+
 // PaddingLayerParams_PaddingConstant
 
 // float value = 1;
@@ -32576,6 +34957,36 @@ UpsampleLayerParams::mutable_scalingfactor() {
   return &scalingfactor_;
 }
 
+// repeated float fractionalScalingFactor = 7;
+inline int UpsampleLayerParams::fractionalscalingfactor_size() const {
+  return fractionalscalingfactor_.size();
+}
+inline void UpsampleLayerParams::clear_fractionalscalingfactor() {
+  fractionalscalingfactor_.Clear();
+}
+inline float UpsampleLayerParams::fractionalscalingfactor(int index) const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.UpsampleLayerParams.fractionalScalingFactor)
+  return fractionalscalingfactor_.Get(index);
+}
+inline void UpsampleLayerParams::set_fractionalscalingfactor(int index, float value) {
+  fractionalscalingfactor_.Set(index, value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.UpsampleLayerParams.fractionalScalingFactor)
+}
+inline void UpsampleLayerParams::add_fractionalscalingfactor(float value) {
+  fractionalscalingfactor_.Add(value);
+  // @@protoc_insertion_point(field_add:CoreML.Specification.UpsampleLayerParams.fractionalScalingFactor)
+}
+inline const ::google::protobuf::RepeatedField< float >&
+UpsampleLayerParams::fractionalscalingfactor() const {
+  // @@protoc_insertion_point(field_list:CoreML.Specification.UpsampleLayerParams.fractionalScalingFactor)
+  return fractionalscalingfactor_;
+}
+inline ::google::protobuf::RepeatedField< float >*
+UpsampleLayerParams::mutable_fractionalscalingfactor() {
+  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.UpsampleLayerParams.fractionalScalingFactor)
+  return &fractionalscalingfactor_;
+}
+
 // .CoreML.Specification.UpsampleLayerParams.InterpolationMode mode = 5;
 inline void UpsampleLayerParams::clear_mode() {
   mode_ = 0;
@@ -32590,6 +35001,20 @@ inline void UpsampleLayerParams::set_mode(::CoreML::Specification::UpsampleLayer
   // @@protoc_insertion_point(field_set:CoreML.Specification.UpsampleLayerParams.mode)
 }
 
+// .CoreML.Specification.UpsampleLayerParams.LinearUpsampleMode linearUpsampleMode = 6;
+inline void UpsampleLayerParams::clear_linearupsamplemode() {
+  linearupsamplemode_ = 0;
+}
+inline ::CoreML::Specification::UpsampleLayerParams_LinearUpsampleMode UpsampleLayerParams::linearupsamplemode() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.UpsampleLayerParams.linearUpsampleMode)
+  return static_cast< ::CoreML::Specification::UpsampleLayerParams_LinearUpsampleMode >(linearupsamplemode_);
+}
+inline void UpsampleLayerParams::set_linearupsamplemode(::CoreML::Specification::UpsampleLayerParams_LinearUpsampleMode value) {
+  
+  linearupsamplemode_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.UpsampleLayerParams.linearUpsampleMode)
+}
+
 // -------------------------------------------------------------------
 
 // ResizeBilinearLayerParams
@@ -35775,6 +38200,20 @@ inline void BatchedMatMulLayerParams::set_allocated_bias(::CoreML::Specification
   // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.BatchedMatMulLayerParams.bias)
 }
 
+// bool int8DynamicQuantize = 10;
+inline void BatchedMatMulLayerParams::clear_int8dynamicquantize() {
+  int8dynamicquantize_ = false;
+}
+inline bool BatchedMatMulLayerParams::int8dynamicquantize() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.BatchedMatMulLayerParams.int8DynamicQuantize)
+  return int8dynamicquantize_;
+}
+inline void BatchedMatMulLayerParams::set_int8dynamicquantize(bool value) {
+  
+  int8dynamicquantize_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.BatchedMatMulLayerParams.int8DynamicQuantize)
+}
+
 // -------------------------------------------------------------------
 
 // ConcatNDLayerParams
@@ -38148,6 +40587,36 @@ SliceStaticLayerParams::mutable_strides() {
   return &strides_;
 }
 
+// repeated bool squeezeMasks = 6;
+inline int SliceStaticLayerParams::squeezemasks_size() const {
+  return squeezemasks_.size();
+}
+inline void SliceStaticLayerParams::clear_squeezemasks() {
+  squeezemasks_.Clear();
+}
+inline bool SliceStaticLayerParams::squeezemasks(int index) const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SliceStaticLayerParams.squeezeMasks)
+  return squeezemasks_.Get(index);
+}
+inline void SliceStaticLayerParams::set_squeezemasks(int index, bool value) {
+  squeezemasks_.Set(index, value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SliceStaticLayerParams.squeezeMasks)
+}
+inline void SliceStaticLayerParams::add_squeezemasks(bool value) {
+  squeezemasks_.Add(value);
+  // @@protoc_insertion_point(field_add:CoreML.Specification.SliceStaticLayerParams.squeezeMasks)
+}
+inline const ::google::protobuf::RepeatedField< bool >&
+SliceStaticLayerParams::squeezemasks() const {
+  // @@protoc_insertion_point(field_list:CoreML.Specification.SliceStaticLayerParams.squeezeMasks)
+  return squeezemasks_;
+}
+inline ::google::protobuf::RepeatedField< bool >*
+SliceStaticLayerParams::mutable_squeezemasks() {
+  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.SliceStaticLayerParams.squeezeMasks)
+  return &squeezemasks_;
+}
+
 // -------------------------------------------------------------------
 
 // SliceDynamicLayerParams
@@ -38272,6 +40741,36 @@ SliceDynamicLayerParams::mutable_strides() {
   return &strides_;
 }
 
+// repeated bool squeezeMasks = 6;
+inline int SliceDynamicLayerParams::squeezemasks_size() const {
+  return squeezemasks_.size();
+}
+inline void SliceDynamicLayerParams::clear_squeezemasks() {
+  squeezemasks_.Clear();
+}
+inline bool SliceDynamicLayerParams::squeezemasks(int index) const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SliceDynamicLayerParams.squeezeMasks)
+  return squeezemasks_.Get(index);
+}
+inline void SliceDynamicLayerParams::set_squeezemasks(int index, bool value) {
+  squeezemasks_.Set(index, value);
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SliceDynamicLayerParams.squeezeMasks)
+}
+inline void SliceDynamicLayerParams::add_squeezemasks(bool value) {
+  squeezemasks_.Add(value);
+  // @@protoc_insertion_point(field_add:CoreML.Specification.SliceDynamicLayerParams.squeezeMasks)
+}
+inline const ::google::protobuf::RepeatedField< bool >&
+SliceDynamicLayerParams::squeezemasks() const {
+  // @@protoc_insertion_point(field_list:CoreML.Specification.SliceDynamicLayerParams.squeezeMasks)
+  return squeezemasks_;
+}
+inline ::google::protobuf::RepeatedField< bool >*
+SliceDynamicLayerParams::mutable_squeezemasks() {
+  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.SliceDynamicLayerParams.squeezeMasks)
+  return &squeezemasks_;
+}
+
 // -------------------------------------------------------------------
 
 // TileLayerParams
@@ -38644,6 +41143,102 @@ inline void NonMaximumSuppressionLayerParams::set_perclasssuppression(bool value
 
 // -------------------------------------------------------------------
 
+// ClampedReLULayerParams
+
+// float alpha = 1;
+inline void ClampedReLULayerParams::clear_alpha() {
+  alpha_ = 0;
+}
+inline float ClampedReLULayerParams::alpha() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.ClampedReLULayerParams.alpha)
+  return alpha_;
+}
+inline void ClampedReLULayerParams::set_alpha(float value) {
+  
+  alpha_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.ClampedReLULayerParams.alpha)
+}
+
+// float beta = 2;
+inline void ClampedReLULayerParams::clear_beta() {
+  beta_ = 0;
+}
+inline float ClampedReLULayerParams::beta() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.ClampedReLULayerParams.beta)
+  return beta_;
+}
+inline void ClampedReLULayerParams::set_beta(float value) {
+  
+  beta_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.ClampedReLULayerParams.beta)
+}
+
+// -------------------------------------------------------------------
+
+// ArgSortLayerParams
+
+// int64 axis = 1;
+inline void ArgSortLayerParams::clear_axis() {
+  axis_ = GOOGLE_LONGLONG(0);
+}
+inline ::google::protobuf::int64 ArgSortLayerParams::axis() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.ArgSortLayerParams.axis)
+  return axis_;
+}
+inline void ArgSortLayerParams::set_axis(::google::protobuf::int64 value) {
+  
+  axis_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.ArgSortLayerParams.axis)
+}
+
+// bool descending = 2;
+inline void ArgSortLayerParams::clear_descending() {
+  descending_ = false;
+}
+inline bool ArgSortLayerParams::descending() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.ArgSortLayerParams.descending)
+  return descending_;
+}
+inline void ArgSortLayerParams::set_descending(bool value) {
+  
+  descending_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.ArgSortLayerParams.descending)
+}
+
+// -------------------------------------------------------------------
+
+// SliceBySizeLayerParams
+
+// int64 size = 2;
+inline void SliceBySizeLayerParams::clear_size() {
+  size_ = GOOGLE_LONGLONG(0);
+}
+inline ::google::protobuf::int64 SliceBySizeLayerParams::size() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SliceBySizeLayerParams.size)
+  return size_;
+}
+inline void SliceBySizeLayerParams::set_size(::google::protobuf::int64 value) {
+  
+  size_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SliceBySizeLayerParams.size)
+}
+
+// int64 axis = 3;
+inline void SliceBySizeLayerParams::clear_axis() {
+  axis_ = GOOGLE_LONGLONG(0);
+}
+inline ::google::protobuf::int64 SliceBySizeLayerParams::axis() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.SliceBySizeLayerParams.axis)
+  return axis_;
+}
+inline void SliceBySizeLayerParams::set_axis(::google::protobuf::int64 value) {
+  
+  axis_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.SliceBySizeLayerParams.axis)
+}
+
+// -------------------------------------------------------------------
+
 // NeuralNetworkClassifier
 
 // repeated .CoreML.Specification.NeuralNetworkLayer layers = 1;
@@ -38933,6 +41528,112 @@ inline NeuralNetworkClassifier::ClassLabelsCase NeuralNetworkClassifier::ClassLa
 }
 // -------------------------------------------------------------------
 
+// OneHotLayerParams
+
+// uint64 oneHotVectorSize = 1;
+inline void OneHotLayerParams::clear_onehotvectorsize() {
+  onehotvectorsize_ = GOOGLE_ULONGLONG(0);
+}
+inline ::google::protobuf::uint64 OneHotLayerParams::onehotvectorsize() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.OneHotLayerParams.oneHotVectorSize)
+  return onehotvectorsize_;
+}
+inline void OneHotLayerParams::set_onehotvectorsize(::google::protobuf::uint64 value) {
+  
+  onehotvectorsize_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.OneHotLayerParams.oneHotVectorSize)
+}
+
+// int64 axis = 2;
+inline void OneHotLayerParams::clear_axis() {
+  axis_ = GOOGLE_LONGLONG(0);
+}
+inline ::google::protobuf::int64 OneHotLayerParams::axis() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.OneHotLayerParams.axis)
+  return axis_;
+}
+inline void OneHotLayerParams::set_axis(::google::protobuf::int64 value) {
+  
+  axis_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.OneHotLayerParams.axis)
+}
+
+// float onValue = 3;
+inline void OneHotLayerParams::clear_onvalue() {
+  onvalue_ = 0;
+}
+inline float OneHotLayerParams::onvalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.OneHotLayerParams.onValue)
+  return onvalue_;
+}
+inline void OneHotLayerParams::set_onvalue(float value) {
+  
+  onvalue_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.OneHotLayerParams.onValue)
+}
+
+// float offValue = 4;
+inline void OneHotLayerParams::clear_offvalue() {
+  offvalue_ = 0;
+}
+inline float OneHotLayerParams::offvalue() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.OneHotLayerParams.offValue)
+  return offvalue_;
+}
+inline void OneHotLayerParams::set_offvalue(float value) {
+  
+  offvalue_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.OneHotLayerParams.offValue)
+}
+
+// -------------------------------------------------------------------
+
+// CumSumLayerParams
+
+// int64 axis = 1;
+inline void CumSumLayerParams::clear_axis() {
+  axis_ = GOOGLE_LONGLONG(0);
+}
+inline ::google::protobuf::int64 CumSumLayerParams::axis() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.CumSumLayerParams.axis)
+  return axis_;
+}
+inline void CumSumLayerParams::set_axis(::google::protobuf::int64 value) {
+  
+  axis_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.CumSumLayerParams.axis)
+}
+
+// bool excludeFinalSum = 2;
+inline void CumSumLayerParams::clear_excludefinalsum() {
+  excludefinalsum_ = false;
+}
+inline bool CumSumLayerParams::excludefinalsum() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.CumSumLayerParams.excludeFinalSum)
+  return excludefinalsum_;
+}
+inline void CumSumLayerParams::set_excludefinalsum(bool value) {
+  
+  excludefinalsum_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.CumSumLayerParams.excludeFinalSum)
+}
+
+// bool reverse = 3;
+inline void CumSumLayerParams::clear_reverse() {
+  reverse_ = false;
+}
+inline bool CumSumLayerParams::reverse() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.CumSumLayerParams.reverse)
+  return reverse_;
+}
+inline void CumSumLayerParams::set_reverse(bool value) {
+  
+  reverse_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.CumSumLayerParams.reverse)
+}
+
+// -------------------------------------------------------------------
+
 // NeuralNetworkRegressor
 
 // repeated .CoreML.Specification.NeuralNetworkLayer layers = 1;
@@ -40454,6 +43155,22 @@ inline void AdamOptimizer::set_allocated_eps(::CoreML::Specification::DoublePara
 
 // -------------------------------------------------------------------
 
+// -------------------------------------------------------------------
+
+// -------------------------------------------------------------------
+
+// -------------------------------------------------------------------
+
+// -------------------------------------------------------------------
+
+// -------------------------------------------------------------------
+
+// -------------------------------------------------------------------
+
+// -------------------------------------------------------------------
+
+// -------------------------------------------------------------------
+
 
 // @@protoc_insertion_point(namespace_scope)
 
@@ -40468,9 +43185,14 @@ namespace protobuf {
 template <> struct is_proto_enum< ::CoreML::Specification::SamePadding_SamePaddingMode> : ::google::protobuf::internal::true_type {};
 template <> struct is_proto_enum< ::CoreML::Specification::SamplingMode_Method> : ::google::protobuf::internal::true_type {};
 template <> struct is_proto_enum< ::CoreML::Specification::BoxCoordinatesMode_Coordinates> : ::google::protobuf::internal::true_type {};
+template <> struct is_proto_enum< ::CoreML::Specification::Convolution3DLayerParams_PaddingType> : ::google::protobuf::internal::true_type {};
 template <> struct is_proto_enum< ::CoreML::Specification::PoolingLayerParams_PoolingType> : ::google::protobuf::internal::true_type {};
+template <> struct is_proto_enum< ::CoreML::Specification::Pooling3DLayerParams_PoolingType3D> : ::google::protobuf::internal::true_type {};
+template <> struct is_proto_enum< ::CoreML::Specification::Pooling3DLayerParams_Pooling3DPaddingType> : ::google::protobuf::internal::true_type {};
+template <> struct is_proto_enum< ::CoreML::Specification::GlobalPooling3DLayerParams_GlobalPoolingType3D> : ::google::protobuf::internal::true_type {};
 template <> struct is_proto_enum< ::CoreML::Specification::UnaryFunctionLayerParams_Operation> : ::google::protobuf::internal::true_type {};
 template <> struct is_proto_enum< ::CoreML::Specification::UpsampleLayerParams_InterpolationMode> : ::google::protobuf::internal::true_type {};
+template <> struct is_proto_enum< ::CoreML::Specification::UpsampleLayerParams_LinearUpsampleMode> : ::google::protobuf::internal::true_type {};
 template <> struct is_proto_enum< ::CoreML::Specification::FlattenLayerParams_FlattenOrder> : ::google::protobuf::internal::true_type {};
 template <> struct is_proto_enum< ::CoreML::Specification::ReshapeLayerParams_ReshapeOrder> : ::google::protobuf::internal::true_type {};
 template <> struct is_proto_enum< ::CoreML::Specification::ReorganizeDataLayerParams_ReorganizationType> : ::google::protobuf::internal::true_type {};
diff --git a/mlmodel/build/format/NeuralNetwork_enums.h b/mlmodel/build/format/NeuralNetwork_enums.h
index 9677764c6..3e417279d 100644
--- a/mlmodel/build/format/NeuralNetwork_enums.h
+++ b/mlmodel/build/format/NeuralNetwork_enums.h
@@ -242,6 +242,14 @@ enum MLNeuralNetworkLayerlayer: int {
     MLNeuralNetworkLayerlayer_whereBroadcastable = 1330,
     MLNeuralNetworkLayerlayer_layerNormalization = 1350,
     MLNeuralNetworkLayerlayer_NonMaximumSuppression = 1400,
+    MLNeuralNetworkLayerlayer_oneHot = 1450,
+    MLNeuralNetworkLayerlayer_cumSum = 1455,
+    MLNeuralNetworkLayerlayer_clampedReLU = 1460,
+    MLNeuralNetworkLayerlayer_argSort = 1461,
+    MLNeuralNetworkLayerlayer_pooling3d = 1465,
+    MLNeuralNetworkLayerlayer_globalPooling3d = 1466,
+    MLNeuralNetworkLayerlayer_sliceBySize = 1470,
+    MLNeuralNetworkLayerlayer_convolution3d = 1471,
     MLNeuralNetworkLayerlayer_NOT_SET = 0,
 };
 
@@ -548,6 +556,22 @@ static const char * MLNeuralNetworkLayerlayer_Name(MLNeuralNetworkLayerlayer x)
             return "MLNeuralNetworkLayerlayer_layerNormalization";
         case MLNeuralNetworkLayerlayer_NonMaximumSuppression:
             return "MLNeuralNetworkLayerlayer_NonMaximumSuppression";
+        case MLNeuralNetworkLayerlayer_oneHot:
+            return "MLNeuralNetworkLayerlayer_oneHot";
+        case MLNeuralNetworkLayerlayer_cumSum:
+            return "MLNeuralNetworkLayerlayer_cumSum";
+        case MLNeuralNetworkLayerlayer_clampedReLU:
+            return "MLNeuralNetworkLayerlayer_clampedReLU";
+        case MLNeuralNetworkLayerlayer_argSort:
+            return "MLNeuralNetworkLayerlayer_argSort";
+        case MLNeuralNetworkLayerlayer_pooling3d:
+            return "MLNeuralNetworkLayerlayer_pooling3d";
+        case MLNeuralNetworkLayerlayer_globalPooling3d:
+            return "MLNeuralNetworkLayerlayer_globalPooling3d";
+        case MLNeuralNetworkLayerlayer_sliceBySize:
+            return "MLNeuralNetworkLayerlayer_sliceBySize";
+        case MLNeuralNetworkLayerlayer_convolution3d:
+            return "MLNeuralNetworkLayerlayer_convolution3d";
         case MLNeuralNetworkLayerlayer_NOT_SET:
             return "INVALID";
     }
@@ -611,6 +635,12 @@ static const char * MLConvolutionLayerParamsConvolutionPaddingType_Name(MLConvol
     return "INVALID";
 }
 
+enum MLPaddingType: int {
+    MLPaddingTypeCUSTOM = 0,
+    MLPaddingTypeVALID = 1,
+    MLPaddingTypeSAME = 2,
+};
+
 enum MLPoolingType: int {
     MLPoolingTypeMAX = 0,
     MLPoolingTypeAVERAGE = 1,
@@ -639,6 +669,22 @@ static const char * MLPoolingLayerParamsPoolingPaddingType_Name(MLPoolingLayerPa
     return "INVALID";
 }
 
+enum MLPoolingType3D: int {
+    MLPoolingType3DMAX = 0,
+    MLPoolingType3DAVERAGE = 1,
+};
+
+enum MLPooling3DPaddingType: int {
+    MLPooling3DPaddingTypeCUSTOM = 0,
+    MLPooling3DPaddingTypeVALID = 1,
+    MLPooling3DPaddingTypeSAME = 2,
+};
+
+enum MLGlobalPoolingType3D: int {
+    MLGlobalPoolingType3DMAX = 0,
+    MLGlobalPoolingType3DAVERAGE = 1,
+};
+
 enum MLPaddingLayerParamsPaddingType: int {
     MLPaddingLayerParamsPaddingType_constant = 1,
     MLPaddingLayerParamsPaddingType_reflection = 2,
@@ -677,6 +723,12 @@ enum MLInterpolationMode: int {
     MLInterpolationModeBILINEAR = 1,
 };
 
+enum MLLinearUpsampleMode: int {
+    MLLinearUpsampleModeDEFAULT = 0,
+    MLLinearUpsampleModeALIGN_CORNERS_TRUE = 1,
+    MLLinearUpsampleModeALIGN_CORNERS_FALSE = 2,
+};
+
 enum MLFlattenOrder: int {
     MLFlattenOrderCHANNEL_FIRST = 0,
     MLFlattenOrderCHANNEL_LAST = 1,
@@ -690,6 +742,7 @@ enum MLReshapeOrder: int {
 enum MLReorganizationType: int {
     MLReorganizationTypeSPACE_TO_DEPTH = 0,
     MLReorganizationTypeDEPTH_TO_SPACE = 1,
+    MLReorganizationTypePIXEL_SHUFFLE = 2,
 };
 
 enum MLSliceAxis: int {
diff --git a/mlmodel/build/format/VisionFeaturePrint.pb.cc b/mlmodel/build/format/VisionFeaturePrint.pb.cc
index f9701ac1e..7df6b83de 100644
--- a/mlmodel/build/format/VisionFeaturePrint.pb.cc
+++ b/mlmodel/build/format/VisionFeaturePrint.pb.cc
@@ -19,9 +19,12 @@ namespace Specification {
 namespace CoreMLModels {
 class VisionFeaturePrint_SceneDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<VisionFeaturePrint_Scene> {
 } _VisionFeaturePrint_Scene_default_instance_;
+class VisionFeaturePrint_ObjectDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<VisionFeaturePrint_Object> {
+} _VisionFeaturePrint_Object_default_instance_;
 class VisionFeaturePrintDefaultTypeInternal : public ::google::protobuf::internal::ExplicitlyConstructed<VisionFeaturePrint> {
   public:
   const ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Scene* scene_;
+  const ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object* object_;
 } _VisionFeaturePrint_default_instance_;
 
 namespace protobuf_VisionFeaturePrint_2eproto {
@@ -39,11 +42,13 @@ PROTOBUF_CONSTEXPR_VAR ::google::protobuf::internal::ParseTable const
     TableStruct::schema[] = {
   { NULL, NULL, 0, -1, -1, false },
   { NULL, NULL, 0, -1, -1, false },
+  { NULL, NULL, 0, -1, -1, false },
 };
 
 
 void TableStruct::Shutdown() {
   _VisionFeaturePrint_Scene_default_instance_.Shutdown();
+  _VisionFeaturePrint_Object_default_instance_.Shutdown();
   _VisionFeaturePrint_default_instance_.Shutdown();
 }
 
@@ -52,6 +57,7 @@ void TableStruct::InitDefaultsImpl() {
 
   ::google::protobuf::internal::InitProtobufDefaults();
   _VisionFeaturePrint_Scene_default_instance_.DefaultConstruct();
+  _VisionFeaturePrint_Object_default_instance_.DefaultConstruct();
   _VisionFeaturePrint_default_instance_.DefaultConstruct();
 }
 
@@ -96,6 +102,23 @@ const VisionFeaturePrint_Scene_SceneVersion VisionFeaturePrint_Scene::SceneVersi
 const VisionFeaturePrint_Scene_SceneVersion VisionFeaturePrint_Scene::SceneVersion_MAX;
 const int VisionFeaturePrint_Scene::SceneVersion_ARRAYSIZE;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+bool VisionFeaturePrint_Object_ObjectVersion_IsValid(int value) {
+  switch (value) {
+    case 0:
+    case 1:
+      return true;
+    default:
+      return false;
+  }
+}
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+const VisionFeaturePrint_Object_ObjectVersion VisionFeaturePrint_Object::OBJECT_VERSION_INVALID;
+const VisionFeaturePrint_Object_ObjectVersion VisionFeaturePrint_Object::OBJECT_VERSION_1;
+const VisionFeaturePrint_Object_ObjectVersion VisionFeaturePrint_Object::ObjectVersion_MIN;
+const VisionFeaturePrint_Object_ObjectVersion VisionFeaturePrint_Object::ObjectVersion_MAX;
+const int VisionFeaturePrint_Object::ObjectVersion_ARRAYSIZE;
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
 // ===================================================================
 
@@ -296,8 +319,315 @@ void VisionFeaturePrint_Scene::set_version(::CoreML::Specification::CoreMLModels
 
 // ===================================================================
 
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+const int VisionFeaturePrint_Object::kVersionFieldNumber;
+const int VisionFeaturePrint_Object::kOutputFieldNumber;
+#endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
+
+VisionFeaturePrint_Object::VisionFeaturePrint_Object()
+  : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) {
+  if (GOOGLE_PREDICT_TRUE(this != internal_default_instance())) {
+    protobuf_VisionFeaturePrint_2eproto::InitDefaults();
+  }
+  SharedCtor();
+  // @@protoc_insertion_point(constructor:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object)
+}
+VisionFeaturePrint_Object::VisionFeaturePrint_Object(const VisionFeaturePrint_Object& from)
+  : ::google::protobuf::MessageLite(),
+      _internal_metadata_(NULL),
+      output_(from.output_),
+      _cached_size_(0) {
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  version_ = from.version_;
+  // @@protoc_insertion_point(copy_constructor:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object)
+}
+
+void VisionFeaturePrint_Object::SharedCtor() {
+  version_ = 0;
+  _cached_size_ = 0;
+}
+
+VisionFeaturePrint_Object::~VisionFeaturePrint_Object() {
+  // @@protoc_insertion_point(destructor:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object)
+  SharedDtor();
+}
+
+void VisionFeaturePrint_Object::SharedDtor() {
+}
+
+void VisionFeaturePrint_Object::SetCachedSize(int size) const {
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+}
+const VisionFeaturePrint_Object& VisionFeaturePrint_Object::default_instance() {
+  protobuf_VisionFeaturePrint_2eproto::InitDefaults();
+  return *internal_default_instance();
+}
+
+VisionFeaturePrint_Object* VisionFeaturePrint_Object::New(::google::protobuf::Arena* arena) const {
+  VisionFeaturePrint_Object* n = new VisionFeaturePrint_Object;
+  if (arena != NULL) {
+    arena->Own(n);
+  }
+  return n;
+}
+
+void VisionFeaturePrint_Object::Clear() {
+// @@protoc_insertion_point(message_clear_start:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object)
+  output_.Clear();
+  version_ = 0;
+}
+
+bool VisionFeaturePrint_Object::MergePartialFromCodedStream(
+    ::google::protobuf::io::CodedInputStream* input) {
+#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure
+  ::google::protobuf::uint32 tag;
+  // @@protoc_insertion_point(parse_start:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object)
+  for (;;) {
+    ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(16383u);
+    tag = p.first;
+    if (!p.second) goto handle_unusual;
+    switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
+      // .CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.ObjectVersion version = 1;
+      case 1: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(8u)) {
+          int value;
+          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
+                   int, ::google::protobuf::internal::WireFormatLite::TYPE_ENUM>(
+                 input, &value)));
+          set_version(static_cast< ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object_ObjectVersion >(value));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      // repeated string output = 100;
+      case 100: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(802u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadString(
+                input, this->add_output()));
+          DO_(::google::protobuf::internal::WireFormatLite::VerifyUtf8String(
+            this->output(this->output_size() - 1).data(),
+            this->output(this->output_size() - 1).length(),
+            ::google::protobuf::internal::WireFormatLite::PARSE,
+            "CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output"));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
+      default: {
+      handle_unusual:
+        if (tag == 0 ||
+            ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) ==
+            ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) {
+          goto success;
+        }
+        DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag));
+        break;
+      }
+    }
+  }
+success:
+  // @@protoc_insertion_point(parse_success:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object)
+  return true;
+failure:
+  // @@protoc_insertion_point(parse_failure:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object)
+  return false;
+#undef DO_
+}
+
+void VisionFeaturePrint_Object::SerializeWithCachedSizes(
+    ::google::protobuf::io::CodedOutputStream* output) const {
+  // @@protoc_insertion_point(serialize_start:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object)
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  // .CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.ObjectVersion version = 1;
+  if (this->version() != 0) {
+    ::google::protobuf::internal::WireFormatLite::WriteEnum(
+      1, this->version(), output);
+  }
+
+  // repeated string output = 100;
+  for (int i = 0, n = this->output_size(); i < n; i++) {
+    ::google::protobuf::internal::WireFormatLite::VerifyUtf8String(
+      this->output(i).data(), this->output(i).length(),
+      ::google::protobuf::internal::WireFormatLite::SERIALIZE,
+      "CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output");
+    ::google::protobuf::internal::WireFormatLite::WriteString(
+      100, this->output(i), output);
+  }
+
+  // @@protoc_insertion_point(serialize_end:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object)
+}
+
+size_t VisionFeaturePrint_Object::ByteSizeLong() const {
+// @@protoc_insertion_point(message_byte_size_start:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object)
+  size_t total_size = 0;
+
+  // repeated string output = 100;
+  total_size += 2 *
+      ::google::protobuf::internal::FromIntSize(this->output_size());
+  for (int i = 0, n = this->output_size(); i < n; i++) {
+    total_size += ::google::protobuf::internal::WireFormatLite::StringSize(
+      this->output(i));
+  }
+
+  // .CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.ObjectVersion version = 1;
+  if (this->version() != 0) {
+    total_size += 1 +
+      ::google::protobuf::internal::WireFormatLite::EnumSize(this->version());
+  }
+
+  int cached_size = ::google::protobuf::internal::ToCachedSize(total_size);
+  GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();
+  _cached_size_ = cached_size;
+  GOOGLE_SAFE_CONCURRENT_WRITES_END();
+  return total_size;
+}
+
+void VisionFeaturePrint_Object::CheckTypeAndMergeFrom(
+    const ::google::protobuf::MessageLite& from) {
+  MergeFrom(*::google::protobuf::down_cast<const VisionFeaturePrint_Object*>(&from));
+}
+
+void VisionFeaturePrint_Object::MergeFrom(const VisionFeaturePrint_Object& from) {
+// @@protoc_insertion_point(class_specific_merge_from_start:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object)
+  GOOGLE_DCHECK_NE(&from, this);
+  _internal_metadata_.MergeFrom(from._internal_metadata_);
+  ::google::protobuf::uint32 cached_has_bits = 0;
+  (void) cached_has_bits;
+
+  output_.MergeFrom(from.output_);
+  if (from.version() != 0) {
+    set_version(from.version());
+  }
+}
+
+void VisionFeaturePrint_Object::CopyFrom(const VisionFeaturePrint_Object& from) {
+// @@protoc_insertion_point(class_specific_copy_from_start:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object)
+  if (&from == this) return;
+  Clear();
+  MergeFrom(from);
+}
+
+bool VisionFeaturePrint_Object::IsInitialized() const {
+  return true;
+}
+
+void VisionFeaturePrint_Object::Swap(VisionFeaturePrint_Object* other) {
+  if (other == this) return;
+  InternalSwap(other);
+}
+void VisionFeaturePrint_Object::InternalSwap(VisionFeaturePrint_Object* other) {
+  output_.InternalSwap(&other->output_);
+  std::swap(version_, other->version_);
+  std::swap(_cached_size_, other->_cached_size_);
+}
+
+::std::string VisionFeaturePrint_Object::GetTypeName() const {
+  return "CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object";
+}
+
+#if PROTOBUF_INLINE_NOT_IN_HEADERS
+// VisionFeaturePrint_Object
+
+// .CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.ObjectVersion version = 1;
+void VisionFeaturePrint_Object::clear_version() {
+  version_ = 0;
+}
+::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object_ObjectVersion VisionFeaturePrint_Object::version() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.version)
+  return static_cast< ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object_ObjectVersion >(version_);
+}
+void VisionFeaturePrint_Object::set_version(::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object_ObjectVersion value) {
+  
+  version_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.version)
+}
+
+// repeated string output = 100;
+int VisionFeaturePrint_Object::output_size() const {
+  return output_.size();
+}
+void VisionFeaturePrint_Object::clear_output() {
+  output_.Clear();
+}
+const ::std::string& VisionFeaturePrint_Object::output(int index) const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+  return output_.Get(index);
+}
+::std::string* VisionFeaturePrint_Object::mutable_output(int index) {
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+  return output_.Mutable(index);
+}
+void VisionFeaturePrint_Object::set_output(int index, const ::std::string& value) {
+  // @@protoc_insertion_point(field_set:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+  output_.Mutable(index)->assign(value);
+}
+#if LANG_CXX11
+void VisionFeaturePrint_Object::set_output(int index, ::std::string&& value) {
+  // @@protoc_insertion_point(field_set:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+  output_.Mutable(index)->assign(std::move(value));
+}
+#endif
+void VisionFeaturePrint_Object::set_output(int index, const char* value) {
+  GOOGLE_DCHECK(value != NULL);
+  output_.Mutable(index)->assign(value);
+  // @@protoc_insertion_point(field_set_char:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+}
+void VisionFeaturePrint_Object::set_output(int index, const char* value, size_t size) {
+  output_.Mutable(index)->assign(
+    reinterpret_cast<const char*>(value), size);
+  // @@protoc_insertion_point(field_set_pointer:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+}
+::std::string* VisionFeaturePrint_Object::add_output() {
+  // @@protoc_insertion_point(field_add_mutable:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+  return output_.Add();
+}
+void VisionFeaturePrint_Object::add_output(const ::std::string& value) {
+  output_.Add()->assign(value);
+  // @@protoc_insertion_point(field_add:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+}
+#if LANG_CXX11
+void VisionFeaturePrint_Object::add_output(::std::string&& value) {
+  output_.Add(std::move(value));
+  // @@protoc_insertion_point(field_add:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+}
+#endif
+void VisionFeaturePrint_Object::add_output(const char* value) {
+  GOOGLE_DCHECK(value != NULL);
+  output_.Add()->assign(value);
+  // @@protoc_insertion_point(field_add_char:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+}
+void VisionFeaturePrint_Object::add_output(const char* value, size_t size) {
+  output_.Add()->assign(reinterpret_cast<const char*>(value), size);
+  // @@protoc_insertion_point(field_add_pointer:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+}
+const ::google::protobuf::RepeatedPtrField< ::std::string>&
+VisionFeaturePrint_Object::output() const {
+  // @@protoc_insertion_point(field_list:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+  return output_;
+}
+::google::protobuf::RepeatedPtrField< ::std::string>*
+VisionFeaturePrint_Object::mutable_output() {
+  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+  return &output_;
+}
+
+#endif  // PROTOBUF_INLINE_NOT_IN_HEADERS
+
+// ===================================================================
+
 #if !defined(_MSC_VER) || _MSC_VER >= 1900
 const int VisionFeaturePrint::kSceneFieldNumber;
+const int VisionFeaturePrint::kObjectFieldNumber;
 #endif  // !defined(_MSC_VER) || _MSC_VER >= 1900
 
 VisionFeaturePrint::VisionFeaturePrint()
@@ -319,6 +649,10 @@ VisionFeaturePrint::VisionFeaturePrint(const VisionFeaturePrint& from)
       mutable_scene()->::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Scene::MergeFrom(from.scene());
       break;
     }
+    case kObject: {
+      mutable_object()->::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object::MergeFrom(from.object());
+      break;
+    }
     case VISIONFEATUREPRINTTYPE_NOT_SET: {
       break;
     }
@@ -367,6 +701,10 @@ void VisionFeaturePrint::clear_VisionFeaturePrintType() {
       delete VisionFeaturePrintType_.scene_;
       break;
     }
+    case kObject: {
+      delete VisionFeaturePrintType_.object_;
+      break;
+    }
     case VISIONFEATUREPRINTTYPE_NOT_SET: {
       break;
     }
@@ -402,6 +740,18 @@ bool VisionFeaturePrint::MergePartialFromCodedStream(
         break;
       }
 
+      // .CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object object = 21;
+      case 21: {
+        if (static_cast< ::google::protobuf::uint8>(tag) ==
+            static_cast< ::google::protobuf::uint8>(170u)) {
+          DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(
+               input, mutable_object()));
+        } else {
+          goto handle_unusual;
+        }
+        break;
+      }
+
       default: {
       handle_unusual:
         if (tag == 0 ||
@@ -435,6 +785,12 @@ void VisionFeaturePrint::SerializeWithCachedSizes(
       20, *VisionFeaturePrintType_.scene_, output);
   }
 
+  // .CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object object = 21;
+  if (has_object()) {
+    ::google::protobuf::internal::WireFormatLite::WriteMessage(
+      21, *VisionFeaturePrintType_.object_, output);
+  }
+
   // @@protoc_insertion_point(serialize_end:CoreML.Specification.CoreMLModels.VisionFeaturePrint)
 }
 
@@ -450,6 +806,13 @@ size_t VisionFeaturePrint::ByteSizeLong() const {
           *VisionFeaturePrintType_.scene_);
       break;
     }
+    // .CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object object = 21;
+    case kObject: {
+      total_size += 2 +
+        ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual(
+          *VisionFeaturePrintType_.object_);
+      break;
+    }
     case VISIONFEATUREPRINTTYPE_NOT_SET: {
       break;
     }
@@ -478,6 +841,10 @@ void VisionFeaturePrint::MergeFrom(const VisionFeaturePrint& from) {
       mutable_scene()->::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Scene::MergeFrom(from.scene());
       break;
     }
+    case kObject: {
+      mutable_object()->::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object::MergeFrom(from.object());
+      break;
+    }
     case VISIONFEATUREPRINTTYPE_NOT_SET: {
       break;
     }
@@ -560,6 +927,54 @@ void VisionFeaturePrint::set_allocated_scene(::CoreML::Specification::CoreMLMode
   // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.CoreMLModels.VisionFeaturePrint.scene)
 }
 
+// .CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object object = 21;
+bool VisionFeaturePrint::has_object() const {
+  return VisionFeaturePrintType_case() == kObject;
+}
+void VisionFeaturePrint::set_has_object() {
+  _oneof_case_[0] = kObject;
+}
+void VisionFeaturePrint::clear_object() {
+  if (has_object()) {
+    delete VisionFeaturePrintType_.object_;
+    clear_has_VisionFeaturePrintType();
+  }
+}
+ const ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object& VisionFeaturePrint::object() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.CoreMLModels.VisionFeaturePrint.object)
+  return has_object()
+      ? *VisionFeaturePrintType_.object_
+      : ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object::default_instance();
+}
+::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object* VisionFeaturePrint::mutable_object() {
+  if (!has_object()) {
+    clear_VisionFeaturePrintType();
+    set_has_object();
+    VisionFeaturePrintType_.object_ = new ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.CoreMLModels.VisionFeaturePrint.object)
+  return VisionFeaturePrintType_.object_;
+}
+::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object* VisionFeaturePrint::release_object() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.CoreMLModels.VisionFeaturePrint.object)
+  if (has_object()) {
+    clear_has_VisionFeaturePrintType();
+    ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object* temp = VisionFeaturePrintType_.object_;
+    VisionFeaturePrintType_.object_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+void VisionFeaturePrint::set_allocated_object(::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object* object) {
+  clear_VisionFeaturePrintType();
+  if (object) {
+    set_has_object();
+    VisionFeaturePrintType_.object_ = object;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.CoreMLModels.VisionFeaturePrint.object)
+}
+
 bool VisionFeaturePrint::has_VisionFeaturePrintType() const {
   return VisionFeaturePrintType_case() != VISIONFEATUREPRINTTYPE_NOT_SET;
 }
diff --git a/mlmodel/build/format/VisionFeaturePrint.pb.h b/mlmodel/build/format/VisionFeaturePrint.pb.h
index eda7bda90..6ea8996b4 100644
--- a/mlmodel/build/format/VisionFeaturePrint.pb.h
+++ b/mlmodel/build/format/VisionFeaturePrint.pb.h
@@ -36,6 +36,9 @@ namespace CoreMLModels {
 class VisionFeaturePrint;
 class VisionFeaturePrintDefaultTypeInternal;
 extern VisionFeaturePrintDefaultTypeInternal _VisionFeaturePrint_default_instance_;
+class VisionFeaturePrint_Object;
+class VisionFeaturePrint_ObjectDefaultTypeInternal;
+extern VisionFeaturePrint_ObjectDefaultTypeInternal _VisionFeaturePrint_Object_default_instance_;
 class VisionFeaturePrint_Scene;
 class VisionFeaturePrint_SceneDefaultTypeInternal;
 extern VisionFeaturePrint_SceneDefaultTypeInternal _VisionFeaturePrint_Scene_default_instance_;
@@ -72,6 +75,17 @@ const VisionFeaturePrint_Scene_SceneVersion VisionFeaturePrint_Scene_SceneVersio
 const VisionFeaturePrint_Scene_SceneVersion VisionFeaturePrint_Scene_SceneVersion_SceneVersion_MAX = VisionFeaturePrint_Scene_SceneVersion_SCENE_VERSION_1;
 const int VisionFeaturePrint_Scene_SceneVersion_SceneVersion_ARRAYSIZE = VisionFeaturePrint_Scene_SceneVersion_SceneVersion_MAX + 1;
 
+enum VisionFeaturePrint_Object_ObjectVersion {
+  VisionFeaturePrint_Object_ObjectVersion_OBJECT_VERSION_INVALID = 0,
+  VisionFeaturePrint_Object_ObjectVersion_OBJECT_VERSION_1 = 1,
+  VisionFeaturePrint_Object_ObjectVersion_VisionFeaturePrint_Object_ObjectVersion_INT_MIN_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32min,
+  VisionFeaturePrint_Object_ObjectVersion_VisionFeaturePrint_Object_ObjectVersion_INT_MAX_SENTINEL_DO_NOT_USE_ = ::google::protobuf::kint32max
+};
+bool VisionFeaturePrint_Object_ObjectVersion_IsValid(int value);
+const VisionFeaturePrint_Object_ObjectVersion VisionFeaturePrint_Object_ObjectVersion_ObjectVersion_MIN = VisionFeaturePrint_Object_ObjectVersion_OBJECT_VERSION_INVALID;
+const VisionFeaturePrint_Object_ObjectVersion VisionFeaturePrint_Object_ObjectVersion_ObjectVersion_MAX = VisionFeaturePrint_Object_ObjectVersion_OBJECT_VERSION_1;
+const int VisionFeaturePrint_Object_ObjectVersion_ObjectVersion_ARRAYSIZE = VisionFeaturePrint_Object_ObjectVersion_ObjectVersion_MAX + 1;
+
 // ===================================================================
 
 class VisionFeaturePrint_Scene : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Scene) */ {
@@ -167,6 +181,122 @@ class VisionFeaturePrint_Scene : public ::google::protobuf::MessageLite /* @@pro
 };
 // -------------------------------------------------------------------
 
+class VisionFeaturePrint_Object : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object) */ {
+ public:
+  VisionFeaturePrint_Object();
+  virtual ~VisionFeaturePrint_Object();
+
+  VisionFeaturePrint_Object(const VisionFeaturePrint_Object& from);
+
+  inline VisionFeaturePrint_Object& operator=(const VisionFeaturePrint_Object& from) {
+    CopyFrom(from);
+    return *this;
+  }
+
+  static const VisionFeaturePrint_Object& default_instance();
+
+  static inline const VisionFeaturePrint_Object* internal_default_instance() {
+    return reinterpret_cast<const VisionFeaturePrint_Object*>(
+               &_VisionFeaturePrint_Object_default_instance_);
+  }
+  static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
+    1;
+
+  void Swap(VisionFeaturePrint_Object* other);
+
+  // implements Message ----------------------------------------------
+
+  inline VisionFeaturePrint_Object* New() const PROTOBUF_FINAL { return New(NULL); }
+
+  VisionFeaturePrint_Object* New(::google::protobuf::Arena* arena) const PROTOBUF_FINAL;
+  void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from)
+    PROTOBUF_FINAL;
+  void CopyFrom(const VisionFeaturePrint_Object& from);
+  void MergeFrom(const VisionFeaturePrint_Object& from);
+  void Clear() PROTOBUF_FINAL;
+  bool IsInitialized() const PROTOBUF_FINAL;
+
+  size_t ByteSizeLong() const PROTOBUF_FINAL;
+  bool MergePartialFromCodedStream(
+      ::google::protobuf::io::CodedInputStream* input) PROTOBUF_FINAL;
+  void SerializeWithCachedSizes(
+      ::google::protobuf::io::CodedOutputStream* output) const PROTOBUF_FINAL;
+  void DiscardUnknownFields();
+  int GetCachedSize() const PROTOBUF_FINAL { return _cached_size_; }
+  private:
+  void SharedCtor();
+  void SharedDtor();
+  void SetCachedSize(int size) const;
+  void InternalSwap(VisionFeaturePrint_Object* other);
+  private:
+  inline ::google::protobuf::Arena* GetArenaNoVirtual() const {
+    return NULL;
+  }
+  inline void* MaybeArenaPtr() const {
+    return NULL;
+  }
+  public:
+
+  ::std::string GetTypeName() const PROTOBUF_FINAL;
+
+  // nested types ----------------------------------------------------
+
+  typedef VisionFeaturePrint_Object_ObjectVersion ObjectVersion;
+  static const ObjectVersion OBJECT_VERSION_INVALID =
+    VisionFeaturePrint_Object_ObjectVersion_OBJECT_VERSION_INVALID;
+  static const ObjectVersion OBJECT_VERSION_1 =
+    VisionFeaturePrint_Object_ObjectVersion_OBJECT_VERSION_1;
+  static inline bool ObjectVersion_IsValid(int value) {
+    return VisionFeaturePrint_Object_ObjectVersion_IsValid(value);
+  }
+  static const ObjectVersion ObjectVersion_MIN =
+    VisionFeaturePrint_Object_ObjectVersion_ObjectVersion_MIN;
+  static const ObjectVersion ObjectVersion_MAX =
+    VisionFeaturePrint_Object_ObjectVersion_ObjectVersion_MAX;
+  static const int ObjectVersion_ARRAYSIZE =
+    VisionFeaturePrint_Object_ObjectVersion_ObjectVersion_ARRAYSIZE;
+
+  // accessors -------------------------------------------------------
+
+  // repeated string output = 100;
+  int output_size() const;
+  void clear_output();
+  static const int kOutputFieldNumber = 100;
+  const ::std::string& output(int index) const;
+  ::std::string* mutable_output(int index);
+  void set_output(int index, const ::std::string& value);
+  #if LANG_CXX11
+  void set_output(int index, ::std::string&& value);
+  #endif
+  void set_output(int index, const char* value);
+  void set_output(int index, const char* value, size_t size);
+  ::std::string* add_output();
+  void add_output(const ::std::string& value);
+  #if LANG_CXX11
+  void add_output(::std::string&& value);
+  #endif
+  void add_output(const char* value);
+  void add_output(const char* value, size_t size);
+  const ::google::protobuf::RepeatedPtrField< ::std::string>& output() const;
+  ::google::protobuf::RepeatedPtrField< ::std::string>* mutable_output();
+
+  // .CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.ObjectVersion version = 1;
+  void clear_version();
+  static const int kVersionFieldNumber = 1;
+  ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object_ObjectVersion version() const;
+  void set_version(::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object_ObjectVersion value);
+
+  // @@protoc_insertion_point(class_scope:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object)
+ private:
+
+  ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_;
+  ::google::protobuf::RepeatedPtrField< ::std::string> output_;
+  int version_;
+  mutable int _cached_size_;
+  friend struct protobuf_VisionFeaturePrint_2eproto::TableStruct;
+};
+// -------------------------------------------------------------------
+
 class VisionFeaturePrint : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:CoreML.Specification.CoreMLModels.VisionFeaturePrint) */ {
  public:
   VisionFeaturePrint();
@@ -183,6 +313,7 @@ class VisionFeaturePrint : public ::google::protobuf::MessageLite /* @@protoc_in
 
   enum VisionFeaturePrintTypeCase {
     kScene = 20,
+    kObject = 21,
     VISIONFEATUREPRINTTYPE_NOT_SET = 0,
   };
 
@@ -191,7 +322,7 @@ class VisionFeaturePrint : public ::google::protobuf::MessageLite /* @@protoc_in
                &_VisionFeaturePrint_default_instance_);
   }
   static PROTOBUF_CONSTEXPR int const kIndexInFileMessages =
-    1;
+    2;
 
   void Swap(VisionFeaturePrint* other);
 
@@ -233,6 +364,7 @@ class VisionFeaturePrint : public ::google::protobuf::MessageLite /* @@protoc_in
   // nested types ----------------------------------------------------
 
   typedef VisionFeaturePrint_Scene Scene;
+  typedef VisionFeaturePrint_Object Object;
 
   // accessors -------------------------------------------------------
 
@@ -245,10 +377,20 @@ class VisionFeaturePrint : public ::google::protobuf::MessageLite /* @@protoc_in
   ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Scene* release_scene();
   void set_allocated_scene(::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Scene* scene);
 
+  // .CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object object = 21;
+  bool has_object() const;
+  void clear_object();
+  static const int kObjectFieldNumber = 21;
+  const ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object& object() const;
+  ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object* mutable_object();
+  ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object* release_object();
+  void set_allocated_object(::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object* object);
+
   VisionFeaturePrintTypeCase VisionFeaturePrintType_case() const;
   // @@protoc_insertion_point(class_scope:CoreML.Specification.CoreMLModels.VisionFeaturePrint)
  private:
   void set_has_scene();
+  void set_has_object();
 
   inline bool has_VisionFeaturePrintType() const;
   void clear_VisionFeaturePrintType();
@@ -258,6 +400,7 @@ class VisionFeaturePrint : public ::google::protobuf::MessageLite /* @@protoc_in
   union VisionFeaturePrintTypeUnion {
     VisionFeaturePrintTypeUnion() {}
     ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Scene* scene_;
+    ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object* object_;
   } VisionFeaturePrintType_;
   mutable int _cached_size_;
   ::google::protobuf::uint32 _oneof_case_[1];
@@ -288,6 +431,93 @@ inline void VisionFeaturePrint_Scene::set_version(::CoreML::Specification::CoreM
 
 // -------------------------------------------------------------------
 
+// VisionFeaturePrint_Object
+
+// .CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.ObjectVersion version = 1;
+inline void VisionFeaturePrint_Object::clear_version() {
+  version_ = 0;
+}
+inline ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object_ObjectVersion VisionFeaturePrint_Object::version() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.version)
+  return static_cast< ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object_ObjectVersion >(version_);
+}
+inline void VisionFeaturePrint_Object::set_version(::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object_ObjectVersion value) {
+  
+  version_ = value;
+  // @@protoc_insertion_point(field_set:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.version)
+}
+
+// repeated string output = 100;
+inline int VisionFeaturePrint_Object::output_size() const {
+  return output_.size();
+}
+inline void VisionFeaturePrint_Object::clear_output() {
+  output_.Clear();
+}
+inline const ::std::string& VisionFeaturePrint_Object::output(int index) const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+  return output_.Get(index);
+}
+inline ::std::string* VisionFeaturePrint_Object::mutable_output(int index) {
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+  return output_.Mutable(index);
+}
+inline void VisionFeaturePrint_Object::set_output(int index, const ::std::string& value) {
+  // @@protoc_insertion_point(field_set:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+  output_.Mutable(index)->assign(value);
+}
+#if LANG_CXX11
+inline void VisionFeaturePrint_Object::set_output(int index, ::std::string&& value) {
+  // @@protoc_insertion_point(field_set:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+  output_.Mutable(index)->assign(std::move(value));
+}
+#endif
+inline void VisionFeaturePrint_Object::set_output(int index, const char* value) {
+  GOOGLE_DCHECK(value != NULL);
+  output_.Mutable(index)->assign(value);
+  // @@protoc_insertion_point(field_set_char:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+}
+inline void VisionFeaturePrint_Object::set_output(int index, const char* value, size_t size) {
+  output_.Mutable(index)->assign(
+    reinterpret_cast<const char*>(value), size);
+  // @@protoc_insertion_point(field_set_pointer:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+}
+inline ::std::string* VisionFeaturePrint_Object::add_output() {
+  // @@protoc_insertion_point(field_add_mutable:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+  return output_.Add();
+}
+inline void VisionFeaturePrint_Object::add_output(const ::std::string& value) {
+  output_.Add()->assign(value);
+  // @@protoc_insertion_point(field_add:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+}
+#if LANG_CXX11
+inline void VisionFeaturePrint_Object::add_output(::std::string&& value) {
+  output_.Add(std::move(value));
+  // @@protoc_insertion_point(field_add:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+}
+#endif
+inline void VisionFeaturePrint_Object::add_output(const char* value) {
+  GOOGLE_DCHECK(value != NULL);
+  output_.Add()->assign(value);
+  // @@protoc_insertion_point(field_add_char:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+}
+inline void VisionFeaturePrint_Object::add_output(const char* value, size_t size) {
+  output_.Add()->assign(reinterpret_cast<const char*>(value), size);
+  // @@protoc_insertion_point(field_add_pointer:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+}
+inline const ::google::protobuf::RepeatedPtrField< ::std::string>&
+VisionFeaturePrint_Object::output() const {
+  // @@protoc_insertion_point(field_list:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+  return output_;
+}
+inline ::google::protobuf::RepeatedPtrField< ::std::string>*
+VisionFeaturePrint_Object::mutable_output() {
+  // @@protoc_insertion_point(field_mutable_list:CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object.output)
+  return &output_;
+}
+
+// -------------------------------------------------------------------
+
 // VisionFeaturePrint
 
 // .CoreML.Specification.CoreMLModels.VisionFeaturePrint.Scene scene = 20;
@@ -338,6 +568,54 @@ inline void VisionFeaturePrint::set_allocated_scene(::CoreML::Specification::Cor
   // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.CoreMLModels.VisionFeaturePrint.scene)
 }
 
+// .CoreML.Specification.CoreMLModels.VisionFeaturePrint.Object object = 21;
+inline bool VisionFeaturePrint::has_object() const {
+  return VisionFeaturePrintType_case() == kObject;
+}
+inline void VisionFeaturePrint::set_has_object() {
+  _oneof_case_[0] = kObject;
+}
+inline void VisionFeaturePrint::clear_object() {
+  if (has_object()) {
+    delete VisionFeaturePrintType_.object_;
+    clear_has_VisionFeaturePrintType();
+  }
+}
+inline  const ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object& VisionFeaturePrint::object() const {
+  // @@protoc_insertion_point(field_get:CoreML.Specification.CoreMLModels.VisionFeaturePrint.object)
+  return has_object()
+      ? *VisionFeaturePrintType_.object_
+      : ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object::default_instance();
+}
+inline ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object* VisionFeaturePrint::mutable_object() {
+  if (!has_object()) {
+    clear_VisionFeaturePrintType();
+    set_has_object();
+    VisionFeaturePrintType_.object_ = new ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object;
+  }
+  // @@protoc_insertion_point(field_mutable:CoreML.Specification.CoreMLModels.VisionFeaturePrint.object)
+  return VisionFeaturePrintType_.object_;
+}
+inline ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object* VisionFeaturePrint::release_object() {
+  // @@protoc_insertion_point(field_release:CoreML.Specification.CoreMLModels.VisionFeaturePrint.object)
+  if (has_object()) {
+    clear_has_VisionFeaturePrintType();
+    ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object* temp = VisionFeaturePrintType_.object_;
+    VisionFeaturePrintType_.object_ = NULL;
+    return temp;
+  } else {
+    return NULL;
+  }
+}
+inline void VisionFeaturePrint::set_allocated_object(::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object* object) {
+  clear_VisionFeaturePrintType();
+  if (object) {
+    set_has_object();
+    VisionFeaturePrintType_.object_ = object;
+  }
+  // @@protoc_insertion_point(field_set_allocated:CoreML.Specification.CoreMLModels.VisionFeaturePrint.object)
+}
+
 inline bool VisionFeaturePrint::has_VisionFeaturePrintType() const {
   return VisionFeaturePrintType_case() != VISIONFEATUREPRINTTYPE_NOT_SET;
 }
@@ -350,6 +628,8 @@ inline VisionFeaturePrint::VisionFeaturePrintTypeCase VisionFeaturePrint::Vision
 #endif  // !PROTOBUF_INLINE_NOT_IN_HEADERS
 // -------------------------------------------------------------------
 
+// -------------------------------------------------------------------
+
 
 // @@protoc_insertion_point(namespace_scope)
 
@@ -363,6 +643,7 @@ namespace google {
 namespace protobuf {
 
 template <> struct is_proto_enum< ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Scene_SceneVersion> : ::google::protobuf::internal::true_type {};
+template <> struct is_proto_enum< ::CoreML::Specification::CoreMLModels::VisionFeaturePrint_Object_ObjectVersion> : ::google::protobuf::internal::true_type {};
 
 }  // namespace protobuf
 }  // namespace google
diff --git a/mlmodel/build/format/VisionFeaturePrint_enums.h b/mlmodel/build/format/VisionFeaturePrint_enums.h
index e5ec1eb0b..b948dbfbb 100644
--- a/mlmodel/build/format/VisionFeaturePrint_enums.h
+++ b/mlmodel/build/format/VisionFeaturePrint_enums.h
@@ -2,6 +2,7 @@
 #define __VISIONFEATUREPRINT_ENUMS_H
 enum MLVisionFeaturePrintVisionFeaturePrintType: int {
     MLVisionFeaturePrintVisionFeaturePrintType_scene = 20,
+    MLVisionFeaturePrintVisionFeaturePrintType_object = 21,
     MLVisionFeaturePrintVisionFeaturePrintType_NOT_SET = 0,
 };
 
@@ -10,6 +11,8 @@ static const char * MLVisionFeaturePrintVisionFeaturePrintType_Name(MLVisionFeat
     switch (x) {
         case MLVisionFeaturePrintVisionFeaturePrintType_scene:
             return "MLVisionFeaturePrintVisionFeaturePrintType_scene";
+        case MLVisionFeaturePrintVisionFeaturePrintType_object:
+            return "MLVisionFeaturePrintVisionFeaturePrintType_object";
         case MLVisionFeaturePrintVisionFeaturePrintType_NOT_SET:
             return "INVALID";
     }
@@ -21,4 +24,9 @@ enum MLSceneVersion: int {
     MLSceneVersionSCENE_VERSION_1 = 1,
 };
 
+enum MLObjectVersion: int {
+    MLObjectVersionOBJECT_VERSION_INVALID = 0,
+    MLObjectVersionOBJECT_VERSION_1 = 1,
+};
+
 #endif
diff --git a/mlmodel/docs/.gitignore b/mlmodel/docs/.gitignore
new file mode 100644
index 000000000..cb407ba34
--- /dev/null
+++ b/mlmodel/docs/.gitignore
@@ -0,0 +1,2 @@
+*/Generated
+_build
diff --git a/mlmodel/docs/Format/ArrayFeatureExtractor.rst b/mlmodel/docs/Format/ArrayFeatureExtractor.rst
new file mode 100644
index 000000000..68a7f4c20
--- /dev/null
+++ b/mlmodel/docs/Format/ArrayFeatureExtractor.rst
@@ -0,0 +1,14 @@
+ArrayFeatureExtractor
+________________________________________________________________________________
+
+An array feature extractor.
+
+Given an index, extracts the value at that index from its array input.
+Indexes are zero-based.
+
+
+.. code-block:: proto
+
+	message ArrayFeatureExtractor {
+	    repeated uint64 extractIndex = 1;
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/BayesianProbitRegressor.rst b/mlmodel/docs/Format/BayesianProbitRegressor.rst
new file mode 100644
index 000000000..010aae908
--- /dev/null
+++ b/mlmodel/docs/Format/BayesianProbitRegressor.rst
@@ -0,0 +1,101 @@
+BayesianProbitRegressor
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message BayesianProbitRegressor {
+	
+	    message Gaussian {
+	        double mean = 1;
+	        double precision = 2; // inverse of the variance
+	    }
+	
+	    message FeatureValueWeight {
+	        uint32 featureValue = 1;
+	        Gaussian featureWeight = 2;
+	    }
+	
+	    message FeatureWeight {
+	        uint32 featureId = 1;
+	        repeated FeatureValueWeight weights = 2;
+	    }
+	
+	    uint32 numberOfFeatures = 1;
+	
+	    Gaussian bias = 2;  // bias term
+	
+	    repeated FeatureWeight features = 3;  // feature weights
+	
+	    string regressionInputFeatureName = 10;
+	
+	    string optimismInputFeatureName = 11;
+	
+	    string samplingScaleInputFeatureName = 12;
+	
+	    string samplingTruncationInputFeatureName = 13;
+	
+	    string meanOutputFeatureName = 20;
+	
+	    string varianceOutputFeatureName = 21;
+	
+	    string pessimisticProbabilityOutputFeatureName = 22;
+	
+	    string sampledProbabilityOutputFeatureName = 23;
+	}
+
+
+
+
+
+
+BayesianProbitRegressor.Gaussian
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message Gaussian {
+	        double mean = 1;
+	        double precision = 2; // inverse of the variance
+	    }
+
+
+
+
+
+
+BayesianProbitRegressor.FeatureValueWeight
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message FeatureValueWeight {
+	        uint32 featureValue = 1;
+	        Gaussian featureWeight = 2;
+	    }
+
+
+
+
+
+
+BayesianProbitRegressor.FeatureWeight
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message FeatureWeight {
+	        uint32 featureId = 1;
+	        repeated FeatureValueWeight weights = 2;
+	    }
\ No newline at end of file
diff --git a/mlmodel/docs/Format/CategoricalMapping.rst b/mlmodel/docs/Format/CategoricalMapping.rst
new file mode 100644
index 000000000..cb9106d9d
--- /dev/null
+++ b/mlmodel/docs/Format/CategoricalMapping.rst
@@ -0,0 +1,27 @@
+CategoricalMapping
+________________________________________________________________________________
+
+A categorical mapping.
+
+This allows conversion from integers to strings, or from strings to integers.
+
+
+.. code-block:: proto
+
+	message CategoricalMapping {
+	    oneof MappingType {
+	        // Conversion from strings to integers
+	        StringToInt64Map stringToInt64Map = 1;
+	
+	        // Conversion from integer to string
+	        Int64ToStringMap int64ToStringMap = 2;
+	    }
+	
+	    oneof ValueOnUnknown {
+	        // Default output when converting from an integer to a string.
+	        string strValue = 101;
+	
+	        // Default output when converting from a string to an integer.
+	        int64 int64Value = 102;
+	    }
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/CustomModel.rst b/mlmodel/docs/Format/CustomModel.rst
new file mode 100644
index 000000000..cca7b571e
--- /dev/null
+++ b/mlmodel/docs/Format/CustomModel.rst
@@ -0,0 +1,73 @@
+CustomModel
+________________________________________________________________________________
+
+A parameterized model whose function is defined in code
+
+
+.. code-block:: proto
+
+	message CustomModel {
+	
+	    message CustomModelParamValue {
+	        oneof value {
+	            double doubleValue = 10;
+	            string stringValue = 20;
+	            int32 intValue = 30;
+	            int64 longValue = 40;
+	            bool boolValue = 50;
+	            bytes bytesValue = 60;
+	        }
+	    }
+	
+	    string className = 10; // The name of the class (conforming to MLCustomModel) corresponding to this model
+	    map<string, CustomModelParamValue> parameters = 30;
+	    string description = 40; // An (optional) description provided by the model creator. This information is displayed when viewing the model, but does not affect the model's execution on device.
+	}
+
+
+
+
+
+
+CustomModel.CustomModelParamValue
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message CustomModelParamValue {
+	        oneof value {
+	            double doubleValue = 10;
+	            string stringValue = 20;
+	            int32 intValue = 30;
+	            int64 longValue = 40;
+	            bool boolValue = 50;
+	            bytes bytesValue = 60;
+	        }
+	    }
+
+
+
+
+
+
+CustomModel.ParametersEntry
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message CustomModelParamValue {
+	        oneof value {
+	            double doubleValue = 10;
+	            string stringValue = 20;
+	            int32 intValue = 30;
+	            int64 longValue = 40;
+	            bool boolValue = 50;
+	            bytes bytesValue = 60;
+	        }
+	    }
\ No newline at end of file
diff --git a/mlmodel/docs/Format/DataStructures.rst b/mlmodel/docs/Format/DataStructures.rst
new file mode 100644
index 000000000..7f2dc07de
--- /dev/null
+++ b/mlmodel/docs/Format/DataStructures.rst
@@ -0,0 +1,195 @@
+StringToInt64Map
+________________________________________________________________________________
+
+A mapping from a string
+to a 64-bit integer.
+
+
+.. code-block:: proto
+
+	message StringToInt64Map {
+	    map<string, int64> map = 1;
+	}
+
+
+
+
+
+
+
+
+Int64ToStringMap
+________________________________________________________________________________
+
+A mapping from a 64-bit integer
+to a string.
+
+
+.. code-block:: proto
+
+	message Int64ToStringMap {
+	    map<int64, string> map = 1;
+	}
+
+
+
+
+
+
+
+
+StringToDoubleMap
+________________________________________________________________________________
+
+A mapping from a string
+to a double-precision floating point number.
+
+
+.. code-block:: proto
+
+	message StringToDoubleMap {
+	    map<string, double> map = 1;
+	}
+
+
+
+
+
+
+
+
+Int64ToDoubleMap
+________________________________________________________________________________
+
+A mapping from a 64-bit integer
+to a double-precision floating point number.
+
+
+.. code-block:: proto
+
+	message Int64ToDoubleMap {
+	    map<int64, double> map = 1;
+	}
+
+
+
+
+
+
+
+
+StringVector
+________________________________________________________________________________
+
+A vector of strings.
+
+
+.. code-block:: proto
+
+	message StringVector {
+	    repeated string vector = 1;
+	}
+
+
+
+
+
+
+Int64Vector
+________________________________________________________________________________
+
+A vector of 64-bit integers.
+
+
+.. code-block:: proto
+
+	message Int64Vector {
+	    repeated int64 vector = 1;
+	}
+
+
+
+
+
+
+FloatVector
+________________________________________________________________________________
+
+A vector of floating point numbers.
+
+
+.. code-block:: proto
+
+	message FloatVector {
+	    repeated float vector = 1;
+	}
+
+
+
+
+
+
+DoubleVector
+________________________________________________________________________________
+
+A vector of double-precision floating point numbers.
+
+
+.. code-block:: proto
+
+	message DoubleVector {
+	    repeated double vector = 1;
+	}
+
+
+
+
+
+
+Int64Range
+________________________________________________________________________________
+
+A range of int64 values
+
+
+.. code-block:: proto
+
+	message Int64Range {
+	    int64 minValue = 1;
+	    int64 maxValue = 2;
+	}
+
+
+
+
+
+
+Int64Set
+________________________________________________________________________________
+
+A set of int64 values
+
+
+.. code-block:: proto
+
+	message Int64Set {
+	    repeated int64 values = 1;
+	}
+
+
+
+
+
+
+DoubleRange
+________________________________________________________________________________
+
+A range of double values
+
+
+.. code-block:: proto
+
+	message DoubleRange {
+	    double minValue = 1;
+	    double maxValue = 2;
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/DictVectorizer.rst b/mlmodel/docs/Format/DictVectorizer.rst
new file mode 100644
index 000000000..539b17a2e
--- /dev/null
+++ b/mlmodel/docs/Format/DictVectorizer.rst
@@ -0,0 +1,26 @@
+DictVectorizer
+________________________________________________________________________________
+
+Uses an index mapping to convert a dictionary to an array.
+
+The output array will be equal in length to the index mapping vector parameter.
+All keys in the input dictionary must be present in the index mapping vector.
+
+For each item in the input dictionary, insert its value in the output array.
+The position of the insertion is determined by the position of the item's key
+in the index mapping. Any keys not present in the input dictionary, will be
+zero in the output array.
+
+For example: if the ``stringToIndex`` parameter is set to ``["a", "c", "b", "z"]``,
+then an input of ``{"a": 4, "c": 8}`` will produce an output of ``[4, 8, 0, 0]``.
+
+
+.. code-block:: proto
+
+	message DictVectorizer {
+	    oneof Map {
+	        StringVector stringToIndex = 1;
+	
+	        Int64Vector int64ToIndex = 2;
+	    }
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/FeatureTypes.rst b/mlmodel/docs/Format/FeatureTypes.rst
new file mode 100644
index 000000000..5ca1c3601
--- /dev/null
+++ b/mlmodel/docs/Format/FeatureTypes.rst
@@ -0,0 +1,436 @@
+Int64FeatureType
+________________________________________________________________________________
+
+The 64-bit integer feature type.
+
+
+.. code-block:: proto
+
+	message Int64FeatureType {}
+
+
+
+
+
+
+DoubleFeatureType
+________________________________________________________________________________
+
+The double-precision floating point number feature type.
+
+
+.. code-block:: proto
+
+	message DoubleFeatureType {}
+
+
+
+
+
+
+StringFeatureType
+________________________________________________________________________________
+
+The string feature type.
+
+
+.. code-block:: proto
+
+	message StringFeatureType {}
+
+
+
+
+
+
+SizeRange
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message SizeRange {
+	    uint64 lowerBound = 1;
+	    int64 upperBound = 2; // negative value means unbound otherwise upperbound is included in range
+	}
+
+
+
+
+
+
+ImageFeatureType
+________________________________________________________________________________
+
+The image feature type.
+
+
+.. code-block:: proto
+
+	message ImageFeatureType {
+	    // Assumes raw (decompressed) format
+	    enum ColorSpace {
+	        INVALID_COLOR_SPACE = 0;
+	        GRAYSCALE = 10; //  8 bits per pixel
+	        RGB = 20;       // 32 bits per pixel: RGBA with A channel ignored
+	        BGR = 30;       // 32 bits per pixel: BGRA with A channel ignored
+	    }
+	
+	    message ImageSize {
+	        uint64 width = 1;
+	        uint64 height = 2;
+	    }
+	
+	    message EnumeratedImageSizes {
+	        repeated ImageSize sizes = 1;
+	    }
+	
+	    message ImageSizeRange {
+	        SizeRange widthRange = 1;
+	        SizeRange heightRange = 2;
+	    }
+	
+	    // The required or default image size is width x height
+	    //
+	    // If specificationVersion <= 2 or SizeFlexibility is empty,
+	    // width x height is the required fixed image size
+	    //
+	    // If SizeFlexibility is present, width x height indicate a "default"
+	    // image size which must be consistent with the flexibilty specified
+	
+	    int64 width = 1;
+	    int64 height = 2;
+	
+	    // For specification version >= 3 you can specify image size flexibility.
+	
+	    oneof SizeFlexibility {
+	
+	        // Use enumeratedSizes for a set of distinct fixed sizes
+	        // e.g. portrait or landscape: [80 x 100, 100 x 8]
+	        //
+	        // If the width x height fields above are specified then they must be
+	        // one of the sizes listed.
+	        //
+	        // If width and height are not specified above then the default width
+	        // and height will be enumeratedSizes[0]
+	        //
+	        // Must be non-empty
+	
+	        EnumeratedImageSizes enumeratedSizes = 21;
+	
+	        // Use imageSizeRange to allow for ranges of values
+	        // e.g. any image greater than 10 x 20: [10..<max] x [20..<max]
+	        //
+	        // If width and height are specified above they must fall in the range
+	        // specified in imageSizeRange. They will be treated as the default size.
+	        //
+	        // If width and height are not specified above then the default width
+	        // and height will be imageSizeRange.widthRange.lowerBound x imageSizeRange.heightRange.lowerBound
+	
+	        ImageSizeRange imageSizeRange = 31;
+	    }
+	
+	    ColorSpace colorSpace = 3;
+	}
+
+
+
+
+
+
+ImageFeatureType.ImageSize
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message ImageSize {
+	        uint64 width = 1;
+	        uint64 height = 2;
+	    }
+
+
+
+
+
+
+ImageFeatureType.EnumeratedImageSizes
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message EnumeratedImageSizes {
+	        repeated ImageSize sizes = 1;
+	    }
+
+
+
+
+
+
+ImageFeatureType.ImageSizeRange
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message ImageSizeRange {
+	        SizeRange widthRange = 1;
+	        SizeRange heightRange = 2;
+	    }
+
+
+
+
+
+
+ArrayFeatureType
+________________________________________________________________________________
+
+The array feature type.
+
+
+.. code-block:: proto
+
+	message ArrayFeatureType {
+	
+	    enum ArrayDataType {
+	        INVALID_ARRAY_DATA_TYPE = 0;
+	        FLOAT32 = 65568; // 0x10000 | 32
+	        DOUBLE = 65600;  // 0x10000 | 64
+	        INT32 = 131104;  // 0x20000 | 32
+	    }
+	
+	    // The required or default shape
+	    //
+	    // If specificationVersion <= 2 or ShapeFlexibility is empty,
+	    // shape is the required fixed shape
+	    //
+	    // If ShapeFlexibility is present, shape indicate a "default"
+	    // shape which must be consistent with the flexibilty specified
+	
+	    repeated int64 shape = 1;
+	
+	    ArrayDataType dataType = 2;
+	
+	    message Shape {
+	        repeated int64 shape = 1;
+	    }
+	
+	    message EnumeratedShapes {
+	        repeated Shape shapes = 1;
+	    }
+	
+	    message ShapeRange {
+	        // sizeRanges.size() must be length 1 or 3
+	        // sizeRanges[d] specifies the allowed range for dimension d
+	        repeated SizeRange sizeRanges = 1;
+	    }
+	
+	    // For specification version >= 3 you can specify image size flexibility.
+	
+	    oneof ShapeFlexibility {
+	
+	        // Use enumeratedShapes for a set of distinct fixed shapes
+	        //
+	        // If the shape field is specified then it must be
+	        // one of the enumerated shapes.
+	        // If shape is not specifed, the "default" shape will be considered
+	        // enumeratedShapes[0]
+	        //
+	        // Must be non-empty
+	
+	        EnumeratedShapes enumeratedShapes = 21;
+	
+	        // Use shapeRange to allow the size of each dimension vary within
+	        // indpendently specified ranges
+	        //
+	        // If you specify shape above it must fall in the range
+	        // specified in shapeRanges. It will be treated as the default shape.
+	        //
+	        // If you don't specify shape above then the default shape will
+	        // have shape[d] = shapeRange.sizeRanges[d].lowerBound
+	
+	        ShapeRange shapeRange = 31;
+	
+	    }
+	
+	    oneof defaultOptionalValue {
+	        int32 intDefaultValue = 41;
+	        float floatDefaultValue = 51;
+	        double doubleDefaultValue = 61;
+	    }
+	
+	}
+
+
+
+
+
+
+ArrayFeatureType.Shape
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message Shape {
+	        repeated int64 shape = 1;
+	    }
+
+
+
+
+
+
+ArrayFeatureType.EnumeratedShapes
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message EnumeratedShapes {
+	        repeated Shape shapes = 1;
+	    }
+
+
+
+
+
+
+ArrayFeatureType.ShapeRange
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message ShapeRange {
+	        // sizeRanges.size() must be length 1 or 3
+	        // sizeRanges[d] specifies the allowed range for dimension d
+	        repeated SizeRange sizeRanges = 1;
+	    }
+
+
+
+
+
+
+DictionaryFeatureType
+________________________________________________________________________________
+
+The dictionary feature type.
+
+
+.. code-block:: proto
+
+	message DictionaryFeatureType {
+	    oneof KeyType {
+	        Int64FeatureType int64KeyType = 1;
+	        StringFeatureType stringKeyType = 2;
+	    }
+	}
+
+
+
+
+
+
+SequenceFeatureType
+________________________________________________________________________________
+
+The Sequence feature type.
+
+
+.. code-block:: proto
+
+	message SequenceFeatureType {
+	
+	    oneof Type {
+	        Int64FeatureType int64Type = 1;
+	        StringFeatureType stringType = 3;
+	    }
+	
+	    // Range of allowed size/length/count of sequence
+	    SizeRange sizeRange = 101;
+	}
+
+
+
+
+
+
+FeatureType
+________________________________________________________________________________
+
+A feature, which may be optional.
+
+
+.. code-block:: proto
+
+	message FeatureType {
+	    oneof Type {
+	        Int64FeatureType int64Type = 1;
+	        DoubleFeatureType doubleType = 2;
+	        StringFeatureType stringType = 3;
+	        ImageFeatureType imageType = 4;
+	        ArrayFeatureType multiArrayType = 5;
+	        DictionaryFeatureType dictionaryType = 6;
+	        SequenceFeatureType sequenceType = 7;
+	    }
+	
+	    bool isOptional = 1000;
+	}
+
+
+
+
+
+
+
+
+
+
+ArrayFeatureType.ArrayDataType
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum ArrayDataType {
+	        INVALID_ARRAY_DATA_TYPE = 0;
+	        FLOAT32 = 65568; // 0x10000 | 32
+	        DOUBLE = 65600;  // 0x10000 | 64
+	        INT32 = 131104;  // 0x20000 | 32
+	    }
+
+
+
+ImageFeatureType.ColorSpace
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum ColorSpace {
+	        INVALID_COLOR_SPACE = 0;
+	        GRAYSCALE = 10; //  8 bits per pixel
+	        RGB = 20;       // 32 bits per pixel: RGBA with A channel ignored
+	        BGR = 30;       // 32 bits per pixel: BGRA with A channel ignored
+	    }
\ No newline at end of file
diff --git a/mlmodel/docs/Format/FeatureVectorizer.rst b/mlmodel/docs/Format/FeatureVectorizer.rst
new file mode 100644
index 000000000..b8f787fc0
--- /dev/null
+++ b/mlmodel/docs/Format/FeatureVectorizer.rst
@@ -0,0 +1,39 @@
+FeatureVectorizer
+________________________________________________________________________________
+
+A FeatureVectorizer puts one or more features into a single array.
+
+The ordering of features in the output array is determined by
+``inputList``.
+
+``inputDimensions`` is a zero based index.
+
+
+.. code-block:: proto
+
+	message FeatureVectorizer {
+	    message InputColumn {
+	        string inputColumn = 1;
+	        uint64 inputDimensions = 2;
+	    }
+	
+	    repeated InputColumn inputList = 1;
+	}
+
+
+
+
+
+
+FeatureVectorizer.InputColumn
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message InputColumn {
+	        string inputColumn = 1;
+	        uint64 inputDimensions = 2;
+	    }
\ No newline at end of file
diff --git a/mlmodel/docs/Format/GLMClassifier.rst b/mlmodel/docs/Format/GLMClassifier.rst
new file mode 100644
index 000000000..394e732df
--- /dev/null
+++ b/mlmodel/docs/Format/GLMClassifier.rst
@@ -0,0 +1,85 @@
+GLMClassifier
+________________________________________________________________________________
+
+A generalized linear model classifier.
+
+
+.. code-block:: proto
+
+	message GLMClassifier {
+	    message DoubleArray {
+	        repeated double value = 1;
+	    }
+	
+	    enum PostEvaluationTransform {
+	        Logit = 0;
+	        Probit = 1; 
+	    }
+	
+	    enum ClassEncoding {
+	        ReferenceClass = 0; 
+	        OneVsRest = 1; 
+	    }
+	
+	    repeated DoubleArray weights = 1;
+	    repeated double offset = 2;
+	    PostEvaluationTransform postEvaluationTransform = 3;
+	    ClassEncoding classEncoding = 4;
+	
+	    oneof ClassLabels {
+	        StringVector stringClassLabels = 100;
+	        Int64Vector int64ClassLabels = 101;
+	    }
+	}
+
+
+
+
+
+
+GLMClassifier.DoubleArray
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message DoubleArray {
+	        repeated double value = 1;
+	    }
+
+
+
+
+
+
+
+
+
+
+GLMClassifier.ClassEncoding
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum ClassEncoding {
+	        ReferenceClass = 0; 
+	        OneVsRest = 1; 
+	    }
+
+
+
+GLMClassifier.PostEvaluationTransform
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum PostEvaluationTransform {
+	        Logit = 0;
+	        Probit = 1; 
+	    }
\ No newline at end of file
diff --git a/mlmodel/docs/Format/GLMRegressor.rst b/mlmodel/docs/Format/GLMRegressor.rst
new file mode 100644
index 000000000..ae24b5033
--- /dev/null
+++ b/mlmodel/docs/Format/GLMRegressor.rst
@@ -0,0 +1,62 @@
+GLMRegressor
+________________________________________________________________________________
+
+A generalized linear model regressor.
+
+
+.. code-block:: proto
+
+	message GLMRegressor {
+	    message DoubleArray {
+	        repeated double value = 1;
+	    }
+	
+	    enum PostEvaluationTransform {
+	        NoTransform = 0;
+	        Logit = 1;
+	        Probit = 2;
+	    }
+	
+	    repeated DoubleArray weights = 1;
+	    repeated double offset = 2;
+	    PostEvaluationTransform postEvaluationTransform = 3;
+	}
+
+
+
+
+
+
+GLMRegressor.DoubleArray
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message DoubleArray {
+	        repeated double value = 1;
+	    }
+
+
+
+
+
+
+
+
+
+
+GLMRegressor.PostEvaluationTransform
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum PostEvaluationTransform {
+	        NoTransform = 0;
+	        Logit = 1;
+	        Probit = 2;
+	    }
\ No newline at end of file
diff --git a/mlmodel/docs/Format/Gazetteer.rst b/mlmodel/docs/Format/Gazetteer.rst
new file mode 100644
index 000000000..0618912e3
--- /dev/null
+++ b/mlmodel/docs/Format/Gazetteer.rst
@@ -0,0 +1,22 @@
+Gazetteer
+________________________________________________________________________________
+
+A model which uses an efficient probabilistic representation
+for assigning labels to a set of strings.
+
+
+.. code-block:: proto
+
+	message Gazetteer {
+	
+	    uint32 revision = 1;
+	    
+	    string language = 10;
+	
+	    bytes modelParameterData = 100;
+	    
+	    oneof ClassLabels {
+	        StringVector stringClassLabels = 200;
+	    }
+	    
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/Identity.rst b/mlmodel/docs/Format/Identity.rst
new file mode 100644
index 000000000..0ef885e7c
--- /dev/null
+++ b/mlmodel/docs/Format/Identity.rst
@@ -0,0 +1,13 @@
+Identity
+________________________________________________________________________________
+
+An identity model.
+
+This model returns given inputs as outputs, unchanged.
+Intended to be used for testing purposes.
+
+
+.. code-block:: proto
+
+	message Identity {
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/Imputer.rst b/mlmodel/docs/Format/Imputer.rst
new file mode 100644
index 000000000..b45c0593d
--- /dev/null
+++ b/mlmodel/docs/Format/Imputer.rst
@@ -0,0 +1,36 @@
+Imputer
+________________________________________________________________________________
+
+A transformer that replaces missing values with a default value,
+such as a statistically-derived value.
+
+If ``ReplaceValue`` is set, then missing values of that type are
+replaced with the corresponding value.
+
+For example: if ``replaceDoubleValue`` is set to ``NaN``
+and a single ``NaN`` double value is provided as input,
+then it is replaced by ``imputedDoubleValue``. However
+if the input is an array of doubles, then any instances
+of ``NaN`` in the array is replaced with the corresponding
+value in ``imputedDoubleArray``.
+
+
+.. code-block:: proto
+
+	message Imputer {
+	    oneof ImputedValue {
+	        double imputedDoubleValue = 1;
+	        int64 imputedInt64Value = 2;
+	        string imputedStringValue = 3;
+	        DoubleVector imputedDoubleArray = 4;
+	        Int64Vector imputedInt64Array = 5;
+	        StringToDoubleMap imputedStringDictionary = 6;
+	        Int64ToDoubleMap imputedInt64Dictionary = 7;
+	    }
+	
+	    oneof ReplaceValue {
+	        double replaceDoubleValue = 11;
+	        int64 replaceInt64Value = 12;
+	        string replaceStringValue = 13;
+	    }
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/ItemSimilarityRecommender.rst b/mlmodel/docs/Format/ItemSimilarityRecommender.rst
new file mode 100644
index 000000000..38deecfd9
--- /dev/null
+++ b/mlmodel/docs/Format/ItemSimilarityRecommender.rst
@@ -0,0 +1,107 @@
+Each tree is a collection of nodes,
+each of which is identified by a unique identifier.
+
+Each node is either a branch or a leaf node.
+A branch node evaluates a value according to a behavior;
+if true, the node identified by ``true_child_node_id`` is evaluated next,
+if false, the node identified by ``false_child_node_id`` is evaluated next.
+A leaf node adds the evaluation value to the base prediction value
+to get the final prediction.
+
+A tree must have exactly one root node,
+which has no parent node.
+A tree must not terminate on a branch node.
+All leaf nodes must be accessible
+by evaluating one or more branch nodes in sequence,
+starting from the root node.
+
+
+
+ItemSimilarityRecommender
+________________________________________________________________________________
+
+Item Similarity Recommender
+
+ The Item Similarity recommender takes as input a list of items and scores,
+ then uses that information and a table of item similarities to predict similarity
+ scores for all items.  By default, the items predicted are most similar to the given
+ items but not part of that item set.
+
+ The predicted score for a given item k is
+   sum_(i in observed items)   sim_(k,i) * (score_i - shift_k)
+
+ Because only the most similar scores for each item i are stored,
+ sim_(k,i) is often zero.
+
+ For many models, the score adjustment parameter shift_j is zero -- it's occasionally used
+ to counteract global biases for popular items.
+
+
+ References:
+
+
+.. code-block:: proto
+
+	message ItemSimilarityRecommender {
+	
+	    message ConnectedItem {
+	        uint64 itemId = 1;
+	        double similarityScore = 2;
+	    }
+	
+	    message SimilarItems {
+	        uint64 itemId = 1;
+	        repeated ConnectedItem similarItemList = 2;
+	        double itemScoreAdjustment = 3;
+	    }
+	
+	    repeated SimilarItems itemItemSimilarities = 1;
+	
+	    StringVector itemStringIds = 2;
+	    Int64Vector itemInt64Ids = 3;
+	
+	
+	    string recommendedItemListOutputFeatureName = 20;
+	    string recommendedItemScoreOutputFeatureName = 21;
+	
+	}
+
+
+
+
+
+
+ItemSimilarityRecommender.ConnectedItem
+--------------------------------------------------------------------------------
+
+The items similar to a given base item.
+
+
+.. code-block:: proto
+
+	    message ConnectedItem {
+	        uint64 itemId = 1;
+	        double similarityScore = 2;
+	    }
+
+
+
+
+
+
+ItemSimilarityRecommender.SimilarItems
+--------------------------------------------------------------------------------
+
+The formula for the score of a given model as given above, with shift_k
+  parameter given by itemScoreAdjustment, and the similar item list filling in
+  all the known sim(k,i) scores for i given by itemID and k given by the itemID parameter in
+  the similarItemList.
+
+
+.. code-block:: proto
+
+	    message SimilarItems {
+	        uint64 itemId = 1;
+	        repeated ConnectedItem similarItemList = 2;
+	        double itemScoreAdjustment = 3;
+	    }
\ No newline at end of file
diff --git a/mlmodel/docs/Format/LinkedModel.rst b/mlmodel/docs/Format/LinkedModel.rst
new file mode 100644
index 000000000..07abe1695
--- /dev/null
+++ b/mlmodel/docs/Format/LinkedModel.rst
@@ -0,0 +1,46 @@
+LinkedModel
+________________________________________________________________________________
+
+A model which wraps another (compiled) model external to this one
+
+
+.. code-block:: proto
+
+	message LinkedModel {
+	
+	    oneof LinkType {
+	        // A model located via a file system path
+	        LinkedModelFile linkedModelFile = 1;
+	    }
+	}
+
+
+
+
+
+
+LinkedModelFile
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message LinkedModelFile {
+	
+	    // Model file name: e.g. "MyFetureExtractor.mlmodelc"
+	    StringParameter linkedModelFileName = 1;
+	
+	    // Search path to find the linked model file
+	    // Multiple paths can be searched using the unix-style path separator ":"
+	    // Each path can be relative (to this model) or absolute
+	    //
+	    // An empty string is the same as teh relative search path "."
+	    // which searches in the same location as this model file
+	    //
+	    // There are some special paths which start with $
+	    // - $BUNDLE_MAIN - Indicates to look in the main bundle
+	    // - $BUNDLE_IDENTIFIER(identifier) - Looks in Bunde with given identifer
+	    StringParameter linkedModelSearchPath = 2;
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/Model.rst b/mlmodel/docs/Format/Model.rst
new file mode 100644
index 000000000..2bd7e9b94
--- /dev/null
+++ b/mlmodel/docs/Format/Model.rst
@@ -0,0 +1,333 @@
+A Core ML model consists of a specification version
+and a model description,
+and can be any one of the following types:
+
+Neural Networks
+  - `NeuralNetwork`
+
+Regressors
+  - ``GLMRegressor``
+  - ``SupportVectorRegressor``
+  - ``TreeEnsembleRegressor``
+  - ``NeuralNetworkRegressor``
+  - ``BayesianProbitRegressor``
+
+Classifiers
+  - `NeuralNetworkClassifier`
+  - `TreeEnsembleClassifier`
+  - `GLMClassifier`
+  - `SupportVectorClassifier`
+  - `KNearestNeighborsClassifier`
+
+Other models
+  - `CustomModel`
+  - `TextClassifier`
+  - `WordTagger`
+  - `Gazetteer`
+  - `WordEmbedding`
+  - `VisionFeaturePrint`
+  - `LinkedModel`
+  - `SoundAnalysisPreprocessing`
+  - `ItemSimilarityRecommender`
+
+Feature Engineering
+  - `Imputer`
+  - `Scaler`
+  - `Normalizer`
+  - `OneHotEncoder`
+  - `CategoricalMapping`
+  - `FeatureVectorizer`
+  - `DictVectorizer`
+  - `ArrayFeatureExtractor`
+  - `NonMaximumSuppression`
+
+Pipelines
+  - `PipelineClassifier`
+  - `PipelineRegressor`
+  - `Pipeline`
+
+Simple Mathematical Functions
+  - `Identity`
+
+
+
+Pipeline
+________________________________________________________________________________
+
+A pipeline consisting of one or more models.
+
+
+.. code-block:: proto
+
+	message Pipeline {
+	    repeated Model models = 1;
+	
+	    // Optional names given for each model
+	    // If not supplied it defaults to ["model0",..., "model"(models.size()-1)]
+	    // These names can be used to disambiguate the scope / domain of a parameter
+	    repeated string names = 2;
+	}
+
+
+
+
+
+
+PipelineClassifier
+________________________________________________________________________________
+
+A classifier pipeline.
+
+
+.. code-block:: proto
+
+	message PipelineClassifier {
+	    Pipeline pipeline = 1;
+	}
+
+
+
+
+
+
+PipelineRegressor
+________________________________________________________________________________
+
+A regressor pipeline.
+
+
+.. code-block:: proto
+
+	message PipelineRegressor {
+	    Pipeline pipeline = 1;
+	}
+
+
+
+
+
+
+FeatureDescription
+________________________________________________________________________________
+
+A feature description,
+consisting of a name, short description, and type.
+
+
+.. code-block:: proto
+
+	message FeatureDescription {
+	    string name = 1;
+	    string shortDescription = 2;
+	    FeatureType type = 3;
+	}
+
+
+
+
+
+
+Metadata
+________________________________________________________________________________
+
+Model metadata,
+consisting of a short description, a version string,
+an author, a license, and any other user defined
+key/value meta data.
+
+
+.. code-block:: proto
+
+	message Metadata {
+	    string shortDescription = 1;
+	    string versionString = 2;
+	    string author = 3;
+	    string license = 4;
+	    map<string, string> userDefined = 100;
+	}
+
+
+
+
+
+
+
+
+ModelDescription
+________________________________________________________________________________
+
+A description of a model,
+consisting of descriptions of its input and output features.
+Both regressor and classifier models require the name of the
+primary predicted output feature (``predictedFeatureName``).
+Classifier models can specify the output feature containing
+probabilities for the predicted classes
+(``predictedProbabilitiesName``).
+
+
+.. code-block:: proto
+
+	message ModelDescription {
+	    repeated FeatureDescription input = 1;
+	    repeated FeatureDescription output = 10;
+	
+	    // [Required for regressor and classifier models]: the name
+	    // to give to an output feature containing the prediction.
+	    string predictedFeatureName = 11;
+	
+	    // [Optional for classifier models]: the name to give to an
+	    // output feature containing a dictionary mapping class
+	    // labels to their predicted probabilities. If not specified,
+	    // the dictionary will not be returned by the model.
+	    string predictedProbabilitiesName = 12;
+	
+	    repeated FeatureDescription trainingInput = 50;
+	
+	    Metadata metadata = 100;
+	}
+
+
+
+
+
+
+SerializedModel
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message SerializedModel {
+	    // Identifier whose content describes the model type of the serialized protocol buffer message.
+	    string identifier = 1;
+	
+	    // Must be a valid serialized protocol buffer of the above specified type.
+	    bytes model = 2;
+	}
+
+
+
+
+
+
+Model
+________________________________________________________________________________
+
+A Core ML model,
+consisting of a specification version,
+a model description, and a model type.
+
+Core ML model compatibility is indicated by
+a monotonically increasing specification version number,
+which is incremented anytime a backward-incompatible change is made
+(this is functionally equivalent to the MAJOR version number
+described by `Semantic Versioning 2.0.0 <http://semver.org/>`_).
+
+Specification Versions : OS Availability (Core ML Version)
+
+1 : iOS 11, macOS 10.13, tvOS 11, watchOS 4 (Core ML 1)
+- Feedforward & Recurrent Neural Networks
+- General Linear Models
+- Tree Ensembles
+- Support Vector Machines
+- Pipelines
+- Feature Engineering
+
+2 : iOS 11.2, macOS 10.13.2, tvOS 11.2, watchOS 4.2 (Core ML 1.2)
+- Custom Layers for Neural Networks
+- Float 16 support for Neural Network layers
+
+3 : iOS 12, macOS 10.14, tvOS 12, watchOS 5 (Core ML 2)
+- Flexible shapes and image sizes
+- Categorical sequences
+- Core ML Vision Feature Print, Text Classifier, Word Tagger
+- Non Max Suppression
+- Crop and Resize Bilinear NN layers
+- Custom Models
+
+4 : iOS 13, macOS 10.15, tvOS 13, watchOS 6 (Core ML 3)
+- Updatable models
+- Exact shape / general rank mapping for neural networks
+- Large expansion of supported neural network layers
+  - Generalized operations
+  - Control flow
+  - Dynamic layers
+  - See NeuralNetwork.proto
+- Nearest Neighbor Classifier
+- Sound Analysis Prepreocessing
+- Recommender
+- Linked Model
+- NLP Gazeteer
+- NLP WordEmbedding
+
+
+.. code-block:: proto
+
+	message Model {
+	    int32 specificationVersion = 1;
+	    ModelDescription description = 2;
+	    
+	    bool isUpdatable = 10;
+	    
+	    // start at 200 here
+	    // model specific parameters:
+	    oneof Type {
+	        // pipeline starts at 200
+	        PipelineClassifier pipelineClassifier = 200;
+	        PipelineRegressor pipelineRegressor = 201;
+	        Pipeline pipeline = 202;
+	
+	        // regressors start at 300
+	        GLMRegressor glmRegressor = 300;
+	        SupportVectorRegressor supportVectorRegressor = 301;
+	        TreeEnsembleRegressor treeEnsembleRegressor = 302;
+	        NeuralNetworkRegressor neuralNetworkRegressor = 303;
+	        BayesianProbitRegressor bayesianProbitRegressor = 304;
+	
+	        // classifiers start at 400
+	        GLMClassifier glmClassifier = 400;
+	        SupportVectorClassifier supportVectorClassifier = 401;
+	        TreeEnsembleClassifier treeEnsembleClassifier = 402;
+	        NeuralNetworkClassifier neuralNetworkClassifier = 403;
+	        KNearestNeighborsClassifier kNearestNeighborsClassifier = 404;
+	
+	        // generic models start at 500
+	        NeuralNetwork neuralNetwork = 500;
+	        ItemSimilarityRecommender itemSimilarityRecommender = 501;
+	
+	        // Custom and linked models
+	        CustomModel customModel = 555;
+	        LinkedModel linkedModel = 556;
+	
+	        // feature engineering starts at 600
+	        OneHotEncoder oneHotEncoder = 600;
+	        Imputer imputer = 601;
+	        FeatureVectorizer featureVectorizer = 602;
+	        DictVectorizer dictVectorizer = 603;
+	        Scaler scaler = 604;
+	        CategoricalMapping categoricalMapping = 606;
+	        Normalizer normalizer = 607;
+	        ArrayFeatureExtractor arrayFeatureExtractor = 609;
+	        NonMaximumSuppression nonMaximumSuppression = 610;
+	
+	
+	        // simple mathematical functions used for testing start at 900
+	        Identity identity = 900;
+	
+	        // reserved until 1000
+	
+	        // CoreML provided models
+	        CoreMLModels.TextClassifier textClassifier = 2000;
+	        CoreMLModels.WordTagger wordTagger = 2001;
+	        CoreMLModels.VisionFeaturePrint visionFeaturePrint = 2002;
+	        CoreMLModels.SoundAnalysisPreprocessing soundAnalysisPreprocessing = 2003;
+	        CoreMLModels.Gazetteer gazetteer = 2004;
+	        CoreMLModels.WordEmbedding wordEmbedding = 2005;
+	        
+	        // Reserved private messages start at 3000
+	        // These messages are subject to change with no notice or support.
+	        SerializedModel serializedModel = 3000;
+	    }
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/NearestNeighbors.rst b/mlmodel/docs/Format/NearestNeighbors.rst
new file mode 100644
index 000000000..70fd8e9f8
--- /dev/null
+++ b/mlmodel/docs/Format/NearestNeighbors.rst
@@ -0,0 +1,144 @@
+KNearestNeighborsClassifier
+________________________________________________________________________________
+
+A k-Nearest-Neighbor classifier
+
+
+.. code-block:: proto
+
+	message KNearestNeighborsClassifier {
+	
+	    NearestNeighborsIndex nearestNeighborsIndex = 1;
+	
+	    Int64Parameter numberOfNeighbors = 3;
+	
+	    oneof ClassLabels {
+	        StringVector stringClassLabels = 100;
+	        Int64Vector int64ClassLabels = 101;
+	    }
+	
+	    oneof DefaultClassLabel {
+	        string defaultStringLabel = 110;
+	        int64 defaultInt64Label = 111;
+	    }
+	
+	    oneof WeightingScheme {
+	        UniformWeighting uniformWeighting = 200;
+	        InverseDistanceWeighting inverseDistanceWeighting = 210;
+	    }
+	}
+
+
+
+
+
+
+NearestNeighborsIndex
+________________________________________________________________________________
+
+The "core" attributes of a Nearest Neighbors model.
+
+
+.. code-block:: proto
+
+	message NearestNeighborsIndex {
+	
+	    int32 numberOfDimensions = 1;
+	
+	    repeated FloatVector floatSamples = 2;
+	
+	    oneof IndexType {
+	        LinearIndex linearIndex = 100;
+	        SingleKdTreeIndex singleKdTreeIndex = 110;
+	    }
+	
+	    oneof DistanceFunction {
+	        SquaredEuclideanDistance squaredEuclideanDistance = 200;
+	    }
+	
+	}
+
+
+
+
+
+
+UniformWeighting
+________________________________________________________________________________
+
+Specifies a uniform weighting scheme (i.e. each neighbor receives equal
+voting power).
+
+
+.. code-block:: proto
+
+	message UniformWeighting {
+	}
+
+
+
+
+
+
+InverseDistanceWeighting
+________________________________________________________________________________
+
+Specifies a inverse-distance weighting scheme (i.e. closest neighbors receives higher
+voting power). A nearest neighbor with highest sum of (1 / distance) is picked.
+
+
+.. code-block:: proto
+
+	message InverseDistanceWeighting {
+	}
+
+
+
+
+
+
+LinearIndex
+________________________________________________________________________________
+
+Specifies a flat index of data points to be searched by brute force.
+
+
+.. code-block:: proto
+
+	message LinearIndex {
+	}
+
+
+
+
+
+
+SingleKdTreeIndex
+________________________________________________________________________________
+
+Specifies a kd-tree backend for the nearest neighbors model.
+
+
+.. code-block:: proto
+
+	message SingleKdTreeIndex {
+	
+	    int32 leafSize = 1;
+	
+	}
+
+
+
+
+
+
+SquaredEuclideanDistance
+________________________________________________________________________________
+
+Specifies the Squared Euclidean Distance function.
+
+
+.. code-block:: proto
+
+	message SquaredEuclideanDistance {
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/NeuralNetwork.rst b/mlmodel/docs/Format/NeuralNetwork.rst
new file mode 100644
index 000000000..c9329ec53
--- /dev/null
+++ b/mlmodel/docs/Format/NeuralNetwork.rst
@@ -0,0 +1,7836 @@
+A neural network is defined through a collection of layers
+and represents a directed acyclic graph (DAG).
+Each layer has a name, a layer type,
+a list of input names, a list of output names,
+and a collection of parameters specific to the layer type.
+
+The graph structure and connectivity of the neural network
+is inferred from the input and output names.
+A neural network starts with the layer
+whose input name is equal to the value specified in
+``Model.description.input.name``,
+and ends with the layer
+whose output name is equal to the value specified in
+``Model.description.output.name``.
+Layers must have unique input and output names,
+and a layer may not have input or output names that
+refer to layers that are not yet defined.
+
+For Core ML specification version <=3,
+all inputs are mapped to static rank 5 tensors, with axis notations
+[Sequence, Batch, Channel, Height, Width].
+
+From specification version 4 onwards (iOS >= 13, macOS >= 10.15), more options are available
+(see enums ``NeuralNetworkMultiArrayShapeMapping``, ``NeuralNetworkImageShapeMapping``)
+to map inputs to generic N-Dimensional (or N rank) tensors, where N >= 1.
+
+Each layer type may have specific constraints on the ranks of its inputs and outputs.
+
+Some of the layers (such as softmax, reduce, etc) have parameters that have been described in
+terms of notational axis "Channel", "Height", "Width" or "Sequence". They can be re-interpreted easily in
+the general ND setting by using the following rule:
+"width" is same as axis = -1 (i.e. the last axis from the end)
+"height" is same as axis = -2 (i.e. the second last axis from the end)
+"channel" is same as axis = -3 (i.e. the third last axis from the end)
+"sequence" is same as axis = -5 (i.e. the fifth last axis from the end)
+
+Several layers are available in 3 different variations, with the names ending
+in identifiers: ``like``, ``static`` and ``dynamic``. For instance, ``FillLike``,
+``FillStatic`` and ``FillDynamic``. The ``static`` variation generally will have
+a property corresponding to the shape of the output. For instance, if the
+output of the ``FillStatic`` layer is desired to be of shape (10, 4), the
+property ``targetShape`` will have to be set to [10, 4]. In the ``dynamic`` case,
+the shape is an input, hence it can be changed at runtime. For instance, for
+a ``FillDynamic`` layer, the input would have to be an array containing the
+values 10 and 4, if the desired output is of shape (10, 4). Whereas in the
+``like`` case, the additional input's shape is used as the output shape, ignoring
+its values. For instance, for a ``FillLike`` layer, for an input with shape
+(10, 4), the output generated will also be of shape (10, 4), values of the
+input will be ignored.
+
+
+
+NeuralNetwork
+________________________________________________________________________________
+
+A neural network.
+
+
+.. code-block:: proto
+
+	message NeuralNetwork {
+	
+	    repeated NeuralNetworkLayer layers = 1;
+	    repeated NeuralNetworkPreprocessing preprocessing = 2;
+	
+	    // use this enum value to determine the input tensor shapes to the neural network, for multiarray inputs
+	    NeuralNetworkMultiArrayShapeMapping arrayInputShapeMapping = 5;
+	
+	    // use this enum value to determine the input tensor shapes to the neural network, for image inputs
+	    NeuralNetworkImageShapeMapping imageInputShapeMapping = 6;
+	
+	
+	    NetworkUpdateParameters updateParams = 10;
+	
+	}
+
+
+
+
+
+
+NeuralNetworkImageScaler
+________________________________________________________________________________
+
+A neural network preprocessor that
+performs a scalar multiplication of an image
+followed by addition of scalar biases to the channels.
+
+Input: X
+   An image in BGR or RGB format with shape ``[3, H, W]``
+   or in grayscale format with shape ``[1, H, W]``.
+Output: Y
+   An image with format and shape corresponding to the input.
+
+If the input image is in BGR format:
+
+.. code::
+
+    Y[0, :, :] = channelScale * X[0, :, :] + blueBias
+    Y[1, :, :] = channelScale * X[1, :, :] + greenBias
+    Y[2, :, :] = channelScale * X[2, :, :] + redBias
+
+If the input image is in RGB format:
+
+.. code::
+
+    Y[0, :, :] = channelScale * X[0, :, :] + redBias
+    Y[1, :, :] = channelScale * X[1, :, :] + greenBias
+    Y[2, :, :] = channelScale * X[2, :, :] + blueBias
+
+If the input image is in grayscale format:
+
+.. code::
+
+    Y[0, :, :] = channelScale * X[0, :, :] + grayBias
+
+
+.. code-block:: proto
+
+	message NeuralNetworkImageScaler {
+	
+	    float channelScale = 10; 
+	    float blueBias = 20; 
+	    float greenBias = 21; 
+	    float redBias = 22; 
+	    float grayBias = 30; 
+	
+	}
+
+
+
+
+
+
+NeuralNetworkMeanImage
+________________________________________________________________________________
+
+A neural network preprocessor that
+subtracts the provided mean image from the input image.
+The mean image is subtracted from the input named
+``NeuralNetworkPreprocessing.featureName``.
+
+
+.. code-block:: proto
+
+	message NeuralNetworkMeanImage {
+	
+	    repeated float meanImage = 1;
+	
+	}
+
+
+
+
+
+
+NeuralNetworkPreprocessing
+________________________________________________________________________________
+
+Preprocessing parameters for image inputs.
+
+
+.. code-block:: proto
+
+	message NeuralNetworkPreprocessing {
+	
+	    string featureName = 1; 
+	    oneof preprocessor {
+	        NeuralNetworkImageScaler scaler = 10;
+	        NeuralNetworkMeanImage meanImage = 11;
+	    }
+	
+	}
+
+
+
+
+
+
+ActivationReLU
+________________________________________________________________________________
+
+A rectified linear unit (ReLU) activation function.
+
+This function has the following formula:
+
+.. math::
+    f(x) = \text{max}(0, x)
+
+
+.. code-block:: proto
+
+	message ActivationReLU {
+	
+	}
+
+
+
+
+
+
+ActivationLeakyReLU
+________________________________________________________________________________
+
+A leaky rectified linear unit (ReLU) activation function.
+
+This function has the following formula:
+
+.. math::
+    f(x) = \begin{cases}
+            x      & \text{if } x \geq 0 \\
+            \alpha x & \text{if } x < 0
+           \end{cases}
+
+
+.. code-block:: proto
+
+	message ActivationLeakyReLU {
+	
+	    float alpha = 1; //negative slope value for leakyReLU
+	
+	}
+
+
+
+
+
+
+ActivationTanh
+________________________________________________________________________________
+
+A hyperbolic tangent activation function.
+
+This function has the following formula:
+
+.. math::
+    f(x) = \dfrac{1 - e^{-2x}}{1 + e^{-2x}}
+
+
+.. code-block:: proto
+
+	message ActivationTanh {
+	
+	}
+
+
+
+
+
+
+ActivationScaledTanh
+________________________________________________________________________________
+
+A scaled hyperbolic tangent activation function.
+
+This function has the following formula:
+
+.. math::
+    f(x) = \alpha \tanh(\beta x)
+
+
+.. code-block:: proto
+
+	message ActivationScaledTanh {
+	
+	    float alpha = 1;
+	    float beta = 2;
+	
+	}
+
+
+
+
+
+
+ActivationSigmoid
+________________________________________________________________________________
+
+A sigmoid activation function.
+
+This function has the following formula:
+
+.. math::
+    f(x) = \dfrac{1}{1 + e^{-x}}
+
+
+.. code-block:: proto
+
+	message ActivationSigmoid {
+	
+	}
+
+
+
+
+
+
+ActivationLinear
+________________________________________________________________________________
+
+A linear activation function.
+
+This function has the following formula:
+
+.. math::
+    f(x) = \alpha x + \beta
+
+
+.. code-block:: proto
+
+	message ActivationLinear {
+	
+	    float alpha = 1;
+	    float beta = 2;
+	
+	}
+
+
+
+
+
+
+ActivationSigmoidHard
+________________________________________________________________________________
+
+A hard sigmoid activation function.
+
+This function has the following formula:
+
+.. math::
+    f(x) = \text{min}(\text{max}(\alpha x + \beta, 0), 1)
+
+
+.. code-block:: proto
+
+	message ActivationSigmoidHard {
+	
+	    float alpha = 1;
+	    float beta = 2;
+	
+	}
+
+
+
+
+
+
+ActivationPReLU
+________________________________________________________________________________
+
+A parameterized rectified linear unit (PReLU) activation function.
+Input must be at least rank 3. Axis = -3 is denoted by "C", or channels.
+"alpha" parameter can be a vector of length C.
+
+This function has the following formula:
+
+.. math::
+   f(x_i) = \begin{cases}
+                x_i          & \text{if } x_i \geq 0 \\
+                \alpha_i x_i & \text{if } x_i < 0
+            \end{cases} \;,\;i=1,...,C
+
+
+.. code-block:: proto
+
+	message ActivationPReLU {
+	
+	    // parameter of length C or 1.
+	    // If length is 1, same value is used for all channels
+	    WeightParams alpha = 1;
+	
+	}
+
+
+
+
+
+
+ActivationELU
+________________________________________________________________________________
+
+An exponential linear unit (ELU) activation function.
+
+This function has the following formula:
+
+.. math::
+    f(x) = \begin{cases}
+            x              & \text{if } x \geq 0 \\
+            \alpha (e^x - 1) & \text{if } x < 0
+           \end{cases}
+
+
+.. code-block:: proto
+
+	message ActivationELU {
+	
+	    float alpha = 1;
+	
+	}
+
+
+
+
+
+
+ActivationThresholdedReLU
+________________________________________________________________________________
+
+A thresholded rectified linear unit (ReLU) activation function.
+
+This function has the following formula:
+
+.. math::
+    f(x) = \begin{cases}
+            x & \text{if } x \geq \alpha \\
+            0 & \text{if } x < \alpha
+           \end{cases}
+
+
+.. code-block:: proto
+
+	message ActivationThresholdedReLU {
+	
+	    float alpha = 1;
+	
+	}
+
+
+
+
+
+
+ActivationSoftsign
+________________________________________________________________________________
+
+A softsign activation function.
+
+This function has the following formula:
+
+.. math::
+    f(x) = \dfrac{x}{1 + |x|}
+
+
+.. code-block:: proto
+
+	message ActivationSoftsign {
+	
+	}
+
+
+
+
+
+
+ActivationSoftplus
+________________________________________________________________________________
+
+A softplus activation function.
+
+This function has the following formula:
+
+.. math::
+    f(x) = \text{log}(1 + e^x)
+
+
+.. code-block:: proto
+
+	message ActivationSoftplus {
+	
+	}
+
+
+
+
+
+
+ActivationParametricSoftplus
+________________________________________________________________________________
+
+A parametric softplus activation function.
+Input must be at least rank 3. axis = -3 is denoted by "C", or channels.
+"alpha"/"beta" parameter can be a vector of length C.
+
+This function has the following formula:
+
+.. math::
+    f(x_i) = \alpha_i \text{log}(1 + e^{\beta_i x_i}) \;,\;i=1,...,C
+
+
+.. code-block:: proto
+
+	message ActivationParametricSoftplus {
+	
+	    // If length is 1, same value is used for all channels
+	    WeightParams alpha = 1; //parameter of length C or 1
+	    WeightParams beta = 2; //parameter of length C or 1
+	
+	}
+
+
+
+
+
+
+ActivationParams
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message ActivationParams {
+	
+	    oneof NonlinearityType {
+	        ActivationLinear linear = 5;
+	
+	        ActivationReLU ReLU = 10;
+	        ActivationLeakyReLU leakyReLU = 15;
+	        ActivationThresholdedReLU thresholdedReLU = 20;
+	        ActivationPReLU PReLU = 25;
+	
+	        ActivationTanh tanh = 30;
+	        ActivationScaledTanh scaledTanh = 31;
+	
+	        ActivationSigmoid sigmoid = 40;
+	        ActivationSigmoidHard sigmoidHard = 41;
+	
+	        ActivationELU ELU = 50;
+	
+	        ActivationSoftsign softsign = 60;
+	        ActivationSoftplus softplus = 70;
+	        ActivationParametricSoftplus parametricSoftplus = 71;
+	    }
+	
+	}
+
+
+
+
+
+
+Tensor
+________________________________________________________________________________
+
+Representation of the intermediate tensors
+
+
+.. code-block:: proto
+
+	message Tensor {
+	
+	    // Number of dimensions in the tensor shape
+	    uint32 rank = 1;
+	    // actual value of the tensor shape.
+	    // must be of length "rank". Can contain -1s for unknown dimensions.
+	    repeated int64 dimValue = 2;
+	
+	}
+
+
+
+
+
+
+NeuralNetworkLayer
+________________________________________________________________________________
+
+A single neural network layer.
+
+
+.. code-block:: proto
+
+	message NeuralNetworkLayer {
+	
+	    string name = 1; //descriptive name of the layer
+	    repeated string input = 2;
+	    repeated string output = 3;
+	
+	    repeated Tensor inputTensor = 4; // must be the same length as the "input" field
+	    repeated Tensor outputTensor = 5; // must be the same length as the "output" field
+	
+	    // Must be set to true to mark the layer as updatable.
+	    // If true, the weightParams in the layer's properties must also be set to updatable
+	    // If false, the value of the isUpdatable parameter within the layer's weights are ignored
+	    bool isUpdatable = 10;
+	
+	    oneof layer {
+	
+	        // Start at 100 here
+	        ConvolutionLayerParams convolution = 100;
+	
+	        PoolingLayerParams pooling = 120;
+	
+	        ActivationParams activation = 130;
+	
+	        InnerProductLayerParams innerProduct = 140;
+	        EmbeddingLayerParams embedding = 150;
+	
+	        // Normalization-related Layers
+	        BatchnormLayerParams batchnorm = 160;
+	        MeanVarianceNormalizeLayerParams mvn = 165;
+	        L2NormalizeLayerParams l2normalize = 170;
+	        SoftmaxLayerParams softmax = 175;
+	        LRNLayerParams lrn = 180;
+	
+	        CropLayerParams crop = 190;
+	        PaddingLayerParams padding = 200;
+	        UpsampleLayerParams upsample = 210;
+	
+	        ResizeBilinearLayerParams resizeBilinear = 211;
+	        CropResizeLayerParams cropResize = 212;
+	
+	        UnaryFunctionLayerParams unary = 220;
+	
+	        // Element-wise Operations
+	        AddLayerParams add = 230;
+	        MultiplyLayerParams multiply = 231;
+	
+	        AverageLayerParams average = 240;
+	        ScaleLayerParams scale = 245;
+	
+	        BiasLayerParams bias = 250;
+	        MaxLayerParams max = 260;
+	        MinLayerParams min = 261;
+	
+	        DotProductLayerParams dot = 270;
+	        ReduceLayerParams reduce = 280;
+	        LoadConstantLayerParams loadConstant = 290;
+	
+	        // Data Reorganization
+	        ReshapeLayerParams reshape = 300;
+	        FlattenLayerParams flatten = 301;
+	        PermuteLayerParams permute = 310;
+	        ConcatLayerParams concat = 320;
+	        SplitLayerParams split = 330;
+	        SequenceRepeatLayerParams sequenceRepeat = 340;
+	
+	        ReorganizeDataLayerParams reorganizeData = 345;
+	        SliceLayerParams slice = 350;
+	
+	        // Recurrent Layers
+	        SimpleRecurrentLayerParams simpleRecurrent = 400;
+	        GRULayerParams gru = 410;
+	        UniDirectionalLSTMLayerParams uniDirectionalLSTM = 420;
+	        BiDirectionalLSTMLayerParams biDirectionalLSTM = 430;
+	
+	        // Custom (user-implemented) Layer
+	        CustomLayerParams custom = 500;
+	
+	        // Following layers are available only after Core ML Specification
+	        // version >= 4 (iOS >= 13, macOS >= 10.15)
+	
+	        // Control Flow related Layers
+	        CopyLayerParams copy = 600;
+	        BranchLayerParams branch = 605;
+	
+	        LoopLayerParams loop = 615;
+	        LoopBreakLayerParams loopBreak = 620;
+	        LoopContinueLayerParams loopContinue = 625;
+	
+	        RangeStaticLayerParams rangeStatic = 635;
+	        RangeDynamicLayerParams rangeDynamic = 640;
+	
+	        // Element-wise Unary Layers
+	        ClipLayerParams clip = 660;
+	        CeilLayerParams ceil = 665;
+	        FloorLayerParams floor = 670;
+	
+	        SignLayerParams sign = 680;
+	        RoundLayerParams round = 685;
+	
+	        Exp2LayerParams exp2 = 700;
+	
+	        SinLayerParams sin = 710;
+	        CosLayerParams cos = 715;
+	        TanLayerParams tan = 720;
+	
+	        AsinLayerParams asin = 730;
+	        AcosLayerParams acos = 735;
+	        AtanLayerParams atan = 740;
+	
+	        SinhLayerParams sinh = 750;
+	        CoshLayerParams cosh = 755;
+	        TanhLayerParams tanh = 760;
+	
+	        AsinhLayerParams asinh = 770;
+	        AcoshLayerParams acosh = 775;
+	        AtanhLayerParams atanh = 780;
+	
+	        ErfLayerParams erf = 790;
+	        GeluLayerParams gelu = 795;
+	
+	        // Element-wise Binary with Broadcasting Support
+	        EqualLayerParams equal = 815;
+	        NotEqualLayerParams notEqual = 820;
+	        LessThanLayerParams lessThan = 825;
+	        LessEqualLayerParams lessEqual = 827;
+	        GreaterThanLayerParams greaterThan = 830;
+	        GreaterEqualLayerParams greaterEqual = 832;
+	
+	        LogicalOrLayerParams logicalOr = 840;
+	        LogicalXorLayerParams logicalXor = 845;
+	        LogicalNotLayerParams logicalNot = 850;
+	        LogicalAndLayerParams logicalAnd = 855;
+	
+	        ModBroadcastableLayerParams modBroadcastable = 865;
+	        MinBroadcastableLayerParams minBroadcastable = 870;
+	        MaxBroadcastableLayerParams maxBroadcastable = 875;
+	        AddBroadcastableLayerParams addBroadcastable = 880;
+	        PowBroadcastableLayerParams powBroadcastable = 885;
+	        DivideBroadcastableLayerParams divideBroadcastable = 890;
+	        FloorDivBroadcastableLayerParams floorDivBroadcastable = 895;
+	        MultiplyBroadcastableLayerParams multiplyBroadcastable = 900;
+	        SubtractBroadcastableLayerParams subtractBroadcastable = 905;
+	
+	        // Tensor Manipulations
+	        TileLayerParams tile = 920;
+	        StackLayerParams stack = 925;
+	        GatherLayerParams gather = 930;
+	        ScatterLayerParams scatter = 935;
+	        GatherNDLayerParams gatherND = 940;
+	        ScatterNDLayerParams scatterND = 945;
+	        SoftmaxNDLayerParams softmaxND = 950;
+	        GatherAlongAxisLayerParams gatherAlongAxis = 952;
+	        ScatterAlongAxisLayerParams scatterAlongAxis = 954;
+	
+	        ReverseLayerParams reverse = 960;
+	        ReverseSeqLayerParams reverseSeq = 965;
+	
+	        SplitNDLayerParams splitND = 975;
+	        ConcatNDLayerParams concatND = 980;
+	        TransposeLayerParams transpose = 985;
+	
+	        SliceStaticLayerParams sliceStatic = 995;
+	        SliceDynamicLayerParams sliceDynamic = 1000;
+	        SlidingWindowsLayerParams slidingWindows = 1005;
+	
+	        TopKLayerParams topK = 1015;
+	        ArgMinLayerParams argMin = 1020;
+	        ArgMaxLayerParams argMax = 1025;
+	
+	        EmbeddingNDLayerParams embeddingND = 1040;
+	        BatchedMatMulLayerParams batchedMatmul = 1045;
+	
+	        // Tensor Allocation / Reshape-related Operations
+	        GetShapeLayerParams getShape = 1065;
+	        LoadConstantNDLayerParams loadConstantND = 1070;
+	
+	        FillLikeLayerParams fillLike = 1080;
+	        FillStaticLayerParams fillStatic = 1085;
+	        FillDynamicLayerParams fillDynamic = 1090;
+	
+	        BroadcastToLikeLayerParams broadcastToLike = 1100;
+	        BroadcastToStaticLayerParams broadcastToStatic = 1105;
+	        BroadcastToDynamicLayerParams broadcastToDynamic = 1110;
+	
+	        SqueezeLayerParams squeeze = 1120;
+	        ExpandDimsLayerParams expandDims = 1125;
+	        FlattenTo2DLayerParams flattenTo2D = 1130;
+	        ReshapeLikeLayerParams reshapeLike = 1135;
+	        ReshapeStaticLayerParams reshapeStatic = 1140;
+	        ReshapeDynamicLayerParams reshapeDynamic = 1145;
+	        RankPreservingReshapeLayerParams rankPreservingReshape = 1150;
+	
+	        ConstantPaddingLayerParams constantPad = 1155;
+	
+	        // Random Distributions
+	        RandomNormalLikeLayerParams randomNormalLike = 1170;
+	        RandomNormalStaticLayerParams randomNormalStatic = 1175;
+	        RandomNormalDynamicLayerParams randomNormalDynamic = 1180;
+	
+	        RandomUniformLikeLayerParams randomUniformLike = 1190;
+	        RandomUniformStaticLayerParams randomUniformStatic = 1195;
+	        RandomUniformDynamicLayerParams randomUniformDynamic = 1200;
+	
+	        RandomBernoulliLikeLayerParams randomBernoulliLike = 1210;
+	        RandomBernoulliStaticLayerParams randomBernoulliStatic = 1215;
+	        RandomBernoulliDynamicLayerParams randomBernoulliDynamic = 1220;
+	
+	        CategoricalDistributionLayerParams categoricalDistribution = 1230;
+	
+	        // Reduction-related Layers:
+	        ReduceL1LayerParams reduceL1 = 1250;
+	        ReduceL2LayerParams reduceL2 = 1255;
+	        ReduceMaxLayerParams reduceMax = 1260;
+	        ReduceMinLayerParams reduceMin = 1265;
+	        ReduceSumLayerParams reduceSum = 1270;
+	        ReduceProdLayerParams reduceProd = 1275;
+	        ReduceMeanLayerParams reduceMean = 1280;
+	        ReduceLogSumLayerParams reduceLogSum = 1285;
+	        ReduceSumSquareLayerParams reduceSumSquare = 1290;
+	        ReduceLogSumExpLayerParams reduceLogSumExp = 1295;
+	
+	        // Masking / Selection Layers
+	        WhereNonZeroLayerParams whereNonZero = 1313;
+	        MatrixBandPartLayerParams matrixBandPart = 1315;
+	        LowerTriangularLayerParams lowerTriangular = 1320;
+	        UpperTriangularLayerParams upperTriangular = 1325;
+	        WhereBroadcastableLayerParams whereBroadcastable = 1330;
+	
+	        // Normalization Layers
+	        LayerNormalizationLayerParams layerNormalization = 1350;
+	
+	        NonMaximumSuppressionLayerParams NonMaximumSuppression = 1400;
+	
+	        // Following layers are available only after Core ML Specification
+	        // version >= 5 (iOS >= 14, macOS >= 10.16)
+	        OneHotLayerParams oneHot = 1450;
+	        CumSumLayerParams cumSum = 1455;
+	        ClampedReLULayerParams clampedReLU = 1460;
+	        ArgSortLayerParams argSort = 1461;
+	        Pooling3DLayerParams pooling3d = 1465;
+	        GlobalPooling3DLayerParams globalPooling3d = 1466;
+	        SliceBySizeLayerParams sliceBySize = 1470;
+	        Convolution3DLayerParams convolution3d = 1471;
+	
+	    }
+	
+	}
+
+
+
+
+
+
+BranchLayerParams
+________________________________________________________________________________
+
+Branching Layer
+
+A layer that provides the functionality of branching or an If-Else block.
+
+Must have 1 input. There are no outputs as the execution is transferred to either the
+if or the else branch based on the value of the input.
+
+Input is the condition predicate. Must be a scalar (length 1 tensor).
+
+
+.. code-block:: proto
+
+	message BranchLayerParams {
+	
+	    NeuralNetwork ifBranch = 1;
+	    NeuralNetwork elseBranch = 2;
+	
+	}
+
+
+
+
+
+
+LoopLayerParams
+________________________________________________________________________________
+
+Loop Layer
+
+A layer that provides the functionality of a "for" loop or a "while" loop.
+
+There are either no inputs or 1 input. When an input is present, it corresponds to the maximum loop count,
+in that case the value of the "maxLoopIterations" field is ignored. Input must be a scalar.
+(For description below, maxLoopIterations is assumed to be the value of the input, when its present)
+
+No outputs are produced. Blobs produced by the condition or the body network are visible in the scope of the overall network.
+
+"conditionNetwork" must produce a tensor with the name specified in the "conditionVar" field.
+
+There are 3 possible cases for determining the termination condition:
+
+Case 1:
+
+If there is no "conditionNetwork", in this case the layer corresponds to a pure for loop, which is run "maxLoopIterations" number of times.
+Equivalent pseudo-code:
+
+for loopIterator = 0 : maxLoopIterations
+     bodyNetwork()
+
+
+Case 2:
+
+"conditionNetwork" is present, and "maxLoopIterations" is 0 and there is no input,
+in this case the layer corresponds to a while loop. Equivalent pseudo-code:
+
+conditionVar = conditionNetwork()
+while conditionVar:
+     bodyNetwork()
+     conditionVar = conditionNetwork()
+
+
+Case 3:
+
+"conditionNetwork" is provided, and "maxLoopIterations" is positive or there is an input,
+in this case the layer corresponds to a while loop with a joint condition. Equivalent pseudo-code:
+
+loopIterator = 0
+conditionVar = conditionNetwork()
+while (conditionVar and loopIterator < maxLoopIterations):
+     bodyNetwork()
+     loopIterator = loopIterator + 1
+     conditionVar = conditionNetwork()
+
+
+.. code-block:: proto
+
+	message LoopLayerParams {
+	
+	    uint64 maxLoopIterations = 1;
+	    string conditionVar = 2;
+	    NeuralNetwork conditionNetwork = 3;
+	    NeuralNetwork bodyNetwork = 4;
+	
+	}
+
+
+
+
+
+
+LoopBreakLayerParams
+________________________________________________________________________________
+
+Loop break Layer
+
+Terminate the loop that has this layer.
+If present, it should always reside in the "bodyNetwork" of the loop layer
+
+No inputs/outputs
+
+
+.. code-block:: proto
+
+	message LoopBreakLayerParams {
+	
+	}
+
+
+
+
+
+
+LoopContinueLayerParams
+________________________________________________________________________________
+
+Loop Continue Layer
+
+Stop the current loop iteration and continue on the next iteration.
+If present, it should always reside in the "bodyNetwork" of the loop layer
+
+No inputs/outputs
+
+
+.. code-block:: proto
+
+	message LoopContinueLayerParams {
+	
+	}
+
+
+
+
+
+
+CopyLayerParams
+________________________________________________________________________________
+
+Copy Layer
+
+A layer that copies its input tensor to the output tensor.
+Must have 1 input and 1 output, with distinct names.
+This is the only layer that is allowed to re-generate an output that is already present in the neural network prior to this layer,
+in which case it will overwrite the output tensor.
+
+
+.. code-block:: proto
+
+	message CopyLayerParams {
+	
+	}
+
+
+
+
+
+
+GreaterThanLayerParams
+________________________________________________________________________________
+
+GreaterThan Layer
+
+Either 1 or 2 inputs.
+Produces 1 output.
+Perform elementwise greater than operation.
+
+Output is 1.0f if the condition is true otherwise 0.0f.
+
+.. code::
+
+     y = x1 > x2
+         or
+     y = x1 > alpha, if only one input is provided
+
+Broadcasting is supported.
+
+
+.. code-block:: proto
+
+	message GreaterThanLayerParams {
+	
+	    float alpha = 2;
+	
+	}
+
+
+
+
+
+
+GreaterEqualLayerParams
+________________________________________________________________________________
+
+GreaterEqual Layer
+
+Either 1 or 2 inputs.
+Produces 1 output.
+Perform elementwise greater equal operation.
+
+Output is 1.0f if the condition is true otherwise 0.0f.
+
+.. code::
+
+     y = x1 >= x2
+         or
+     y = x1 >= alpha, if only one input is provided
+
+Broadcasting is supported.
+
+
+.. code-block:: proto
+
+	message GreaterEqualLayerParams {
+	
+	    float alpha = 2;
+	
+	}
+
+
+
+
+
+
+LessThanLayerParams
+________________________________________________________________________________
+
+LessThan Layer
+
+Either 1 or 2 inputs.
+Produces 1 output.
+Perform elementwise less than operation.
+
+Output is 1.0f if the condition is true otherwise 0.0f.
+
+.. code::
+
+     y = x1 < x2
+         or
+     y = x1 < alpha, if only one input is provided
+
+Broadcasting is supported.
+
+
+.. code-block:: proto
+
+	message LessThanLayerParams {
+	
+	    float alpha = 2;
+	
+	}
+
+
+
+
+
+
+LessEqualLayerParams
+________________________________________________________________________________
+
+LessEqual Layer
+
+Either 1 or 2 inputs.
+Produces 1 output.
+Perform elementwise less equal operation.
+
+Output is 1.0f if the condition is true otherwise 0.0f.
+
+.. code::
+
+     y = x1 <= x2
+         or
+     y = x1 <= alpha, if only one input is provided
+
+Broadcasting is supported.
+
+
+.. code-block:: proto
+
+	message LessEqualLayerParams {
+	
+	    float alpha = 2;
+	
+	}
+
+
+
+
+
+
+EqualLayerParams
+________________________________________________________________________________
+
+Equal Layer
+
+Either 1 or 2 inputs.
+Produces 1 output.
+Perform elementwise equal operation.
+
+Output is 1.0f if the condition is true otherwise 0.0f.
+
+.. code::
+
+     y = x1 == x2
+         or
+     y = x1 == alpha, if only one input is provided
+
+Broadcasting is supported.
+
+
+.. code-block:: proto
+
+	message EqualLayerParams {
+	
+	    float alpha = 1;
+	
+	}
+
+
+
+
+
+
+NotEqualLayerParams
+________________________________________________________________________________
+
+NotEqual Layer
+
+Either 1 or 2 inputs.
+Produces 1 output.
+Perform elementwise not equal operation.
+
+Output is 1.0f if the condition is true otherwise 0.0f.
+
+.. code::
+
+     y = x1 != x2
+         or
+     y = x1 != alpha, if only one input is provided
+
+Broadcasting is supported.
+
+
+.. code-block:: proto
+
+	message NotEqualLayerParams {
+	
+	    float alpha = 1;
+	
+	}
+
+
+
+
+
+
+LogicalAndLayerParams
+________________________________________________________________________________
+
+LogicalAnd Layer
+
+Must have 2 inputs, produces 1 output.
+Perform elementwise logical AND operation.
+
+Input is considered False if equal to 0.0f otherwise True.
+Output is 1.0f if the condition is true otherwise 0.0f.
+
+.. code::
+
+     y = AND(x1, x2)
+
+Broadcasting is supported.
+
+
+.. code-block:: proto
+
+	message LogicalAndLayerParams {
+	
+	}
+
+
+
+
+
+
+LogicalOrLayerParams
+________________________________________________________________________________
+
+LogicalOr Layer
+
+Must have 2 inputs, produces 1 output.
+Perform elementwise logical OR operation.
+
+Input is considered False if equal to 0.0f otherwise True.
+Output is 1.0f if the condition is true otherwise 0.0f.
+
+.. code::
+
+     y = OR(x1, x2)
+
+Broadcasting is supported.
+
+
+.. code-block:: proto
+
+	message LogicalOrLayerParams {
+	
+	}
+
+
+
+
+
+
+LogicalXorLayerParams
+________________________________________________________________________________
+
+LogicalXor Layer
+
+Must have 2 inputs, produces 1 output.
+Perform elementwise logical XOR operation.
+
+Input is considered False if equal to 0.0f otherwise True.
+Output is 1.0f if the condition is true otherwise 0.0f.
+
+.. code::
+
+     y = XOR(x1, x2)
+
+Broadcasting is supported.
+
+
+.. code-block:: proto
+
+	message LogicalXorLayerParams {
+	
+	}
+
+
+
+
+
+
+LogicalNotLayerParams
+________________________________________________________________________________
+
+LogicalNot Layer
+
+Must have 1 input, produces 1 output.
+Perform elementwise logical NOT operation.
+
+Input is considered False if equal to 0.0f otherwise True.
+Output is 1.0f if the condition is true otherwise 0.0f.
+
+.. code::
+
+     y = NOT(x)
+
+
+.. code-block:: proto
+
+	message LogicalNotLayerParams {
+	
+	}
+
+
+
+
+
+
+BorderAmounts
+________________________________________________________________________________
+
+Specifies the amount of spatial border to be either padded or cropped.
+
+For padding:
+
+.. code::
+
+    H_out = borderAmounts[0].startEdgeSize + H_in + borderAmounts[0].endEdgeSize
+    W_out = borderAmounts[1].startEdgeSize + W_in + borderAmounts[1].endEdgeSize
+
+    topPaddingAmount == Height startEdgeSize
+    bottomPaddingAmount == Height endEdgeSize
+    leftPaddingAmount == Width startEdgeSize
+    rightPaddingAmount == Width endEdgeSize
+
+For cropping:
+
+.. code::
+
+    H_out = (-borderAmounts[0].startEdgeSize) + H_in + (-borderAmounts[0].endEdgeSize)
+    W_out = (-borderAmounts[1].startEdgeSize) + W_in + (-borderAmounts[1].endEdgeSize)
+
+    topCropAmount == Height startEdgeSize
+    bottomCropAmount == Height endEdgeSize
+    leftCropAmount == Width startEdgeSize
+    rightCropAmount == Width endEdgeSize
+
+
+.. code-block:: proto
+
+	message BorderAmounts {
+	
+	    message EdgeSizes {
+	        uint64 startEdgeSize = 1;
+	
+	        uint64 endEdgeSize = 2;
+	    }
+	
+	    repeated EdgeSizes borderAmounts = 10;
+	
+	}
+
+
+
+
+
+
+BorderAmounts.EdgeSizes
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message EdgeSizes {
+	        uint64 startEdgeSize = 1;
+	
+	        uint64 endEdgeSize = 2;
+	    }
+
+
+
+
+
+
+ValidPadding
+________________________________________________________________________________
+
+Specifies the type of padding to be used with Convolution/Deconvolution and Pooling layers.
+After padding, input spatial shape: ``[H_in, W_in]``, gets modified to the
+output spatial shape ``[H_out, W_out]``.
+
+.. code::
+
+     topPaddingAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize
+     bottomPaddingAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize
+     leftPaddingAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize
+     rightPaddingAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize
+
+With Convolution or Pooling:
+
+.. code::
+
+   H_out = int_division_round_down((H_in + topPaddingAmount + bottomPaddingAmount - KernelSize[0]),stride[0]) + 1
+
+which is same as:
+
+.. code::
+
+   H_out = int_division_round_up((H_in + topPaddingAmount + bottomPaddingAmount - KernelSize[0] + 1),stride[0])
+
+With Deconvolution:
+
+.. code::
+
+   H_out = (H_in-1) * stride[0] + kernelSize[0] - (topPaddingAmount + bottomPaddingAmount)
+
+
+The equivalent expressions hold true for ``W_out`` as well.
+
+
+By default, the values of ``paddingAmounts`` are set to ``0``,
+which results in a "true" valid padding.
+If non-zero values are provided for ``paddingAmounts``,
+"valid" convolution/pooling is performed within the spatially expanded input.
+
+
+.. code-block:: proto
+
+	message ValidPadding {
+	
+	    BorderAmounts paddingAmounts = 1;
+	
+	}
+
+
+
+
+
+
+SamePadding
+________________________________________________________________________________
+
+Specifies the type of padding to be used with Convolution/Deconvolution and pooling layers.
+After padding, input spatial shape: ``[H_in, W_in]``, gets modified to the
+output spatial shape ``[H_out, W_out]``.
+With Convolution or pooling:
+
+.. code::
+
+     H_out = int_division_round_up(H_in,stride[0])
+     W_out = int_division_round_up(W_in,stride[1])
+
+This is achieved by using the following padding amounts:
+
+.. code::
+
+    totalPaddingHeight = max(0,(H_out-1) * stride[0] + KernelSize[0] - Hin)
+    totalPaddingWidth = max(0,(W_out-1) * stride[1] + KernelSize[1] - Win)
+
+There are two modes of asymmetry:
+``BOTTOM_RIGHT_HEAVY``, and ``TOP_LEFT_HEAVY``.
+
+If the mode is ``BOTTOM_RIGHT_HEAVY``:
+
+.. code::
+
+    topPaddingAmount = floor(totalPaddingHeight / 2)
+    bottomPaddingAmount = totalPaddingHeight - topPaddingAmount
+    leftPaddingAmount = floor(totalPaddingWidth / 2)
+    rightPaddingAmount = totalPaddingWidth - leftPaddingAmount
+
+If the mode is ``TOP_LEFT_HEAVY``:
+
+.. code::
+
+    bottomPaddingAmount = floor(totalPaddingHeight / 2)
+    topPaddingAmount = totalPaddingHeight - bottomPaddingAmount
+    rightPaddingAmount = floor(totalPaddingWidth / 2)
+    leftPaddingAmount = totalPaddingWidth - rightPaddingAmount
+
+
+With Deconvolution:
+
+.. code::
+
+   H_out = H_in * stride[0]
+   W_out = W_in * stride[1]
+
+
+.. code-block:: proto
+
+	message SamePadding {
+	
+	    enum SamePaddingMode {
+	
+	        BOTTOM_RIGHT_HEAVY = 0;
+	        TOP_LEFT_HEAVY = 1;
+	
+	    }
+	    SamePaddingMode asymmetryMode = 1;
+	
+	}
+
+
+
+
+
+
+SamplingMode
+________________________________________________________________________________
+
+Specifies how grid points are sampled from an interval.
+Without the loss of generality, assume the interval to be [0, X-1] from which N points are to be sampled.
+Here X may correspond to an input image's height or width.
+All the methods can be expressed in terms of numpy's linspace function, along with the constraint that grid points have to lie in the interval [0, X-1].
+Note: numpy.linspace(start = start, end = end, num = N, endpoint = True) corresponds to sampling
+N points uniformly from the interval [start, end], endpoints included.
+The methods vary in how the ``start`` and ``end`` values are computed.
+
+
+.. code-block:: proto
+
+	message SamplingMode {
+	
+	    enum Method {
+	
+	        STRICT_ALIGN_ENDPOINTS_MODE = 0;
+	
+	        ALIGN_ENDPOINTS_MODE = 1;
+	
+	        UPSAMPLE_MODE = 2;
+	
+	        ROI_ALIGN_MODE = 3;
+	
+	    }
+	
+	    Method samplingMethod = 1;
+	
+	}
+
+
+
+
+
+
+BoxCoordinatesMode
+________________________________________________________________________________
+
+Specifies the convention used to specify four bounding box coordinates for an image of size (Height, Width).
+The (0,0) coordinate corresponds to the top-left corner of the image.
+
+
+.. code-block:: proto
+
+	message BoxCoordinatesMode {
+	
+	    enum Coordinates {
+	
+	        CORNERS_HEIGHT_FIRST = 0;
+	
+	        CORNERS_WIDTH_FIRST = 1;
+	
+	        CENTER_SIZE_HEIGHT_FIRST = 2;
+	
+	        CENTER_SIZE_WIDTH_FIRST = 3;
+	
+	    }
+	
+	    Coordinates boxMode = 1;
+	
+	}
+
+
+
+
+
+
+WeightParams
+________________________________________________________________________________
+
+Weights for layer parameters.
+Weights are stored as repeated floating point numbers
+using row-major ordering
+and can represent 1-, 2-, 3-, or 4-dimensional data.
+
+
+.. code-block:: proto
+
+	message WeightParams {
+	
+	    repeated float floatValue = 1;
+	
+	    bytes float16Value = 2;
+	
+	    bytes rawValue = 30;
+	
+	    bytes int8RawValue = 31;
+	
+	    QuantizationParams quantization = 40;
+	
+	    bool isUpdatable = 50;
+	
+	}
+
+
+
+
+
+
+QuantizationParams
+________________________________________________________________________________
+
+Quantization parameters.
+
+
+.. code-block:: proto
+
+	message QuantizationParams {
+	
+	    uint64 numberOfBits = 1;
+	    oneof QuantizationType {
+	        LinearQuantizationParams linearQuantization = 101;
+	        LookUpTableQuantizationParams lookupTableQuantization = 102;
+	    }
+	
+	}
+
+
+
+
+
+
+LinearQuantizationParams
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message LinearQuantizationParams {
+	
+	    repeated float scale = 1;
+	    repeated float bias = 2;
+	
+	}
+
+
+
+
+
+
+LookUpTableQuantizationParams
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message LookUpTableQuantizationParams {
+	
+	    (2^numberOfBits) Elements.
+	    repeated float floatValue = 1;
+	
+	}
+
+
+
+
+
+
+ConvolutionLayerParams
+________________________________________________________________________________
+
+A layer that performs spatial convolution or deconvolution.
+
+.. code::
+
+     y = ConvolutionLayer(x)
+
+Requires 1 or 2 inputs and produces 1 output.
+
+Input
+   First Input:
+     A blob with rank greater than or equal to 4.
+     Rank 4 blob represents [Batch, channels, height, width].
+     For ranks greater than 4, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
+
+    From Core ML specification version 4 onwards (iOS >= 13, macOS >= 10.15).
+    convolution layer can have 2 inputs, in which case the second input is
+    the blob representing the weights. This is allowed when "isDeconvolution" = False.
+    The weight blob should have shape
+    ``[outputChannels, kernelChannels, kernelHeight, kernelWidth]``,
+    where kernelChannels == inputChannels / nGroups.
+
+Output
+  Rank is same as the input. e.g.: for rank 4 input, output shape is [B, C_out, H_out, W_out]
+
+
+If ``dilationFactor`` is not 1, effective kernel size is
+modified as follows:
+
+.. code::
+
+     KernelSize[0] <-- (kernelSize[0]-1) * dilationFactor[0] + 1
+     KernelSize[1] <-- (kernelSize[1]-1) * dilationFactor[1] + 1
+
+Type of padding can be ``valid`` or ``same``. Output spatial dimensions depend on the
+the type of padding. For details, refer to the descriptions of the messages "ValidPadding"
+and "SamePadding". Padded values are all zeros.
+
+For Deconvolution, ``ConvolutionPaddingType`` (``valid`` or ``same``) is ignored when ``outputShape`` is set.
+
+
+.. code-block:: proto
+
+	message ConvolutionLayerParams {
+	
+	    uint64 outputChannels = 1;
+	
+	    uint64 kernelChannels = 2;
+	
+	    uint64 nGroups = 10;
+	
+	    repeated uint64 kernelSize = 20;
+	
+	    repeated uint64 stride = 30;
+	
+	    repeated uint64 dilationFactor = 40;
+	
+	    oneof ConvolutionPaddingType {
+	        ValidPadding valid = 50;
+	        SamePadding same = 51;
+	    }
+	
+	    bool isDeconvolution = 60;
+	
+	    bool hasBias = 70;
+	
+	    WeightParams weights = 90;
+	    WeightParams bias = 91; 
+	
+	    repeated uint64 outputShape = 100;
+	
+	}
+
+
+
+
+
+
+Convolution3DLayerParams
+________________________________________________________________________________
+
+A layer that performs a 3-dimensional convolution.
+
+.. code::
+
+     y = Convolution3DLayer(x)
+
+Input
+   A blob of rank 5.
+   The input blob's shape should be ``[batch, channels, depth, height, width]``.
+
+Fields
+  The bias field, if set, should have shape of ``[channelsOut]``.
+
+Output
+  A blob of rank 5.
+  The output blob's shape is ``[batch, channelsOut, depthOut, heightOut, widthOut]``.
+
+Type of padding can be ``custom``, ``valid``, or ``same``. Padded values are all zeros.
+Output spatial dimensions depend on the the type of padding. For details, refer to the
+descriptions of the ``PaddingType`` field of this ``Convolution3DLayerParams`` message.
+
+Example
+  For example, given an input of size ``[1, 3, 3, 8, 8]``, a stride of 2 in each dimension,
+  a kernel of 3 in each dimension, 2 output channels, and ``same`` padding, this layer will
+  compute the total padding applied in the depth, height, and width dimensions to be 2, 1, and 1,
+  respectively. The depth padding is even and will be applied equally to both sides of the depth
+  dimension. Since the height and width padding values are odd, they'll be applied to the
+  bottom/right of the height/width dimensions. Thus, the padding applied to the input will be
+  ``[1, 1, 0, 1, 0, 1]`` (front, back, top, bottom, left, right). Finally, the output produced
+  will have size ``[1, 2, 2, 4, 4]``.
+
+
+.. code-block:: proto
+
+	message Convolution3DLayerParams {
+	
+	    int32 outputChannels = 1;
+	
+	    int32 inputChannels = 2;
+	
+	    int32 nGroups = 10;
+	
+	    int32 kernelDepth = 20;
+	
+	    int32 kernelHeight = 21;
+	
+	    int32 kernelWidth = 22;
+	
+	    int32 strideDepth = 31;
+	
+	    int32 strideHeight = 32;
+	
+	    int32 strideWidth = 33;
+	
+	    int32 dilationDepth = 40;
+	
+	    int32 dilationHeight = 41;
+	
+	    int32 dilationWidth = 42;
+	
+	    bool hasBias = 50;
+	
+	    WeightParams weights = 60;
+	
+	    WeightParams bias = 61;
+	
+	
+	    enum PaddingType {
+	        CUSTOM = 0;
+	        VALID = 1;
+	        SAME = 2;
+	    }
+	    PaddingType paddingType = 70;
+	
+	    int32 customPaddingFront = 80;
+	
+	    int32 customPaddingBack = 81;
+	
+	    int32 customPaddingTop = 82;
+	
+	    int32 customPaddingBottom = 83;
+	
+	    int32 customPaddingLeft = 84;
+	
+	    int32 customPaddingRight = 85;
+	
+	}
+
+
+
+
+
+
+InnerProductLayerParams
+________________________________________________________________________________
+
+A layer that performs a matrix-vector or matrix-matrix product.
+This is equivalent to a fully-connected, or dense layer.
+The weight parameters correspond to a matrix of dimensions (inputChannels, outputChannels) i.e. (C_in, C_out)
+
+.. code::
+
+     y = InnerProductLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+     Input can have rank 1 to rank 5. This is how it is reshaped in to the matrix (for rank > 1):
+     rank 1 (x1) : in this case, the layer corresponds to a matrix-vector product. x1 must be equal to C_in
+     rank 2 (x1, x2): x2 must be equal to C_in
+     rank 3 (x1, x2, x3) --> (x1 * x2, x3). x3 must be equal to C_in
+     rank 4 (x1, x2, x3, x4) ---> (x1, x2 * x3 * x4). x2 * x3 * x4 must be equal to C_in
+     rank 5 (x1, x2, x3, x4, x5) ---> (x1 * x2, x3 * x4 * x5). x3 * x4 * x5 must be equal to C_in
+
+Output
+     Output rank is same as the input rank
+     rank 1: (C_out)
+     rank 2: (x1, C_out)
+     rank 3: (x1, x2, C_out)
+     rank 4: (x1, C_out, 1, 1)
+     rank 5: (x1, x2, C_out, 1, 1)
+
+
+.. code-block:: proto
+
+	message InnerProductLayerParams {
+	
+	    uint64 inputChannels = 1; 
+	    uint64 outputChannels = 2; 
+	
+	    bool hasBias = 10; 
+	
+	    WeightParams weights = 20; 
+	    WeightParams bias = 21; 
+	
+	    bool int8DynamicQuantize = 22;
+	
+	}
+
+
+
+
+
+
+EmbeddingLayerParams
+________________________________________________________________________________
+
+A layer that performs a matrix lookup and optionally adds a bias.
+The weights matrix is stored with dimensions [outputChannels, inputDim].
+
+.. code::
+
+     y = EmbeddingLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    Input values must be in the range ``[0, inputDim - 1]``.
+
+    Input must have rank equal to 4 or 5, such that the last 3 dimensions are all 1.
+    rank 4: shape (x1, 1, 1, 1). x1 is effectively the batch/sequence length.
+    rank 5: shape (x1, x2 , 1, 1, 1). x1 * x2 is effectively the combined batch/sequence length.
+
+Output
+     Output rank is same as the input rank. Please see input description above.
+     rank 4: shape (x1, outputChannels, 1, 1)
+     rank 5: shape (x1, x2, outputChannels, 1, 1)
+
+
+.. code-block:: proto
+
+	message EmbeddingLayerParams {
+	
+	    uint64 inputDim = 1; 
+	    uint64 outputChannels = 2; 
+	
+	    bool hasBias = 10; 
+	
+	    WeightParams weights = 20; 
+	    WeightParams bias = 21; 
+	
+	}
+
+
+
+
+
+
+EmbeddingNDLayerParams
+________________________________________________________________________________
+
+A layer that performs a matrix lookup and optionally adds a bias.
+The weights matrix is stored with dimensions [embeddingSize, vocabSize].
+
+.. code::
+
+     y = EmbeddingNDLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    Input values must be in the range ``[0, vocabSize - 1]``.
+    Input must have rank at least 2. The last dimension must always be 1.
+    rank 2: shape (x1, 1). x1 is the batch/sequence length.
+    rank 3: shape (x1, x2, 1). x1 * x2 is effectively the combined batch/sequence length.
+    rank 4: shape (x1, x2, x3, 1). x1 * x2 * x2 is effectively the combined batch/sequence length.
+    rank 5: shape (x1, x2 , x3, x4, 1). x1 * x2 * x3 * x4 is effectively the combined batch/sequence length.
+
+Output
+     Output rank is same as the input rank. Please see input description above.
+     rank 2: shape (x1, embeddingSize)
+     rank 3: shape (x1, x2, embeddingSize)
+     rank 4: shape (x1, x2, x3, embeddingSize)
+     rank 5: shape (x1, x2, x3, x4, embeddingSize)
+
+
+.. code-block:: proto
+
+	message EmbeddingNDLayerParams {
+	
+	    uint64 vocabSize = 1; 
+	    uint64 embeddingSize = 2; 
+	    bool hasBias = 3; 
+	    WeightParams weights = 20; 
+	    WeightParams bias = 21; 
+	
+	}
+
+
+
+
+
+
+BatchnormLayerParams
+________________________________________________________________________________
+
+A layer that performs batch normalization,
+which is performed along axis = -3,
+and repeated along the other axes, if present.
+
+.. code::
+
+     y = BatchnormLayer(x)
+
+Requires 1 input and produces 1 output.
+
+This operation is described by the following formula:
+
+.. math::
+    y_i = \gamma_i \dfrac{ (x_i - \mu_i)}{\sqrt{\sigma_i^2 + \epsilon}} + \beta_i \;,\;i=1,....,C
+
+Input
+    A blob with rank greater than equal to 3.
+    Example: Rank 4 blob represents [Batch, channels, height, width]
+    For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
+
+Output
+    A blob with the same shape as the input.
+
+
+.. code-block:: proto
+
+	message BatchnormLayerParams {
+	
+	    uint64 channels = 1; 
+	
+	    bool computeMeanVar = 5;
+	    bool instanceNormalization = 6;
+	
+	    float epsilon = 10;
+	
+	    WeightParams gamma = 15; 
+	    WeightParams beta = 16; 
+	    WeightParams mean = 17; 
+	    WeightParams variance = 18; 
+	
+	}
+
+
+
+
+
+
+PoolingLayerParams
+________________________________________________________________________________
+
+A spatial pooling layer.
+
+.. code::
+
+     y = PoolingLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob with rank greater than equal to 4.
+    Rank 4 blob represents [Batch, channels, height, width]
+    For ranks greater than 4, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
+
+Output
+    Rank is same as the input. e.g.: for rank 4 input, output shape is [B, C, H_out, W_out]
+
+Padding options are similar to ``ConvolutionLayerParams``
+with the additional option of ``ValidCompletePadding`` (``includeLastPixel``),
+which ensures that the last application of the kernel
+always includes the last pixel of the input image, if there is padding.
+
+.. code::
+
+    H_out = ceil(float(H_in + 2 * paddingAmounts[0] - kernelSize[0])/float(Stride[0])) + 1
+    if (paddingAmounts[0] > 0 or paddingAmounts[1] > 0)
+         if ((H_out - 1) * Stride >= H_in + paddingAmounts[0]) {
+             H_out = H_out - 1
+         }
+    }
+
+The equivalent expressions hold true for ``W_out`` as well.
+Only symmetric padding is supported with this option.
+
+
+.. code-block:: proto
+
+	message PoolingLayerParams {
+	
+	    enum PoolingType {
+	
+	        MAX = 0;
+	        AVERAGE = 1;
+	        L2 = 2;
+	
+	    }
+	    PoolingType type = 1; 
+	
+	    repeated uint64 kernelSize = 10;
+	
+	    repeated uint64 stride = 20;
+	
+	    message ValidCompletePadding {
+	
+	        repeated uint64 paddingAmounts = 10;
+	
+	    }
+	
+	    oneof PoolingPaddingType {
+	        ValidPadding valid = 30;
+	        SamePadding same = 31;
+	        ValidCompletePadding includeLastPixel = 32;
+	    }
+	
+	    bool avgPoolExcludePadding = 50;
+	
+	    bool globalPooling = 60;
+	
+	}
+
+
+
+
+
+
+PoolingLayerParams.ValidCompletePadding
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message ValidCompletePadding {
+	
+	        repeated uint64 paddingAmounts = 10;
+	
+	    }
+
+
+
+
+
+
+Pooling3DLayerParams
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message Pooling3DLayerParams {
+	    
+	    enum PoolingType3D {
+	        MAX = 0;
+	        AVERAGE = 1;
+	    }
+	    
+	    // Whether to use Max or Average
+	    PoolingType3D type = 1;
+	    
+	    // Depth of the pooling region.
+	    int32 kernelDepth = 2;
+	    
+	    // Height of the pooling region.
+	    int32 kernelHeight = 3;
+	    
+	    // Width of the pooling region.
+	    int32 kernelWidth = 4;
+	    
+	    // Stride along the depth direction
+	    int32 strideDepth = 5;
+	    
+	    // Stride along the height direction
+	    int32 strideHeight = 6;
+	    
+	    // Stride along the width direction
+	    int32 strideWidth = 7;
+	    
+	    enum Pooling3DPaddingType {
+	        CUSTOM = 0;
+	        VALID = 1;
+	        SAME = 2;
+	    }
+	    Pooling3DPaddingType paddingType = 15;
+	    
+	    // Padding before the input in the depth direction.
+	    int32 customPaddingFront = 8;
+	    
+	    // Padding after the input in the depth direction.
+	    int32 customPaddingBack = 9;
+	    
+	    // Padding before the input in the height direction.
+	    int32 customPaddingTop = 10;
+	    
+	    // Padding after the input in the height direction.
+	    int32 customPaddingBottom = 11;
+	    
+	    // Padding before the input in the width direction.
+	    int32 customPaddingLeft = 12;
+	    
+	    // Padding after the input in the width direction.
+	    int32 customPaddingRight = 13;
+	    
+	    // If true, exclude zeros from padding in Average pooling.  Meaningless in Max Pooling.
+	    bool countExcludePadding = 14;
+	}
+
+
+
+
+
+
+GlobalPooling3DLayerParams
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message GlobalPooling3DLayerParams {
+	    
+	    enum GlobalPoolingType3D {
+	        MAX = 0;
+	        AVERAGE = 1;
+	    }
+	    
+	    // Whether to use Max or Average
+	    GlobalPoolingType3D type = 1;
+	}
+
+
+
+
+
+
+PaddingLayerParams
+________________________________________________________________________________
+
+A layer that performs padding along spatial dimensions.
+
+.. code::
+
+     y = PaddingLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob with rank at least 2.
+    e.g.: blob with shape ``[H_in, W_in]``.
+    For ranks greater than 2, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch
+    i.e. Padding is applied on last two dimensions.
+
+Output
+    Same rank as the input.
+    e.g.: blob with shape ``[H_out, W_out]``.
+
+Output dimensions are calculated as follows:
+
+.. code::
+
+    H_out = H_in + topPaddingAmount + bottomPaddingAmount
+    W_out = W_in + leftPaddingAmount + rightPaddingAmount
+
+    topPaddingAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize
+    bottomPaddingAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize
+    leftPaddingAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize
+    rightPaddingAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize
+
+There are three types of padding:
+
+- ``PaddingConstant``, which fills a constant value at the border.
+- ``PaddingReflection``, which reflects the values at the border.
+- ``PaddingReplication``, which replicates the values at the border.
+
+Given the following input:
+
+.. code::
+
+    [1, 3, 4]  :  1   2   3   4
+                  5   6   7   8
+                  9   10  11  12
+
+Here is the output of applying the padding
+``(top=2, left=2, bottom=0, right=0)``
+with each of the supported types:
+
+- ``PaddingConstant`` (``value = 0``):
+  .. code::
+
+      [1, 5, 6]  :  0   0   0  0   0   0
+                    0   0   0  0   0   0
+                    0   0   1  2   3   4
+                    0   0   5  6   7   8
+                    0   0   9  10  11  12
+
+- ``PaddingReflection``:
+  .. code::
+
+      [1, 5, 6]  :  11  10  9  10  11  12
+                    7   6   5  6   7   8
+                    3   2   1  2   3   4
+                    7   6   5  6   7   8
+                    11  10  9  10  11  12
+
+- ``PaddingReplication``:
+  .. code::
+
+      [1, 5, 6]  :  1   1   1  2   3   4
+                    1   1   1  2   3   4
+                    1   1   1  2   3   4
+                    5   5   5  6   7   8
+                    9   9   9  10  11  12
+
+
+.. code-block:: proto
+
+	message PaddingLayerParams {
+	
+	    message PaddingConstant {
+	        float value = 1;
+	    }
+	
+	    message PaddingReflection {
+	    }
+	
+	    message PaddingReplication {
+	    }
+	
+	    oneof PaddingType {
+	        PaddingConstant constant = 1;
+	        PaddingReflection reflection = 2;
+	        PaddingReplication replication = 3;
+	    }
+	
+	    BorderAmounts paddingAmounts = 10; 
+	
+	}
+
+
+
+
+
+
+PaddingLayerParams.PaddingConstant
+--------------------------------------------------------------------------------
+
+Fill a constant value in the padded region.
+
+
+.. code-block:: proto
+
+	    message PaddingConstant {
+	        float value = 1;
+	    }
+
+
+
+
+
+
+PaddingLayerParams.PaddingReflection
+--------------------------------------------------------------------------------
+
+Reflect the values at the border for padding.
+
+
+.. code-block:: proto
+
+	    message PaddingReflection {
+	    }
+
+
+
+
+
+
+PaddingLayerParams.PaddingReplication
+--------------------------------------------------------------------------------
+
+Replicate the values at the border for padding.
+
+
+.. code-block:: proto
+
+	    message PaddingReplication {
+	    }
+
+
+
+
+
+
+ConcatLayerParams
+________________________________________________________________________________
+
+A layer that concatenates along the axis = -3 or -5.
+For general concatenation along any axis, see ConcatNDLayer.
+
+.. code::
+
+     y = ConcatLayer(x1,x2,....)
+
+Requires more than 1 input and produces 1 output.
+
+Input
+  All input blobs must have same rank.
+  If "sequenceConcat" = False, rank must be greater than equal to 3. In this case concatenation is along axis = -3
+  If "sequenceConcat" = True, rank must be greater than equal to 5. In this case concatenation is along axis = -5
+
+Output
+  Same rank as the input.
+
+
+.. code-block:: proto
+
+	message ConcatLayerParams {
+	
+	    bool sequenceConcat = 100;
+	
+	}
+
+
+
+
+
+
+LRNLayerParams
+________________________________________________________________________________
+
+A layer that performs local response normalization (LRN).
+
+.. code::
+
+     y = LRNLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob with rank greater than equal to 3.
+    Example: Rank 4 blob represents [Batch, channels, height, width]
+    For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
+Output
+    A blob with the same shape as the input.
+
+This layer is described by the following formula:
+
+.. math::
+    x_i \leftarrow  \dfrac{x_i}{\left ( k + \dfrac{\alpha}{C} \sum_j x_j^2 \right )^\beta}
+
+where the summation is done over a ``(localSize, 1, 1)`` neighborhood ---
+that is, over a window "across" channels in 1x1 spatial neighborhoods.
+
+
+.. code-block:: proto
+
+	message LRNLayerParams {
+	
+	    float alpha = 1;
+	    float beta = 2;
+	    uint64 localSize = 3; 
+	    float k = 4; 
+	
+	}
+
+
+
+
+
+
+SoftmaxLayerParams
+________________________________________________________________________________
+
+Softmax Normalization Layer
+
+A layer that performs softmax normalization.
+Normalization is applied along axis = -3 or N-3 (where N is the rank of the input)
+For softmax layer that can operate on any axis, see SoftmaxNDLayer.
+
+
+.. code::
+
+     y = SoftmaxLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    Must be a blob with rank >= 3.
+Output
+    A blob with the same shape as the input.
+
+This layer is described by the following formula:
+
+.. math::
+    x_i \leftarrow \dfrac{e^{x_i}}{\sum_i{e^{x_i}}}
+
+
+.. code-block:: proto
+
+	message SoftmaxLayerParams {
+	
+	}
+
+
+
+
+
+
+SplitLayerParams
+________________________________________________________________________________
+
+A layer that uniformly splits across axis = -3 to produce a specified number of outputs.
+For general split operation along any axis, see SplitNDLayer.
+
+.. code::
+
+     (y1,y2,...yN) = SplitLayer(x), where N = nOutputs
+
+Requires 1 input and produces multiple outputs.
+
+Input
+    A blob with rank at least 3.
+    e.g.: blob with shape ``[C, H, W]``
+Output
+    ``nOutputs`` blobs each with same rank as the input.
+    e.g.: For input that is of shape ``[C, H, W]``, output shapes will be ``[C/nOutputs, H, W]``
+
+
+.. code-block:: proto
+
+	message SplitLayerParams {
+	
+	    uint64 nOutputs = 1; 
+	
+	}
+
+
+
+
+
+
+AddLayerParams
+________________________________________________________________________________
+
+A layer that performs elementwise addition.
+This layer has limited broadcasting support. For general broadcasting see AddBroadcastableLayer.
+
+.. code::
+
+     y = AddLayer(x1,x2,...)
+
+Requires 1 or more than 1 input and produces 1 output.
+
+Input
+    In general, there are no rank constraints.
+    However, only certain set of shapes are broadcastable. For example:
+    [B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W]
+Output
+    A blob with shape equal to the input blob.
+
+If only one input is provided, scalar addition is performed:
+
+.. math::
+    y = x + \alpha
+
+
+.. code-block:: proto
+
+	message AddLayerParams {
+	
+	    float alpha = 1;
+	
+	}
+
+
+
+
+
+
+MultiplyLayerParams
+________________________________________________________________________________
+
+A layer that performs elementwise multiplication.
+This layer has limited broadcasting support. For general broadcasting see MultiplyBroadcastableLayer.
+
+.. code::
+
+     y = MultiplyLayer(x1,x2,...)
+
+Requires 1 or more than 1 input and produces 1 output.
+
+Input
+    In general, there are no rank constraints.
+    However, only certain set of shapes are broadcastable. For example:
+    [B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W]
+Output
+    A blob with shape equal to the first input blob.
+
+If only one input is provided, scalar multiplication is performed:
+
+.. math::
+    y = \alpha x
+
+
+.. code-block:: proto
+
+	message MultiplyLayerParams {
+	
+	    float alpha = 1;
+	
+	}
+
+
+
+
+
+
+UnaryFunctionLayerParams
+________________________________________________________________________________
+
+A layer that applies a unary function.
+
+.. code::
+
+     y = UnaryFunctionLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob with no rank constraints.
+Output
+    A blob with the same shape as the input.
+
+The input is first modified by shifting and scaling:
+
+.. math::
+    x \leftarrow \text{scale} \cdot x + \text{shift}
+
+
+.. code-block:: proto
+
+	message UnaryFunctionLayerParams {
+	
+	    enum Operation {
+	        SQRT = 0;
+	        RSQRT = 1;
+	        INVERSE = 2;
+	        POWER = 3;
+	        EXP = 4;
+	        LOG = 5;
+	        ABS = 6;
+	        THRESHOLD = 7;
+	    }
+	    Operation type = 1; 
+	
+	    float alpha = 2;
+	
+	    float epsilon = 3;
+	
+	    float shift = 4;
+	
+	    float scale = 5;
+	
+	}
+
+
+
+
+
+
+UpsampleLayerParams
+________________________________________________________________________________
+
+A layer that scales up spatial dimensions.
+It supports two modes: nearest neighbour (default) and bilinear.
+
+.. code::
+
+     y = UpsampleLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob with rank at least 3.
+    e.g.: blob with shape ``[C, H, W]``.
+    For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
+
+Output
+    Same rank as the input.
+    e.g.: blob with shape ``[C, scalingFactor[0] * H, scalingFactor[1] * W]``
+
+
+.. code-block:: proto
+
+	message UpsampleLayerParams {
+	
+	    repeated uint64 scalingFactor = 1;
+	
+	    repeated float fractionalScalingFactor = 7;
+	
+	    enum InterpolationMode {
+	
+	        NN = 0; 
+	        BILINEAR = 1; 
+	
+	    }
+	
+	    InterpolationMode mode = 5;
+	
+	    enum LinearUpsampleMode {
+	
+	        DEFAULT = 0;
+	        ALIGN_CORNERS_TRUE = 1;
+	        ALIGN_CORNERS_FALSE = 2;
+	
+	    }
+	
+	    LinearUpsampleMode linearUpsampleMode = 6;
+	
+	}
+
+
+
+
+
+
+ResizeBilinearLayerParams
+________________________________________________________________________________
+
+A layer that resizes the input to a pre-specified spatial size using bilinear interpolation.
+
+.. code::
+
+     y = ResizeBilinearLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob with rank at least 3.
+    e.g.: blob with shape ``[C, H_in, W_in]``.
+    For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
+
+Output
+    Same rank as the input.
+    e.g.: blob with shape ``[C, H_out, W_out]``.
+
+
+.. code-block:: proto
+
+	message ResizeBilinearLayerParams {
+	
+	    repeated uint64 targetSize = 1;
+	
+	    SamplingMode mode = 2;
+	
+	}
+
+
+
+
+
+
+CropResizeLayerParams
+________________________________________________________________________________
+
+A layer that extracts cropped spatial patches or RoIs (regions of interest) from the input and resizes them to a pre-specified size using
+bilinear interpolation.
+Note that RoI Align layer can be implemented with this layer followed by a pooling layer.
+
+.. code::
+
+     y = CropResizeLayer(x)
+
+Requires 2 inputs and produces 1 output.
+
+Input
+    There are two inputs.
+    First input represents an image feature map.
+    Second input represents the bounding box coordinates for N patches or RoIs (region of interest).
+
+    First input is rank 5: [1, Batch, C, H_in, W_in].
+    Second input is rank 5. Its shape can be either [N, 1, 4, 1, 1] or [N, 1, 5, 1, 1].
+
+    N: number of patches/RoIs to be extracted
+
+    If RoI shape = ``[N, 1, 4, 1, 1]``
+                   The axis=-3 corresponds to the four coordinates specifying the bounding box.
+                   All the N RoIs are extracted from all the batches of the input.
+
+    If RoI shape = ``[N, 1, 5, 1, 1]``
+                    The first element of the axis=-3 specifies the input batch id from which to extract the RoI and
+                              must be in the interval ``[0, Batch - 1]``. That is, n-th RoI is extracted from the RoI[n,0,0,0,0]-th
+                    input batch id. The last four elements of the axis=-3 specify the bounding box coordinates.
+
+Output
+    A blob with rank 5.
+          - Shape is [N, Batch, C, H_out, W_out] if input RoI shape is [N, 1, 4, 1, 1]
+          - Shape is [N, 1, C, H_out, W_out] if input RoI shape is [N, 1, 5, 1, 1]
+
+
+.. code-block:: proto
+
+	message CropResizeLayerParams {
+	
+	    repeated uint64 targetSize = 1;
+	
+	    bool normalizedCoordinates = 2;
+	
+	    SamplingMode mode = 3;
+	
+	    BoxCoordinatesMode boxIndicesMode = 4;
+	
+	    float spatialScale = 5;
+	
+	}
+
+
+
+
+
+
+BiasLayerParams
+________________________________________________________________________________
+
+A layer that performs elementwise addition of a bias,
+which is broadcasted to match the input shape.
+
+.. code::
+
+     y = BiasLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob with rank at least 3.
+    e.g.: blob with shape ``[C, H, W]``.
+    For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
+Output
+    A blob with the same shape as the input.
+
+
+.. code-block:: proto
+
+	message BiasLayerParams {
+	
+	    repeated uint64 shape = 1;
+	
+	    WeightParams bias = 2;
+	
+	}
+
+
+
+
+
+
+ScaleLayerParams
+________________________________________________________________________________
+
+A layer that performs elmentwise multiplication by a scale factor
+and optionally adds a bias;
+both the scale and bias are broadcasted to match the input shape.
+
+.. code::
+
+     y = ScaleLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob with rank at least 3.
+    e.g.: blob with shape ``[C, H, W]``.
+    For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
+Output
+    A blob with the same shape as the input.
+
+
+.. code-block:: proto
+
+	message ScaleLayerParams {
+	
+	    repeated uint64 shapeScale = 1;
+	
+	    WeightParams scale = 2; 
+	
+	    bool hasBias = 3; 
+	
+	    repeated uint64 shapeBias = 4;
+	
+	    WeightParams bias = 5;
+	
+	}
+
+
+
+
+
+
+LoadConstantLayerParams
+________________________________________________________________________________
+
+A layer that loads data as a parameter and provides it as an output.
+The output is rank 5. For general rank, see LoadConstantNDLayer.
+
+.. code::
+
+     y = LoadConstantLayer()
+
+Requires no input and produces 1 output.
+
+Output:
+    A blob with rank 5 and shape ``[1, 1, C, H, W]``
+
+
+.. code-block:: proto
+
+	message LoadConstantLayerParams {
+	
+	    repeated uint64 shape = 1;
+	
+	    WeightParams data = 2;
+	
+	}
+
+
+
+
+
+
+L2NormalizeLayerParams
+________________________________________________________________________________
+
+A layer that performs L2 normalization, i.e. divides by the
+the square root of the sum of squares of all elements of input.
+
+.. code::
+
+     y = L2NormalizeLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob with rank greater than equal to 3.
+    For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
+Output
+    A blob with the same shape as the input.
+
+This layer is described by the following formula:
+
+.. math::
+    x_i \leftarrow \dfrac{x_i}{\sqrt{\sum{x_i^2} + \epsilon}}
+
+
+.. code-block:: proto
+
+	message L2NormalizeLayerParams {
+	
+	    float epsilon = 1;
+	
+	}
+
+
+
+
+
+
+FlattenLayerParams
+________________________________________________________________________________
+
+A layer that flattens the input.
+
+.. code::
+
+     y = FlattenLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob with rank greater than equal to 3.
+    e.g.: Rank 4 blob represents [Batch, C, H, W]
+    For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
+Output
+    Same rank as the input, such that last two dimensions are both 1.
+    e.g.: For rank 4 input, output shape is ``[Batch, C * H * W, 1, 1]``
+
+There are two X orders: ``CHANNEL_FIRST`` and ``CHANNEL_LAST``.
+``CHANNEL_FIRST`` does not require data to be rearranged,
+because row major ordering is used by internal storage.
+``CHANNEL_LAST`` requires data to be rearranged.
+
+
+.. code-block:: proto
+
+	message FlattenLayerParams {
+	
+	    enum FlattenOrder {
+	
+	        CHANNEL_FIRST = 0;
+	        CHANNEL_LAST = 1;
+	
+	    }
+	    FlattenOrder mode = 1;
+	
+	}
+
+
+
+
+
+
+ReshapeLayerParams
+________________________________________________________________________________
+
+A layer that recasts the input into a new shape.
+
+.. code::
+
+     y = ReshapeLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob with rank 5.
+    e.g.: ``[1, 1, C, H, W]`` or ``[Seq, 1, C, H, W]``.
+Output
+    A blob with rank 5.
+    e.g.: ``[1, 1, C_out, H_out, W_out]`` or ``[Seq_out, 1, C_out, H_out, W_out]``.
+
+There are two reshape orders: ``CHANNEL_FIRST`` and ``CHANNEL_LAST``.
+``CHANNEL_FIRST`` is equivalent to
+flattening the input to ``[Seq, 1, C * H * W, 1, 1]`` in channel first order
+and then reshaping it to the target shape;
+no data rearrangement is required.
+``CHANNEL_LAST`` is equivalent to
+flattening the input to ``[Seq, 1, H * W * C, 1, 1]`` in channel last order,
+reshaping it to ``[Seq_out, 1, H_out, W_out, C_out]`` (it is now in "H_out-major"" order),
+and then permuting it to ``[C_out, H_out, W_out]``;
+both the flattening and permuting requires the data to be rearranged.
+
+
+.. code-block:: proto
+
+	message ReshapeLayerParams {
+	
+	    repeated int64 targetShape = 1;
+	
+	    enum ReshapeOrder {
+	
+	        CHANNEL_FIRST = 0;
+	        CHANNEL_LAST = 1;
+	
+	    }
+	    ReshapeOrder mode = 2;
+	
+	}
+
+
+
+
+
+
+PermuteLayerParams
+________________________________________________________________________________
+
+A layer that rearranges the dimensions and data of an input.
+For generic transpose/permute operation see TransposeLayer.
+
+.. code::
+
+     y = PermuteLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    Must be a rank 5 blob.
+    e.g.: shape ``[Seq, B, C, H, W]``.
+Output
+    Rank 5 blob. Transposed version of the input, such that dimensions at axis=1 or axis=-4 is unchanged.
+
+
+Examples:
+
+ Assume input shape is [Seq, B, C, H, W]
+
+- If ``axis`` is set to ``[0, 3, 1, 2]``,
+  then the output has shape ``[Seq, B, W, C, H]``
+
+- If ``axis`` is set to ``[3, 1, 2, 0]``,
+  then the output has shape ``[W, B, C, H, Seq]``
+
+- If ``axis`` is set to ``[0, 3, 2, 1]``,
+  then the output has shape ``[Seq, B, W, H, C]``
+
+- If ``axis`` is not set, or is set to ``[0, 1, 2, 3]``,
+  the output is the same as the input.
+
+
+.. code-block:: proto
+
+	message PermuteLayerParams {
+	
+	    repeated uint64 axis = 1;
+	
+	}
+
+
+
+
+
+
+ReorganizeDataLayerParams
+________________________________________________________________________________
+
+A layer that reorganizes data in the input in specific ways.
+
+.. code::
+
+     y = ReorganizeDataLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob with rank at least 3.
+    e.g.: blob with shape ``[C, H, W]``.
+    For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
+Output
+    Same rank as the input.
+    e.g.: blob with shape ``[C_out, H_out, W_out]``.
+
+mode == SPACE_TO_DEPTH
+ ``[C_out, H_out, W_out]`` : ``[C * blockSize * blockSize, H/blockSize, W/blockSize]``.
+ blockSize must divide H and W.
+ Data is moved from the spatial dimensions to the channel dimension. Input is spatially divided into
+ non-overlapping blocks of size blockSize X blockSize and data from each block is moved into the
+ channel dimension.
+
+mode == DEPTH_TO_SPACE
+ ``[C_out, H_out, W_out]`` : ``[C/(blockSize * blockSize), H * blockSize, W * blockSize]``.
+ Square of blockSize must divide C.
+ Reverse of SPACE_TO_DEPTH. Data is moved from the channel dimension to the spatial dimensions.
+
+mode == PIXEL_SHUFFLE
+ ``[C_out, H_out, W_out]`` : ``[C/(blockSize * blockSize), H * blockSize, W *  blockSize]``.
+ Square of blockSize must divide C.
+ Similar to DEPTH_TO_SPACE, but using the pixel-shuffle semantics for channel order in the output space.
+ In both modes, elements along the channel dimension are collapsed into
+ blocks in the spatial dimensions. The difference is in the arrangement of
+ the input-channels' data in the output space. See below example for more
+ detail.
+ (Only available in Core ML Specification >= 5 (iOS >= 14, macOS >= 10.16)
+
+
+Examples:
+
+Assume input is the following [C = 8, H = 1, W = 2] tensor:
+
+.. code::
+
+   [[[1 2]] [[3 4]] [[5 6]] [[7 8]] [[9 10]] [[11 12]] [[13 14]] [[15 16]]]
+
+If block_size == 2 and mode == DEPTH_TO_SPACE, output will be the following
+[C = 2, H = 2, W = 4] tensor:
+
+.. code::
+
+   [[[ 1  5  2  6]
+     [ 9 13 10 14]]
+
+    [[ 3  7  4  8]
+     [11 15 12 16]]]
+
+For mode == SPACE_TO_DEPTH, the behavior is the same as mode ==
+DEPTH_TO_SPACE, but with the input and output swapped.
+
+If block_size == 2 and mode == PIXEL_SHUFFLE, output will be the following
+[C = 2, H = 2, W = 4] tensor:
+
+.. code::
+
+   [[[ 1  3  2  4]
+     [ 5  7  6  8]]
+
+    [[ 9 11 10 12]
+     [13 15 14 16]]]
+
+
+.. code-block:: proto
+
+	message ReorganizeDataLayerParams {
+	
+	    enum ReorganizationType {
+	
+	        SPACE_TO_DEPTH = 0;
+	        DEPTH_TO_SPACE = 1;
+	        PIXEL_SHUFFLE = 2;
+	
+	    }
+	    ReorganizationType mode = 1;
+	    uint64 blockSize = 2; 
+	
+	}
+
+
+
+
+
+
+SliceLayerParams
+________________________________________________________________________________
+
+A layer that slices the input data along axis = -1 or -2 or -3.
+For general slice along any axis, please see SliceStaticLayer/SliceDynamicLayer.
+
+.. code::
+
+     y = SliceLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob that can, in general, have any rank. However, depending on the value of "axis" ,
+    there may be additional rank constraints.
+Output
+    A blob with the same rank as the input.
+
+Sliced section is taken from the interval ``[startIndex, endIndex)``, i.e.
+startIndex is inclusive while endIndex is exclusive.
+stride must be positive and represents the step size for slicing.
+Negative indexing is supported for startIndex and endIndex.
+-1 denotes N-1, -2 denotes N-2 and so on, where N is the length of the dimension to be sliced.
+
+
+.. code-block:: proto
+
+	message SliceLayerParams {
+	
+	    int64 startIndex = 1; 
+	    int64 endIndex = 2; 
+	    uint64 stride = 3; 
+	
+	    enum SliceAxis {
+	
+	        CHANNEL_AXIS = 0;
+	        HEIGHT_AXIS = 1;
+	        WIDTH_AXIS = 2;
+	
+	    }
+	    // The following mapping is used for interpreting this parameter:
+	    // CHANNEL_AXIS => axis = -3, input must have rank at least 3.
+	    // HEIGHT_AXIS => axis = -2, input must have rank at least 2.
+	    // WIDTH_AXIS => axis = -1
+	    SliceAxis axis = 4;
+	
+	}
+
+
+
+
+
+
+ReduceLayerParams
+________________________________________________________________________________
+
+A layer that reduces the input using a specified operation.
+
+.. code::
+
+     y = ReduceLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob that can, in general, have any rank. However, depending on the value of "axis" ,
+     there may be additional rank constraints.
+Output
+    A blob with the same rank as the input, which has 1s on the dimensions specified in the parameter "axis"
+
+    Values supported for axis are [-1], [-2], [-3], [-2,-1], [-3,-2,-1]
+    and the equivalent positive values (depending on the rank of the input)
+    For mode == 'ArgMax', axis must be [-1] or [-2] or [-3].
+
+
+.. code-block:: proto
+
+	message ReduceLayerParams {
+	
+	    enum ReduceOperation {
+	
+	        SUM = 0;
+	        AVG = 1;
+	        PROD = 2;
+	        LOGSUM = 3;
+	        SUMSQUARE = 4;
+	        L1 = 5;
+	        L2 = 6;
+	        MAX = 7;
+	        MIN = 8;
+	        ARGMAX = 9; 
+	
+	    }
+	    ReduceOperation mode = 1; 
+	
+	    float epsilon = 2;
+	
+	    enum ReduceAxis {
+	
+	        CHW = 0;
+	        HW = 1;
+	        C = 2;
+	        H = 3;
+	        W = 4;
+	
+	    }
+	
+	    // The following mapping is used for interpreting this parameter:
+	    // CHW = axis [-3, -2, -1], input must have rank at least 3.
+	    // HW = axis [-2, -1], input must have rank at least 2.
+	    // C = axis [-3]
+	    // H = axis [-2]
+	    // W = axis [-1]
+	    ReduceAxis axis = 3;
+	
+	}
+
+
+
+
+
+
+CropLayerParams
+________________________________________________________________________________
+
+A layer that crops the spatial dimensions of an input.
+If two inputs are provided, the shape of the second input is used as the reference shape.
+
+.. code::
+
+     y = CropLayer(x1) or y = CropLayer(x1,x2)
+
+Requires 1 or 2 inputs and produces 1 output.
+
+Input
+   1 or 2 tensors, each with rank at least 3, both inputs must have equal rank.
+   Example:
+    - 1 input case: A blob with shape ``[C, H_in, W_in]``.
+    - 2 input case: 1st blob with shape ``[C, H_in, W_in]``, 2nd blob with shape ``[C, H_out, W_out]``.
+
+    For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
+
+Output
+    Same rank as the inputs.
+    e.g.: A blob with shape ``[C, H_out, W_out]``.
+
+If one input is used, output is computed as follows:
+
+.. code::
+
+     y = x1[:, topCropAmount:H_in - bottomCropAmount, leftCropAmount:W_in - rightCropAmount]
+
+     topCropAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize
+     bottomCropAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize
+     leftCropAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize
+     rightCropAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize
+
+     H_out = H_in - topCropAmount - bottomCropAmount
+     W_out = W_in - leftCropAmount - rightCropAmount
+
+If two inputs are used, output is computed as follows:
+
+.. code::
+
+     y = x1[:, offset[0]:offset[0] + H_out, offset[1]:offset[1] + W_out]
+
+
+.. code-block:: proto
+
+	message CropLayerParams {
+	
+	    BorderAmounts cropAmounts = 1;
+	
+	    repeated uint64 offset = 5;
+	
+	}
+
+
+
+
+
+
+AverageLayerParams
+________________________________________________________________________________
+
+A layer that computes the elementwise average of the inputs.
+This layer has limited broadcasting support. For general broadcasting see AddBroadcastableLayer.
+
+.. code::
+
+     y = AverageLayer(x1,x2,...)
+
+Requires multiple inputs and produces 1 output.
+
+Input
+    In general, there are no rank constraints.
+    However, only certain set of shapes are broadcastable. For example:
+    [B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W]
+Output
+    A blob with the same shape as each input.
+
+
+.. code-block:: proto
+
+	message AverageLayerParams {
+	
+	}
+
+
+
+
+
+
+MaxLayerParams
+________________________________________________________________________________
+
+A layer that computes the elementwise maximum over the inputs.
+
+.. code::
+
+     y = MaxLayer(x1,x2,...)
+
+Requires multiple inputs and produces 1 output.
+
+Input
+    In general, there are no rank constraints.
+    However, only certain set of shapes are broadcastable. For example:
+    [B, C, 1, 1], [B, C, H, W]
+Output
+    A blob with the same shape as each input.
+
+
+.. code-block:: proto
+
+	message MaxLayerParams {
+	
+	}
+
+
+
+
+
+
+MinLayerParams
+________________________________________________________________________________
+
+A layer that computes the elementwise minimum over the inputs.
+
+.. code::
+
+     y = MinLayer(x1,x2,...)
+
+Requires multiple inputs and produces 1 output.
+
+Input
+    In general, there are no rank constraints.
+    However, only certain set of shapes are broadcastable. For example:
+    [B, C, 1, 1], [B, C, H, W]
+Output
+    A blob with the same shape as each input.
+
+
+.. code-block:: proto
+
+	message MinLayerParams {
+	
+	}
+
+
+
+
+
+
+DotProductLayerParams
+________________________________________________________________________________
+
+A layer that computes the dot product of two vectors.
+
+.. code::
+
+     y = DotProductLayer(x1,x2)
+
+Requires 2 inputs and produces 1 output.
+
+Input
+    Two blobs with rank at least 3, such that the last two dimensions must be 1.
+    e.g.: blobs with shape ``[B, C, 1, 1]``.
+    For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
+
+Output
+    Same rank as the input.
+    e.g. for rank 4 inputs, output shape: [B, 1, 1, 1]
+
+
+.. code-block:: proto
+
+	message DotProductLayerParams {
+	
+	    bool cosineSimilarity = 1;
+	
+	}
+
+
+
+
+
+
+MeanVarianceNormalizeLayerParams
+________________________________________________________________________________
+
+A layer that performs mean variance normalization, along axis = -3.
+
+.. code::
+
+     y = MeanVarianceNormalizeLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob with rank greater than equal to 3.
+    Example: Rank 4 blob represents [Batch, channels, height, width]
+    For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
+
+Output
+    A blob with the same shape as the input.
+
+If ``acrossChannels == true``
+normalization is performed on flattened input, i.e. the input is reshaped to (Batch,C), where "Batch" contains
+all dimensions from 0 to -4 (inclusive), and C contains dimensions -1, -2, -3.
+
+If ``acrossChannels == false``
+normalization is performed within a channel,
+across spatial dimensions (i.e. last two dimensions).
+
+
+.. code-block:: proto
+
+	message MeanVarianceNormalizeLayerParams {
+	
+	    bool acrossChannels = 1;
+	
+	    bool normalizeVariance = 2;
+	
+	    float epsilon = 3;
+	
+	}
+
+
+
+
+
+
+SequenceRepeatLayerParams
+________________________________________________________________________________
+
+A layer that repeats a sequence or the dimension sitting at axis = -5
+
+.. code::
+
+     y = SequenceRepeatLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    A blob with rank at least 5.
+    e.g: shape ``[Seq, B, C, H, W]``
+Output
+    A blob with the same rank as the input.
+    e.g.: for input shape ``[Seq, B, C, H, W]``, output shape is ``[nRepetitions * Seq, B, C, H, W]``.
+
+
+.. code-block:: proto
+
+	message SequenceRepeatLayerParams {
+	
+	    uint64 nRepetitions = 1;
+	
+	}
+
+
+
+
+
+
+SimpleRecurrentLayerParams
+________________________________________________________________________________
+
+A simple recurrent layer.
+
+.. code::
+
+     y_t = SimpleRecurrentLayer(x_t, y_{t-1})
+
+Input
+   A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
+   This represents a sequence of vectors of size ``inputVectorSize``.
+Output
+   Same rank as the input.
+   Represents a vector of size ``outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.
+
+- Output Shape: ``[1, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == false``
+- Output Shape: ``[Seq, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == true``
+
+This layer is described by the following equation:
+
+.. math::
+    \boldsymbol{y_t} = f(\mathrm{clip}(W \boldsymbol{x_t} + \
+                                       R \boldsymbol{y_{t-1}} + b))
+
+- ``W`` is a 2-dimensional weight matrix
+  (``[outputVectorSize, inputVectorSize]``, row-major)
+- ``R`` is a 2-dimensional recursion matrix
+  (``[outputVectorSize, outputVectorSize]``, row-major)
+- ``b`` is a 1-dimensional bias vector (``[outputVectorSize]``)
+- ``f()`` is an activation
+- ``clip()`` is a function that constrains values between ``[-50.0, 50.0]``
+
+
+.. code-block:: proto
+
+	message SimpleRecurrentLayerParams {
+	
+	    uint64 inputVectorSize = 1; 
+	    uint64 outputVectorSize = 2; 
+	
+	    ActivationParams activation = 10; 
+	
+	        If false output is just the result after final state update.
+	        If true, output is a sequence, containing outputs at all time steps.
+	    bool sequenceOutput = 15;
+	
+	    bool hasBiasVector = 20; 
+	
+	    WeightParams weightMatrix = 30; 
+	    WeightParams recursionMatrix = 31; 
+	    WeightParams biasVector = 32; 
+	
+	    bool reverseInput = 100;
+	    // If true, then the node processes the input sequence from right to left
+	
+	}
+
+
+
+
+
+
+GRULayerParams
+________________________________________________________________________________
+
+Gated-Recurrent Unit (GRU) Layer
+
+.. code::
+
+     y_t = GRULayer(x_t, y_{t-1})
+
+Input
+   A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
+   This represents a sequence of vectors of size ``inputVectorSize``.
+Output
+   Same rank as the input.
+   Represents a vector of size ``outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.
+
+- Output Shape: ``[1, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == false``
+- Output Shape: ``[Seq, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == true``
+
+This layer is described by the following equations:
+
+Update Gate
+    .. math::
+        \boldsymbol{z_t} = \
+            f(\mathrm{clip}(W_z \boldsymbol{x_t} + \
+                            R_z \boldsymbol{y_{t-1}} + b_z)
+
+Reset Gate
+    .. math::
+        \boldsymbol{r_t} = \
+            f(\mathrm{clip}(W_r \boldsymbol{x_t} + \
+                            R_r \boldsymbol{y_{t-1}} + b_r))
+
+Cell Memory State
+    .. math::
+        \boldsymbol{c_t} = \
+            \boldsymbol{y_{t-1}} \odot \boldsymbol{r_t}
+
+Output Gate
+    .. math::
+        \boldsymbol{o_t} = \
+            g(\mathrm{clip}(W_o \boldsymbol{x_t} + \
+                            R_o \boldsymbol{c_t} + b_o))
+
+Output
+    .. math::
+        \boldsymbol{y_t} = \
+            (1 - \boldsymbol{z_t}) \odot \boldsymbol{o_t} + \
+             \boldsymbol{z_t} \odot \boldsymbol{y_{t-1}}
+
+- ``W_z``, ``W_r``, ``W_o`` are 2-dimensional input weight matrices
+  (``[outputVectorSize, inputVectorSize]``, row-major)
+- ``R_z``, ``R_r``, ``R_o`` are 2-dimensional recursion matrices
+  (``[outputVectorSize, outputVectorSize]``, row-major)
+- ``b_z``, ``b_r``, ``b_o`` are 1-dimensional bias vectors
+  (``[outputVectorSize]``)
+- ``f()``, ``g()`` are activations
+- ``clip()`` is a function that constrains values between ``[-50.0, 50.0]``
+- ``⊙`` denotes the elementwise product of matrices
+
+
+.. code-block:: proto
+
+	message GRULayerParams {
+	
+	    uint64 inputVectorSize = 1; 
+	    uint64 outputVectorSize = 2; 
+	
+	    repeated ActivationParams activations = 10;
+	
+	    bool sequenceOutput = 15;
+	
+	    bool hasBiasVectors = 20;
+	
+	    WeightParams updateGateWeightMatrix = 30; 
+	    WeightParams resetGateWeightMatrix = 31; 
+	    WeightParams outputGateWeightMatrix = 32; 
+	
+	    WeightParams updateGateRecursionMatrix = 50; 
+	    WeightParams resetGateRecursionMatrix = 51; 
+	    WeightParams outputGateRecursionMatrix = 52; 
+	
+	    WeightParams updateGateBiasVector = 70; 
+	    WeightParams resetGateBiasVector = 71; 
+	    WeightParams outputGateBiasVector = 72; 
+	
+	    bool reverseInput = 100;
+	
+	}
+
+
+
+
+
+
+LSTMParams
+________________________________________________________________________________
+
+Long short-term memory (LSTM) parameters.
+
+This is described by the following equations:
+
+Input Gate
+    .. math::
+        \boldsymbol{i_t} = \
+            f(\mathrm{clip}(W_i \boldsymbol{x_t} + \
+                            R_i \boldsymbol{y_{t-1}} + \
+                            p_i \odot c_{t-1} + b_i))
+
+Forget Gate
+    .. math::
+        \boldsymbol{f_t} = \
+            f(\mathrm{clip}(W_f \boldsymbol{x_t} + \
+                            R_f \boldsymbol{y_{t-1}} + \
+                            p_f \odot c_{t-1} + b_f))
+
+Block Input
+    .. math::
+        \boldsymbol{z_t} = \
+            g(\mathrm{clip}(W_z \boldsymbol{x_t} + \
+                            R_z \boldsymbol{y_{t-1}} + b_z))
+
+Cell Memory State
+    .. math::
+        \boldsymbol{c_t} = \
+            \boldsymbol{c_{t-1}} \odot \boldsymbol{f_t} + \
+            \boldsymbol{i_t} \odot \boldsymbol{z_t}
+
+Output Gate
+    .. math::
+        \boldsymbol{o_t} = \
+            f(\mathrm{clip}(W_o \boldsymbol{x_t} + \
+                            R_o \boldsymbol{y_{t-1}} + \
+                            p_o \odot c_t + b_o))
+
+Output
+    .. math::
+        \boldsymbol{y_t} = \
+            h(\boldsymbol{c_t}) \odot \boldsymbol{o_t}
+
+- ``W_i``, ``W_f``, ``W_z``, ``W_o`` are 2-dimensional input weight matrices
+  (``[outputVectorSize, inputVectorSize]``, row-major)
+- ``R_i``, ``R_f``, ``R_z``, ``R_o`` are 2-dimensional recursion matrices
+  (``[outputVectorSize, outputVectorSize]``, row-major)
+- ``b_i``, ``b_f``, ``b_z``, ``b_o`` are 1-dimensional bias vectors
+  (``[outputVectorSize]``)
+- ``p_``, ``p_f``, ``p_o`` are 1-dimensional peephole vectors
+  (``[outputVectorSize]``)
+- ``f()``, ``g()``, ``h()`` are activations
+- ``clip()`` is a function that constrains values between ``[-50.0, 50.0]``
+- ``⊙`` denotes the elementwise product of matrices
+
+
+.. code-block:: proto
+
+	message LSTMParams {
+	
+	    bool sequenceOutput = 10;
+	
+	    bool hasBiasVectors = 20;
+	
+	    bool forgetBias = 30;
+	
+	    bool hasPeepholeVectors = 40;
+	
+	    bool coupledInputAndForgetGate = 50;
+	
+	    float cellClipThreshold = 60;
+	
+	}
+
+
+
+
+
+
+LSTMWeightParams
+________________________________________________________________________________
+
+Weights for long short-term memory (LSTM) layers
+
+
+.. code-block:: proto
+
+	message LSTMWeightParams {
+	
+	    WeightParams inputGateWeightMatrix = 1; 
+	    WeightParams forgetGateWeightMatrix = 2; 
+	    WeightParams blockInputWeightMatrix = 3; 
+	    WeightParams outputGateWeightMatrix = 4; 
+	
+	    WeightParams inputGateRecursionMatrix = 20; 
+	    WeightParams forgetGateRecursionMatrix = 21; 
+	    WeightParams blockInputRecursionMatrix = 22; 
+	    WeightParams outputGateRecursionMatrix = 23; 
+	
+	    //biases:
+	    WeightParams inputGateBiasVector = 40; 
+	    WeightParams forgetGateBiasVector = 41; 
+	    WeightParams blockInputBiasVector = 42; 
+	    WeightParams outputGateBiasVector = 43; 
+	
+	    //peepholes:
+	    WeightParams inputGatePeepholeVector = 60; 
+	    WeightParams forgetGatePeepholeVector = 61; 
+	    WeightParams outputGatePeepholeVector = 62; 
+	
+	}
+
+
+
+
+
+
+UniDirectionalLSTMLayerParams
+________________________________________________________________________________
+
+A unidirectional long short-term memory (LSTM) layer.
+
+.. code::
+
+     (y_t, c_t) = UniDirectionalLSTMLayer(x_t, y_{t-1}, c_{t-1})
+
+Input
+   A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
+   This represents a sequence of vectors of size ``inputVectorSize``.
+Output
+   Same rank as the input.
+   Represents a vector of size ``outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.
+
+- Output Shape: ``[1, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == false``
+- Output Shape: ``[Seq, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput == true``
+
+
+.. code-block:: proto
+
+	message UniDirectionalLSTMLayerParams {
+	
+	    uint64 inputVectorSize = 1; 
+	    uint64 outputVectorSize = 2; 
+	
+	    repeated ActivationParams activations = 10;
+	
+	    LSTMParams params = 15;
+	
+	    LSTMWeightParams weightParams = 20; 
+	
+	    bool reverseInput = 100;
+	
+	}
+
+
+
+
+
+
+BiDirectionalLSTMLayerParams
+________________________________________________________________________________
+
+Bidirectional long short-term memory (LSTM) layer
+
+.. code::
+
+     (y_t, c_t, y_t_reverse, c_t_reverse) = BiDirectionalLSTMLayer(x_t, y_{t-1}, c_{t-1}, y_{t-1}_reverse, c_{t-1}_reverse)
+
+Input
+   A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
+   This represents a sequence of vectors of size ``inputVectorSize``.
+Output
+   Same rank as the input.
+   Represents a vector of size ``2 * outputVectorSize``. It is either the final output or a sequence of outputs at all time steps.
+
+- Output Shape: ``[1, Batch, 2 * outputVectorSize, 1, 1]`` , if ``sequenceOutput == false``
+- Output Shape: ``[Seq, Batch, 2 * outputVectorSize, 1, 1]`` , if ``sequenceOutput == true``
+
+
+The first LSTM operates on the input sequence in the forward direction.
+The second LSTM operates on the input sequence in the reverse direction.
+
+Example: given the input sequence ``[x_1, x_2, x_3]``,
+where ``x_i`` are vectors at time index ``i``:
+
+The forward LSTM output is ``[yf_1, yf_2, yf_3]``,
+
+where ``yf_i`` are vectors of size ``outputVectorSize``:
+
+- ``yf_1`` is the output at the end of sequence {``x_1``}
+- ``yf_2`` is the output at the end of sequence {``x_1``, ``x_2``}
+- ``yf_3`` is the output at the end of sequence {``x_1``, ``x_2``, ``x_3``}
+
+The backward LSTM output: ``[yb_1, yb_2, yb_3]``,
+
+where ``yb_i`` are vectors of size ``outputVectorSize``:
+
+- ``yb_1`` is the output at the end of sequence {``x_3``}
+- ``yb_2`` is the output at the end of sequence {``x_3``, ``x_2``}
+- ``yb_3`` is the output at the end of sequence {``x_3``, ``x_2``, ``x_1``}
+
+Output of the bi-dir layer:
+
+- if ``sequenceOutput = True`` : { ``[yf_1, yb_3]``,  ``[yf_2, yb_2]``,  ``[yf_3, yb_1]`` }
+- if ``sequenceOutput = False`` : { ``[yf_3, yb_3]`` }
+
+
+.. code-block:: proto
+
+	message BiDirectionalLSTMLayerParams {
+	
+	    uint64 inputVectorSize = 1;
+	    uint64 outputVectorSize = 2;
+	
+	    repeated ActivationParams activationsForwardLSTM = 10;
+	    repeated ActivationParams activationsBackwardLSTM = 11;
+	
+	    LSTMParams params = 15;
+	
+	    repeated LSTMWeightParams weightParams = 20;
+	
+	}
+
+
+
+
+
+
+CustomLayerParams
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message CustomLayerParams {
+	
+	    message CustomLayerParamValue {
+	        oneof value {
+	            double doubleValue = 10;
+	            string stringValue = 20;
+	            int32 intValue = 30;
+	            int64 longValue = 40;
+	            bool boolValue = 50;
+	        }
+	    }
+	
+	    string className = 10; // The name of the class (conforming to MLCustomLayer) corresponding to this layer
+	    repeated WeightParams weights = 20; // Any weights -- these are serialized in binary format and memmapped at runtime
+	    map<string, CustomLayerParamValue> parameters = 30; // these may be handled as strings, so this should not be large
+	    string description = 40; // An (optional) description of the layer provided by the model creator. This information is displayed when viewing the model, but does not affect the model's execution on device.
+	
+	}
+
+
+
+
+
+
+CustomLayerParams.CustomLayerParamValue
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message CustomLayerParamValue {
+	        oneof value {
+	            double doubleValue = 10;
+	            string stringValue = 20;
+	            int32 intValue = 30;
+	            int64 longValue = 40;
+	            bool boolValue = 50;
+	        }
+	    }
+
+
+
+
+
+
+CustomLayerParams.ParametersEntry
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message CustomLayerParamValue {
+	        oneof value {
+	            double doubleValue = 10;
+	            string stringValue = 20;
+	            int32 intValue = 30;
+	            int64 longValue = 40;
+	            bool boolValue = 50;
+	        }
+	    }
+
+
+
+
+
+
+TransposeLayerParams
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message TransposeLayerParams {
+	
+	    repeated uint64 axes = 1; //
+	
+	}
+
+
+
+
+
+
+BatchedMatMulLayerParams
+________________________________________________________________________________
+
+A layer that computes the matrix multiplication of two tensors with numpy-like broadcasting
+where the matrices reside in the last two indices of the tensor.
+
+.. code::
+
+     y = BatchedMatMul(a,b)
+
+Requires 1 or 2 inputs and produces 1 output.
+
+The first tensor, "a", must be provided as an input. The second tensor can either be an input or provided as a weight matrix parameter.
+
+Input
+    - a: First N-Dimensional tensor
+    - b: Second N-Dimensional tensor (either a rank-N input or a matrix, i.e. N=2, provided as a layer parameter)
+
+Output
+    A tensor containing the matrix product of two tensors.
+    When there are two inputs: rank is max(2, rank(a), rank(b))
+    When there is one input: rank is same as that of the input.
+
+This operation behaves as following:
+
+ When there are two inputs:
+     - If N >= 2 for both tensors, it is treated as a batch of matrices residing in the last two indices.
+       All the indices, except for the last two, are broadcasted using conventional rules.
+     - If the first tensor is 1-D, it is converted to a 2-D tensor by prepending a 1 to its shape. Eg. (D) -> (1,D)
+     - If the second tensor is 1-D, it is converted to a 2-D tensor by appending a 1 to its shape. Eg. (D) -> (D,1)
+
+ When there is one input:
+     - The weight matrix corresponds to a matrix, of shape (X1, X2). Values of X1, X2 must be provided as layer parameters.
+     - The input, "a", is reshaped into a matrix by combining all the leading dimensions, except the last, into a batch dimension. eg:
+            - if "a" is rank 1 (X1,) -->  (1, X1). Output shape will be (X2,)
+            - if "a" is rank 2 (B1, X1) --> no need to reshape. Output shape will be (B1, X2)
+            - if "a" is rank 3 (B1, B2, X1) --> (B1 * B2, X1). Output shape will be (B1, B2, X2)
+            - etc
+
+
+.. code-block:: proto
+
+	message BatchedMatMulLayerParams {
+	
+	    bool transposeA = 1;
+	    bool transposeB = 2;
+	
+	
+	    uint64 weightMatrixFirstDimension = 5; 
+	    uint64 weightMatrixSecondDimension = 6; 
+	
+	    bool hasBias = 7; 
+	
+	    WeightParams weights = 8;
+	    WeightParams bias = 9; 
+	
+	    bool int8DynamicQuantize = 10;
+	
+	}
+
+
+
+
+
+
+ConcatNDLayerParams
+________________________________________________________________________________
+
+A layer that concatenates a list of tensors along a specified axis.
+
+.. code::
+
+     y = ConcatNDLayer(x1,x2,....)
+
+Requires at least 2 input and produces 1 output.
+
+Input
+    A Sequence of N-dimensional tensors. The rank of the input tensors must match and all dimensions except 'axis' must be equal.
+Output
+    A N-Dimensional tensor with the same rank .
+
+
+.. code-block:: proto
+
+	message ConcatNDLayerParams {
+	
+	    int64 axis = 1;
+	
+	}
+
+
+
+
+
+
+SoftmaxNDLayerParams
+________________________________________________________________________________
+
+A layer that performs softmax normalization along a specified axis.
+
+.. code::
+
+     y = SoftmaxNDLayer(x)
+
+Requires 1 input and produces 1 output.
+
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message SoftmaxNDLayerParams {
+	
+	    int64 axis = 1;
+	
+	}
+
+
+
+
+
+
+ReverseLayerParams
+________________________________________________________________________________
+
+A layer that reverses specific dimensions of the input tensor.
+It is similar in functionality to the numpy.flip method.
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message ReverseLayerParams {
+	
+	    repeated bool reverseDim = 1;
+	
+	}
+
+
+
+
+
+
+ReverseSeqLayerParams
+________________________________________________________________________________
+
+A layer that reverses variable length slices.
+
+Requires 2 inputs and produces 1 output.
+
+2 inputs, in order are denoted by "data", "seq_lengths".
+"seq_lenghts" must be a rank 1 tensor, i.e. seq_lengths.shape = (B,)
+which contains the lengths of the amount of sequence to be reversed, for each element of the batch.
+Dimension "batchAxis" in "data" must be equal to B, i.e,
+data.shape[batchAxis] = B.
+
+According to the batch axis, input "data" is first divided into a batch of B inputs,
+each of which is flipped along the dimension "sequenceAxis", by the amount specified in
+"seq_lengths", the second input.
+
+e.g.:
+
+data [shape = (2,4)]:
+[0 1 2 3]
+[4 5 6 7]
+seq_lengths [shape = (2,)]:
+[3, 0]
+batchAxis = 0
+sequenceAxis = 1
+
+output [shape = (2,4)]:
+[2 1 0 3]
+[4 5 6 7]
+
+
+data [shape = (2,3,2)]:
+[0 1]
+[2 3]
+[4 5] (slice = 0)
+[6 7]
+[8 9]
+[10 11] (slice = 1)
+seq_lengths [shape = (2,)]:
+[2, 3]
+batchAxis = 0
+sequenceAxis = 1
+
+output [shape = (2,3,2)]:
+[2 3]
+[0 1]
+[4 5] (slice = 0)
+[10 11]
+[8 9]
+[6 7] (slice = 1)
+
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message ReverseSeqLayerParams {
+	
+	    int64 batchAxis = 1; // batch axis has to be strictly less than seq_axis
+	    int64 sequenceAxis = 2;
+	
+	}
+
+
+
+
+
+
+LoadConstantNDLayerParams
+________________________________________________________________________________
+
+A layer that loads data as a parameter and provides it as an output.
+
+.. code::
+
+     y = LoadConstantNDLayer()
+
+Requires no input and produces 1 output.
+
+Output: A tensor with shape as provided in the parameter "shape"
+
+
+.. code-block:: proto
+
+	message LoadConstantNDLayerParams {
+	
+	    repeated uint64 shape = 1;
+	    WeightParams data = 2;
+	
+	}
+
+
+
+
+
+
+FillLikeLayerParams
+________________________________________________________________________________
+
+A layer that generates an output tensor with a constant value.
+Input is only used to determine the shape of the output.
+This layer is used to allocate a tensor with a dynamic shape (that of the input) and constant value.
+
+Requires 1 input and produces 1 output.
+
+.. code::
+
+     y = FillLikeLayer(x)
+
+Input
+    A N-Dimensional tensor, whose values are ignored. Only the shape is used to
+    infer the shape of the output.
+
+Output
+    A N-Dimensional tensor with the same shape as the input tensor.
+
+
+.. code-block:: proto
+
+	message FillLikeLayerParams {
+	
+	    float value = 1;
+	
+	}
+
+
+
+
+
+
+FillStaticLayerParams
+________________________________________________________________________________
+
+A layer that generates an output tensor with a constant value.
+This layer is used to allocate a tensor with a static shape and constant value.
+
+Requires no input and produces 1 output.
+
+.. code::
+
+     y = FillStaticLayer(x)
+
+Output
+    A N-Dimensional tensor of shape "targetShape".
+
+
+.. code-block:: proto
+
+	message FillStaticLayerParams {
+	
+	    float value = 1;
+	    repeated uint64 targetShape = 2;
+	
+	}
+
+
+
+
+
+
+FillDynamicLayerParams
+________________________________________________________________________________
+
+A layer that generates an output tensor with a constant value.
+This layer is used to allocate a tensor with a dynamic shape (as specified by the input) and constant value.
+
+Requires 1 input and produces 1 output.
+
+.. code::
+
+     y = FillDynamicLayer(x)
+
+Input
+    A rank 1 tensor specifying the shape of the output
+
+Output
+    An N-Dimensional tensor with the shape specified by the values in the input tensor.
+
+
+.. code-block:: proto
+
+	message FillDynamicLayerParams {
+	
+	    float value = 1;
+	
+	}
+
+
+
+
+
+
+WhereBroadcastableLayerParams
+________________________________________________________________________________
+
+A layer that returns the elements either from tensor x or tensor y,
+depending on the value in the condition tensor.
+It is similar in functionality to the numpy.where method with 3 inputs.
+
+Requires 3 inputs and produces 1 output.
+Inputs, in order, are the condition tensor, x and y.
+
+for each vector index (i,...,j):
+   output[i,...,j] = x[i,...,j] if condition[i,...,j] = True
+                     y[i,...,j] if condition[i,...,j] = False
+
+All the 3 inputs are first broadcasted to a common shape.
+(the shapes must be broadcastable)
+
+output.rank = max(input[0].rank, input[1].rank, input[2].rank)
+
+
+.. code-block:: proto
+
+	message WhereBroadcastableLayerParams {
+	
+	}
+
+
+
+
+
+
+SinLayerParams
+________________________________________________________________________________
+
+A layer that computes elementwise trigonometric sine function.
+
+
+.. code::
+
+     y = SinLayer(x)
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message SinLayerParams {
+	
+	}
+
+
+
+
+
+
+CosLayerParams
+________________________________________________________________________________
+
+A layer that computes elementwise trigonometric cosine function.
+
+
+.. code::
+
+     y = CosLayer(x)
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message CosLayerParams {
+	
+	}
+
+
+
+
+
+
+TanLayerParams
+________________________________________________________________________________
+
+A layer that computes elementwise trigonometric tangent function.
+
+
+.. code::
+
+     y = TanLayer(x)
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message TanLayerParams {
+	
+	}
+
+
+
+
+
+
+AsinLayerParams
+________________________________________________________________________________
+
+A layer that computes elementwise trigonometric arcsine function.
+
+
+.. code::
+
+     y = AsinLayer(x)
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message AsinLayerParams {
+	
+	}
+
+
+
+
+
+
+AcosLayerParams
+________________________________________________________________________________
+
+A layer that computes elementwise trigonometric arccosine function.
+
+
+.. code::
+
+     y = AcosLayer(x)
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message AcosLayerParams {
+	
+	}
+
+
+
+
+
+
+AtanLayerParams
+________________________________________________________________________________
+
+A layer that computes elementwise trigonometric arctangent function.
+
+
+.. code::
+
+     y = AtanLayer(x)
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message AtanLayerParams {
+	
+	}
+
+
+
+
+
+
+SinhLayerParams
+________________________________________________________________________________
+
+A layer that computes elementwise trigonometric hyperbolic sine function.
+
+
+.. code::
+
+     y = SinhLayer(x)
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message SinhLayerParams {
+	
+	}
+
+
+
+
+
+
+CoshLayerParams
+________________________________________________________________________________
+
+A layer that computes elementwise trigonometric hyperbolic cosine function.
+
+
+.. code::
+
+     y = CoshLayer(x)
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message CoshLayerParams {
+	
+	}
+
+
+
+
+
+
+TanhLayerParams
+________________________________________________________________________________
+
+A layer that computes elementwise trigonometric hyperbolic tangent function.
+
+
+.. code::
+
+     y = TanhLayer(x)
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message TanhLayerParams {
+	
+	}
+
+
+
+
+
+
+AsinhLayerParams
+________________________________________________________________________________
+
+A layer that computes elementwise trigonometric hyperbolic arcsine function.
+
+
+.. code::
+
+     y = AsinhLayer(x)
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message AsinhLayerParams {
+	
+	}
+
+
+
+
+
+
+AcoshLayerParams
+________________________________________________________________________________
+
+A layer that computes elementwise trigonometric hyperbolic arccosine function.
+
+
+.. code::
+
+     y = AcoshLayer(x)
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message AcoshLayerParams {
+	
+	}
+
+
+
+
+
+
+AtanhLayerParams
+________________________________________________________________________________
+
+A layer that computes elementwise trigonometric hyperbolic arctangent function.
+
+
+.. code::
+
+     y = AtanhLayer(x)
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message AtanhLayerParams {
+	
+	}
+
+
+
+
+
+
+PowBroadcastableLayerParams
+________________________________________________________________________________
+
+A layer that raises each element in first tensor to the power of
+corresponding element in the second tensor.
+Supports conventional numpy-like broadcasting.
+
+.. code::
+
+     y = PowBroadcastableLayer(x)
+
+Requires 2 inputs and produces 1 output.
+
+Input
+    - First N-Dimensional tensor
+    - Second N-Dimensional tensor
+
+Output
+    An N-Dimensional tensor with the broadcast shape.
+
+
+.. code-block:: proto
+
+	message PowBroadcastableLayerParams {
+	
+	}
+
+
+
+
+
+
+Exp2LayerParams
+________________________________________________________________________________
+
+A layer that computes the exponential of all elements in the input tensor, with the base 2.
+
+
+.. code::
+
+     y = Exp2Layer(x)
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message Exp2LayerParams {
+	
+	}
+
+
+
+
+
+
+WhereNonZeroLayerParams
+________________________________________________________________________________
+
+A layer that returns a tensor containing the indices of all non-zero
+elements of input tensor.
+It is similar in functionality to the numpy.where method with 1 input.
+
+Requires 1 input and produces 1 output.
+Output is of rank 2, of shape (N,R),
+where N is the number of non-zero elements in the input and R is the rank of the input.
+
+Output contains indices represented in the multi-index form
+
+e.g.:
+input {shape = (4,)}:
+[0 1 0 2]
+output {shape = (2,1)}:
+[1]
+[3]
+
+
+input {shape = (3, 3)}:
+[1 2 1]
+[0 2 2]
+[2 1 0]
+output {shape = (7,1)}:
+[0. 0.]
+[0. 1.]
+[0. 2.]
+[1. 1.]
+[1. 2.]
+[2. 0.]
+[2. 1.]
+
+
+.. code-block:: proto
+
+	message WhereNonZeroLayerParams {
+	
+	}
+
+
+
+
+
+
+MatrixBandPartLayerParams
+________________________________________________________________________________
+
+A layer that copies a tensor setting everything outside a central band in
+each inner-most matrix to zero.
+
+Requires 1 input and produces 1 output.
+
+Parameters for matrix_band_part layer
+band(m, n) = (num_lower < 0 || (m-n) <= num_lower) && (num_upper < 0 || (n-m) <= num_upper).
+output[i, j, k, ..., m, n] = band(m, n) * input[i, j, k, ..., m, n]
+
+
+Output shape is same as the input shape.
+Rank of the input must be at least 2.
+For rank higher than 2, the last 2 dimensions are treated as the matrix, while the rest are treated as batch.
+
+
+.. code-block:: proto
+
+	message MatrixBandPartLayerParams {
+	
+	    int64 numLower = 1;
+	    int64 numUpper = 2;
+	
+	}
+
+
+
+
+
+
+UpperTriangularLayerParams
+________________________________________________________________________________
+
+A layer that copies a tensor setting everything outside upper triangular to zero.
+
+Requires 1 input and produces 1 output.
+
+Output shape is same as the input shape.
+Rank of the input must be at least 2.
+For rank higher than 2, the last 2 dimensions are treated as the matrix, while the rest are treated as batch.
+
+
+.. code-block:: proto
+
+	message UpperTriangularLayerParams {
+	
+	    int64 k = 1; // Diagonal below which to zero elements. k = 0 (the default) is the main diagonal, k < 0 is below it and k > 0 is above
+	
+	}
+
+
+
+
+
+
+LowerTriangularLayerParams
+________________________________________________________________________________
+
+A layer that copies a tensor setting everything outside lower triangular to zero.
+
+Requires 1 input and produces 1 output.
+
+Output shape is same as the input shape.
+Rank of the input must be at least 2.
+For rank higher than 2, the last 2 dimensions are treated as the matrix, while the rest are treated as batch.
+
+
+.. code-block:: proto
+
+	message LowerTriangularLayerParams {
+	
+	    int64 k = 1; // Diagonal above which to zero elements. k = 0 (the default) is the main diagonal, k < 0 is below it and k > 0 is above
+	
+	}
+
+
+
+
+
+
+BroadcastToLikeLayerParams
+________________________________________________________________________________
+
+A layer that broadcasts a tensor to a new shape.
+
+Requires 2 inputs and produces 1 output.
+
+First input is broadcast to produce the output, while the second input is only
+used to determine the shape of the output. Values of second input are not used.
+
+Output is a tensor with the same shape as the second input.
+
+
+.. code-block:: proto
+
+	message BroadcastToLikeLayerParams {
+	
+	}
+
+
+
+
+
+
+BroadcastToStaticLayerParams
+________________________________________________________________________________
+
+A layer that broadcasts a tensor to a new shape.
+
+Requires 1 input and produces 1 output.
+
+Output tensor is the broadcasted version of the input and has shape as specified in the
+parameter "targetShape".
+
+
+.. code-block:: proto
+
+	message BroadcastToStaticLayerParams {
+	
+	    repeated uint64 targetShape = 1;
+	
+	}
+
+
+
+
+
+
+BroadcastToDynamicLayerParams
+________________________________________________________________________________
+
+A layer that broadcasts a tensor to a new shape.
+
+Requires 2 inputs and produces 1 output.
+
+First input is the one that is broadcasted to produce the output.
+Second input is a rank 1 tensor specifying the shape of the output.
+Output tensor has shape as specified by the values in the 2nd input tensor.
+
+
+.. code-block:: proto
+
+	message BroadcastToDynamicLayerParams {
+	
+	}
+
+
+
+
+
+
+AddBroadcastableLayerParams
+________________________________________________________________________________
+
+A layer that performs element-wise addition operation with broadcast support.
+
+Requires 2 inputs and produces 1 output.
+
+
+.. code-block:: proto
+
+	message AddBroadcastableLayerParams {
+	
+	}
+
+
+
+
+
+
+MaxBroadcastableLayerParams
+________________________________________________________________________________
+
+A layer that performs element-wise maximum operation with broadcast support.
+
+Requires 2 inputs and produces 1 output.
+
+
+.. code-block:: proto
+
+	message MaxBroadcastableLayerParams {
+	
+	}
+
+
+
+
+
+
+MinBroadcastableLayerParams
+________________________________________________________________________________
+
+A layer that performs element-wise minimum operation with broadcast support.
+
+Requires 2 inputs and produces 1 output.
+
+
+.. code-block:: proto
+
+	message MinBroadcastableLayerParams {
+	
+	}
+
+
+
+
+
+
+ModBroadcastableLayerParams
+________________________________________________________________________________
+
+A layer that performs element-wise modular operation with broadcast support.
+
+Requires 2 inputs and produces 1 output.
+
+
+.. code-block:: proto
+
+	message ModBroadcastableLayerParams {
+	
+	}
+
+
+
+
+
+
+FloorDivBroadcastableLayerParams
+________________________________________________________________________________
+
+A layer that performs element-wise floor division operation with broadcast support.
+
+Requires 2 inputs and produces 1 output.
+
+
+.. code-block:: proto
+
+	message FloorDivBroadcastableLayerParams {
+	
+	}
+
+
+
+
+
+
+SubtractBroadcastableLayerParams
+________________________________________________________________________________
+
+A layer that performs element-wise subtract operation with broadcast support.
+
+Requires 2 inputs and produces 1 output.
+
+
+.. code-block:: proto
+
+	message SubtractBroadcastableLayerParams {
+	
+	}
+
+
+
+
+
+
+MultiplyBroadcastableLayerParams
+________________________________________________________________________________
+
+A layer that performs element-wise multiply operation with broadcast support.
+
+Requires 2 inputs and produces 1 output.
+
+
+.. code-block:: proto
+
+	message MultiplyBroadcastableLayerParams {
+	
+	}
+
+
+
+
+
+
+DivideBroadcastableLayerParams
+________________________________________________________________________________
+
+A layer that performs element-wise division operation with broadcast support.
+
+Requires 2 inputs and produces 1 output.
+
+
+.. code-block:: proto
+
+	message DivideBroadcastableLayerParams {
+	
+	}
+
+
+
+
+
+
+GatherLayerParams
+________________________________________________________________________________
+
+Gather layer that gathers elements from the first input, along a specified axis,
+at indices specified in the second input.
+It is similar in functionality to the numpy.take method.
+
+Requires 2 inputs and produces 1 output.
+
+Given two inputs, 'data' and 'indices', gather the slices of 'data'
+and store into output.
+e.g.
+for i in [0, length(indices) - 1]
+   output[i] = data[indices[i]]  (1-D case, axis=0)
+
+if axis = 0:
+for each vector index (i,...,j)
+   output[i,...,j,:,..,:] = data[indices[i,...,j],:,..,:]
+
+output.rank = (data.rank - 1) + indices.rank
+
+Negative indices and negative axis are supported.
+
+e.g:
+
+data shape = (2, 3)
+indices shape = (6, 8)
+axis = 0
+output shape = (6, 8) + (3,) = (6, 8, 3)
+
+data shape = (2, 3, 5)
+indices shape = (6, 8)
+axis = 1
+output shape = (2,) + (6, 8) + (5,) =  (2, 6, 8, 5)
+
+
+.. code-block:: proto
+
+	message GatherLayerParams {
+	
+	    int64 axis = 1;
+	
+	}
+
+
+
+
+
+
+ScatterLayerParams
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message ScatterLayerParams {
+	
+	    int64 axis = 1;
+	    ScatterMode mode = 2; 
+	
+	}
+
+
+
+
+
+
+GatherNDLayerParams
+________________________________________________________________________________
+
+A layer that gathers elements from the first input, 'params', at the multi-indices specified
+by the second input, 'indices'.
+
+Requires 2 inputs and produces 1 output.
+
+'params' = input[0], 'indices' = input[1]
+
+'indices' is a rank K+1 tensor of shape [I_0, I_1, .., I_(K-1), I_K] which is viewed as a collection of
+indices of (I_0 * I_1 * ... * I_(K-1)) points in the I_K dimensional space. For instance, the multi-index of the first point
+is indices[0,0,...,0,:].
+
+Here is how the output is constructed:
+
+for i = 0,1,...,(I_0-1)
+  ...
+    for j = 0,1,....,(I_(K-1)-1)
+         output[i,....,j,:,:,..,:] = params[indices[i,...,j,:], :,:,..,:]
+
+Hence, output shape is [I_0, I_1,...,I(K-1)] + params.shape[I_K:]
+
+output.rank = indices.rank - 1 + params.rank - indices.shape[-1]
+
+e.g:
+
+input[0] shape = (4, 2, 3, 4)
+input[1] shape = (6, 2)
+output shape = (6,) + (3, 4) = (6, 3, 4)
+
+input[0] shape = (3, 3, 3, 4, 7)
+input[1] shape = (3, 5)
+output shape = (3,) + () = (3,)
+
+input[0] shape = (5, 3, 2, 5)
+input[1] shape = (2, 7, 3, 2)
+output shape = (2, 7, 3) + (2, 5) = (2, 7, 3, 2, 5)
+
+
+.. code-block:: proto
+
+	message GatherNDLayerParams {
+	
+	}
+
+
+
+
+
+
+ScatterNDLayerParams
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message ScatterNDLayerParams {
+	
+	    ScatterMode mode = 1; 
+	
+	}
+
+
+
+
+
+
+GatherAlongAxisLayerParams
+________________________________________________________________________________
+
+Gather layer that gathers elements from the first input, along a specified axis,
+at indices specified in the second input.
+It is similar in functionality to the numpy.take_along_axis method.
+
+Requires 2 inputs and produces 1 output.
+
+Given two inputs, 'data' and 'indices', gather the slices of 'data'
+and store into output.
+
+Both inputs and output have the same rank.
+Output shape is same as the shape of 'indices'
+Shapes of 'indices' and 'data' match, except at the 'axis' dimension.
+
+This operation performs the following operation for axis=0:
+for each vector index (i,j,....,k)
+   output[i,j,....,k] = data[index[i,j,....,k],j,....,k]
+
+Negative indices and negative axis are supported.
+
+e.g:
+
+data shape = (4, 4, 7)
+indices shape = (4, 5, 7)
+axis = 1
+output shape = (4, 5, 7)
+
+
+.. code-block:: proto
+
+	message GatherAlongAxisLayerParams {
+	
+	    int64 axis = 1;
+	
+	}
+
+
+
+
+
+
+ScatterAlongAxisLayerParams
+________________________________________________________________________________
+
+A layer that scatters data into a new tensor according to indices from
+the input along the given axis into the output tensor.
+This is the inverse operation of GatherAlongAxis.
+It is similar in functionality to the numpy.put_along_axis method.
+
+Requires 3 inputs and produces 1 output.
+3 inputs, in order are denoted as "container", "indices", "updates".
+
+All inputs and output have the same rank.
+Output shape is same as the shape of 'container'
+Shapes of 'indices' and 'updates' match, which is same as the shape of 'container' except at the 'axis' dimension.
+
+Negative indices and negative axis are supported.
+
+This operation performs the following operation for axis=0:
+output = container
+for each vector index (i,j,....,k)
+   output[index[i,j,....,k],j,....,k] = updates[i,j,....,k]
+
+e.g.:
+
+container shape = (2, 5, 6)
+indices shape = (2, 2, 6)
+updates shape = (2, 2, 6)
+axis = -2
+output shape = (2, 5, 6)
+
+
+.. code-block:: proto
+
+	message ScatterAlongAxisLayerParams {
+	
+	    int64 axis = 1;
+	    ScatterMode mode = 2; 
+	
+	}
+
+
+
+
+
+
+StackLayerParams
+________________________________________________________________________________
+
+A layer that stacks the input tensors along the given axis.
+It is similar in functionality to the numpy.stack method.
+
+Requires at least 2 inputs and produces 1 output.
+All inputs must have the same shape.
+Rank of the output is 1 greater than the rank of the inputs.
+
+Negative indexing is supported for the "axis" parameter.
+
+e.g.:
+
+input shape = (2, 4, 2)
+number of inputs = 5
+axis = 3
+output shape = (2, 4, 2, 5)
+
+input shape = (2, 4, 2)
+number of inputs = 5
+axis = -2
+output shape = (2, 4, 5, 2)
+
+
+.. code-block:: proto
+
+	message StackLayerParams {
+	
+	    int64 axis = 1;
+	
+	}
+
+
+
+
+
+
+RankPreservingReshapeLayerParams
+________________________________________________________________________________
+
+A layer that reshapes a tensor that does not alter the rank of the input.
+Order of the data is left unchanged.
+
+Requires 1 input and produces 1 output.
+
+e.g:
+
+input shape = (20,10)
+targetShape = (5,-1)
+output shape = (5,40)
+
+input shape = (20,10,5)
+targetShape = (0,2,25)
+output shape = (20,2,25)
+
+input shape = (10,3,5)
+targetShape = (25,0,-1)
+output shape = (25,3,2)
+
+
+.. code-block:: proto
+
+	message RankPreservingReshapeLayerParams {
+	
+	    repeated int64 targetShape = 1;
+	
+	}
+
+
+
+
+
+
+ConstantPaddingLayerParams
+________________________________________________________________________________
+
+Constant padding layer.
+Pad the input array with a constant value, either along a single given axis or along a set of axes.
+
+Requires 1 or 2 inputs and produces 1 output.
+The amount of padding can be either set as a parameter ("padAmounts") or provided as a second input.
+
+Output rank is same as the rank of the first input.
+
+when "padToGivenOutputSizeMode" is False:
+
+output_shape[i] = input_shape[i] + padAmounts[2*i] + padAmounts[2*i+1], i=0,...,rank-1
+
+Examples:
+
+input shape = (20,10)
+padAmounts = [0,1,4,0]
+output shape = (21,14)
+
+input shape = (20,10,5)
+padAmounts = [0,0,3,4,0,9]
+output shape = (20,17,14)
+
+
+when "padToGivenOutputSizeMode" is True
+
+output_shape[i] = max(input_shape[i], max(padAmounts[2*i] + padAmounts[2*i+1])), i=0,...,rank-1
+
+input shape = (20,10)
+padAmounts = [0,21,14,0]
+output shape = (21,14)
+
+input shape = (20,10,5)
+padAmounts = [0,0,17,0,0,14]
+output shape = (20,17,14)
+
+
+.. code-block:: proto
+
+	message ConstantPaddingLayerParams {
+	    float value = 1;
+	
+	    repeated uint64 padAmounts = 2;
+	
+	    bool padToGivenOutputSizeMode = 3;
+	}
+
+
+
+
+
+
+RandomNormalLikeLayerParams
+________________________________________________________________________________
+
+A layer that returns a tensor filled with values from the normal distribution.
+
+Requires 1 input and produces 1 output.
+
+Parameters
+    seed: seed used for the normal distribution.
+    mean: mean of the normal distribution.
+    stdDev: standard deviation of the normal distribution.
+
+Input
+    An N-Dimensional tensor, whose values are ignored. Only the shape is used to
+    infer the shape of the output.
+
+Output
+    An N-Dimensional tensor with the same shape as the input tensor.
+
+
+.. code-block:: proto
+
+	message RandomNormalLikeLayerParams {
+	
+	    int64 seed = 1;
+	    float mean = 2;
+	    float stdDev = 3;
+	
+	}
+
+
+
+
+
+
+RandomNormalStaticLayerParams
+________________________________________________________________________________
+
+A layer that returns a tensor filled with values from the normal distribution.
+
+Requires no input and produces 1 output.
+
+Parameters
+    seed: seed used for the normal distribution.
+    mean: mean of the normal distribution.
+    stdDev: standard deviation of the normal distribution.
+    outputShape: shape of the output tensor.
+
+Output
+    An N-Dimensional tensor of shape "outputShape".
+
+
+.. code-block:: proto
+
+	message RandomNormalStaticLayerParams {
+	
+	    int64 seed = 1;
+	    float mean = 2;
+	    float stdDev = 3;
+	    repeated uint64 outputShape = 4;
+	
+	}
+
+
+
+
+
+
+RandomNormalDynamicLayerParams
+________________________________________________________________________________
+
+A layer that returns a tensor filled with values from the normal distribution.
+
+Requires 1 input and produces 1 output.
+
+Parameters:
+    seed: seed used for the normal distribution.
+    mean: mean of the normal distribution.
+    stdDev: standard deviation of the normal distribution.
+
+Input
+    A rank 1 tensor specifying the shape of the output
+
+Output
+    An N-Dimensional tensor with the shape specified by the values in the input tensor.
+
+
+.. code-block:: proto
+
+	message RandomNormalDynamicLayerParams {
+	
+	    int64 seed = 1;
+	    float mean = 2;
+	    float stdDev = 3;
+	
+	}
+
+
+
+
+
+
+RandomUniformLikeLayerParams
+________________________________________________________________________________
+
+A layer that returns a tensor filled with values from the uniform distribution.
+
+Requires 1 input and produces 1 output.
+
+Parameters
+    seed: seed used for the uniform distribution.
+    minVal: lower bound on the range of random values for the uniform distribution.
+    maxVal: upper bound on the range of random values for the uniform distribution.
+
+Input
+    An N-Dimensional tensor, whose values are ignored. Only the shape is used to
+    infer the shape of the output.
+
+Output
+    An N-Dimensional tensor with the same shape as the input tensor.
+
+
+.. code-block:: proto
+
+	message RandomUniformLikeLayerParams {
+	
+	    int64 seed = 1;
+	    float minVal = 2;
+	    float maxVal = 3;
+	
+	}
+
+
+
+
+
+
+RandomUniformStaticLayerParams
+________________________________________________________________________________
+
+A layer that returns a tensor filled with values from the uniform distribution.
+
+Requires no input and produces 1 output.
+
+Parameters
+    seed: seed used for the uniform distribution.
+    minVal: lower bound on the range of random values for the uniform distribution.
+    maxVal: upper bound on the range of random values for the uniform distribution.
+    outputShape: shape of the output tensor.
+
+Output
+    An N-Dimensional tensor of shape "outputShape".
+
+
+.. code-block:: proto
+
+	message RandomUniformStaticLayerParams {
+	
+	    int64 seed = 1;
+	    float minVal = 2;
+	    float maxVal = 3;
+	    repeated uint64 outputShape = 4;
+	
+	}
+
+
+
+
+
+
+RandomUniformDynamicLayerParams
+________________________________________________________________________________
+
+A layer that returns a tensor filled with values from the uniform distribution.
+
+Requires 1 input and produces 1 output.
+
+Parameters:
+    seed: seed used for the uniform distribution.
+    minVal: lower bound on the range of random values for the uniform distribution.
+    maxVal: upper bound on the range of random values for the uniform distribution.
+
+Input
+    A rank 1 tensor specifying the shape of the output
+
+Output
+    An N-Dimensional tensor with the shape specified by the values in the input tensor.
+
+
+.. code-block:: proto
+
+	message RandomUniformDynamicLayerParams {
+	
+	    int64 seed = 1;
+	    float minVal = 2;
+	    float maxVal = 3;
+	
+	}
+
+
+
+
+
+
+RandomBernoulliLikeLayerParams
+________________________________________________________________________________
+
+A layer that returns a tensor filled with values from the Bernoulli distribution.
+
+Requires 1 input and produces 1 output.
+
+Parameters
+    seed: seed used for the Bernoulli distribution.
+    prob: probability of a 1 event.
+
+Input
+    An N-Dimensional tensor, whose values are ignored. Only the shape is used to
+    infer the shape of the output.
+
+Output
+    An N-Dimensional tensor with the same shape as the input tensor.
+
+
+.. code-block:: proto
+
+	message RandomBernoulliLikeLayerParams {
+	
+	    int64 seed = 1;
+	    float prob = 2;
+	
+	}
+
+
+
+
+
+
+RandomBernoulliStaticLayerParams
+________________________________________________________________________________
+
+A layer that returns a tensor filled with values from the Bernoulli distribution.
+
+Requires no input and produces 1 output.
+
+Parameters
+    seed: seed used for the Bernoulli distribution.
+    prob: probability of a 1 event.
+    outputShape: shape of the output tensor.
+
+Output
+    An N-Dimensional tensor of shape "outputShape".
+
+
+.. code-block:: proto
+
+	message RandomBernoulliStaticLayerParams {
+	
+	    int64 seed = 1;
+	    float prob = 2;
+	    repeated uint64 outputShape = 3;
+	
+	}
+
+
+
+
+
+
+RandomBernoulliDynamicLayerParams
+________________________________________________________________________________
+
+A layer that returns a tensor filled with values from the Bernoulli distribution.
+
+Requires 1 input and produces 1 output.
+
+Parameters:
+    seed: seed used for the Bernoulli distribution.
+    prob: probability of a 1 event.
+
+Input
+    A rank 1 tensor specifying the shape of the output
+
+Output
+    An N-Dimensional tensor with the shape specified by the values in the input tensor.
+
+
+.. code-block:: proto
+
+	message RandomBernoulliDynamicLayerParams {
+	
+	    int64 seed = 1;
+	    float prob = 2;
+	
+	}
+
+
+
+
+
+
+CategoricalDistributionLayerParams
+________________________________________________________________________________
+
+A layer that returns a tensor of the specified shape filled with values from the categorical distribution.
+
+Requires 1 input and produces 1 output.
+
+Parameter:
+    seed: seed used for the categorical distribution.
+    numSamples: number of samples to draw.
+    isLogits: true if the inputs are logits, false if the inputs are probabilities.
+    eps: default value is 1e-10.
+    temperature: default value is 1.0.
+
+Input tensor shape = [D_1, D_2, ... , D_(R-1), D_R] (Rank = R)
+Then the shape of the output is [D_1, D_2, ... , D_(R-1), numSamples] (Rank = R)
+
+
+.. code-block:: proto
+
+	message CategoricalDistributionLayerParams {
+	
+	    int64 seed = 1;
+	    int64 numSamples = 2;
+	    bool isLogits = 3;
+	    float eps = 4;
+	    float temperature = 5;
+	}
+
+
+
+
+
+
+ReduceL1LayerParams
+________________________________________________________________________________
+
+A layer that performs reduction with L1 normalization operation.
+
+Negative indexing is supported.
+Requires 1 input and produces 1 output.
+
+Parameters:
+   axes: dimensions along which to perform reduction
+   keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
+   reduceAll: ignore the "axes" parameter, perform reduction along all axes
+
+
+.. code-block:: proto
+
+	message ReduceL1LayerParams {
+	
+	    repeated int64 axes = 1;
+	    bool keepDims = 2;
+	    bool reduceAll = 3;
+	
+	}
+
+
+
+
+
+
+ReduceL2LayerParams
+________________________________________________________________________________
+
+A layer that performs reduction with L2 normalization operation.
+
+Negative indexing is supported.
+Requires 1 input and produces 1 output.
+
+Parameters:
+   axes: dimensions along which to perform reduction
+   keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
+   reduceAll: ignore the "axes" parameter, perform reduction along all axes
+
+
+.. code-block:: proto
+
+	message ReduceL2LayerParams {
+	
+	    repeated int64 axes = 1;
+	    bool keepDims = 2;
+	    bool reduceAll = 3;
+	
+	}
+
+
+
+
+
+
+ReduceMaxLayerParams
+________________________________________________________________________________
+
+A layer that performs reduction with max operation.
+
+Negative indexing is supported.
+Requires 1 input and produces 1 output.
+
+Parameters:
+   axes: dimensions along which to perform reduction
+   keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
+   reduceAll: ignore the "axes" parameter, perform reduction along all axes
+
+
+.. code-block:: proto
+
+	message ReduceMaxLayerParams {
+	
+	    repeated int64 axes = 1;
+	    bool keepDims = 2;
+	    bool reduceAll = 3;
+	
+	}
+
+
+
+
+
+
+ReduceMinLayerParams
+________________________________________________________________________________
+
+A layer that performs reduction with min operation.
+
+Negative indexing is supported.
+Requires 1 input and produces 1 output.
+
+Parameters:
+   axes: dimensions along which to perform reduction
+   keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
+   reduceAll: ignore the "axes" parameter, perform reduction along all axes
+
+
+.. code-block:: proto
+
+	message ReduceMinLayerParams {
+	
+	    repeated int64 axes = 1;
+	    bool keepDims = 2;
+	    bool reduceAll = 3;
+	
+	}
+
+
+
+
+
+
+ReduceSumLayerParams
+________________________________________________________________________________
+
+A layer that performs reduction with sum operation.
+
+Negative indexing is supported.
+Requires 1 input and produces 1 output.
+
+Parameters:
+   axes: dimensions along which to perform reduction
+   keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
+   reduceAll: ignore the "axes" parameter, perform reduction along all axes
+
+
+.. code-block:: proto
+
+	message ReduceSumLayerParams {
+	
+	    repeated int64 axes = 1;
+	    bool keepDims = 2;
+	    bool reduceAll = 3;
+	
+	}
+
+
+
+
+
+
+ReduceProdLayerParams
+________________________________________________________________________________
+
+A layer that performs reduction with prod operation.
+
+Negative indexing is supported.
+Requires 1 input and produces 1 output.
+
+Parameters:
+   axes: dimensions along which to perform reduction
+   keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
+   reduceAll: ignore the "axes" parameter, perform reduction along all axes
+
+
+.. code-block:: proto
+
+	message ReduceProdLayerParams {
+	
+	    repeated int64 axes = 1;
+	    bool keepDims = 2;
+	    bool reduceAll = 3;
+	
+	}
+
+
+
+
+
+
+ReduceMeanLayerParams
+________________________________________________________________________________
+
+A layer that performs reduction with mean operation.
+
+Negative indexing is supported.
+Requires 1 input and produces 1 output.
+
+Parameters:
+   axes: dimensions along which to perform reduction
+   keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
+   reduceAll: ignore the "axes" parameter, perform reduction along all axes
+
+
+.. code-block:: proto
+
+	message ReduceMeanLayerParams {
+	
+	    repeated int64 axes = 1;
+	    bool keepDims = 2;
+	    bool reduceAll = 3;
+	
+	}
+
+
+
+
+
+
+ReduceLogSumLayerParams
+________________________________________________________________________________
+
+A layer that performs reduction with logSum operation.
+
+Negative indexing is supported.
+Requires 1 input and produces 1 output.
+
+Parameters:
+   axes: dimensions along which to perform reduction
+   keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
+   reduceAll: ignore the "axes" parameter, perform reduction along all axes
+
+
+.. code-block:: proto
+
+	message ReduceLogSumLayerParams {
+	
+	    repeated int64 axes = 1;
+	    bool keepDims = 2;
+	    bool reduceAll = 3;
+	
+	}
+
+
+
+
+
+
+ReduceSumSquareLayerParams
+________________________________________________________________________________
+
+A layer that performs reduction with logSumExp operation.
+
+Negative indexing is supported.
+Requires 1 input and produces 1 output.
+
+Parameters:
+   axes: dimensions along which to perform reduction
+   keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
+   reduceAll: ignore the "axes" parameter, perform reduction along all axes
+
+
+.. code-block:: proto
+
+	message ReduceSumSquareLayerParams {
+	
+	    repeated int64 axes = 1;
+	    bool keepDims = 2;
+	    bool reduceAll = 3;
+	
+	}
+
+
+
+
+
+
+ReduceLogSumExpLayerParams
+________________________________________________________________________________
+
+A layer that performs reduction with logSumExp operation.
+
+Negative indexing is supported.
+Requires 1 input and produces 1 output.
+
+Parameters:
+   axes: dimensions along which to perform reduction
+   keepDims: if True, keep the reduced dimensions (value will be 1), otherwise, reduced dimensions are squeezed
+   reduceAll: ignore the "axes" parameter, perform reduction along all axes
+
+
+.. code-block:: proto
+
+	message ReduceLogSumExpLayerParams {
+	
+	    repeated int64 axes = 1;
+	    bool keepDims = 2;
+	    bool reduceAll = 3;
+	
+	}
+
+
+
+
+
+
+ExpandDimsLayerParams
+________________________________________________________________________________
+
+A layer that increases the rank of the input tensor by adding unit dimensions.
+
+Requires 1 input and produces 1 output.
+
+e.g.:
+
+input shape = (10,5)
+axes = (0,1)
+output shape = (1,1,10,5)
+
+input shape = (10,5)
+axes = (0,2)
+output shape = (1,10,1,5)
+
+input shape = (10,5)
+axes = (-2,-1)
+output shape = (10,5,1,1)
+
+
+.. code-block:: proto
+
+	message ExpandDimsLayerParams {
+	
+	    repeated int64 axes = 1;
+	
+	}
+
+
+
+
+
+
+FlattenTo2DLayerParams
+________________________________________________________________________________
+
+A layer that flattens the input tensor into a 2-dimensional matrix.
+
+Requires 1 input and produces 1 output.
+Output tensor is always rank 2.
+
+First dimension of output is the product of all the dimensions in input[:axis] ("axis" is exclusive)
+Second dimension of output is the product of all the dimensions in input[axis:] ("axis" is inclusive)
+
+e.g.:
+input shape:  (3,)
+axis:  -1
+output shape:  (1, 3)
+
+input shape:  (3,)
+axis:  1
+output shape:  (3, 1)
+
+input shape:  (4, 3)
+axis:  -1
+output shape:  (4, 3)
+
+input shape:  (5, 2)
+axis:  0
+output shape:  (1, 10)
+
+input shape:  (5, 5, 3)
+axis:  -2
+output shape:  (5, 15)
+
+input shape:  (2, 3, 2)
+axis:  -1
+output shape:  (6, 2)
+
+
+.. code-block:: proto
+
+	message FlattenTo2DLayerParams {
+	
+	    int64 axis = 1;
+	
+	}
+
+
+
+
+
+
+ReshapeStaticLayerParams
+________________________________________________________________________________
+
+A layer that reshapes a tensor.
+
+Requires 1 input and produces 1 output.
+
+Output tensor is the reshaped version of the input and has shape as specified in the
+parameter "targetShape".
+
+
+.. code-block:: proto
+
+	message ReshapeStaticLayerParams {
+	
+	    repeated int64 targetShape = 1;
+	
+	}
+
+
+
+
+
+
+ReshapeLikeLayerParams
+________________________________________________________________________________
+
+A layer that reshapes a tensor.
+
+Requires 2 inputs and produces 1 output.
+
+First input is reshaped to produce the output, while the second input is only
+used to determine the shape of the output. Values of the second input are not used.
+
+Output is a tensor with the same shape as the second input.
+
+
+.. code-block:: proto
+
+	message ReshapeLikeLayerParams {
+	
+	}
+
+
+
+
+
+
+ReshapeDynamicLayerParams
+________________________________________________________________________________
+
+A layer that reshapes a tensor.
+
+Requires 2 inputs and produces 1 output.
+
+First input is the one that is reshaped to produce the output.
+Second input is a rank 1 tensor specifying the shape of the output.
+Output tensor has shape as specified by the values in the 2nd input tensor.
+
+
+.. code-block:: proto
+
+	message ReshapeDynamicLayerParams {
+	
+	}
+
+
+
+
+
+
+SqueezeLayerParams
+________________________________________________________________________________
+
+A layer that decreases the rank of the input tensor by removing unit dimensions.
+
+Requires 1 input and produces 1 output.
+
+Output rank is one less than input rank, if input rank is more than 1.
+If input rank is 1, output rank is also 1.
+
+e.g.:
+
+input shape = (1,1,10,5)
+axes = (0,1)
+output shape = (10,5)
+
+input shape = (1,10,5,1)
+axes = (0,3)
+output shape = (10,5)
+
+input shape = (10,5,1,1)
+axes = (-2,-1)
+output shape = (10,5)
+
+input shape = (1,)
+axes = (0)
+output shape = (1,)
+
+
+.. code-block:: proto
+
+	message SqueezeLayerParams {
+	
+	    repeated int64 axes = 1;
+	    bool squeezeAll = 2; // if true squeeze all dimensions that are 1.
+	
+	}
+
+
+
+
+
+
+TopKLayerParams
+________________________________________________________________________________
+
+A layer that returns top K (or bottom K) values and the corresponding indices
+of the input along a given axis.
+
+Requires 1 or 2 inputs and produces 2 outputs.
+
+The second input is the value of the K, and is optional.
+If there is only one input, value of K that is specified in the layer parameter is used.
+
+Both outputs have the same rank as the first input.
+Second input must correspond to a scalar tensor.
+
+e.g.:
+
+first input's shape = (45, 34, 10, 5)
+axis = 1
+output shape, for both outputs = (45, K, 10, 5)
+
+
+.. code-block:: proto
+
+	message TopKLayerParams {
+	
+	    int64 axis = 1; 
+	    uint64 K = 2; 
+	    bool useBottomK = 3; 
+	
+	}
+
+
+
+
+
+
+ArgMaxLayerParams
+________________________________________________________________________________
+
+A layer that returns the indices of the maximum value along a specified axis in a tensor.
+
+Requires 1 input and produces 1 output. Negative indexing is supported.
+
+Output has the same rank as the input if "removeDim" is False (default).
+Output has rank one less than the input if "removeDim" is True and input rank is more than 1.
+
+e.g.:
+
+input shape = (45, 34, 10, 5)
+axis = -2
+output shape = (45, 1, 10, 5), if removeDim = False (default)
+output shape = (45, 10, 5), if removeDim = True
+
+input shape = (5,)
+axis = 0
+output shape = (1,), if removeDim = False or True
+
+
+.. code-block:: proto
+
+	message ArgMaxLayerParams {
+	
+	    int64 axis = 1;
+	    bool removeDim = 2;
+	
+	}
+
+
+
+
+
+
+ArgMinLayerParams
+________________________________________________________________________________
+
+A layer that returns the indices of the minimum value along a specified axis in a tensor.
+
+Requires 1 input and produces 1 output. Negative indexing is supported.
+
+Output has the same rank as the input if "removeDim" is False (default).
+Output has rank one less than the input if "removeDim" is True and input rank is more than 1.
+
+e.g.:
+
+input shape = (45, 34, 10, 5)
+axis = -2
+output shape = (45, 1, 10, 5), if removeDim = False (default)
+output shape = (45, 10, 5), if removeDim = True
+
+input shape = (5,)
+axis = 0
+output shape = (1,), if removeDim = False or True
+
+
+.. code-block:: proto
+
+	message ArgMinLayerParams {
+	
+	    int64 axis = 1;
+	    bool removeDim = 2;
+	
+	}
+
+
+
+
+
+
+SplitNDLayerParams
+________________________________________________________________________________
+
+A layer layer that splits the input tensor into multiple output tensors,
+along the specified axis.
+
+The layer either uniformly splits the input tensor into ``num_splits`` tensors, or
+splits according to the given split sizes in ``split_sizes``.
+Supports unequal splits and negative indexing.
+
+Requires 1 input and produces at least 2 outputs.
+Rank of all the outputs is same as that of the input.
+
+If parameter "splitSizes" is provided, value of the parameter "numSplits" is ignored, since in that case
+"numSplits" is automatically inferred to be the length of "splitSizes".
+
+
+e.g.:
+input shape:  (5, 3, 4)
+axis = -3, split_sizes = [3, 2]
+output shape:  (3, 3, 4)
+output shape:  (2, 3, 4)
+
+
+.. code-block:: proto
+
+	message SplitNDLayerParams {
+	
+	    int64 axis = 1;
+	    uint64 numSplits = 2;
+	    repeated uint64 splitSizes = 3;
+	
+	}
+
+
+
+
+
+
+CeilLayerParams
+________________________________________________________________________________
+
+A layer that performs element-wise ceil operation on the input tensor that
+rounds the value to the smallest integer not less than x.
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message CeilLayerParams {
+	
+	}
+
+
+
+
+
+
+RoundLayerParams
+________________________________________________________________________________
+
+A layer that performs element-wise round operation on the input tensor
+that rounds the value to the nearest integer.
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message RoundLayerParams {
+	
+	}
+
+
+
+
+
+
+FloorLayerParams
+________________________________________________________________________________
+
+A layer that performs element-wise floor operation on the input tensor
+that rounds the value to the largest integer not greater than x.
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message FloorLayerParams {
+	
+	}
+
+
+
+
+
+
+SignLayerParams
+________________________________________________________________________________
+
+A layer that performs element-wise sign operation (+1 for positive values,
+-1 for negative values, 0 for zeros).
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message SignLayerParams {
+	
+	}
+
+
+
+
+
+
+ClipLayerParams
+________________________________________________________________________________
+
+A layer that performs element-wise clip operation. Clip the values in the
+input tensor to the threshold values [min_value, max_value].
+
+Requires 1 input and produces 1 output.
+
+Parameter minVal: the minimum threshold.
+Parameter maxVal: the maximum threshold.
+
+output =  min(max(input, minVal), maxVal)
+
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message ClipLayerParams {
+	
+	    float minVal = 1;
+	    float maxVal = 2;
+	
+	}
+
+
+
+
+
+
+SliceStaticLayerParams
+________________________________________________________________________________
+
+A layer that extracts a slice of size ``(end - begin) / stride``
+from the given input tensor.
+Support negative indexing and negative strides.
+
+Requires 1 input and produces 1 output.
+Output rank is same as the input rank.
+
+Value of beginIds, beginMasks, endIds, endMasks, strides are required parameters.
+Lengths of all the parameters must equal the rank of the input.
+
+i-th element of "beginIds" is ignored and assumed to be 0 if the i-th element of
+"beginMasks" is True
+
+i-th element of "endIds" is ignored and assumed to be -1 if the i-th element of
+"endMasks" is True
+
+e.g.:
+if i-th element of "squeezeMasks" is set to True, only beginIds[i] would be sliced
+out, and all other masks and inputs are ignored.
+
+e.g. (without squeezeMasks):
+input shape:  (5, 5, 5)
+beginIds:  [1, 2, 3]
+beginMasks:  [True, False, True]
+endIds:  [3, -3, 2]
+endMasks:  [False, True, True]
+strides:  [2, 2, 2]
+SqueezeMasks:  [False, False, False]
+output shape:  (2, 2, 3)
+This is equivalent to input[:3:2, 2::2, ::2]
+
+e.g. (with squeezeMasks):
+input shape:  (5, 5, 5)
+beginIds:  [1, 2, 3]
+beginMasks:  [True, False, True]
+endIds:  [3, -3, 2]
+endMasks:  [False, True, True]
+strides:  [2, 2, 2]
+SqueezeMasks:  [False, True, False]
+output shape:  (2, 3)
+This is equivalent to input[:3:2, 2, ::2]
+
+
+.. code-block:: proto
+
+	message SliceStaticLayerParams {
+	
+	    repeated int64 beginIds = 1;
+	    repeated bool beginMasks = 2;
+	    repeated int64 endIds = 3;
+	    repeated bool endMasks = 4;
+	    repeated int64 strides = 5;
+	    repeated bool squeezeMasks = 6;
+	
+	
+	}
+
+
+
+
+
+
+SliceDynamicLayerParams
+________________________________________________________________________________
+
+A layer that extracts a slice of size ``(end - begin) / stride``
+from the given input tensor.
+Support negative indexing and negative strides.
+See "SliceStaticLayerParams" for the description and an example of the functionality of the layer.
+
+Requires 2 to 7 inputs and produces 1 output.
+Rank of the output is same as the rank of the first input unless squeezeMask is set.
+
+Value of beginIds, beginMasks, endIds, endMasks, strides can be passed in either
+as dynamic inputs or as static parameters.
+Lengths of all the parameters or inputs from 2-6 must equal the rank of the first input.
+
+The 2nd input represents the "beginIds".
+The 3rd input, if present, corresponds to "endIds". In this case the value of the "endIds" parameter is ignored.
+The 4th input, if present, corresponds to "strides". In this case the value of the "strides" parameter is ignored.
+The 5th input, if present, corresponds to "beginMasks". In this case the value of the "beginMasks" parameter is ignored.
+The 6th input, if present, corresponds to "endMasks". In this case the value of the "endMasks" parameter is ignored.
+The 7th input, if present, corresponds to "squeezeMasks". In this case the value of the "squeezeMasks" parameter is ignored.
+
+
+.. code-block:: proto
+
+	message SliceDynamicLayerParams {
+	
+	    repeated bool beginMasks = 2;
+	    repeated int64 endIds = 3;
+	    repeated bool endMasks = 4;
+	    repeated int64 strides = 5;
+	    repeated bool squeezeMasks = 6;
+	
+	}
+
+
+
+
+
+
+TileLayerParams
+________________________________________________________________________________
+
+A layer that constructs a tensor by repeating the input tensor multiple
+number of times.
+
+Requires 1 or 2 inputs and produces 1 output.
+Output rank is same as the input rank.
+
+If two inputs are provided, second input is used as "reps"
+and "reps" parameter is ignored.
+
+If only one input is provided,
+length of the "reps" parameter must be at least 1 and
+not greater than the rank of the input.
+If it is less than the input rank, it is made equal to the input rank by prepending 1's to it.
+
+e.g.:
+
+input shape = (2, 4, 2)
+reps = (1, 2, 6)
+output shape = (2, 8, 12)
+
+input shape = (2, 4, 2)
+reps = (6)
+reps after prepending ones = (1, 1, 6)
+output shape = (2, 4, 12)
+
+input shape = (2, 4, 2)
+second input = [1, 2, 6] -> shape: (3,)
+reps = N/A [Ignored]
+output shape = (2, 8, 12)
+
+
+.. code-block:: proto
+
+	message TileLayerParams {
+	
+	    repeated uint64 reps = 1;
+	
+	}
+
+
+
+
+
+
+GetShapeLayerParams
+________________________________________________________________________________
+
+A layer that returns the shape of an input tensor.
+
+Requires 1 input and produces 1 output.
+
+Input: a tensor.
+Output: a vector of length R, where R is the rank of the input tensor
+Output is always a rank 1 tensor.
+
+
+.. code-block:: proto
+
+	message GetShapeLayerParams {
+	
+	}
+
+
+
+
+
+
+ErfLayerParams
+________________________________________________________________________________
+
+A layer that computes the Gauss error function,
+which is defined as:
+
+.. math::
+    f(x) = \dfrac{1}{\sqrt{\pi}}\int_{-x}^{x}{e^{-t^2}dt}
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message ErfLayerParams {
+	
+	}
+
+
+
+
+
+
+GeluLayerParams
+________________________________________________________________________________
+
+A layer that evaluates the Gaussian Error Linear Unit (GELU) activation.
+Following equations are used to compute the activation based on the value of the "mode" parameter:
+
+mode == 'EXACT':
+.. math::
+    f(x) = 0.5x\left ( 1+\rm{erf}\left ( \frac{x}{\sqrt{2}} \right ) \right )
+
+mode == 'TANH_APPROXIMATION':
+.. math::
+    f(x) = 0.5x\left ( 1+\rm{tanh}\left ( \sqrt{2/\pi}\left ( x + 0.044715x^3 \right ) \right ) \right )
+
+mode == 'SIGMOID_APPROXIMATION':
+.. math::
+    f(x) = x*\rm{sigmoid}(1.702x)
+
+Requires 1 input and produces 1 output.
+Output shape is same as the input.
+
+
+.. code-block:: proto
+
+	message GeluLayerParams {
+	
+	    enum GeluMode {
+	
+	        EXACT = 0;
+	        TANH_APPROXIMATION = 1;
+	        SIGMOID_APPROXIMATION = 2;
+	
+	    }
+	
+	    GeluMode mode = 1; 
+	
+	}
+
+
+
+
+
+
+RangeStaticLayerParams
+________________________________________________________________________________
+
+RangeStatic layer that returns a tensor that contains evenly spaced values.
+It is similar in functionality to the numpy.arange method.
+
+Requires no input and produces 1 output.
+Output is a rank 1 tensor.
+
+
+.. code-block:: proto
+
+	message RangeStaticLayerParams {
+	
+	    float endValue = 1;
+	    float startValue = 2;
+	    float stepSizeValue = 3;
+	
+	}
+
+
+
+
+
+
+RangeDynamicLayerParams
+________________________________________________________________________________
+
+A layer that returns a tensor that contains evenly spaced values.
+Its functionality is similar to the numpy.arange method.
+
+Requires at least 1 input, up to a maximum of 3 inputs.
+Produces 1 output, which is a rank 1 tensor.
+
+Each input must be a scalar, or rank 1 and shape (1,).
+
+The first input represents the "endValue".
+The second input, if present, corresponds to "startValue". In this case the value of the "startValue" parameter is ignored.
+The third input, if present, corresponds to "stepSizeValue". In this case the value of the "stepSizeValue" parameter is ignored.
+
+
+.. code-block:: proto
+
+	message RangeDynamicLayerParams {
+	
+	    float startValue = 2;
+	    float stepSizeValue = 3;
+	
+	}
+
+
+
+
+
+
+SlidingWindowsLayerParams
+________________________________________________________________________________
+
+A layer that returns a tensor containing all windows of size ``windowSize``
+separated by ``step`` along the dimension ``axis``.
+
+.. code::
+
+     y = SlidingWindows(x)
+
+Requires 1 input and produces 1 output.
+
+Input
+    An N-Dimensional tensor.
+
+Output
+    An (N+1)-Dimensional tensor.
+
+This operation behaves as following:
+     - if axis = 0 & input is rank 1 (L,). Output shape will be (M, W).
+     - if axis = 1 & input is rank 3 (B1, L, C1). Output shape will be (B1, M, W, C1)
+     - if axis = 2 & input is rank 5 (B1, B2, L, C1, C2) --> (B1 * B2, L, C1 * C2) --> (B1 * B2, M, W, C1 * C2). Output shape will be (B1, B2, M, W, C1, C2)
+     - etc.
+where
+     - L, C, B refer to input length, feature dimension length & batch size respectively
+     - W is the window size.
+     - M is the number of windows/slices calculated as M = (L - W) / step + 1
+
+
+.. code-block:: proto
+
+	message SlidingWindowsLayerParams {
+	
+	    int64 axis = 1;
+	    uint64 windowSize = 2;
+	    uint64 step = 3;
+	
+	}
+
+
+
+
+
+
+LayerNormalizationLayerParams
+________________________________________________________________________________
+
+A layer that applies layer normalization over the input tensor.
+
+Requires 1 input and produces 1 output.
+
+output = gamma * (input - computed_mean) / (sqrt(computed_variance + eps)) + beta
+
+Parameters
+    normalizedShape: subset of the input shape, along with layer norm is performed, rest of the input shape is treated as the batch dimension. The mean and variance are computed for the input, over the last few dimensions as specified by the normalizedShape parameter.
+    gamma: must have shape = "normalizedShape"
+    beta: must have shape = "normalizedShape"
+    eps: small constant to avoid division by 0
+
+Output shape is same as the input.
+
+e.g.:
+input shape = (10,5)
+normalized shape = (5,) or (10,5)
+
+input shape = (10,5,6,7)
+normalized shape = (7,) or (6,7) or (5,6,7) or (10,5,6,7)
+
+
+.. code-block:: proto
+
+	message LayerNormalizationLayerParams {
+	
+	    repeated int64 normalizedShape = 1;
+	    float eps = 2;
+	    WeightParams gamma = 3;
+	    WeightParams beta = 4;
+	
+	}
+
+
+
+
+
+
+NonMaximumSuppressionLayerParams
+________________________________________________________________________________
+
+Non maximum suppression (NMS) layer.
+Applies the non maximum suppression algorithm to input bounding box coordinates.
+The effect of this layer is similar to the functionality of the "NonMaximumSuppression"
+model type (for details please see NonMaximumSuppression.proto) with a couple of differences.
+One, this is a layer in a neural network model, whereas that is a different model type. Second,
+this layer supports a batch of bounding boxes.
+
+The NMS layer requires at least 2 inputs, and up to a maximum of 5 inputs. It produces 4 outputs.
+Following is the description of inputs and outputs:
+
+input 1, shape (B,N,4): coordinates of N boxes, for a batch size B.
+input 2, shape (B,N,C): class scores for each box. C can be 1 when there is only 1 score per box, i.e., no class specific score.
+
+input 3, optional, shape (1,): IoU threshold. When present, it overwrites the value provided in layer parameter "iouThreshold".
+input 4, optional, shape (1,): Score threshold. When present, it overwrites the value provided in layer parameter "scoreThreshold".
+input 5, optional, shape (1,): Maximum number of boxes. When present, it overwrites the value provided in layer parameter "maxBoxes".
+
+output 1, shape (B,maxBoxes,4): box coordinates, corresponding to the surviving boxes.
+output 2, shape (B,maxBoxes,C): box scores, corresponding to the surviving boxes.
+output 3, shape (B,maxBoxes): indices of the surviving boxes. Hence it will have values in the range [0,N-1], except for padding.
+output 4, shape (B,): number of boxes selected after the NMS algorithm, for each batch.
+
+When surviving boxes are less than "maxBoxes", the first 3 outputs are padded.
+For the first two outputs, the padding is done using values 0, whereas for the third output the
+padding value used is -1, since the output values represent indices.
+
+If no box survives, that is, all the scores are below the "scoreThreshold",
+then for that batch, number of boxes (value of the fourth output) will be 1. The first 3 outputs will
+correspond to the box with the highest score. This is to avoid generating an "empty" output.
+
+The four values that describe the box dimensions are (in order):
+
+ - x (center location of the box along the horizontal axis)
+ - y (center location of the box along the vertical axis)
+ - width (size of box along the horizontal axis)
+ - height (size of box on along the vertical axis)
+
+In each batch,
+the N scores for N boxes, used for suppression, are generated by taking the max of the matrix (N,C)
+along the columns.
+If "perClassSuppression" flag is false, suppression happens across all classes.
+If "perClassSuppression" flag is true, each box is assigned to the class with the highest
+score and then the suppression happens separately for boxes within the same class.
+
+Note that the 4th output can be used to dynamically slice the first 3 outputs, in case
+the padded outputs are not required.
+
+
+.. code-block:: proto
+
+	message NonMaximumSuppressionLayerParams {
+	    float iouThreshold = 1;
+	
+	    float scoreThreshold = 2;
+	
+	    uint64 maxBoxes = 3;
+	
+	    bool perClassSuppression = 4;
+	}
+
+
+
+
+
+
+ClampedReLULayerParams
+________________________________________________________________________________
+
+A layer that performs element-wise clamped ReLU operation.
+
+Requires 1 input and produces 1 output.
+
+This function has the following formula:
+
+.. math::
+    f(x) = \begin{cases}
+              \text{min}(\text{beta},x) \;\; \text{if} \;\; x \geq 0\\
+              \text{min}(\text{beta} ,\text{alpha}\cdot x) \;\; \text{if} \;\; x<0
+           \end{cases}
+
+Output shape is same as the input.
+
+Available (iOS >= 14, macOS >= 10.16, watchOS >= 7)
+
+
+.. code-block:: proto
+
+	message ClampedReLULayerParams {
+	
+	    float alpha = 1;
+	    float beta = 2;
+	
+	}
+
+
+
+
+
+
+ArgSortLayerParams
+________________________________________________________________________________
+
+A layer that returns the indices that would sort the input tensor, along a specified axis.
+
+Requires 1 input and produces 1 output.
+
+Output has the same rank and shape as the input.
+
+Value of "axis" must be positive and less than the rank of the input.
+
+e.g.:
+
+input shape = (5,)
+axis = 0
+input values = [3.1, 5.4, 32.9, 3.2, 77.0]
+output shape = (5,)
+output values = [0, 3, 1, 2, 4], descending = False
+output values = [4, 2, 1, 3, 0], descending = True
+
+input shape = (2,3)
+axis = 1
+input values = [[3, 5, 32], [3, 77, 6]]
+output shape = (2,3)
+output values = [[0, 1, 2], [0, 2, 1]], descending = False
+output values = [[2, 1, 0], [1, 2, 0]], descending = True
+
+
+.. code-block:: proto
+
+	message ArgSortLayerParams {
+	
+	    int64 axis = 1; 
+	    bool descending = 2;
+	
+	}
+
+
+
+
+
+
+SliceBySizeLayerParams
+________________________________________________________________________________
+
+A layer that does slice operation by providing size to be extracted 
+from the given input tensor.
+
+Requires 2 inputs and produces 1 output.
+Rank of the output is same as the rank of the first input.
+
+The 1st input represents the tensor to be sliced.
+The 2nd input represents the beginning index to be sliced from.
+
+Example:
+Input 1: x (x.shape = (2, 3, 4))
+Input 2: begin
+size: 2
+axis: 1
+
+Output: x[:, begin:begin+2, :]
+
+
+.. code-block:: proto
+
+	message SliceBySizeLayerParams {
+	
+	    int64 size = 2;
+	    int64 axis = 3;
+	
+	}
+
+
+
+
+
+
+NeuralNetworkClassifier
+________________________________________________________________________________
+
+A neural network specialized as a classifier.
+
+
+.. code-block:: proto
+
+	message NeuralNetworkClassifier {
+	
+	    repeated NeuralNetworkLayer layers = 1;
+	    repeated NeuralNetworkPreprocessing preprocessing = 2;
+	
+	    // use this enum value to determine the input tensor shapes to the neural network, for multiarray inputs
+	    NeuralNetworkMultiArrayShapeMapping arrayInputShapeMapping = 5;
+	
+	    // use this enum value to determine the input tensor shapes to the neural network, for image inputs
+	    NeuralNetworkImageShapeMapping imageInputShapeMapping = 6;
+	
+	    NetworkUpdateParameters updateParams = 10;
+	
+	    // The set of labels for every possible class.
+	    oneof ClassLabels {
+	        StringVector stringClassLabels = 100;
+	        Int64Vector int64ClassLabels = 101;
+	    }
+	
+	    // The name of the output blob containing the probability of each class.
+	    // In other words, the score vector. Must be a 1-D tensor with the same
+	    // number and order of elements as ClassLabels.
+	    string labelProbabilityLayerName = 200;
+	}
+
+
+
+
+
+
+OneHotLayerParams
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message OneHotLayerParams {
+	
+	    uint64 oneHotVectorSize = 1; 
+	    int64 axis = 2; 
+	    float onValue = 3;
+	    float offValue = 4;
+	}
+
+
+
+
+
+
+CumSumLayerParams
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message CumSumLayerParams {
+	
+	    int64 axis = 1; 
+	
+	    bool excludeFinalSum = 2;
+	
+	    bool reverse = 3; 
+	}
+
+
+
+
+
+
+NeuralNetworkRegressor
+________________________________________________________________________________
+
+A neural network specialized as a regressor.
+
+
+.. code-block:: proto
+
+	message NeuralNetworkRegressor {
+	
+	    repeated NeuralNetworkLayer layers = 1;
+	    repeated NeuralNetworkPreprocessing preprocessing = 2;
+	
+	    // use this enum value to determine the input tensor shapes to the neural network, for multiarray inputs
+	    NeuralNetworkMultiArrayShapeMapping arrayInputShapeMapping = 5;
+	
+	    // use this enum value to determine the input tensor shapes to the neural network, for image inputs
+	    NeuralNetworkImageShapeMapping imageInputShapeMapping = 6;
+	
+	    NetworkUpdateParameters updateParams = 10;
+	
+	}
+
+
+
+
+
+
+NetworkUpdateParameters
+________________________________________________________________________________
+
+Details on how the network will be updated
+
+
+.. code-block:: proto
+
+	message NetworkUpdateParameters {
+	
+	    repeated LossLayer lossLayers = 1;
+	    Optimizer optimizer = 2;
+	    Int64Parameter epochs = 3;
+	
+	    BoolParameter shuffle = 10;
+	
+	    Int64Parameter seed = 20;
+	}
+
+
+
+
+
+
+LossLayer
+________________________________________________________________________________
+
+Loss layer - categorical cross entropy and mean squared error are the only supported loss functions currently
+
+
+.. code-block:: proto
+
+	message LossLayer {
+	
+	    string name = 1;
+	    oneof LossLayerType {
+	
+	        CategoricalCrossEntropyLossLayer categoricalCrossEntropyLossLayer = 10;
+	        MeanSquaredErrorLossLayer meanSquaredErrorLossLayer = 11;
+	
+	    }
+	
+	}
+
+
+
+
+
+
+CategoricalCrossEntropyLossLayer
+________________________________________________________________________________
+
+Categorical cross entropy loss layer
+Categorical cross entropy is used for single label categorization (only one category is applicable for each data point).
+
+The input is a vector of length N representing the distribution over N categories.  It must be the output of a softmax.
+
+The target is a single value representing the true category or class label. If the target is the predictedFeatureName of a neural network classifier it will be inverse mapped to the corresponding categorical index for you.
+
+math:
+Loss_{CCE}(input, target) = -\sum_{i=1}^{N} (target == i) log( input[i] ) = - log (input[target])
+
+
+.. code-block:: proto
+
+	message CategoricalCrossEntropyLossLayer {
+	
+	    string input = 1;
+	    string target = 2;
+	
+	}
+
+
+
+
+
+
+MeanSquaredErrorLossLayer
+________________________________________________________________________________
+
+Mean squared error loss layer,
+specifying input and target
+
+
+.. code-block:: proto
+
+	message MeanSquaredErrorLossLayer {
+	
+	    string input = 1;
+	    string target = 2;
+	
+	}
+
+
+
+
+
+
+Optimizer
+________________________________________________________________________________
+
+Optimizer - stochastic gradient descent and adam are the only supported optimizers currently
+
+
+.. code-block:: proto
+
+	message Optimizer {
+	
+	    oneof OptimizerType {
+	
+	        SGDOptimizer sgdOptimizer = 10;
+	        AdamOptimizer adamOptimizer = 11;
+	
+	    }
+	
+	}
+
+
+
+
+
+
+SGDOptimizer
+________________________________________________________________________________
+
+Stochastic gradient descent optimizer,
+specifying configurable learning rate, mini batch size, and momentum
+
+
+.. code-block:: proto
+
+	message SGDOptimizer {
+	
+	    DoubleParameter learningRate = 1;
+	    Int64Parameter miniBatchSize = 2;
+	    DoubleParameter momentum = 3;
+	
+	}
+
+
+
+
+
+
+AdamOptimizer
+________________________________________________________________________________
+
+Adam optimizer,
+specifying configurable learning rate, mini batch size, betas, and eps
+
+
+.. code-block:: proto
+
+	message AdamOptimizer {
+	
+	    DoubleParameter learningRate = 1;
+	    Int64Parameter miniBatchSize = 2;
+	    DoubleParameter beta1 = 3;
+	    DoubleParameter beta2 = 4;
+	    DoubleParameter eps = 5;
+	
+	}
+
+
+
+
+
+
+
+
+
+
+BoxCoordinatesMode.Coordinates
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum Coordinates {
+	
+	        CORNERS_HEIGHT_FIRST = 0;
+	
+	        CORNERS_WIDTH_FIRST = 1;
+	
+	        CENTER_SIZE_HEIGHT_FIRST = 2;
+	
+	        CENTER_SIZE_WIDTH_FIRST = 3;
+	
+	    }
+
+
+
+Convolution3DLayerParams.PaddingType
+--------------------------------------------------------------------------------
+
+The type of padding.
+All padding types pad the input shape with zeros.
+CUSTOM padding will add the custom padding values specified below to their respective
+dimensions, e.g., `customPaddingFront` number of zeros will be added to one side of the
+input's depth dimension and `customPaddingBack` number of zeros will be added to the other
+side of the input's depth dimension.
+VALID padding adds no padding to any dimension. In this case, the last convolution along
+each dimension will be dropped if the input dimension and the kernel size, stride, and
+dilation do not match.
+SAME padding adds enough padding to each dimension such that the output of the convolution
+has size ``Ceiling(inputShape / stride)``. Padding is added evenly to both sides of each
+dimension unless the total padding to add is odd, in which case it is added to the
+back/bottom/right side of the respective dimension. For example, if the total padding needed
+in the depth dimension is 3, 1 zero will be added to the front side of the depth dimension
+and 2 zeros will be added to the back side.
+
+.. code-block:: proto
+
+	    enum PaddingType {
+	        CUSTOM = 0;
+	        VALID = 1;
+	        SAME = 2;
+	    }
+
+
+
+FlattenLayerParams.FlattenOrder
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum FlattenOrder {
+	
+	        CHANNEL_FIRST = 0;
+	        CHANNEL_LAST = 1;
+	
+	    }
+
+
+
+GeluLayerParams.GeluMode
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum GeluMode {
+	
+	        EXACT = 0;
+	        TANH_APPROXIMATION = 1;
+	        SIGMOID_APPROXIMATION = 2;
+	
+	    }
+
+
+
+GlobalPooling3DLayerParams.GlobalPoolingType3D
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum GlobalPoolingType3D {
+	        MAX = 0;
+	        AVERAGE = 1;
+	    }
+
+
+
+NeuralNetworkImageShapeMapping
+________________________________________________________________________________
+
+
+
+.. code-block:: proto
+
+	enum NeuralNetworkImageShapeMapping {
+	
+	
+	    RANK5_IMAGE_MAPPING = 0;
+	
+	    RANK4_IMAGE_MAPPING = 1;
+	
+	}
+
+
+
+NeuralNetworkMultiArrayShapeMapping
+________________________________________________________________________________
+
+
+
+.. code-block:: proto
+
+	enum NeuralNetworkMultiArrayShapeMapping {
+	
+	
+	    RANK5_ARRAY_MAPPING = 0;
+	
+	    EXACT_ARRAY_MAPPING = 1;
+	
+	}
+
+
+
+Pooling3DLayerParams.Pooling3DPaddingType
+--------------------------------------------------------------------------------
+
+The type of padding.
+All padding types pad the input shape with zeros.
+CUSTOM padding will add the custom padding values specified below to their respective
+dimensions, e.g., `customPaddingFront` number of zeros will be added to one side of the
+input's depth dimension and `customPaddingBack` number of zeros will be added to the other
+side of the input's depth dimension.
+VALID padding adds no padding to any dimension. In this case, the last pool along
+each dimension will be dropped if the input dimension and the kernel size, and stride do not match.
+SAME padding adds enough padding to each dimension such that the output
+has the same spatial dimensions as the input. Padding is added evenly to both
+sides of each dimension unless the total padding to add is odd, in which case the extra padding
+is added to the back/bottom/right side of the respective dimension.  For example, if the the
+total horizontal padding is 3, then there will be 1 padding on the left, and 2 padding on the right.
+
+.. code-block:: proto
+
+	    enum Pooling3DPaddingType {
+	        CUSTOM = 0;
+	        VALID = 1;
+	        SAME = 2;
+	    }
+
+
+
+Pooling3DLayerParams.PoolingType3D
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum PoolingType3D {
+	        MAX = 0;
+	        AVERAGE = 1;
+	    }
+
+
+
+PoolingLayerParams.PoolingType
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum PoolingType {
+	
+	        MAX = 0;
+	        AVERAGE = 1;
+	        L2 = 2;
+	
+	    }
+
+
+
+ReduceLayerParams.ReduceAxis
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum ReduceAxis {
+	
+	        CHW = 0;
+	        HW = 1;
+	        C = 2;
+	        H = 3;
+	        W = 4;
+	
+	    }
+
+
+
+ReduceLayerParams.ReduceOperation
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum ReduceOperation {
+	
+	        SUM = 0;
+	        AVG = 1;
+	        PROD = 2;
+	        LOGSUM = 3;
+	        SUMSQUARE = 4;
+	        L1 = 5;
+	        L2 = 6;
+	        MAX = 7;
+	        MIN = 8;
+	        ARGMAX = 9; 
+	
+	    }
+
+
+
+ReorganizeDataLayerParams.ReorganizationType
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum ReorganizationType {
+	
+	        SPACE_TO_DEPTH = 0;
+	        DEPTH_TO_SPACE = 1;
+	        PIXEL_SHUFFLE = 2;
+	
+	    }
+
+
+
+ReshapeLayerParams.ReshapeOrder
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum ReshapeOrder {
+	
+	        CHANNEL_FIRST = 0;
+	        CHANNEL_LAST = 1;
+	
+	    }
+
+
+
+SamePadding.SamePaddingMode
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum SamePaddingMode {
+	
+	        BOTTOM_RIGHT_HEAVY = 0;
+	        TOP_LEFT_HEAVY = 1;
+	
+	    }
+
+
+
+SamplingMode.Method
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum Method {
+	
+	        STRICT_ALIGN_ENDPOINTS_MODE = 0;
+	
+	        ALIGN_ENDPOINTS_MODE = 1;
+	
+	        UPSAMPLE_MODE = 2;
+	
+	        ROI_ALIGN_MODE = 3;
+	
+	    }
+
+
+
+ScatterMode
+________________________________________________________________________________
+
+
+
+.. code-block:: proto
+
+	enum ScatterMode {
+	
+	    SCATTER_UPDATE = 0;
+	    SCATTER_ADD = 1; 
+	    SCATTER_SUB = 2; 
+	    SCATTER_MUL = 3; 
+	    SCATTER_DIV = 4; 
+	    SCATTER_MAX = 5; 
+	    SCATTER_MIN = 6; 
+	
+	}
+
+
+
+SliceLayerParams.SliceAxis
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum SliceAxis {
+	
+	        CHANNEL_AXIS = 0;
+	        HEIGHT_AXIS = 1;
+	        WIDTH_AXIS = 2;
+	
+	    }
+
+
+
+UnaryFunctionLayerParams.Operation
+--------------------------------------------------------------------------------
+
+A unary operator.
+
+The following functions are supported:
+
+``SQRT``
+    .. math:: f(x) = \sqrt{x}
+
+``RSQRT``
+    .. math:: f(x) = \dfrac{1}{\sqrt{x + \epsilon}}
+
+``INVERSE``
+    .. math:: f(x) = \dfrac{1}{x + \epsilon}
+
+``POWER``
+    .. math:: f(x) = x^\alpha
+
+``EXP``
+    .. math:: f(x) = e^x
+
+``LOG``
+    .. math:: f(x) = \log x
+
+``ABS``
+    .. math:: f(x) = |x|
+
+``THRESHOLD``
+    .. math:: f(x) = \text{max}(\alpha, x)
+
+.. code-block:: proto
+
+	    enum Operation {
+	        SQRT = 0;
+	        RSQRT = 1;
+	        INVERSE = 2;
+	        POWER = 3;
+	        EXP = 4;
+	        LOG = 5;
+	        ABS = 6;
+	        THRESHOLD = 7;
+	    }
+
+
+
+UpsampleLayerParams.InterpolationMode
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum InterpolationMode {
+	
+	        NN = 0; 
+	        BILINEAR = 1; 
+	
+	    }
+
+
+
+UpsampleLayerParams.LinearUpsampleMode
+--------------------------------------------------------------------------------
+
+LinearUpsampleMode specifies the behavior for linear upsampling. Only valid when Interpolation Mode is BILINEAR.
+If input grid is [0, Xin-1] (corresponding to an input size of Xin), and if the output size is Xout,
+then the grid points are sampled in the following manner:
+DEFAULT:
+  spacing = (Xin-Xin/Xout) / (Xout-1)
+  grid_point[i] = min(Xin-1, max(0, i * spacing)), for i = 0,1,2,….,Xout-1
+ALIGN_CORNERS_TRUE:
+  spacing = (Xin-1) / (Xout-1)
+  grid_point[i] = min(Xin-1, max(0, i * spacing)), for i = 0,1,2,….,Xout-1
+ALIGN_CORNERS_FALSE:
+  spacing = Xin / Xout
+  grid_point[i] = min(Xin-1, max(0, i * spacing + 0.5 * spacing - 0.5)), for i = 0,1,2,….,Xout-1
+
+.. code-block:: proto
+
+	    enum LinearUpsampleMode {
+	
+	        DEFAULT = 0;
+	        ALIGN_CORNERS_TRUE = 1;
+	        ALIGN_CORNERS_FALSE = 2;
+	
+	    }
\ No newline at end of file
diff --git a/mlmodel/docs/Format/NonMaximumSuppression.rst b/mlmodel/docs/Format/NonMaximumSuppression.rst
new file mode 100644
index 000000000..5fa6e3db0
--- /dev/null
+++ b/mlmodel/docs/Format/NonMaximumSuppression.rst
@@ -0,0 +1,58 @@
+NonMaximumSuppression
+________________________________________________________________________________
+
+
+
+
+.. code-block:: proto
+
+	message NonMaximumSuppression {
+	    // Suppression methods:
+	    message PickTop {
+	        bool perClass = 1;
+	    }
+	
+	    oneof SuppressionMethod {
+	        PickTop pickTop = 1;
+	    }
+	
+	    oneof ClassLabels {
+	        StringVector stringClassLabels = 100;
+	        Int64Vector int64ClassLabels = 101;
+	    }
+	
+	    double iouThreshold = 110;
+	
+	           it means there is a 60% (0.2 + 0.4) confidence that an object is
+	           present)
+	    double confidenceThreshold = 111;
+	
+	    string confidenceInputFeatureName = 200;
+	
+	    string coordinatesInputFeatureName = 201;
+	
+	    string iouThresholdInputFeatureName = 202;
+	
+	    string confidenceThresholdInputFeatureName = 203;
+	
+	    string confidenceOutputFeatureName = 210;
+	
+	    string coordinatesOutputFeatureName = 211;
+	}
+
+
+
+
+
+
+NonMaximumSuppression.PickTop
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message PickTop {
+	        bool perClass = 1;
+	    }
\ No newline at end of file
diff --git a/mlmodel/docs/Format/Normalizer.rst b/mlmodel/docs/Format/Normalizer.rst
new file mode 100644
index 000000000..91b631d3b
--- /dev/null
+++ b/mlmodel/docs/Format/Normalizer.rst
@@ -0,0 +1,52 @@
+Normalizer
+________________________________________________________________________________
+
+A normalization preprocessor.
+
+
+.. code-block:: proto
+
+	message Normalizer {
+	    enum NormType {
+	        LMax = 0;
+	        L1 = 1;
+	        L2 = 2;
+	    }
+	
+	    NormType normType = 1;
+	}
+
+
+
+
+
+
+
+
+
+
+Normalizer.NormType
+--------------------------------------------------------------------------------
+
+There are three normalization modes,
+which have the corresponding formulas:
+
+Max
+    .. math::
+        max(x_i)
+
+L1
+    .. math::
+        z = ||x||_1 = \sum_{i=1}^{n} |x_i|
+
+L2
+    .. math::
+        z = ||x||_2 = \sqrt{\sum_{i=1}^{n} x_i^2}
+
+.. code-block:: proto
+
+	    enum NormType {
+	        LMax = 0;
+	        L1 = 1;
+	        L2 = 2;
+	    }
\ No newline at end of file
diff --git a/mlmodel/docs/Format/OneHotEncoder.rst b/mlmodel/docs/Format/OneHotEncoder.rst
new file mode 100644
index 000000000..30782e340
--- /dev/null
+++ b/mlmodel/docs/Format/OneHotEncoder.rst
@@ -0,0 +1,50 @@
+OneHotEncoder
+________________________________________________________________________________
+
+Transforms a categorical feature into an array. The array will be all
+zeros expect a single entry of one.
+
+Each categorical value will map to an index, this mapping is given by
+either the ``stringCategories`` parameter or the ``int64Categories``
+parameter.
+
+
+.. code-block:: proto
+
+	message OneHotEncoder {
+	    enum HandleUnknown {
+	        ErrorOnUnknown = 0;
+	        IgnoreUnknown = 1;   // Output will be all zeros for unknown values.
+	    }
+	
+	    oneof CategoryType {
+	        StringVector stringCategories = 1;
+	        Int64Vector int64Categories = 2;
+	    }
+	
+	    // Output can be a dictionary with only one entry, instead of an array.
+	    bool outputSparse = 10;
+	
+	    HandleUnknown handleUnknown = 11;
+	}
+
+
+
+
+
+
+
+
+
+
+OneHotEncoder.HandleUnknown
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	    enum HandleUnknown {
+	        ErrorOnUnknown = 0;
+	        IgnoreUnknown = 1;   // Output will be all zeros for unknown values.
+	    }
\ No newline at end of file
diff --git a/mlmodel/docs/Format/Parameters.rst b/mlmodel/docs/Format/Parameters.rst
new file mode 100644
index 000000000..1de7359f5
--- /dev/null
+++ b/mlmodel/docs/Format/Parameters.rst
@@ -0,0 +1,75 @@
+Int64Parameter
+________________________________________________________________________________
+
+Int64 parameter,
+consisting of a default int64 value, and allowed range or set of values
+value is unbounded if AllowedValues is not set.
+
+
+.. code-block:: proto
+
+	message Int64Parameter {
+	    int64 defaultValue = 1;
+	    oneof AllowedValues {
+	        Int64Range range = 10;
+	        Int64Set set = 11;
+	    }
+	}
+
+
+
+
+
+
+DoubleParameter
+________________________________________________________________________________
+
+Double parameter,
+consisting of a default double value, and allowed range of values
+value is unbounded if AllowedValues is not set.
+
+
+.. code-block:: proto
+
+	message DoubleParameter {
+	    double defaultValue = 1;
+	    oneof AllowedValues {
+	        DoubleRange range = 10;
+	    }
+	}
+
+
+
+
+
+
+StringParameter
+________________________________________________________________________________
+
+String parameter,
+A default string value must be provided
+
+
+.. code-block:: proto
+
+	message StringParameter {
+	    string defaultValue = 1;
+	}
+
+
+
+
+
+
+BoolParameter
+________________________________________________________________________________
+
+String parameter,
+A default bool value must be provided
+
+
+.. code-block:: proto
+
+	message BoolParameter {
+	    bool defaultValue = 1;
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/SVM.rst b/mlmodel/docs/Format/SVM.rst
new file mode 100644
index 000000000..1cb6e7369
--- /dev/null
+++ b/mlmodel/docs/Format/SVM.rst
@@ -0,0 +1,276 @@
+LinearKernel
+________________________________________________________________________________
+
+A linear kernel.
+
+This function has the following formula:
+
+.. math::
+    K(\boldsymbol{x}, \boldsymbol{x'}) = \boldsymbol{x}^T \boldsymbol{x'}
+
+
+.. code-block:: proto
+
+	message LinearKernel {
+	}
+
+
+
+
+
+
+RBFKernel
+________________________________________________________________________________
+
+A Gaussian radial basis function (RBF) kernel.
+
+This function has the following formula:
+
+.. math::
+    K(\boldsymbol{x}, \boldsymbol{x'}) = \
+         \exp(-\gamma || \boldsymbol{x} - \boldsymbol{x'} ||^2 )
+
+
+.. code-block:: proto
+
+	message RBFKernel {
+	    double gamma = 1;
+	}
+
+
+
+
+
+
+PolyKernel
+________________________________________________________________________________
+
+A polynomial kernel.
+
+This function has the following formula:
+
+.. math::
+    K(\boldsymbol{x}, \boldsymbol{x'}) = \
+          (\gamma \boldsymbol{x}^T \boldsymbol{x'} + c)^{degree}
+
+
+.. code-block:: proto
+
+	message PolyKernel {
+	    int32 degree = 1;
+	    double c = 2;
+	    double gamma = 3;
+	}
+
+
+
+
+
+
+SigmoidKernel
+________________________________________________________________________________
+
+A sigmoid kernel.
+
+This function has the following formula:
+
+.. math::
+    K(\boldsymbol{x}, \boldsymbol{x'}) = \
+          \tanh(\gamma \boldsymbol{x}^T \boldsymbol{x'} + c)
+
+
+.. code-block:: proto
+
+	message SigmoidKernel {
+	    double gamma = 1;
+	    double c = 2;
+	}
+
+
+
+
+
+
+Kernel
+________________________________________________________________________________
+
+A kernel.
+
+
+.. code-block:: proto
+
+	message Kernel {
+	    oneof kernel {
+	        LinearKernel linearKernel = 1;
+	        RBFKernel rbfKernel = 2;
+	        PolyKernel polyKernel = 3;
+	        SigmoidKernel sigmoidKernel = 4;
+	    }
+	}
+
+
+
+
+
+
+SparseNode
+________________________________________________________________________________
+
+A sparse node.
+
+
+.. code-block:: proto
+
+	message SparseNode {
+	    int32 index = 1; // 1-based indexes, like libsvm
+	    double value = 2;
+	}
+
+
+
+
+
+
+SparseVector
+________________________________________________________________________________
+
+A sparse vector.
+
+
+.. code-block:: proto
+
+	message SparseVector {
+	    repeated SparseNode nodes = 1;
+	}
+
+
+
+
+
+
+SparseSupportVectors
+________________________________________________________________________________
+
+One or more sparse support vectors.
+
+
+.. code-block:: proto
+
+	message SparseSupportVectors {
+	    repeated SparseVector vectors = 1;
+	}
+
+
+
+
+
+
+DenseVector
+________________________________________________________________________________
+
+A dense vector.
+
+
+.. code-block:: proto
+
+	message DenseVector {
+	    repeated double values = 1;
+	}
+
+
+
+
+
+
+DenseSupportVectors
+________________________________________________________________________________
+
+One or more dense support vectors.
+
+
+.. code-block:: proto
+
+	message DenseSupportVectors {
+	    repeated DenseVector vectors = 1;
+	}
+
+
+
+
+
+
+Coefficients
+________________________________________________________________________________
+
+One or more coefficients.
+
+
+.. code-block:: proto
+
+	message Coefficients {
+	    repeated double alpha = 1;
+	}
+
+
+
+
+
+
+SupportVectorRegressor
+________________________________________________________________________________
+
+A support vector regressor.
+
+
+.. code-block:: proto
+
+	message SupportVectorRegressor {
+	    Kernel kernel = 1;
+	
+	    // Support vectors, either sparse or dense format
+	    oneof supportVectors {
+	        SparseSupportVectors sparseSupportVectors = 2;
+	        DenseSupportVectors denseSupportVectors = 3;
+	    }
+	
+	    // Coefficients, one for each support vector
+	    Coefficients coefficients = 4;
+	
+	    double rho = 5;
+	}
+
+
+
+
+
+
+SupportVectorClassifier
+________________________________________________________________________________
+
+A support vector classifier
+
+
+.. code-block:: proto
+
+	message SupportVectorClassifier {
+	    Kernel kernel = 1;
+	
+	    repeated int32 numberOfSupportVectorsPerClass = 2;
+	
+	    oneof supportVectors {
+	        SparseSupportVectors sparseSupportVectors = 3;
+	        DenseSupportVectors denseSupportVectors = 4;
+	    }
+	
+	    repeated Coefficients coefficients = 5;
+	
+	    repeated double rho = 6;
+	
+	    repeated double probA = 7;
+	    repeated double probB = 8;
+	
+	    oneof ClassLabels {
+	        StringVector stringClassLabels = 100;
+	        Int64Vector int64ClassLabels = 101;
+	    }
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/Scaler.rst b/mlmodel/docs/Format/Scaler.rst
new file mode 100644
index 000000000..feb977f3e
--- /dev/null
+++ b/mlmodel/docs/Format/Scaler.rst
@@ -0,0 +1,29 @@
+Scaler
+________________________________________________________________________________
+
+A scaling operation.
+
+This function has the following formula:
+
+.. math::
+    f(x) = scaleValue \cdot (x + shiftValue)
+
+If the ``scaleValue`` is not given, the default value 1 is used.
+If the ``shiftValue`` is not given, the default value 0 is used.
+
+If ``scaleValue`` and ``shiftValue`` are each a single value
+and the input is an array, then the scale and shift are applied
+to each element of the array.
+
+If the input is an integer, then it is converted to a double to
+perform the scaling operation. If the output type is an integer,
+then it is cast to an integer. If that cast is lossy, then an
+error is generated.
+
+
+.. code-block:: proto
+
+	message Scaler {
+	    repeated double shiftValue = 1;
+	    repeated double scaleValue = 2;
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/SoundAnalysisPreprocessing.rst b/mlmodel/docs/Format/SoundAnalysisPreprocessing.rst
new file mode 100644
index 000000000..e1b555c98
--- /dev/null
+++ b/mlmodel/docs/Format/SoundAnalysisPreprocessing.rst
@@ -0,0 +1,70 @@
+SoundAnalysisPreprocessing
+________________________________________________________________________________
+
+A model which takes audio signal samples as input and outputs an array of
+preprocessed samples according to the specified preprocessing types
+
+
+.. code-block:: proto
+
+	message SoundAnalysisPreprocessing {
+	
+	    // Specific preprocessing types for sound analysis
+	
+	       be fed to Vggish feature extractor.
+	       c.f. https://arxiv.org/pdf/1609.09430.pdf
+	
+	       The preprocessing takes input a single channel (monophonic) audio samples
+	       975 miliseconds long, sampled at 16KHz, i.e., 15600 samples 1D multiarray
+	       and produces preprocessed samples in multiarray of shape [1, 96, 64]
+	
+	     (1) Splits the input audio samples into overlapping frames, where each
+	         frame is 25 milliseconds long and hops forward by 10 milliseconds.
+	         Any partial frames at the end are dropped.
+	
+	     (2) Hann window: apply a periodic Hann with a window_length of
+	         25 milliseconds, which translates to 400 samples in 16KHz sampling rate
+	
+	         w(n) = 0.5 - 0.5 * cos(2*pi*n/window_length_sample),
+	         where 0 <= n <= window_lenth_samples - 1 and window_lenth_samples = 400
+	
+	         Then, the Hann window is applied to each frame as below
+	
+	         windowed_frame(n) = frame(n) * w(n)
+	         where 0 <= n <= window_lenth_samples - 1 and window_lenth_samples = 400
+	
+	     (3) Power spectrum: calculate short-time Fourier transfor magnitude, with
+	         an FFT length of 512
+	
+	     (4) Log Mel filter bank: calculates a log magnitude mel-frequency
+	         spectrogram minimum frequency of 125Hz and maximum frequency of 7500Hz,
+	         number of mel bins is 64, log_offset is 0.01, number of spectrum bins
+	         is 64.
+	
+	    message Vggish {
+	        // no specific parameter
+	    }
+	
+	    // Vision feature print type
+	    oneof SoundAnalysisPreprocessingType {
+	        Vggish vggish = 20;
+	    }
+	
+	}
+
+
+
+
+
+
+SoundAnalysisPreprocessing.Vggish
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message Vggish {
+	        // no specific parameter
+	    }
\ No newline at end of file
diff --git a/mlmodel/docs/Format/TextClassifier.rst b/mlmodel/docs/Format/TextClassifier.rst
new file mode 100644
index 000000000..97ed2232f
--- /dev/null
+++ b/mlmodel/docs/Format/TextClassifier.rst
@@ -0,0 +1,22 @@
+TextClassifier
+________________________________________________________________________________
+
+A model which takes a single input string and outputs a
+label for the input.
+
+
+.. code-block:: proto
+
+	message TextClassifier {
+	
+	    uint32 revision = 1;
+	    
+	    string language = 10;
+	
+	    bytes modelParameterData = 100;
+	    
+	    oneof ClassLabels {
+	        StringVector stringClassLabels = 200;
+	    }
+	    
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/TreeEnsemble.rst b/mlmodel/docs/Format/TreeEnsemble.rst
new file mode 100644
index 000000000..424253ff3
--- /dev/null
+++ b/mlmodel/docs/Format/TreeEnsemble.rst
@@ -0,0 +1,224 @@
+Each tree is a collection of nodes,
+each of which is identified by a unique identifier.
+
+Each node is either a branch or a leaf node.
+A branch node evaluates a value according to a behavior;
+if true, the node identified by ``true_child_node_id`` is evaluated next,
+if false, the node identified by ``false_child_node_id`` is evaluated next.
+A leaf node adds the evaluation value to the base prediction value
+to get the final prediction.
+
+A tree must have exactly one root node,
+which has no parent node.
+A tree must not terminate on a branch node.
+All leaf nodes must be accessible
+by evaluating one or more branch nodes in sequence,
+starting from the root node.
+
+
+
+TreeEnsembleParameters
+________________________________________________________________________________
+
+Tree ensemble parameters.
+
+
+.. code-block:: proto
+
+	message TreeEnsembleParameters {
+	    message TreeNode {
+	        uint64 treeId = 1;
+	        uint64 nodeId = 2;
+	
+	        enum TreeNodeBehavior {
+	            BranchOnValueLessThanEqual = 0;
+	            BranchOnValueLessThan = 1;
+	            BranchOnValueGreaterThanEqual = 2;
+	            BranchOnValueGreaterThan = 3;
+	            BranchOnValueEqual = 4;
+	            BranchOnValueNotEqual = 5;
+	            LeafNode = 6;
+	        }
+	
+	        TreeNodeBehavior nodeBehavior = 3;
+	
+	        uint64 branchFeatureIndex = 10;
+	        double branchFeatureValue = 11;
+	        uint64 trueChildNodeId = 12;
+	        uint64 falseChildNodeId = 13;
+	        bool missingValueTracksTrueChild = 14;
+	
+	        message EvaluationInfo {
+	           uint64 evaluationIndex = 1;
+	           double evaluationValue = 2;
+	        }
+	
+	        repeated EvaluationInfo evaluationInfo = 20;
+	
+	        double relativeHitRate = 30;
+	    }
+	
+	    repeated TreeNode nodes = 1;
+	
+	    uint64 numPredictionDimensions = 2;
+	
+	    repeated double basePredictionValue = 3;
+	}
+
+
+
+
+
+
+TreeEnsembleParameters.TreeNode
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message TreeNode {
+	        uint64 treeId = 1;
+	        uint64 nodeId = 2;
+	
+	        enum TreeNodeBehavior {
+	            BranchOnValueLessThanEqual = 0;
+	            BranchOnValueLessThan = 1;
+	            BranchOnValueGreaterThanEqual = 2;
+	            BranchOnValueGreaterThan = 3;
+	            BranchOnValueEqual = 4;
+	            BranchOnValueNotEqual = 5;
+	            LeafNode = 6;
+	        }
+	
+	        TreeNodeBehavior nodeBehavior = 3;
+	
+	        uint64 branchFeatureIndex = 10;
+	        double branchFeatureValue = 11;
+	        uint64 trueChildNodeId = 12;
+	        uint64 falseChildNodeId = 13;
+	        bool missingValueTracksTrueChild = 14;
+	
+	        message EvaluationInfo {
+	           uint64 evaluationIndex = 1;
+	           double evaluationValue = 2;
+	        }
+	
+	        repeated EvaluationInfo evaluationInfo = 20;
+	
+	        double relativeHitRate = 30;
+	    }
+
+
+
+
+
+
+TreeEnsembleParameters.TreeNode.EvaluationInfo
+--------------------------------------------------------------------------------
+
+The leaf mode.
+
+If ``nodeBahavior`` == ``LeafNode``,
+then the evaluationValue is added to the base prediction value
+in order to get the final prediction.
+To support multiclass classification
+as well as regression and binary classification,
+the evaluation value is encoded here as a sparse vector,
+with evaluationIndex being the index of the base vector
+that evaluation value is added to.
+In the single class case,
+it is expected that evaluationIndex is exactly 0.
+
+
+.. code-block:: proto
+
+	        message EvaluationInfo {
+	           uint64 evaluationIndex = 1;
+	           double evaluationValue = 2;
+	        }
+
+
+
+
+
+
+TreeEnsembleClassifier
+________________________________________________________________________________
+
+A tree ensemble classifier.
+
+
+.. code-block:: proto
+
+	message TreeEnsembleClassifier {
+	    TreeEnsembleParameters treeEnsemble = 1;
+	    TreeEnsemblePostEvaluationTransform postEvaluationTransform = 2;
+	
+	    // Required class label mapping
+	    oneof ClassLabels {
+	        StringVector stringClassLabels = 100;
+	        Int64Vector int64ClassLabels = 101;
+	    }
+	}
+
+
+
+
+
+
+TreeEnsembleRegressor
+________________________________________________________________________________
+
+A tree ensemble regressor.
+
+
+.. code-block:: proto
+
+	message TreeEnsembleRegressor {
+	    TreeEnsembleParameters treeEnsemble = 1;
+	    TreeEnsemblePostEvaluationTransform postEvaluationTransform = 2;
+	}
+
+
+
+
+
+
+
+
+
+
+TreeEnsembleParameters.TreeNode.TreeNodeBehavior
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	        enum TreeNodeBehavior {
+	            BranchOnValueLessThanEqual = 0;
+	            BranchOnValueLessThan = 1;
+	            BranchOnValueGreaterThanEqual = 2;
+	            BranchOnValueGreaterThan = 3;
+	            BranchOnValueEqual = 4;
+	            BranchOnValueNotEqual = 5;
+	            LeafNode = 6;
+	        }
+
+
+
+TreeEnsemblePostEvaluationTransform
+________________________________________________________________________________
+
+A tree ensemble post-evaluation transform.
+
+.. code-block:: proto
+
+	enum TreeEnsemblePostEvaluationTransform {
+	    NoTransform = 0;
+	    Classification_SoftMax = 1;
+	    Regression_Logistic = 2;
+	    Classification_SoftMaxWithZeroClassReference = 3;
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/VisionFeaturePrint.rst b/mlmodel/docs/Format/VisionFeaturePrint.rst
new file mode 100644
index 000000000..942bb0ccd
--- /dev/null
+++ b/mlmodel/docs/Format/VisionFeaturePrint.rst
@@ -0,0 +1,143 @@
+VisionFeaturePrint
+________________________________________________________________________________
+
+A model which takes an input image and outputs array(s) of features
+according to the specified feature types
+
+
+.. code-block:: proto
+
+	message VisionFeaturePrint {
+	
+	    // Specific vision feature print types
+	   
+	    // Scene extracts features useful for identifying contents of natural images
+	    // in both indoor and outdoor environments
+	    message Scene {
+	        enum SceneVersion {
+	            SCENE_VERSION_INVALID = 0;
+	            // VERSION_1 is available on iOS,tvOS 12.0+, macOS 10.14+
+	            // It uses a 299x299 input image and yields a 2048 float feature vector
+	            SCENE_VERSION_1 = 1;
+	        }
+	        
+	        SceneVersion version = 1;
+	    }
+	
+	    // Object extracts features useful for identifying and localizing
+	    // objects in natural images
+	    message Object {
+	        enum ObjectVersion {
+	            OBJECT_VERSION_INVALID = 0;
+	            // VERSION_1 is available on iOS,tvOS 14.0+, macOS 10.16+
+	            // It uses a 299x299 input image and yields two multiarray
+	            // features: one at high resolution of shape (288, 35, 35)
+	            // the other at low resolution of shape (768, 17, 17)
+	            OBJECT_VERSION_1 = 1;
+	        }
+	
+	        ObjectVersion version = 1;
+	
+	        repeated string output = 100;
+	    }
+	
+	    // Vision feature print type
+	    oneof VisionFeaturePrintType {
+	        Scene scene = 20;
+	        Object object = 21;
+	    }
+	
+	}
+
+
+
+
+
+
+VisionFeaturePrint.Scene
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message Scene {
+	        enum SceneVersion {
+	            SCENE_VERSION_INVALID = 0;
+	            // VERSION_1 is available on iOS,tvOS 12.0+, macOS 10.14+
+	            // It uses a 299x299 input image and yields a 2048 float feature vector
+	            SCENE_VERSION_1 = 1;
+	        }
+	        
+	        SceneVersion version = 1;
+	    }
+
+
+
+
+
+
+VisionFeaturePrint.Object
+--------------------------------------------------------------------------------
+
+
+
+
+.. code-block:: proto
+
+	    message Object {
+	        enum ObjectVersion {
+	            OBJECT_VERSION_INVALID = 0;
+	            // VERSION_1 is available on iOS,tvOS 14.0+, macOS 10.16+
+	            // It uses a 299x299 input image and yields two multiarray
+	            // features: one at high resolution of shape (288, 35, 35)
+	            // the other at low resolution of shape (768, 17, 17)
+	            OBJECT_VERSION_1 = 1;
+	        }
+	
+	        ObjectVersion version = 1;
+	
+	        repeated string output = 100;
+	    }
+
+
+
+
+
+
+
+
+
+
+VisionFeaturePrint.Object.ObjectVersion
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	        enum ObjectVersion {
+	            OBJECT_VERSION_INVALID = 0;
+	            // VERSION_1 is available on iOS,tvOS 14.0+, macOS 10.16+
+	            // It uses a 299x299 input image and yields two multiarray
+	            // features: one at high resolution of shape (288, 35, 35)
+	            // the other at low resolution of shape (768, 17, 17)
+	            OBJECT_VERSION_1 = 1;
+	        }
+
+
+
+VisionFeaturePrint.Scene.SceneVersion
+--------------------------------------------------------------------------------
+
+
+
+.. code-block:: proto
+
+	        enum SceneVersion {
+	            SCENE_VERSION_INVALID = 0;
+	            // VERSION_1 is available on iOS,tvOS 12.0+, macOS 10.14+
+	            // It uses a 299x299 input image and yields a 2048 float feature vector
+	            SCENE_VERSION_1 = 1;
+	        }
\ No newline at end of file
diff --git a/mlmodel/docs/Format/WordEmbedding.rst b/mlmodel/docs/Format/WordEmbedding.rst
new file mode 100644
index 000000000..a251396f6
--- /dev/null
+++ b/mlmodel/docs/Format/WordEmbedding.rst
@@ -0,0 +1,17 @@
+WordEmbedding
+________________________________________________________________________________
+
+A model which maps a set of strings into a finite-dimensional real vector space.
+
+
+.. code-block:: proto
+
+	message WordEmbedding {
+	
+	    uint32 revision = 1;
+	    
+	    string language = 10;
+	
+	    bytes modelParameterData = 100;
+	    
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/Format/WordTagger.rst b/mlmodel/docs/Format/WordTagger.rst
new file mode 100644
index 000000000..f1e6982a1
--- /dev/null
+++ b/mlmodel/docs/Format/WordTagger.rst
@@ -0,0 +1,33 @@
+WordTagger
+________________________________________________________________________________
+
+A model which takes a single input string and outputs a
+sequence of tokens, tags for tokens, along with their
+locations and lengths, in the original string.
+
+
+.. code-block:: proto
+
+	message WordTagger {
+	
+	    uint32 revision = 1;
+	
+	    string language = 10;
+	
+	    string tokensOutputFeatureName = 20;
+	
+	    string tokenTagsOutputFeatureName = 21;
+	
+	    string tokenLocationsOutputFeatureName = 22;
+	
+	    string tokenLengthsOutputFeatureName = 23;
+	
+	    bytes modelParameterData = 100;
+	
+	    oneof Tags {
+	        StringVector stringTags = 200;
+	    }
+	
+	
+	    
+	}
\ No newline at end of file
diff --git a/mlmodel/docs/sections/CustomModel.rst b/mlmodel/docs/_sections/CustomModel.rst
similarity index 100%
rename from mlmodel/docs/sections/CustomModel.rst
rename to mlmodel/docs/_sections/CustomModel.rst
diff --git a/mlmodel/docs/sections/DataStructuresAndFeatureTypes.rst b/mlmodel/docs/_sections/DataStructuresAndFeatureTypes.rst
similarity index 100%
rename from mlmodel/docs/sections/DataStructuresAndFeatureTypes.rst
rename to mlmodel/docs/_sections/DataStructuresAndFeatureTypes.rst
diff --git a/mlmodel/docs/sections/FeatureEngineering.rst b/mlmodel/docs/_sections/FeatureEngineering.rst
similarity index 100%
rename from mlmodel/docs/sections/FeatureEngineering.rst
rename to mlmodel/docs/_sections/FeatureEngineering.rst
diff --git a/mlmodel/docs/sections/GLM.rst b/mlmodel/docs/_sections/GLM.rst
similarity index 100%
rename from mlmodel/docs/sections/GLM.rst
rename to mlmodel/docs/_sections/GLM.rst
diff --git a/mlmodel/docs/sections/ItemSimilarityRecommender.rst b/mlmodel/docs/_sections/ItemSimilarityRecommender.rst
similarity index 100%
rename from mlmodel/docs/sections/ItemSimilarityRecommender.rst
rename to mlmodel/docs/_sections/ItemSimilarityRecommender.rst
diff --git a/mlmodel/docs/sections/LinkedModel.rst b/mlmodel/docs/_sections/LinkedModel.rst
similarity index 100%
rename from mlmodel/docs/sections/LinkedModel.rst
rename to mlmodel/docs/_sections/LinkedModel.rst
diff --git a/mlmodel/docs/sections/Model.rst b/mlmodel/docs/_sections/Model.rst
similarity index 100%
rename from mlmodel/docs/sections/Model.rst
rename to mlmodel/docs/_sections/Model.rst
diff --git a/mlmodel/docs/sections/NearestNeighbors.rst b/mlmodel/docs/_sections/NearestNeighbors.rst
similarity index 100%
rename from mlmodel/docs/sections/NearestNeighbors.rst
rename to mlmodel/docs/_sections/NearestNeighbors.rst
diff --git a/mlmodel/docs/sections/NeuralNetwork.rst b/mlmodel/docs/_sections/NeuralNetwork.rst
similarity index 100%
rename from mlmodel/docs/sections/NeuralNetwork.rst
rename to mlmodel/docs/_sections/NeuralNetwork.rst
diff --git a/mlmodel/docs/sections/SVM.rst b/mlmodel/docs/_sections/SVM.rst
similarity index 100%
rename from mlmodel/docs/sections/SVM.rst
rename to mlmodel/docs/_sections/SVM.rst
diff --git a/mlmodel/docs/sections/SoundAnalysisPreprocessing.rst b/mlmodel/docs/_sections/SoundAnalysisPreprocessing.rst
similarity index 100%
rename from mlmodel/docs/sections/SoundAnalysisPreprocessing.rst
rename to mlmodel/docs/_sections/SoundAnalysisPreprocessing.rst
diff --git a/mlmodel/docs/sections/TextClassifier.rst b/mlmodel/docs/_sections/TextClassifier.rst
similarity index 100%
rename from mlmodel/docs/sections/TextClassifier.rst
rename to mlmodel/docs/_sections/TextClassifier.rst
diff --git a/mlmodel/docs/sections/TreeEnsembles.rst b/mlmodel/docs/_sections/TreeEnsembles.rst
similarity index 100%
rename from mlmodel/docs/sections/TreeEnsembles.rst
rename to mlmodel/docs/_sections/TreeEnsembles.rst
diff --git a/mlmodel/docs/sections/VisionFeaturePrint.rst b/mlmodel/docs/_sections/VisionFeaturePrint.rst
similarity index 100%
rename from mlmodel/docs/sections/VisionFeaturePrint.rst
rename to mlmodel/docs/_sections/VisionFeaturePrint.rst
diff --git a/mlmodel/docs/sections/WordTagger.rst b/mlmodel/docs/_sections/WordTagger.rst
similarity index 100%
rename from mlmodel/docs/sections/WordTagger.rst
rename to mlmodel/docs/_sections/WordTagger.rst
diff --git a/mlmodel/docs/_themes/stripped/layout.html b/mlmodel/docs/_themes/stripped/layout.html
new file mode 100644
index 000000000..185b3aca1
--- /dev/null
+++ b/mlmodel/docs/_themes/stripped/layout.html
@@ -0,0 +1,7 @@
+<div class="sphinx">
+{%- block content %}
+  {%- block document %}
+    {% block body %}{% endblock %}
+  {%- endblock %}
+{%- endblock %}
+</div>
\ No newline at end of file
diff --git a/mlmodel/docs/_themes/stripped/style.css b/mlmodel/docs/_themes/stripped/style.css
new file mode 100644
index 000000000..808b3a6a5
--- /dev/null
+++ b/mlmodel/docs/_themes/stripped/style.css
@@ -0,0 +1,3 @@
+a.headerlink {
+    visibility: hidden;
+}
diff --git a/mlmodel/docs/_themes/stripped/theme.conf b/mlmodel/docs/_themes/stripped/theme.conf
new file mode 100644
index 000000000..4f249edd4
--- /dev/null
+++ b/mlmodel/docs/_themes/stripped/theme.conf
@@ -0,0 +1,5 @@
+
+[theme]
+inherit = basic
+stylesheet = style.css
+pygments_style = default
diff --git a/mlmodel/docs/conf.py b/mlmodel/docs/conf.py
index 067da301e..60a9bc1b4 100644
--- a/mlmodel/docs/conf.py
+++ b/mlmodel/docs/conf.py
@@ -1,13 +1,20 @@
 # -*- coding: utf-8 -*-
 #
-# ML Kit Tools documentation build configuration file
+# Core ML documentation build configuration file
 
+import coremltools
 import sys
 import os
+import re
 
-import sphinx_rtd_theme
-from coremltools import __version__
+for m in [
+    "converters",
+    "utils",
+]:
+    module_name = "coremltools." + m
+    sys.modules[module_name] = eval(module_name)
 
+sys.path.insert(0, os.path.abspath("."))
 
 # -- General configuration ------------------------------------------------
 
@@ -18,35 +25,47 @@
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.mathjax', 'sphinx.ext.autosectionlabel']
+extensions = [
+    "sphinx.ext.autodoc",
+    "numpydoc",
+    "sphinx.ext.coverage",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.inheritance_diagram",
+    "sphinx.ext.autosummary",
+    "sphinx_rtd_theme",
+]
 
+autosummary_generate = True
 # Add any paths that contain templates here, relative to this directory.
-# templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
 # source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The master toctree document.
-master_doc = 'index'
-
-default_role = 'ref'
+master_doc = "index"
 
 # General information about the project.
-project = u'mlmodel'
-copyright = u'2017 - 2019, Apple Inc'
-author = u'Apple Inc.'
+project = u"coremltools"
+copyright = u"2017-2020, Apple Inc"
+author = u"Apple Inc."
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
+
+import pkg_resources
+
+version = pkg_resources.require("coremltools")[0].version
+
 # The short X.Y version.
-version = __version__
+version = version
 # The full version, including alpha/beta/rc tags.
-release = __version__
+release = re.split("[a-z]+", version)[0]
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -58,39 +77,34 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['conf.py', 'Thumbs.db', '.DS_Store', 'reference']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+numpydoc_show_class_members = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = False
 
-html_theme = "sphinx_rtd_theme"
-html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
-html_theme_options = {
-    'navigation_depth': 2,
-}
-
-imgmath_image_format = "svg"
+html_theme = "stripped"
+html_theme_path = [
+    "_themes",
+]
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-#
-# html_theme_options = {}
+html_theme_options = {}
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
+html_static_path = []
 
 # -- Options for HTMLHelp output ------------------------------------------
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'mlmodeldoc'
-
+htmlhelp_basename = "coremltoolsdoc"
 
 # -- Options for LaTeX output ---------------------------------------------
 
@@ -98,15 +112,12 @@
     # The paper size ('letterpaper' or 'a4paper').
     #
     # 'papersize': 'letterpaper',
-
     # The font size ('10pt', '11pt' or '12pt').
     #
     # 'pointsize': '10pt',
-
     # Additional stuff for the LaTeX preamble.
     #
     # 'preamble': '',
-
     # Latex figure (float) alignment
     #
     # 'figure_align': 'htbp',
@@ -116,20 +127,20 @@
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'mlmodel.tex', u'mlmodel Documentation',
-     u'Apple Inc.', 'manual'),
+    (
+        master_doc,
+        "coremltools.tex",
+        u"coremltools Documentation",
+        u"Apple Inc.",
+        "manual",
+    ),
 ]
 
-
 # -- Options for manual page output ---------------------------------------
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'mlmodel', u'mlmodel Documentation',
-     [author], 1)
-]
-
+man_pages = [(master_doc, "coremltools", u"coremltools Documentation", [author], 1)]
 
 # -- Options for Texinfo output -------------------------------------------
 
@@ -137,7 +148,41 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'mlmodel', u'mlmodel Documentation',
-     author, 'mlmodel', 'One line description of project.',
-     'Miscellaneous'),
+    (
+        master_doc,
+        "coremltools",
+        u"coremltools Documentation",
+        author,
+        "coremltools",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
 ]
+
+# -- Customizations -------------------
+
+autodoc_default_flags = ["members"]
+
+
+# 'private-members',
+# 'special-members',
+# 'show-inheritance']
+
+
+def autodoc_skip_member(app, what, name, obj, skip, options):
+    # Always do __init__
+    if name == "__init__":
+        return False
+
+    exclusions = (
+        "__weakref__",  # special-members
+        "__doc__",
+        "__module__",
+        "__dict__",  # undoc-members
+    )
+    exclude = name in exclusions
+    return skip or exclude
+
+
+def setup(app):
+    app.connect("autodoc-skip-member", autodoc_skip_member)
diff --git a/mlmodel/docs/preprocess.py b/mlmodel/docs/preprocess.py
index a0d196e67..17039a24a 100644
--- a/mlmodel/docs/preprocess.py
+++ b/mlmodel/docs/preprocess.py
@@ -22,7 +22,7 @@ def pairwise(iterable):
     sections = map(str.strip, re.split(r"<!--\s*(.+)\s*-->", text))
     for section, content in pairwise(sections[1:]):
         if section.endswith(".proto"):
-            section_name = section[:-len(".proto")]
+            section_name = section[: -len(".proto")]
             file_name = "./_sources/reference/{0}.rst".format(section_name)
             with open(file_name, "w") as f:
                 f.truncate()
diff --git a/mlmodel/docs/readme_session.py b/mlmodel/docs/readme_session.py
new file mode 100644
index 000000000..3bea541b0
--- /dev/null
+++ b/mlmodel/docs/readme_session.py
@@ -0,0 +1,320 @@
+import json
+import os
+from requests.auth import HTTPBasicAuth
+import requests
+
+_readme_api_url = "https://dash.readme.io/api/v1/"
+
+
+class ReadMeSession:
+    # Passed for every API call
+    __headers = {"Accept": "application/json", "content-type": "application/json"}
+
+    # Map: <version string -> version info>
+    __versions = None
+
+    def __init__(self, auth_token, api_version=None):
+        self.auth_token = auth_token
+        self.__refresh_versions()
+        if api_version:
+            self.set_api_version(api_version)
+
+    # Set the version used for GET requests
+    def set_api_version(self, version):
+        self.__verify_version_exists(version)
+        self.api_version = version
+        self.__headers["x-readme-version"] = "v" + version
+        if "categories" not in self.get_version():
+            self.__refresh_categories()
+
+    # Call the readme API. api_func should be a requests-based function.
+    def __api_call(self, api_func, endpoint, print_info, data=None):
+        print(print_info + "...   ", end="")
+        response = api_func(
+            _readme_api_url + endpoint,
+            headers=self.__headers,
+            auth=HTTPBasicAuth(self.auth_token, ""),
+            data=data,
+        )
+        if response.status_code not in [200, 201, 204]:
+            print("Error (code " + str(response.status_code) + "): " + response.text)
+            return None
+        else:
+            print()
+            return None if api_func == requests.delete else json.loads(response.text)
+
+    # API GET call.
+    # If paginated, gather and concat the output for each page in the endpoint.
+    def __api_GET(self, endpoint, print_info=None, paginated=False):
+        if not print_info:
+            print_info = "API::GET(" + endpoint + ")"
+        if paginated:
+            i = 1
+            out = []
+            while True:
+                response = self.__api_call(
+                    requests.get,
+                    endpoint + "?page=" + str(i),
+                    print_info + " (page " + str(i) + ")",
+                )
+                if response is None:
+                    return None
+                if len(response) is 0:
+                    return out
+                out += response
+                i += 1
+        else:
+            return self.__api_call(requests.get, endpoint, print_info)
+
+    # API POST call.
+    # Data should be passed in as a map. The map will be converted to string.
+    def __api_POST(self, endpoint, data, print_info=None):
+        if not print_info:
+            print_info = "API::POST(" + endpoint + ")"
+
+        # Convert data to str
+        data_str = ""
+        for x, y in data.items():
+            data_str += '"' + x + '":"' + y + '",'
+        data_str = ("{" + data_str[:-1] + "}").encode("utf-8")
+        data = data_str
+
+        return self.__api_call(requests.post, endpoint, print_info, data)
+
+    # API DELETE call.
+    def __api_DELETE(self, endpoint, print_info):
+        if not print_info:
+            print_info = "API::DELETE(" + endpoint + ")"
+        return self.__api_call(requests.delete, endpoint, print_info)
+
+    # Populates version_to_info as a map: "version" -> "version info"
+    def __refresh_versions(self):
+        response = self.__api_GET("version", print_info="Fetching versions")
+        if response:
+            self.__versions = {}
+            for version in response:
+                self.get_versions()[version["version"]] = version
+
+    # Verify a version exists
+    def __verify_version_exists(self, version):
+        if version not in self.get_versions():
+            raise ValueError("Version " + version + " does not exist.")
+
+    # Get all version info
+    def get_versions(self):
+        return self.__versions
+
+    # Get a version info
+    def get_version(self):
+        versions = self.get_versions()
+        return versions[self.api_version] if self.api_version in versions else None
+
+    # Populates categories as a map: "category title" -> "category ID"
+    def __refresh_categories(self):
+        version_info = self.get_version()
+        version_info["categories"] = {}
+        categories = version_info["categories"]
+        response = self.__api_GET(
+            "categories",
+            paginated=True,
+            print_info="Fetching categories for version " + self.api_version,
+        )
+        if response is not None:
+            for category in response:
+                if category[
+                    "reference"
+                ]:  # Only get cateories that are in the API reference
+                    if category["title"] in categories:
+                        print(
+                            "Warning: There are two categories with the name "
+                            + category["title"]
+                            + " for version "
+                            + self.api_version
+                            + ". Which category this title refers"
+                            + " to will be unpredictable."
+                        )
+                    categories[category["title"]] = category
+                    self.__refresh_category_files(category["title"])
+
+    # Populate as a map: map<category, map<title, info object>>
+    def __refresh_category_files(self, category):
+        self.__verify_category_exists(category)
+        category_files = self.__api_GET(
+            "categories/" + self.get_category(category)["slug"] + "/docs",
+            print_info="Fetching docs in " + category,
+        )
+        # Populate as a map: map<title, info object>>
+        category = self.get_category(category)
+        category["files"] = {}
+        for file in category_files:
+            category["files"][file["title"]] = file
+
+    # Get all category info
+    def get_categories(self):
+        return self.get_version()["categories"]
+
+    # Get a category info
+    def get_category(self, category):
+        categories = self.get_categories()
+        return categories[category] if category in categories else None
+
+    # Get a categories' file list
+    def get_category_files(self, category):
+        self.__verify_category_exists(category)
+        return self.get_category(category)["files"]
+
+    # Verify a category exists
+    def __verify_category_exists(self, category):
+        if not self.get_category(category):
+            raise ValueError(
+                "Category "
+                + category
+                + " does not exist for version "
+                + self.api_version
+                + "."
+            )
+
+    # Create a version with default settings.
+    def create_version(
+        self, version, from_version=None, is_stable=False, is_beta=False, is_hidden=True
+    ):
+        if version in self.get_versions():
+            raise ValueError(
+                "Version " + version + " already exists! Cannot create it."
+            )
+
+        # If no source version, pick the latest one
+        if not from_version:
+            max_version = 0
+            for ver in self.get_versions():
+                ver = float(ver)
+                if ver > max_version:
+                    max_version = ver
+            from_version = str(max_version)
+
+        data = {
+            "version": "v" + version,
+            "is_stable": is_stable,
+            "is_beta": is_beta,
+            "is_hidden": is_hidden,
+            "from": from_version,
+        }
+        self.get_versions()[version] = self.__api_POST(
+            "version", data, "Creating version " + version
+        )
+
+    # Update a version
+    def update_version(self, version, is_stable=None, is_beta=None, is_hidden=None):
+        self.__verify_version_exists(version)
+        data = {
+            "version": "v" + version,
+            "is_stable": is_stable
+            if is_stable is not None
+            else self.get_versions()[version]["is_stable"],
+            "is_beta": is_beta
+            if is_beta is not None
+            else self.get_versions()[version]["is_beta"],
+            "is_hidden": is_hidden
+            if is_hidden is not None
+            else self.get_versions()[version]["is_hidden"],
+        }
+        version = self.__api_POST("version", data, "Creating version " + version)
+        for k, v in version.items():
+            self.get_versions()[version][k] = v
+
+    # Empty a category
+    def empty_category(self, category):
+        self.__verify_category_exists(category)
+        print("Emptying category " + category)
+        for title, data in self.get_category_files(category).items():
+            self.__api_DELETE(
+                "docs/" + data["slug"],
+                print_info="    Removing file " + category + "/" + title,
+            )
+        self.get_category(category)["files"] = {}
+
+    # Delete files in the given category with the given title
+    def delete_file_with_title(self, title, category):
+        self.__verify_category_exists(category)
+
+        # Search for a file with the same title.
+        files = self.get_category_files(category)
+        if title in files:
+            self.__api_DELETE(
+                "docs/" + files[title]["slug"],
+                print_info="Removing duplicate file " + category + "/" + title,
+            )
+            files.pop(title)
+
+    # Uploads all files in the folder at path to ReadMe.
+    # Can also upload individual files at path.
+    def upload(self, path, category, recursive=False):
+        self.__verify_category_exists(category)
+
+        if os.path.isdir(path):
+            if recursive:
+                # get all subdirs in path and recursively transfer all files in that subdir
+                subdirpath = path
+                onlydirs = [
+                    f
+                    for f in os.listdir(subdirpath)
+                    if os.path.isdir(os.path.join(subdirpath, f))
+                ]
+                for dir in onlydirs:
+                    self.upload(os.path.join(path, dir), category, recursive)
+
+            # get all filenames in current dir
+            files = sorted(
+                [
+                    os.path.join(path, f)
+                    for f in os.listdir(path)
+                    if os.path.isfile(os.path.join(path, f))
+                ]
+            )
+
+            # iterate through all filenames and import the html files
+            for currfilename in files:
+                self.upload(currfilename, category, recursive)
+        elif not os.path.isfile(path):
+            raise ValueError("Unable to find file at path: " + path)
+
+        currfilename = path
+        if currfilename.find(".html") != -1:
+            # open and read file
+            file = open(currfilename, "r")
+            filecontents = file.read()
+            file.close()
+            filecontents = filecontents.replace("\\", "&#92;")
+            filecontents = filecontents.replace("\n", "\\\\n")
+            filecontents = filecontents.replace("¶", "")
+            filecontents = filecontents.replace('"', "'")
+            filecontents = (
+                '[block:html]\\n{\\n \\"html\\": \\"'
+                + filecontents
+                + '\\"\\n}\\n[/block]'
+            )
+
+            firstheadline = os.path.basename(currfilename)[:-5]
+            # extract first heading and use as page title
+            # soup = BeautifulSoup(filecontents, 'html.parser')
+            # for headlines in soup.find_all("h1"):
+            # 	firstheadline = headlines.text.strip()
+            # 	break
+
+            # Delete files with identical title
+            self.delete_file_with_title(firstheadline, category)
+
+            # Set up HTML _reamde_api_url for ReadMe API
+            data = {
+                "hidden": "false",
+                "title": firstheadline,
+                "type": "basic",
+                "body": filecontents,
+                "category": self.get_category(category)["_id"],
+            }
+
+            # Create the new page
+            out = self.__api_POST(
+                "docs", data, "Uploading " + currfilename + " to category " + category
+            )
+            self.get_category_files(category)[firstheadline] = out
diff --git a/mlmodel/docs/upload_docs.py b/mlmodel/docs/upload_docs.py
new file mode 100644
index 000000000..22a7e5292
--- /dev/null
+++ b/mlmodel/docs/upload_docs.py
@@ -0,0 +1,90 @@
+import argparse
+import readme_session
+import functools
+import coremltools
+import pathlib
+import os
+import re
+
+parser = argparse.ArgumentParser(description="Upload docs to ReadMe.")
+parser.add_argument(
+    "--version",
+    type=str,
+    help="Version to upload.",
+    default=re.split("[a-z]+", coremltools.version.__version__)[0],
+)
+parser.add_argument(
+    "--from_source_version",
+    type=str,
+    help="Create a version from this version if current CMLT version does not have docs."
+    + "Default is the most recent version",
+    default=None,
+)
+parser.add_argument(
+    "--release_version", action="store_true", help="Release the version to the public."
+)
+parser.add_argument(
+    "--set_version_stable",
+    action="store_true",
+    help="Set this version as the stable (main) version.",
+)
+parser.add_argument("--auth_token", type=str, help="Token for authentication.")
+
+args = parser.parse_args()
+
+
+# Remove "coremltools" from the beginning of of all filenames in this path
+def sanitize_names(path):
+    if os.path.isdir(path):
+        # get all subdirs in path and recursively transfer all files in that subdir
+        subdirpath = path
+        onlydirs = [
+            f
+            for f in os.listdir(subdirpath)
+            if os.path.isdir(os.path.join(subdirpath, f))
+        ]
+        for dir in onlydirs:
+            sanitize_names(os.path.join(path, dir))
+
+    # get all filenames in current dir
+    files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
+
+    # iterate through all filenames and remove coremltools prefix
+    for file in files:
+        if file.startswith("coremltools"):
+            currpath = os.path.join(path, file)
+            newpath = os.path.join(path, file[file.find(".") + 1 :])
+            os.rename(currpath, newpath)
+
+
+# API Setup
+sess = readme_session.ReadMeSession(args.auth_token)
+
+# Create version
+if args.version not in sess.get_versions():
+    sess.create_version(args.version, args.from_source_version)
+sess.set_api_version(args.version)
+
+# Upload generated folders
+docspath = str(pathlib.Path(__file__).parent.absolute() / "_build" / "html")
+dirs = [
+    os.path.join(docspath, f)
+    for f in os.listdir(docspath)
+    if os.path.isdir(os.path.join(docspath, f))
+]
+for thisdir in dirs:
+    if os.path.basename(thisdir)[0] is not "_":
+        sanitize_names(thisdir)
+        print("--------- Processing " + thisdir + " ----------")
+        category = os.path.basename(thisdir)
+        sess.empty_category(category)
+        sess.upload(path=thisdir, category=category, recursive=True)
+        print("-------------------- Done ---------------------\n")
+
+# Release the version or set it to stable
+if args.release_version or args.set_version_stable:
+    sess.update_version(
+        version,
+        is_stable=args.set_version_stable or sess.get_version()["is_stable"],
+        is_hidden=not args.release_version or not sess.get_version()["is_hidden"],
+    )
diff --git a/mlmodel/format/FeatureTypes.proto b/mlmodel/format/FeatureTypes.proto
index 7afd10c4f..8711ac7de 100644
--- a/mlmodel/format/FeatureTypes.proto
+++ b/mlmodel/format/FeatureTypes.proto
@@ -103,7 +103,7 @@ message ImageFeatureType {
  */
 message ArrayFeatureType {
 
-enum ArrayDataType {
+    enum ArrayDataType {
         INVALID_ARRAY_DATA_TYPE = 0;
         FLOAT32 = 65568; // 0x10000 | 32
         DOUBLE = 65600;  // 0x10000 | 64
@@ -164,6 +164,13 @@ enum ArrayDataType {
         ShapeRange shapeRange = 31;
 
     }
+
+    oneof defaultOptionalValue {
+        int32 intDefaultValue = 41;
+        float floatDefaultValue = 51;
+        double doubleDefaultValue = 61;
+    }
+
 }
 
 /**
diff --git a/mlmodel/format/Model.proto b/mlmodel/format/Model.proto
index a88acc08d..cd8d86990 100644
--- a/mlmodel/format/Model.proto
+++ b/mlmodel/format/Model.proto
@@ -153,14 +153,29 @@ message ModelDescription {
     repeated FeatureDescription input = 1;
     repeated FeatureDescription output = 10;
 
+    // [Required for regressor and classifier models]: the name
+    // to give to an output feature containing the prediction.
     string predictedFeatureName = 11;
+
+    // [Optional for classifier models]: the name to give to an
+    // output feature containing a dictionary mapping class
+    // labels to their predicted probabilities. If not specified,
+    // the dictionary will not be returned by the model.
     string predictedProbabilitiesName = 12;
 
     repeated FeatureDescription trainingInput = 50;
-    
+
     Metadata metadata = 100;
 }
 
+message SerializedModel {
+    // Identifier whose content describes the model type of the serialized protocol buffer message.
+    string identifier = 1;
+
+    // Must be a valid serialized protocol buffer of the above specified type.
+    bytes model = 2;
+}
+
 /**
  * A Core ML model,
  * consisting of a specification version,
@@ -278,5 +293,9 @@ message Model {
         CoreMLModels.SoundAnalysisPreprocessing soundAnalysisPreprocessing = 2003;
         CoreMLModels.Gazetteer gazetteer = 2004;
         CoreMLModels.WordEmbedding wordEmbedding = 2005;
+        
+        // Reserved private messages start at 3000
+        // These messages are subject to change with no notice or support.
+        SerializedModel serializedModel = 3000;
     }
 }
diff --git a/mlmodel/format/NeuralNetwork.proto b/mlmodel/format/NeuralNetwork.proto
index 1c6bc35ee..8ca7146f8 100644
--- a/mlmodel/format/NeuralNetwork.proto
+++ b/mlmodel/format/NeuralNetwork.proto
@@ -688,6 +688,17 @@ message NeuralNetworkLayer {
 
         NonMaximumSuppressionLayerParams NonMaximumSuppression = 1400;
 
+        // Following layers are available only after Core ML Specification
+        // version >= 5 (iOS >= 14, macOS >= 10.16)
+        OneHotLayerParams oneHot = 1450;
+        CumSumLayerParams cumSum = 1455;
+        ClampedReLULayerParams clampedReLU = 1460;
+        ArgSortLayerParams argSort = 1461;
+        Pooling3DLayerParams pooling3d = 1465;
+        GlobalPooling3DLayerParams globalPooling3d = 1466;
+        SliceBySizeLayerParams sliceBySize = 1470;
+        Convolution3DLayerParams convolution3d = 1471;
+
     }
 
 }
@@ -1330,10 +1341,26 @@ message WeightParams {
     bytes float16Value = 2;
 
     /**
-     * Raw value specification for custom layers and quantized lower precisions.
+     * Raw value specification for quantized lower precisions.
+     *
+     * This field is interpreted as uintN, where N is the number of bits in quantization.
+     * E.g. if n=8, the field is interpreted as an array of UINT8.
+     * Use this field for quantized parameters unless specifically noted to use
+     * int8RawValue.
      */
     bytes rawValue = 30;
 
+    /**
+     * Field to be used if int8DynamicQuantize is set in the parent layer.
+     * Cannot be set if rawValue is also set.
+     * The values in this field are interpreted as INT8.
+     *
+     * If this field is set, following conditions must hold true:
+     * * QuantizationType == LinearQuantizationParams, such that
+     *   * size of the "scale" field is 1 and "bias" field is empty in "LinearQuantizationParams"
+     */
+    bytes int8RawValue = 31;
+
     /**
      * Quantization related parameters.
      */
@@ -1363,6 +1390,11 @@ message LinearQuantizationParams {
      * Must be an array of 1 element, or an array of C elements, where C
      * is number of output channels. For recurrent layers it is equal to
      * the output vector size.
+     *
+     * Relationship between quantized weights, unquantized weights, scale and bias:
+     *
+     * W_unquantized = W_quantized * scale + bias
+     *
      */
     repeated float scale = 1;
     repeated float bias = 2;
@@ -1503,6 +1535,187 @@ message ConvolutionLayerParams {
 
 }
 
+/**
+ * A layer that performs a 3-dimensional convolution.
+ *
+ * .. code::
+ *
+ *      y = Convolution3DLayer(x)
+ *
+ * Input
+ *    A blob of rank 5.
+ *    The input blob's shape should be ``[batch, channels, depth, height, width]``.
+ *
+ * Fields
+ *   The bias field, if set, should have shape of ``[channelsOut]``.
+ *
+ * Output
+ *   A blob of rank 5.
+ *   The output blob's shape is ``[batch, channelsOut, depthOut, heightOut, widthOut]``.
+ *
+ * Type of padding can be ``custom``, ``valid``, or ``same``. Padded values are all zeros.
+ * Output spatial dimensions depend on the the type of padding. For details, refer to the
+ * descriptions of the ``PaddingType`` field of this ``Convolution3DLayerParams`` message.
+ *
+ * Example
+ *   For example, given an input of size ``[1, 3, 3, 8, 8]``, a stride of 2 in each dimension,
+ *   a kernel of 3 in each dimension, 2 output channels, and ``same`` padding, this layer will
+ *   compute the total padding applied in the depth, height, and width dimensions to be 2, 1, and 1,
+ *   respectively. The depth padding is even and will be applied equally to both sides of the depth
+ *   dimension. Since the height and width padding values are odd, they'll be applied to the
+ *   bottom/right of the height/width dimensions. Thus, the padding applied to the input will be
+ *   ``[1, 1, 0, 1, 0, 1]`` (front, back, top, bottom, left, right). Finally, the output produced
+ *   will have size ``[1, 2, 2, 4, 4]``.
+ *
+ */
+message Convolution3DLayerParams {
+
+    /**
+     * The number of channels in the output (channelsOut). Must be a positive integer.
+     */
+    int32 outputChannels = 1;
+
+    /**
+     * The number of channels in the input (channels). Must be a positive integer.
+     */
+    int32 inputChannels = 2;
+
+    /**
+    * Group convolution, i.e., weight reuse along the channel axis.
+    * It must evenly divide both the number of input and output channels and be at most the number
+    * of input channels (a depthwise convolution).
+    * Input and kernels are divided into g groups and convolution is applied within the groups
+    * independently.
+    */
+    int32 nGroups = 10;
+
+    /* Depth of the convolution kernel. Must be a positive integer.
+     */
+    int32 kernelDepth = 20;
+
+    /* Height of the convolution kernel. Must be a positive integer.
+     */
+    int32 kernelHeight = 21;
+
+    /* Width of the convolution kernel. Must be a positive integer.
+     */
+    int32 kernelWidth = 22;
+
+    /* Stride along the depth direction. Must be a positive integer.
+     */
+    int32 strideDepth = 31;
+
+    /* Stride along the height direction. Must be a positive integer.
+     */
+    int32 strideHeight = 32;
+
+    /* Stride along the width direction. Must be a positive integer.
+     */
+    int32 strideWidth = 33;
+
+    /* Dilation along the depth direction. Must be a positive integer.
+     */
+    int32 dilationDepth = 40;
+
+    /* Dilation along the height direction. Must be a positive integer.
+     */
+    int32 dilationHeight = 41;
+
+    /* Dilation along the width direction. Must be a positive integer.
+     */
+    int32 dilationWidth = 42;
+
+    /**
+     * Flag to specify whether a bias is to be added or not.
+     * If false, then no bias is added.
+     */
+    bool hasBias = 50;
+
+    /**
+     * Weights associated with this layer.
+     * Weights have the shape
+     * if deconvolution == False
+     * ``[outputChannels, kernelChannels, kernelDepth, kernelHeight, kernelWidth]``, where
+     * kernelChannels == inputChannels / nGroups
+     * else if deconvolution == True
+     * ``[outputChannels / nGroups, kernelChannels, kernelDepth, kernelHeight, kernelWidth]``, where
+     */
+    WeightParams weights = 60;
+
+    /**
+     * Must be of size ``[outputChannels]``.
+     */
+    WeightParams bias = 61;
+
+
+    /**
+     * The type of padding.
+     * All padding types pad the input shape with zeros.
+     * CUSTOM padding will add the custom padding values specified below to their respective
+     * dimensions, e.g., `customPaddingFront` number of zeros will be added to one side of the
+     * input's depth dimension and `customPaddingBack` number of zeros will be added to the other
+     * side of the input's depth dimension.
+     * VALID padding adds no padding to any dimension. In this case, the last convolution along
+     * each dimension will be dropped if the input dimension and the kernel size, stride, and
+     * dilation do not match.
+     * SAME padding adds enough padding to each dimension such that the output of the convolution
+     * has size ``Ceiling(inputShape / stride)``. Padding is added evenly to both sides of each
+     * dimension unless the total padding to add is odd, in which case it is added to the
+     * back/bottom/right side of the respective dimension. For example, if the total padding needed
+     * in the depth dimension is 3, 1 zero will be added to the front side of the depth dimension
+     * and 2 zeros will be added to the back side.
+     */
+    enum PaddingType {
+        CUSTOM = 0;
+        VALID = 1;
+        SAME = 2;
+    }
+    PaddingType paddingType = 70;
+
+    /* Padding before the input in the depth direction. Must be zero or a positive integer.
+     * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types.
+     */
+    int32 customPaddingFront = 80;
+
+    /* Padding after the input in the depth direction. Must be zero or a positive integer.
+     * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types.
+     */
+    int32 customPaddingBack = 81;
+
+    /* Padding before the input in the height direction. Must be zero or a positive integer.
+     * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types.
+     */
+    int32 customPaddingTop = 82;
+
+    /* Padding after the input in the height direction. Must be zero or a positive integer.
+     * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types.
+     */
+    int32 customPaddingBottom = 83;
+
+    /* Padding before the input in the width direction. Must be zero or a positive integer.
+     * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types.
+     */
+    int32 customPaddingLeft = 84;
+
+    /* Padding after the input in the width direction. Must be zero or a positive integer.
+     * Used when the `PaddingType` is `CustomPadding`, otherwise ignored by other padding types.
+     */
+    int32 customPaddingRight = 85;
+    
+    /* Flag to specify if this is Convolution Transpose or not.
+     */
+    bool isDeconvolution = 86;
+    
+    /*
+     * The output shape, which has length 3 ``[D_out, H_out, W_out]``.
+     * This is used only for deconvolution (``isDeconvolution == true``).
+     * If not set, the deconvolution output shape is calculated
+     * based on ``PaddingType``.
+     */
+    repeated uint64 outputShape = 87;
+
+}
+
 /**
  * A layer that performs a matrix-vector or matrix-matrix product.
  * This is equivalent to a fully-connected, or dense layer.
@@ -1541,6 +1754,19 @@ message InnerProductLayerParams {
     WeightParams weights = 20; /// Weight matrix [C_out, C_in].
     WeightParams bias = 21; /// Bias vector [C_out].
 
+    /**
+     * If set, this layer, at runtime, quantizes the floating point input blob to int8 before applying an
+     * inner product using INT8 weight matrix parameters, as provided in weights->int8RawValue. The
+     * result is then dequantized.
+     * Requires:
+     * * hasBias == false
+     * * QuantizationType == LinearQuantizationParams, such that
+     *   * size of the "scale" field is 1 and "bias" field is empty in "LinearQuantizationParams"
+     * * numberOfBits == 8
+     * * weights->rawValue_size to be empty
+     */
+    bool int8DynamicQuantize = 22;
+
 }
 
 /**
@@ -1752,6 +1978,162 @@ message PoolingLayerParams {
 
 }
 
+/*
+ * A layer to pool three spatial dimensions
+ *
+ * Input
+ *      A blob with rank equal to 5, representing [Batch, channels, depth, height, width].
+ *
+ * Output
+ *      Rank is same as the input: A blob with rank equal to 5, representing [Batch, channels, depth, height, width].
+ *
+ * Requires 1 input and produces 1 output.
+ *
+ * For example, given an input of shape (1,1,2,3,3):
+ *        +----+----+----+
+ *      / | 10 | 11 | 12 |
+ *     /  +----+----+----+
+ *    /   | 13 | 14 | 15 |
+ *   /    +----+----+----+
+ *  /     | 16 | 17 | 18 |
+ * /      +----+----+----+
+ * +----+----+----+      /
+ * |  1 |  2 |  3 |     /
+ * +----+----+----+    /
+ * |  4 |  5 |  6 |   /
+ * +----+----+----+  /
+ * |  7 |  8 |  9 | /
+ * +----+----+----+
+ *
+ * And applying MAX pooling using:
+ *      Kernel: 2x2x2
+ *      Stride: 1x1x1
+ *      Valid Padding
+ * We expect to get an output with shape: (1,1,1,2,2) and value:
+ * +----+----+
+ * | 14 | 15 |
+ * +----+----+
+ * | 17 | 18 |
+ * +----+----+
+ */
+message Pooling3DLayerParams {
+    
+    enum PoolingType3D {
+        MAX = 0;
+        AVERAGE = 1;
+    }
+    
+    // Whether to use Max or Average
+    PoolingType3D type = 1;
+    
+    // Depth of the pooling region.
+    int32 kernelDepth = 2;
+    
+    // Height of the pooling region.
+    int32 kernelHeight = 3;
+    
+    // Width of the pooling region.
+    int32 kernelWidth = 4;
+    
+    // Stride along the depth direction
+    int32 strideDepth = 5;
+    
+    // Stride along the height direction
+    int32 strideHeight = 6;
+    
+    // Stride along the width direction
+    int32 strideWidth = 7;
+    
+    /**
+     * The type of padding.
+     * All padding types pad the input shape with zeros.
+     * CUSTOM padding will add the custom padding values specified below to their respective
+     * dimensions, e.g., `customPaddingFront` number of zeros will be added to one side of the
+     * input's depth dimension and `customPaddingBack` number of zeros will be added to the other
+     * side of the input's depth dimension.
+     * VALID padding adds no padding to any dimension. In this case, the last pool along
+     * each dimension will be dropped if the input dimension and the kernel size, and stride do not match.
+     * SAME padding adds enough padding to each dimension such that the output
+     * has the same spatial dimensions as the input. Padding is added evenly to both
+     * sides of each dimension unless the total padding to add is odd, in which case the extra padding
+     * is added to the back/bottom/right side of the respective dimension.  For example, if the the
+     * total horizontal padding is 3, then there will be 1 padding on the left, and 2 padding on the right.
+     */
+    enum Pooling3DPaddingType {
+        CUSTOM = 0;
+        VALID = 1;
+        SAME = 2;
+    }
+    Pooling3DPaddingType paddingType = 15;
+    
+    // Padding before the input in the depth direction.
+    int32 customPaddingFront = 8;
+    
+    // Padding after the input in the depth direction.
+    int32 customPaddingBack = 9;
+    
+    // Padding before the input in the height direction.
+    int32 customPaddingTop = 10;
+    
+    // Padding after the input in the height direction.
+    int32 customPaddingBottom = 11;
+    
+    // Padding before the input in the width direction.
+    int32 customPaddingLeft = 12;
+    
+    // Padding after the input in the width direction.
+    int32 customPaddingRight = 13;
+    
+    // If true, exclude zeros from padding in Average pooling.  Meaningless in Max Pooling.
+    bool countExcludePadding = 14;
+}
+
+/*
+ * A layer to pool three spatial dimensions down to one value.
+ * This behaves like a special case of Pooling3DLayerParams in which
+ * the Kernel is the size of the input and there is no padding.
+ *
+ * Input
+ *      A blob with rank equal to 5, representing [Batch, channels, depth, height, width].
+ *
+ * Output
+ *      Rank is same as the input: A blob with rank equal to 5, representing [Batch, channels, depth, height, width].
+ *      Depth, height, and width of the output will always be 1.
+ *
+ * Requires 1 input and produces 1 output.
+ *
+ * For example, given an input of shape (1,1,2,3,3):
+ *        +----+----+----+
+ *      / | 10 | 11 | 12 |
+ *     /  +----+----+----+
+ *    /   | 13 | 14 | 15 |
+ *   /    +----+----+----+
+ *  /     | 16 | 17 | 18 |
+ * /      +----+----+----+
+ * +----+----+----+      /
+ * |  1 |  2 |  3 |     /
+ * +----+----+----+    /
+ * |  4 |  5 |  6 |   /
+ * +----+----+----+  /
+ * |  7 |  8 |  9 | /
+ * +----+----+----+
+ *
+ * And applying MAX global 3d pooling, we expect to get an output with shape: (1,1,1,1,1) and value:
+ * +----+
+ * | 18 |
+ * +----+
+ */
+message GlobalPooling3DLayerParams {
+    
+    enum GlobalPoolingType3D {
+        MAX = 0;
+        AVERAGE = 1;
+    }
+    
+    // Whether to use Max or Average
+    GlobalPoolingType3D type = 1;
+}
+
 /**
  * A layer that performs padding along spatial dimensions.
  *
@@ -1762,13 +2144,14 @@ message PoolingLayerParams {
  * Requires 1 input and produces 1 output.
  *
  * Input
- *     A blob with rank at least 3.
- *     e.g.: blob with shape ``[C, H_in, W_in]``.
- *     For ranks greater than 3, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch.
+ *     A blob with rank at least 2.
+ *     e.g.: blob with shape ``[H_in, W_in]``.
+ *     For ranks greater than 2, the leading dimensions, starting from 0 to -4 (inclusive), are all treated as batch
+ *     i.e. Padding is applied on last two dimensions.
  *
  * Output
  *     Same rank as the input.
- *     e.g.: blob with shape ``[C, H_out, W_out]``.
+ *     e.g.: blob with shape ``[H_out, W_out]``.
  *
  * Output dimensions are calculated as follows:
  *
@@ -2147,12 +2530,24 @@ message UnaryFunctionLayerParams {
 message UpsampleLayerParams {
 
     /**
-     * Scaling Factor.
+     * Scaling Factor. Mutually exclusive with fractionalScalingFactor.
      * Must be length 2 in order ``[H, W]``.
      * If not set, default value ``[1, 1]`` is used.
      */
     repeated uint64 scalingFactor = 1;
 
+    /**
+     * Fractional scaling factor. Mutually exclusive with scalingFactor.
+     * Must be length 2 in order ``[H, W]``.
+     * If not set, default value ``[1.0, 1.0]`` is used.
+     */
+    repeated float fractionalScalingFactor = 7;
+
+    /*
+     * Overall mode for interpolating new elements when upsampling.
+     * NN - Nearest Neighbors - simply pick the nearest true value for interpolated values.
+     * BILINEAR - Use bilinear interpolation. See LinearUpsamplingMode for behavior.
+     */
     enum InterpolationMode {
 
         NN = 0; /// Nearest Neighbour
@@ -2162,6 +2557,30 @@ message UpsampleLayerParams {
 
     InterpolationMode mode = 5;
 
+    /**
+     * LinearUpsampleMode specifies the behavior for linear upsampling. Only valid when Interpolation Mode is BILINEAR.
+     * If input grid is [0, Xin-1] (corresponding to an input size of Xin), and if the output size is Xout,
+     * then the grid points are sampled in the following manner:
+     * DEFAULT:
+     *   spacing = (Xin-Xin/Xout) / (Xout-1)
+     *   grid_point[i] = min(Xin-1, max(0, i * spacing)), for i = 0,1,2,….,Xout-1
+     * ALIGN_CORNERS_TRUE:
+     *   spacing = (Xin-1) / (Xout-1)
+     *   grid_point[i] = min(Xin-1, max(0, i * spacing)), for i = 0,1,2,….,Xout-1
+     * ALIGN_CORNERS_FALSE:
+     *   spacing = Xin / Xout
+     *   grid_point[i] = min(Xin-1, max(0, i * spacing + 0.5 * spacing - 0.5)), for i = 0,1,2,….,Xout-1
+     */
+    enum LinearUpsampleMode {
+
+        DEFAULT = 0;
+        ALIGN_CORNERS_TRUE = 1;
+        ALIGN_CORNERS_FALSE = 2;
+
+    }
+
+    LinearUpsampleMode linearUpsampleMode = 6;
+
 }
 
 /**
@@ -2577,6 +2996,50 @@ message PermuteLayerParams {
  *  Square of blockSize must divide C.
  *  Reverse of SPACE_TO_DEPTH. Data is moved from the channel dimension to the spatial dimensions.
  *
+ * mode == PIXEL_SHUFFLE
+ *  ``[C_out, H_out, W_out]`` : ``[C/(blockSize * blockSize), H * blockSize, W *  blockSize]``.
+ *  Square of blockSize must divide C.
+ *  Similar to DEPTH_TO_SPACE, but using the pixel-shuffle semantics for channel order in the output space.
+ *  In both modes, elements along the channel dimension are collapsed into
+ *  blocks in the spatial dimensions. The difference is in the arrangement of
+ *  the input-channels' data in the output space. See below example for more
+ *  detail.
+ *  (Only available in Core ML Specification >= 5 (iOS >= 14, macOS >= 10.16)
+ *
+ *
+ * Examples:
+ *
+ * Assume input is the following [C = 8, H = 1, W = 2] tensor:
+ *
+ * .. code::
+ *
+ *    [[[1 2]] [[3 4]] [[5 6]] [[7 8]] [[9 10]] [[11 12]] [[13 14]] [[15 16]]]
+ *
+ * If block_size == 2 and mode == DEPTH_TO_SPACE, output will be the following
+ * [C = 2, H = 2, W = 4] tensor:
+ *
+ * .. code::
+ *
+ *    [[[ 1  5  2  6]
+ *      [ 9 13 10 14]]
+ *
+ *     [[ 3  7  4  8]
+ *      [11 15 12 16]]]
+ *
+ * For mode == SPACE_TO_DEPTH, the behavior is the same as mode ==
+ * DEPTH_TO_SPACE, but with the input and output swapped.
+ *
+ * If block_size == 2 and mode == PIXEL_SHUFFLE, output will be the following
+ * [C = 2, H = 2, W = 4] tensor:
+ *
+ * .. code::
+ *
+ *    [[[ 1  3  2  4]
+ *      [ 5  7  6  8]]
+ *
+ *     [[ 9 11 10 12]
+ *      [13 15 14 16]]]
+ *
  */
 message ReorganizeDataLayerParams {
 
@@ -2584,6 +3047,7 @@ message ReorganizeDataLayerParams {
 
         SPACE_TO_DEPTH = 0;
         DEPTH_TO_SPACE = 1;
+        PIXEL_SHUFFLE = 2;
 
     }
     ReorganizationType mode = 1;
@@ -3508,6 +3972,20 @@ message BatchedMatMulLayerParams {
     WeightParams weights = 8;
     WeightParams bias = 9; /// Bias vector [X2]. Supported only when there is one input.
 
+    /**
+     * If set, this layer, at runtime, quantizes the floating point input blob to int8 before applying the
+     * matrix multiplication using the INT8 weight parameters provided in weights->int8RawValue. The
+     * result is then dequantized.
+     * Requires:
+     * * number of inputs to be 1
+     * * hasBias == false
+     * * QuantizationType == LinearQuantizationParams, such that
+     *   * size of the "scale" field is 1 and "bias" field is empty in "LinearQuantizationParams"
+     * * numberOfBits == 8
+     * * weights->rawValue_size to be empty
+     */
+    bool int8DynamicQuantize = 10;
+
 }
 
 /**
@@ -5360,13 +5838,30 @@ message ClipLayerParams {
  * "endMasks" is True
  *
  * e.g.:
+ * if i-th element of "squeezeMasks" is set to True, only beginIds[i] would be sliced
+ * out, and all other masks and inputs are ignored.
+ *
+ * e.g. (without squeezeMasks):
  * input shape:  (5, 5, 5)
  * beginIds:  [1, 2, 3]
  * beginMasks:  [True, False, True]
  * endIds:  [3, -3, 2]
  * endMasks:  [False, True, True]
  * strides:  [2, 2, 2]
+ * SqueezeMasks:  [False, False, False]
  * output shape:  (2, 2, 3)
+ * This is equivalent to input[:3:2, 2::2, ::2]
+ *
+ * e.g. (with squeezeMasks):
+ * input shape:  (5, 5, 5)
+ * beginIds:  [1, 2, 3]
+ * beginMasks:  [True, False, True]
+ * endIds:  [3, -3, 2]
+ * endMasks:  [False, True, True]
+ * strides:  [2, 2, 2]
+ * SqueezeMasks:  [False, True, False]
+ * output shape:  (2, 3)
+ * This is equivalent to input[:3:2, 2, ::2]
  *
  */
 message SliceStaticLayerParams {
@@ -5376,6 +5871,8 @@ message SliceStaticLayerParams {
     repeated int64 endIds = 3;
     repeated bool endMasks = 4;
     repeated int64 strides = 5;
+    repeated bool squeezeMasks = 6;
+
 
 }
 
@@ -5385,8 +5882,8 @@ message SliceStaticLayerParams {
  * Support negative indexing and negative strides.
  * See "SliceStaticLayerParams" for the description and an example of the functionality of the layer.
  *
- * Requires 2 to 6 inputs and produces 1 output.
- * Rank of the output is same as the rank of the first input.
+ * Requires 2 to 7 inputs and produces 1 output.
+ * Rank of the output is same as the rank of the first input unless squeezeMask is set.
  *
  * Value of beginIds, beginMasks, endIds, endMasks, strides can be passed in either
  * as dynamic inputs or as static parameters.
@@ -5397,6 +5894,7 @@ message SliceStaticLayerParams {
  * The 4th input, if present, corresponds to "strides". In this case the value of the "strides" parameter is ignored.
  * The 5th input, if present, corresponds to "beginMasks". In this case the value of the "beginMasks" parameter is ignored.
  * The 6th input, if present, corresponds to "endMasks". In this case the value of the "endMasks" parameter is ignored.
+ * The 7th input, if present, corresponds to "squeezeMasks". In this case the value of the "squeezeMasks" parameter is ignored.
  *
  */
 message SliceDynamicLayerParams {
@@ -5405,6 +5903,7 @@ message SliceDynamicLayerParams {
     repeated int64 endIds = 3;
     repeated bool endMasks = 4;
     repeated int64 strides = 5;
+    repeated bool squeezeMasks = 6;
 
 }
 
@@ -5412,10 +5911,14 @@ message SliceDynamicLayerParams {
  * A layer that constructs a tensor by repeating the input tensor multiple
  * number of times.
  *
- * Requires 1 input and produces 1 output.
+ * Requires 1 or 2 inputs and produces 1 output.
  * Output rank is same as the input rank.
  *
- * Length of the "reps" parameter must be at least 1 and
+ * If two inputs are provided, second input is used as "reps"
+ * and "reps" parameter is ignored.
+ *
+ * If only one input is provided,
+ * length of the "reps" parameter must be at least 1 and
  * not greater than the rank of the input.
  * If it is less than the input rank, it is made equal to the input rank by prepending 1's to it.
  *
@@ -5430,6 +5933,11 @@ message SliceDynamicLayerParams {
  * reps after prepending ones = (1, 1, 6)
  * output shape = (2, 4, 12)
  *
+ * input shape = (2, 4, 2)
+ * second input = [1, 2, 6] -> shape: (3,)
+ * reps = N/A [Ignored]
+ * output shape = (2, 8, 12)
+ *
  */
 message TileLayerParams {
 
@@ -5671,6 +6179,90 @@ message NonMaximumSuppressionLayerParams {
     bool perClassSuppression = 4;
 }
 
+/**
+ * A layer that performs element-wise clamped ReLU operation.
+ *
+ * Requires 1 input and produces 1 output.
+ *
+ * This function has the following formula:
+ *
+ * .. math::
+ *     f(x) = \begin{cases}
+ *               \text{min}(\text{beta},x) \;\; \text{if} \;\; x \geq 0\\
+ *               \text{min}(\text{beta} ,\text{alpha}\cdot x) \;\; \text{if} \;\; x<0
+ *            \end{cases}
+ *
+ * Output shape is same as the input.
+ *
+ * Available (iOS >= 14, macOS >= 10.16, watchOS >= 7)
+ */
+message ClampedReLULayerParams {
+
+    float alpha = 1;
+    float beta = 2;
+
+}
+
+/**
+* A layer that returns the indices that would sort the input tensor, along a specified axis.
+*
+* Requires 1 input and produces 1 output.
+*
+* Output has the same rank and shape as the input.
+*
+* Value of "axis" must be positive and less than the rank of the input.
+*
+* e.g.:
+*
+* input shape = (5,)
+* axis = 0
+* input values = [3.1, 5.4, 32.9, 3.2, 77.0]
+* output shape = (5,)
+* output values = [0, 3, 1, 2, 4], descending = False
+* output values = [4, 2, 1, 3, 0], descending = True
+*
+* input shape = (2,3)
+* axis = 1
+* input values = [[3, 5, 32], [3, 77, 6]]
+* output shape = (2,3)
+* output values = [[0, 1, 2], [0, 2, 1]], descending = False
+* output values = [[2, 1, 0], [1, 2, 0]], descending = True
+*
+*/
+message ArgSortLayerParams {
+
+    int64 axis = 1; /// must be between [0, input_rank - 1]
+    bool descending = 2;
+
+}
+
+/**
+ * A layer that does slice operation by providing size to be extracted 
+ * from the given input tensor.
+ *
+ * Requires 2 inputs and produces 1 output.
+ * Rank of the output is same as the rank of the first input.
+ *
+ * The 1st input represents the tensor to be sliced.
+ * The 2nd input represents the beginning index to be sliced from.
+ *
+ * Example:
+ * Input 1: x (x.shape = (2, 3, 4))
+ * Input 2: begin
+ * size: 2
+ * axis: 1
+ *
+ * Output: x[:, begin:begin+2, :]
+ *
+ */
+message SliceBySizeLayerParams {
+
+    int64 size = 2;
+    int64 axis = 3;
+
+}
+
+
 /// Neural Network Specializations
 /// ------------------------------
 
@@ -5690,22 +6282,101 @@ message NeuralNetworkClassifier {
 
     NetworkUpdateParameters updateParams = 10;
 
-    /**
-     * Mapping from indexed vector of probabilities to class label
-     */
+    // The set of labels for every possible class.
     oneof ClassLabels {
         StringVector stringClassLabels = 100;
         Int64Vector int64ClassLabels = 101;
     }
 
-    /**
-     * The name of the output blob which will be used as the predicted
-     * probabilities of each class label.
-     */
+    // The name of the output blob containing the probability of each class.
+    // In other words, the score vector. Must be a 1-D tensor with the same
+    // number and order of elements as ClassLabels.
     string labelProbabilityLayerName = 200;
+}
 
+
+/**
+ * A layer that computes the one hot representation of the input.
+ *
+ * Requires 1 or 2 inputs and produces 1 output.
+ * Rank of the output is one more than the first input.
+ * If the second input is present, it is used to determine the value of "oneHotVectorSize" and the parameter "oneHotVectorSize" is ignored.
+ *
+ * Input values correspond to indices and should typically be in the range [0,"oneHotVectorSize" -1]. If it is outside this range, a vector of all "offValue" will be chosen.
+ *
+ * Typically one hot vectors contain 0s everywhere, except 1 at the index that the input corresponds to.
+ * However, instead of 0, any float value could be generated by using the "offValue" parameter.
+ * Similarly, instead of 1, any other value can be used by employing the "onValue" parameter.
+ *
+ * e.g.:
+ * input shape: (10,), "oneHotVectorSize" : 32, axis=-1, then output shape will be (10,32)
+ * input shape: (10,23), "oneHotVectorSize" : 32, axis=1, then output shape will be (10,32,23)
+ * input shape: (10,), "oneHotVectorSize" : 32, axis=0, then output shape will be (32,10)
+ *
+ * input shape: (2,), "oneHotVectorSize" : 4, axis=-1, then output shape will be (2,4)
+ * say input values = [2, 0], and "onValue" = 5, and "offValue" = -1, then output will be:
+ * [-1, -1, 5, -1
+ *  5, -1, -1, -1]
+ *
+ *  say input values = [2, -1], and "onValue" = 5, and "offValue" = -1, then output will be:
+ * [-1, -1, 5, -1
+ *  -1, -1, -1, -1]
+ *
+ * Available (iOS >= 14, macOS >= 10.16, watchOS >= 7)
+ */
+
+message OneHotLayerParams {
+
+    uint64 oneHotVectorSize = 1; /// size of the one hot vector
+    int64 axis = 2; ///  negative indexing is supported. It refers to the axis in the output tensor.
+    float onValue = 3;
+    float offValue = 4;
 }
 
+
+/**
+ * A layer that computes the cumsum values of the input along a given axis.
+ *
+ * Requires 1 or 2 inputs and produces 1 output.
+ *
+ * Output shape and rank is same as the first input.
+ * If the second input is present, it is used to determine the value of "axis" and the parameter "axis" is ignored.
+ *
+ * e.g.:
+ * Input shape = (3,), values it has:  [4, 6, 7]
+ *
+ * Then output values will be:
+ *
+ * if "excludeFinalSum" = False and "reverse" = False:
+ * output values : [4, 10, 17]
+ *
+ * if "excludeFinalSum" = True and "reverse" = False:
+ * output values : [0, 4, 10]
+ *
+ * if "excludeFinalSum" = False and "reverse" = True:
+ * output values : [17, 13, 7]
+ *
+ * if "excludeFinalSum" = True and "reverse" = True:
+ * output values : [13, 7, 0]
+ *
+ *
+ * Available (iOS >= 14, macOS >= 10.16, watchOS >= 7)
+ */
+
+
+message CumSumLayerParams {
+
+    int64 axis = 1; ///  negative indexing is supported
+
+    /// if true, the first element of the output is 0, and the last element contains the sum of the input up to the penultimate value
+    /// if false, the first element of the output is same as the input and the last element is the sum of all the input values
+    /// (this behavior is reversed when "reverse" flag is True)
+    bool excludeFinalSum = 2;
+
+    bool reverse = 3; /// if true, cumsum is performed in the opposite direction
+}
+
+
 /**
  * A neural network specialized as a regressor.
  */
@@ -5830,4 +6501,4 @@ message AdamOptimizer {
     DoubleParameter beta2 = 4;
     DoubleParameter eps = 5;
 
-}
\ No newline at end of file
+}
diff --git a/mlmodel/format/VisionFeaturePrint.proto b/mlmodel/format/VisionFeaturePrint.proto
index 5c3805a2f..e195c37ea 100644
--- a/mlmodel/format/VisionFeaturePrint.proto
+++ b/mlmodel/format/VisionFeaturePrint.proto
@@ -9,7 +9,7 @@ option optimize_for = LITE_RUNTIME;
 package CoreML.Specification.CoreMLModels;
 
 /**
-* A model which takes an input image and outputs an array of features
+* A model which takes an input image and outputs array(s) of features
 * according to the specified feature types
 */
 message VisionFeaturePrint {
@@ -29,9 +29,35 @@ message VisionFeaturePrint {
         SceneVersion version = 1;
     }
 
+    // Object extracts features useful for identifying and localizing
+    // objects in natural images
+    message Object {
+        enum ObjectVersion {
+            OBJECT_VERSION_INVALID = 0;
+            // VERSION_1 is available on iOS,tvOS 14.0+, macOS 10.16+
+            // It uses a 299x299 input image and yields two multiarray
+            // features: one at high resolution of shape (288, 35, 35)
+            // the other at low resolution of shape (768, 17, 17)
+            OBJECT_VERSION_1 = 1;
+        }
+
+        ObjectVersion version = 1;
+
+        /*
+        * Stores the names of the output features according to the
+        * order of them being computed from the neural network, i.e.,
+        * the first element in the output is the earliest being
+        * computed, while the last is the latest being computed. In
+        * general, the order reflects the resolution of the feature.
+        * The earlier it is computed, the higher the feature resolution.
+        */
+        repeated string output = 100;
+    }
+
     // Vision feature print type
     oneof VisionFeaturePrintType {
         Scene scene = 20;
+        Object object = 21;
     }
 
 }
diff --git a/mlmodel/src/Comparison.cpp b/mlmodel/src/Comparison.cpp
index 5ed7dd84f..9087fda3a 100644
--- a/mlmodel/src/Comparison.cpp
+++ b/mlmodel/src/Comparison.cpp
@@ -54,6 +54,9 @@ namespace CoreML {
                     return a.neuralnetworkclassifier() == b.neuralnetworkclassifier();
                 case Model::kNeuralNetwork:
                     return a.neuralnetwork() == b.neuralnetwork();
+                case Model::kSerializedModel:
+                    return a.serializedmodel().identifier() == b.serializedmodel().identifier() &&
+                           a.serializedmodel().model() == b.serializedmodel().model();
                 case Model::kBayesianProbitRegressor:
                     return a.bayesianprobitregressor() == b.bayesianprobitregressor();
                 case Model::kOneHotEncoder:
@@ -983,6 +986,19 @@ namespace CoreML {
                         return false;
                     }
                     break;
+                case CoreMLModels::VisionFeaturePrint::kObject:
+                    if (a.object().version() != b.object().version()) {
+                        return false;
+                    }
+                    if (a.object().output_size() != b.object().output_size()) {
+                        return false;
+                    }
+                    for (int i = 0; i < a.object().output_size(); i++) {
+                        if (a.object().output(i) != b.object().output(i)) {
+                            return false;
+                        }
+                    }
+                    break;
                 case CoreMLModels::VisionFeaturePrint::VISIONFEATUREPRINTTYPE_NOT_SET:
                     break;
             }
diff --git a/mlmodel/src/DataType.cpp b/mlmodel/src/DataType.cpp
index 6b3a27713..c99d62b97 100644
--- a/mlmodel/src/DataType.cpp
+++ b/mlmodel/src/DataType.cpp
@@ -279,7 +279,7 @@ FeatureType FeatureType::T() { return FeatureType(U); }
     static std::string dimensionsToString(const std::vector<int64_t> &dims, bool useArrayFormat = false) {
 
         std::stringstream ss;
-        std::string separator = useArrayFormat ? ", " : " x ";
+        std::string separator = useArrayFormat ? ", " : " × ";
 
         if (useArrayFormat) {
             ss << "[";
@@ -336,7 +336,7 @@ FeatureType FeatureType::T() { return FeatureType(U); }
 
     static std::string dimensionRangesToString(const std::vector<std::pair<int64_t,int64_t>> &rangePerDimension, bool useArrayFormat = false) {
         std::stringstream ss;
-        std::string separator = useArrayFormat ? ", " : " x ";
+        std::string separator = useArrayFormat ? ", " : " × ";
 
         if (useArrayFormat) {
             ss << "[";
diff --git a/mlmodel/src/Format.hpp b/mlmodel/src/Format.hpp
index d5d87ff4b..7d901cfe3 100644
--- a/mlmodel/src/Format.hpp
+++ b/mlmodel/src/Format.hpp
@@ -15,6 +15,7 @@
 #include <google/protobuf/io/zero_copy_stream_impl.h>
 
 #include "../build/format/Model.pb.h"
+#include "../build/format/Model_enums.h"
 
 #pragma clang diagnostic pop
 
diff --git a/mlmodel/src/Globals.hpp b/mlmodel/src/Globals.hpp
index 1bc9e953a..0cc57c1ab 100644
--- a/mlmodel/src/Globals.hpp
+++ b/mlmodel/src/Globals.hpp
@@ -27,9 +27,9 @@ namespace CoreML {
     // - New neural network layers (resizeBilinear, cropResize)
     // - <fill in as we develop> ..
     static const int32_t MLMODEL_SPECIFICATION_VERSION_IOS12 = 3;
-    
+
     // Version 4 supports:
-    // - New NN layers, non rank 5 tensors 
+    // - New NN layers, non rank 5 tensors
     // - Updatable models
     // - Exact shape / general rank mapping for neural networks
     // - Large expansion of supported neural network layers
@@ -43,7 +43,13 @@ namespace CoreML {
     // - Linked Model
     static const int32_t MLMODEL_SPECIFICATION_VERSION_IOS13 = 4;
 
-    static const int32_t MLMODEL_SPECIFICATION_VERSION_NEWEST = MLMODEL_SPECIFICATION_VERSION_IOS13;
+    // version 5:
+    // - New NN layers part of the proto message "NeuralNetworkLayer"
+    // - Non-Zero default values for optional inputs in case of Neural Networks
+    // - Float32 input/output for NonmaximumSuppression model
+    static const int32_t MLMODEL_SPECIFICATION_VERSION_IOS14 = 5;
+
+    static const int32_t MLMODEL_SPECIFICATION_VERSION_NEWEST = MLMODEL_SPECIFICATION_VERSION_IOS14;
 
 }
 
diff --git a/mlmodel/src/ItemSimilarityRecommenderCommon.cpp b/mlmodel/src/ItemSimilarityRecommenderCommon.cpp
index 39f716e23..8f048fb5f 100644
--- a/mlmodel/src/ItemSimilarityRecommenderCommon.cpp
+++ b/mlmodel/src/ItemSimilarityRecommenderCommon.cpp
@@ -7,23 +7,21 @@
   //
 
 #include "ItemSimilarityRecommenderCommon.hpp"
-#include "ValidatorUtils-inl.hpp"
-#include "Validators.hpp"
+#include "Validation/ValidatorUtils-inl.hpp"
+#include "Validation/Validators.hpp"
 #include "Format.hpp"
 
 namespace CoreML { namespace Recommender {
 
   _ItemSimilarityRecommenderData::_ItemSimilarityRecommenderData(const Specification::ItemSimilarityRecommender& isr) {
 
-    // Validate that we have item_ids in the correct 0, 1, ..., n-1 sequence.
-    std::set<uint64_t> index_hits;
-
+    uint64_t max_item = 0;
     int n_similarities = isr.itemitemsimilarities_size();
 
     for(int i = 0; i < n_similarities; ++i) {
       const auto& item_sim_info = isr.itemitemsimilarities(i);
       uint64_t item_id = item_sim_info.itemid();
-      index_hits.insert(item_id);
+      max_item = std::max(max_item, item_id);
 
       auto& interaction_list_dest = item_interactions[item_id];
       int n_interactions = item_sim_info.similaritemlist_size();
@@ -35,7 +33,7 @@ namespace CoreML { namespace Recommender {
 
         interaction_list_dest.push_back({inter_id, score});
 
-        index_hits.insert(inter_id);
+        max_item = std::max(max_item, inter_id);
       }
 
       // Sort to ensure equality between equivalent models.
@@ -46,43 +44,54 @@ namespace CoreML { namespace Recommender {
       }
     }
 
-    if(index_hits.size() <= *index_hits.rbegin()) {
-
-      // Make sure that all the model actually numbers things correctly.
-      throw std::invalid_argument("Item IDs in the recommender model must be numbered 0, 1, ..., num_items - 1.");
-    }
-
-    num_items = index_hits.size();
+    num_items = 0;
 
     // Check out the item similarity
     if(isr.has_itemint64ids() && isr.itemint64ids().vector_size() != 0) {
       if(isr.has_itemstringids() && isr.itemstringids().vector_size() != 0) {
         throw std::invalid_argument("Only integer item ids or string item ids can be specified in the same model.");
       }
-
-      if(size_t(isr.itemint64ids().vector_size()) != num_items) {
-        throw std::invalid_argument("Number of integer item ids specified ("
-                                    + std::to_string(isr.itemint64ids().vector_size())
-                                    + ") does not equal the number of items given ("
-                                    + std::to_string(num_items) + ")");
+      
+      num_items = uint64_t(isr.itemint64ids().vector_size());
+
+      if(num_items <= max_item) {
+        throw std::invalid_argument("List of integer item ids specified must be "
+                                    "large enough to index all item ids specified.  The largest item "
+                                    "index is " + std::to_string(max_item) + ", whereas there are "
+                                    " only " + std::to_string(num_items) + " item ids given.");
       }
 
-      integer_id_values.resize((size_t)num_items);
-      for(size_t i = 0; i < num_items; ++i) {
-        integer_id_values[i] = isr.itemint64ids().vector(int(i));
+
+      const auto& itemint64idsVector = isr.itemint64ids().vector();
+      integer_id_values.reserve(static_cast<size_t>(num_items));
+      std::copy(itemint64idsVector.begin(), itemint64idsVector.end(), std::back_inserter(integer_id_values));
+
+      if(std::set<int64_t>(integer_id_values.begin(), integer_id_values.end()).size() != num_items) {
+        throw std::invalid_argument("List of integer item ids specified must be "
+                                    "unique; list contains duplicates.");
       }
+
     } else if(isr.has_itemstringids() && isr.itemstringids().vector_size() != 0) {
-        if(size_t(isr.itemstringids().vector_size()) != num_items) {
-          throw std::invalid_argument("Number of string item ids specified ("
-                                      + std::to_string(isr.itemstringids().vector_size())
-                                      + ") does not equal the number of items given ("
-                                      + std::to_string(num_items) + ")");
-        }
+      
+      num_items = size_t(isr.itemstringids().vector_size());
+
+      if(size_t(isr.itemstringids().vector_size()) < max_item) {
+        throw std::invalid_argument("List of string item ids specified must be "
+                                    "large enough to index all item ids specified.  The largest item "
+                                    "index is " + std::to_string(max_item) + ", whereas there are "
+                                    " only " + std::to_string(num_items) + " item ids given.");
+      }
 
-        string_id_values.resize((size_t)num_items);
-        for(size_t i = 0; i < num_items; ++i) {
-          string_id_values[i] = isr.itemstringids().vector(int(i));
-        }
+      const auto& itemstringidsVector = isr.itemstringids().vector();
+      string_id_values.reserve(static_cast<size_t>(num_items));
+      std::copy(itemstringidsVector.begin(), itemstringidsVector.end(), std::back_inserter(string_id_values));
+
+      if(std::set<std::string>(string_id_values.begin(), string_id_values.end()).size() != num_items) {
+        throw std::invalid_argument("List of string item ids specified must be "
+                                    "unique; list contains duplicates.");
+      }
+    } else {
+      num_items = max_item + 1;
     }
 
     // Check out the specific parameters
diff --git a/mlmodel/src/LayerShapeConstraints.hpp b/mlmodel/src/LayerShapeConstraints.hpp
index 982351931..a8c4da5ff 100644
--- a/mlmodel/src/LayerShapeConstraints.hpp
+++ b/mlmodel/src/LayerShapeConstraints.hpp
@@ -13,7 +13,7 @@
 #include <ostream>
 #include <stdexcept>
 #include <string>
-#include "Validators.hpp"
+#include "Validation/Validators.hpp"
 
 namespace CoreML {
 
diff --git a/MLModelSpecification.hpp b/mlmodel/src/MLModelSpecification.hpp
similarity index 58%
rename from MLModelSpecification.hpp
rename to mlmodel/src/MLModelSpecification.hpp
index cd4380615..70ec6f839 100644
--- a/MLModelSpecification.hpp
+++ b/mlmodel/src/MLModelSpecification.hpp
@@ -17,28 +17,28 @@
 #pragma clang diagnostic ignored "-Wunused-parameter"
 #pragma clang diagnostic ignored "-Wshorten-64-to-32"
 #pragma clang diagnostic ignored "-Wshadow"
-#pragma clang diagnostic ignored "-Wextended-offsetof"
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
 #pragma clang diagnostic ignored "-Wdeprecated"
 #pragma clang diagnostic ignored "-Wundef"
 #pragma clang diagnostic ignored "-Wold-style-cast"
 #pragma clang diagnostic ignored "-Wweak-vtables"
+#pragma clang diagnostic ignored "-Wenum-compare-switch"
 
-#include "mlmodel/src/Model.hpp"
-#include "mlmodel/src/Validators.hpp"
-#include "mlmodel/src/Utils.hpp"
+#include "Model.hpp"
+#include "Validation/Validators.hpp"
+#include "Utils.hpp"
 
-#include "mlmodel/src/transforms/Pipeline.hpp"
-#include "mlmodel/src/transforms/LinearModel.hpp"
-#include "mlmodel/src/transforms/TreeEnsemble.hpp"
-#include "mlmodel/src/transforms/DictVectorizer.hpp"
-#include "mlmodel/src/transforms/NeuralNetwork.hpp"
-#include "mlmodel/src/transforms/OneHotEncoder.hpp"
-#include "mlmodel/src/transforms/FeatureVectorizer.hpp"
-#include "mlmodel/src/transforms/TreeEnsemble.hpp"
-#include "mlmodel/src/TreeEnsembleCommon.hpp"
+#include "transforms/Pipeline.hpp"
+#include "transforms/LinearModel.hpp"
+#include "transforms/TreeEnsemble.hpp"
+#include "transforms/DictVectorizer.hpp"
+#include "transforms/NeuralNetwork.hpp"
+#include "transforms/OneHotEncoder.hpp"
+#include "transforms/FeatureVectorizer.hpp"
+#include "transforms/TreeEnsemble.hpp"
+#include "TreeEnsembleCommon.hpp"
 
-#include "mlmodel/build/format/Model.pb.h"
+#include "../build/format/Model.pb.h"
 
 #pragma clang diagnostic pop
 
diff --git a/mlmodel/src/Model.cpp b/mlmodel/src/Model.cpp
index 9b75a02da..448f49ed5 100644
--- a/mlmodel/src/Model.cpp
+++ b/mlmodel/src/Model.cpp
@@ -113,7 +113,10 @@ namespace CoreML {
                 VALIDATE_MODEL_TYPE(itemSimilarityRecommender);
                 VALIDATE_MODEL_TYPE(soundAnalysisPreprocessing);
                 VALIDATE_MODEL_TYPE(linkedModel);
+            case MLModelType_serializedModel:
+                return {};
             case MLModelType_NOT_SET:
+            default:
                 return Result(ResultType::INVALID_MODEL_INTERFACE, "Model did not specify a valid model-parameter type.");
         }
     }
@@ -132,6 +135,7 @@ namespace CoreML {
         Result r = loadSpecification(*(out.m_spec), in);
         if (!r.good()) { return r; }
         // validate on load
+        
         r = out.validate();
 
         return r;
@@ -301,12 +305,12 @@ namespace CoreML {
 extern "C" {
 
 _MLModelSpecification::_MLModelSpecification()
-    : cppFormat(new CoreML::Specification::Model())
+    : cppFormat(std::make_shared<CoreML::Specification::Model>())
 {
 }
 
     _MLModelSpecification::_MLModelSpecification(const CoreML::Specification::Model& te)
-: cppFormat(new CoreML::Specification::Model(te))
+: cppFormat(std::make_shared<CoreML::Specification::Model>(te))
 {
 }
 
@@ -314,21 +318,21 @@ _MLModelSpecification::_MLModelSpecification()
   cppFormat.reset(new CoreML::Specification::Model(te.getProto()));
 }
     
-_MLModelMetadataSpecification::_MLModelMetadataSpecification() : cppMetadata(new CoreML::Specification::Metadata())
+_MLModelMetadataSpecification::_MLModelMetadataSpecification() : cppMetadata(std::make_shared<CoreML::Specification::Metadata>())
 {
 }
     
 _MLModelMetadataSpecification::_MLModelMetadataSpecification(const CoreML::Specification::Metadata& meta)
-: cppMetadata(new CoreML::Specification::Metadata(meta))
+: cppMetadata(std::make_shared<CoreML::Specification::Metadata>(meta))
 {
 }
-    
-_MLModelDescriptionSpecification::_MLModelDescriptionSpecification() : cppInterface(new CoreML::Specification::ModelDescription())
+
+_MLModelDescriptionSpecification::_MLModelDescriptionSpecification() : cppInterface(std::make_shared<CoreML::Specification::ModelDescription>())
 {
 }
 
 _MLModelDescriptionSpecification::_MLModelDescriptionSpecification(const CoreML::Specification::ModelDescription& interface)
-: cppInterface(new CoreML::Specification::ModelDescription(interface))
+: cppInterface(std::make_shared<CoreML::Specification::ModelDescription>(interface))
 {
 }
 
diff --git a/mlmodel/src/Model.hpp b/mlmodel/src/Model.hpp
index 503337738..0aedda508 100644
--- a/mlmodel/src/Model.hpp
+++ b/mlmodel/src/Model.hpp
@@ -7,7 +7,7 @@
 
 #include "Globals.hpp"
 #include "Result.hpp"
-#include "Validators.hpp"
+#include "Validation/Validators.hpp"
 
 #include "../build/format/Model_enums.h"
 #include "../build/format/Normalizer_enums.h"
diff --git a/mlmodel/src/NeuralNetworkBuffer.cpp b/mlmodel/src/NeuralNetworkBuffer.cpp
new file mode 100644
index 000000000..02e1f1d3a
--- /dev/null
+++ b/mlmodel/src/NeuralNetworkBuffer.cpp
@@ -0,0 +1,132 @@
+//
+//  NeuralNetworkBuffer.cpp
+//  CoreML
+//
+//  Created by Bhushan Sonawane on 11/8/19.
+//  Copyright © 2019 Apple Inc. All rights reserved.
+//
+
+#include "NeuralNetworkBuffer.hpp"
+#include <cerrno>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+namespace NNBuffer {
+
+    /*
+     * getOpenMode - Returns open model as per the mode provided
+     */
+    static std::ios_base::openmode getOpenMode(bufferMode mode)
+    {
+        return (mode == bufferMode::read)
+            ? (std::fstream::in | std::ios::binary)
+            : (std::fstream::in | std::fstream::out | std::ios::binary
+                                | (mode == bufferMode::write ? std::ios::trunc : std::ios::app));
+
+    }
+
+    /*
+     * NeuralNetworkBuffer - NeuralNetworkBuffer
+     */
+    NeuralNetworkBuffer::NeuralNetworkBuffer(const std::string& bufferFilePath, bufferMode mode)
+        : bufferFilePath(bufferFilePath),
+          bufferStream(bufferFilePath, getOpenMode(mode))
+    {
+        if (!bufferStream) {
+            throw std::runtime_error(std::string("Could not open buffer file '" + bufferFilePath + "': ") + std::strerror(errno) + '.');
+        }
+    }
+
+    /*
+     * NeuralNetworkBuffer - NeuralNetworkBuffer
+     */
+    NeuralNetworkBuffer::~NeuralNetworkBuffer() = default;
+
+    /*
+     * NeuralNetworkBuffer - addBuffer
+     * Writes given data into buffer file
+     * Writes in following order
+     * [Length of data, data type, data]
+     * Number of bytes written = Length_Of_Data * Size_Of_Data_Type
+     */
+    template<class T>
+    uint64_t NeuralNetworkBuffer::addBuffer(const std::vector<T>& buffer) {
+        bufferStream.seekp(0, std::ios::end);
+        if (!bufferStream.good()) {
+            throw std::runtime_error(std::string("Could not seek to end of data file: ") + std::strerror(errno) + '.');
+        }
+
+        // Get offset
+        auto offset = bufferStream.tellp();
+
+        // Write length, size of data type and buffer
+        int64_t lenOfBuffer = static_cast<int64_t>(buffer.size());
+        int64_t sizeOfBlock = sizeof(T);
+
+        bufferStream.write((char*)&lenOfBuffer, sizeof(lenOfBuffer));
+        if (bufferStream.fail()) {
+            throw std::runtime_error(std::string("Could not write length of data file: ") + std::strerror(errno) + '.');
+        }
+
+        bufferStream.write((char*)&sizeOfBlock, sizeof(sizeOfBlock));
+        if (bufferStream.fail()) {
+            throw std::runtime_error(std::string("Could not write size of data block: ") + std::strerror(errno) + '.');
+        }
+
+        bufferStream.write((char*)&buffer[0], static_cast<std::streamsize>(sizeOfBlock * lenOfBuffer));
+        if (bufferStream.fail()) {
+            throw std::runtime_error(std::string("Could not write data to data file: ") + std::strerror(errno) + '.');
+        }
+
+        return static_cast<uint64_t>(offset);
+    }
+
+    /*
+     * NeuralNetworkBuffer - getBuffer
+     * Reads data from given offset
+     */
+    template<class T>
+    void NeuralNetworkBuffer::getBuffer(const uint64_t offset, std::vector<T>& buffer) {
+        int64_t lenOfBuffer = 0;
+        int64_t sizeOfBlock = 0;
+
+        bufferStream.seekg(static_cast<std::istream::off_type>(offset), std::ios::beg);
+        if (!bufferStream.good()) {
+            throw std::runtime_error(std::string("Could not seek to beginning of data file: ") + std::strerror(errno) + '.');
+        }
+
+        // Read length of buffer and size of each block
+        bufferStream.read((char*)&lenOfBuffer, sizeof(lenOfBuffer));
+        if (bufferStream.fail()) {
+            throw std::runtime_error(std::string("Could not read length of data file: ") + std::strerror(errno) + '.');
+        }
+
+        bufferStream.read((char*)&sizeOfBlock, sizeof(sizeOfBlock));
+        if (bufferStream.fail()) {
+            throw std::runtime_error(std::string("Could not read size of data block: ") + std::strerror(errno) + '.');
+        }
+
+        // TODO: assert if sizeOfBlock != sizeof(T) or resize accordingly.
+        // Resize buffer to fit buffer
+        buffer.resize(static_cast<typename std::vector<T>::size_type>(lenOfBuffer));
+
+        // Read buffer
+        bufferStream.read((char*)&buffer[0], static_cast<std::streamsize>(sizeOfBlock * lenOfBuffer));
+        if (bufferStream.fail()) {
+            throw std::runtime_error(std::string("Could not read data from data file: ") + std::strerror(errno) + '.');
+        }
+    }
+
+    // Explicit include templated functions
+    template uint64_t NeuralNetworkBuffer::addBuffer(const std::vector<int32_t>&);
+    template uint64_t NeuralNetworkBuffer::addBuffer(const std::vector<int64_t>&);
+    template uint64_t NeuralNetworkBuffer::addBuffer(const std::vector<float>&);
+    template uint64_t NeuralNetworkBuffer::addBuffer(const std::vector<double>&);
+
+    template void NeuralNetworkBuffer::getBuffer(const uint64_t, std::vector<int32_t>&);
+    template void NeuralNetworkBuffer::getBuffer(const uint64_t, std::vector<int64_t>&);
+    template void NeuralNetworkBuffer::getBuffer(const uint64_t, std::vector<float>&);
+    template void NeuralNetworkBuffer::getBuffer(const uint64_t, std::vector<double>&);
+}
diff --git a/mlmodel/src/NeuralNetworkBuffer.hpp b/mlmodel/src/NeuralNetworkBuffer.hpp
new file mode 100644
index 000000000..b4244a605
--- /dev/null
+++ b/mlmodel/src/NeuralNetworkBuffer.hpp
@@ -0,0 +1,51 @@
+//
+//  NeuralNetworkBuffer.hpp
+//  CoreML
+//
+//  Created by Bhushan Sonawane on 11/8/19.
+//  Copyright © 2019 Apple Inc. All rights reserved.
+//
+
+#ifndef NeuralNetworkBuffer_hpp
+#define NeuralNetworkBuffer_hpp
+
+#include <string>
+#include <vector>
+#include <fstream>
+
+namespace NNBuffer {
+    //
+    // NeuralNetworkBuffer - Network parameter read-write management to file
+    // Current management policy
+    // Each parameter is written to binary file in following order.
+    // [Length of data (size_t), Data type of data (size_t), data (length of data * size of data type)]
+    //
+
+    enum bufferMode {
+        write=0,
+        append,
+        read
+    };
+
+    class NeuralNetworkBuffer {
+        private:
+            std::string bufferFilePath;
+            std::fstream bufferStream;
+
+        public:
+            // Must be constructed with file path to store parameters
+            NeuralNetworkBuffer(const std::string&, bufferMode mode=bufferMode::write);
+            ~NeuralNetworkBuffer();
+
+            // Stores given buffer and returns offset in buffer file
+            template<class T>
+            uint64_t addBuffer(const std::vector<T>&);
+
+            // Reads buffer from given offset and stores in vector
+            // passed by reference.
+            // Note that, this routine resizes the given vector.
+            template<class T>
+            void getBuffer(const uint64_t, std::vector<T>&);
+    };
+}
+#endif /* NeuralNetworkBuffer_hpp */
diff --git a/mlmodel/src/Result.cpp b/mlmodel/src/Result.cpp
index 7b7db6c9a..dac329721 100644
--- a/mlmodel/src/Result.cpp
+++ b/mlmodel/src/Result.cpp
@@ -1,5 +1,9 @@
 #include "Result.hpp"
+
+#include "DataType.hpp"
 #include "Format.hpp"
+#include "ResultReason.hpp"
+#include "ResultType.hpp"
 
 #include <sstream>
 
@@ -7,19 +11,27 @@ namespace CoreML {
     
   static const char* m_prefix = "validator error: ";
     
-  Result::Result() : m_type(ResultType::NO_ERROR), m_message("not an error") { }
-
-  Result::Result(ResultType type, const std::string& message) :
-    m_type(type), m_message(m_prefix + message) {
+  Result::Result()
+    : Result(ResultType::NO_ERROR, ResultReason::UNKNOWN, "not an error")
+  { }
 
+  Result::Result(ResultType type, const std::string& message)
+    : Result(type, ResultReason::UNKNOWN, message)
+  { }
 
-    }
+  Result::Result(ResultType type, ResultReason reason, const std::string& message)
+    : m_type(type)
+    , m_reason(reason)
+    , m_message(m_prefix + message) { }
 
-  bool Result::good() const {
+bool Result::good() const {
       return (m_type == ResultType::NO_ERROR || m_type == ResultType::POTENTIALLY_INVALID_NEURAL_NETWORK_SHAPES);
   }
 
-  const ResultType& Result::type() const {
+  ResultReason Result::reason() const {
+    return m_reason;
+  }
+ResultType Result::type() const {
     return m_type;
   }
 
@@ -28,8 +40,8 @@ namespace CoreML {
   }
 
   Result Result::typeMismatchError(
-      FeatureType expected,
-      FeatureType actual,
+      const FeatureType& expected,
+      const FeatureType& actual,
       const std::string& parameterName) {
 
     Result out;
@@ -46,7 +58,7 @@ namespace CoreML {
 
   Result Result::featureTypeInvariantError(
       const std::vector<FeatureType>& allowed,
-      FeatureType actual) {
+      const FeatureType& actual) {
 
     Result out;
     std::stringstream ss;
diff --git a/mlmodel/src/Result.hpp b/mlmodel/src/Result.hpp
index 1dd8c0e62..32a6378f0 100644
--- a/mlmodel/src/Result.hpp
+++ b/mlmodel/src/Result.hpp
@@ -1,67 +1,39 @@
 #ifndef MLMODEL_RESULT_HPP
 #define MLMODEL_RESULT_HPP
 
-#include "DataType.hpp"
+#include <string>
+#include <vector>
 
 namespace CoreML {
 
-enum class ResultType {
-    NO_ERROR,
-    
-    // Feature type of transform doesn't match target type of a prior
-    // transform, i.e. one-hot encoding the output of a linear regressor.
-    TYPE_MISMATCH,
-    
-    // Feature type of a transform violates invariant type conditions,
-    // i.e. passing a dictionary to a linear regressor.
-    FEATURE_TYPE_INVARIANT_VIOLATION,
-    
-    // File I/O errors
-    UNABLE_TO_OPEN_FILE,
-    FAILED_TO_SERIALIZE,
-    FAILED_TO_DESERIALIZE,
-    
-    // Invalid protobuf file (internally inconsistent)
-    INVALID_COMPATIBILITY_VERSION,
-    UNSUPPORTED_COMPATIBILITY_VERSION,
-    UNSUPPORTED_FEATURE_TYPE_FOR_MODEL_TYPE,
-    TOO_MANY_FEATURES_FOR_MODEL_TYPE,
-    INTERFACE_FEATURE_NAME_MISMATCH,
-    INTERFACE_MODEL_PARAMETER_MISMATCH,
-    INVALID_MODEL_INTERFACE,
-
-    // Invalid protobuf model parameters
-    INVALID_MODEL_PARAMETERS,
-    
-    // Invalid updatable parameters
-    INVALID_UPDATABLE_MODEL_PARAMETERS,
-    INVALID_UPDATABLE_MODEL_CONFIGURATION,
-
-    // NN shaper failure, not necessarily an error
-    POTENTIALLY_INVALID_NEURAL_NETWORK_SHAPES
-};
+class FeatureType;
+enum class ResultReason;
+enum class ResultType;
 
 class Result {
 
   private:
-    ResultType m_type = ResultType::NO_ERROR;
+    ResultType m_type;
+    ResultReason m_reason;
     std::string m_message;
 
   public:
     Result(); 
     bool good() const;
     Result(ResultType type, const std::string& message); 
+    Result(ResultType type, ResultReason reason, const std::string& message);
     const std::string& message() const;
-    const ResultType& type() const;
+    ResultReason reason() const;
+    ResultType type() const;
 
     static Result typeMismatchError(
-        FeatureType expected,
-        FeatureType actual,
+        const FeatureType& expected,
+        const FeatureType& actual,
         const std::string& parameterName);
 
     static Result featureTypeInvariantError(
         const std::vector<FeatureType>& allowed,
-        FeatureType actual);
+        const FeatureType& actual);
 
     
     bool operator==(const Result& other) const;
@@ -69,8 +41,6 @@ class Result {
 };
 }
 
-// TODO -- Should we make this throw a C++ exception instead of result type?
-
 /* 
  * A convenience macro to pass results onto the caller. Useful when a function
  * both returns a Result and calls other functions that return a Result, and
@@ -78,9 +48,9 @@ class Result {
 */
 #define HANDLE_RESULT_AND_RETURN_ON_ERROR(EXPR)                                \
   {                                                                            \
-    Result r = EXPR;                                                           \
-    if (!r.good()) {                                                           \
-      return r;                                                                \
+    Result _result = (EXPR);                                                   \
+    if (!_result.good()) {                                                     \
+      return _result;                                                          \
     }                                                                          \
   }                                                                            \
 
diff --git a/mlmodel/src/ResultReason.hpp b/mlmodel/src/ResultReason.hpp
new file mode 100644
index 000000000..7b6b676ee
--- /dev/null
+++ b/mlmodel/src/ResultReason.hpp
@@ -0,0 +1,83 @@
+//
+//  ResultReason.hpp
+//  CoreML
+//
+//  Created by Jeff Kilpatrick on 12/16/19.
+//  Copyright © 2019 Apple Inc. All rights reserved.
+//
+
+#pragma once
+
+namespace CoreML {
+
+/**
+ Super specific reasons for non-good Results.
+ */
+enum class ResultReason {
+    UNKNOWN,
+
+    // -----------------------------------------
+    // Model validation
+    MODEL_INPUT_TYPE_INVALID,
+    MODEL_OUTPUT_TYPE_INVALID,
+
+    // -----------------------------------------
+    // Program validation
+    BLOCK_INPUT_COUNT_MISMATCHED,
+    BLOCK_OUTPUT_NAME_EMPTY,
+    BLOCK_OUTPUT_COUNT_MISMATCHED,
+    BLOCK_OUTPUT_TYPE_MISMATCHED,
+    BLOCK_OUTPUT_VALUE_UNDEFINED,
+    BLOCK_PARAM_NAME_EMPTY,
+    BLOCK_PARAM_NAME_SHADOWS,
+
+    FUNCTION_BLOCK_RETURN_COUNT_MISMATCHED,
+    FUNCTION_BLOCK_RETURN_TYPE_MISMATCHED,
+    FUNCTION_NAME_EMPTY,
+    FUNCTION_PARAM_NAME_EMPTY,
+    FUNCTION_PARAM_NAME_SHADOWS,
+    FUNCTION_PARAM_TYPE_NULL,
+
+    MODEL_MAIN_IMAGE_INPUT_SIZE_BAD,
+    MODEL_MAIN_IMAGE_INPUT_TYPE_BAD,
+    MODEL_MAIN_IMAGE_OUTPUT_SIZE_BAD,
+    MODEL_MAIN_IMAGE_OUTPUT_TYPE_BAD,
+    MODEL_MAIN_INPUT_COUNT_MISMATCHED,
+    MODEL_MAIN_INPUT_OUTPUT_MISSING,
+    MODEL_MAIN_INPUT_OUTPUT_TYPE_INVALID,
+    MODEL_MAIN_INPUT_RANK_MISMATCHED,
+    MODEL_MAIN_INPUT_SHAPE_MISMATCHED,
+    MODEL_MAIN_INPUT_TYPE_MISMATCHED,
+    MODEL_MAIN_OUTPUT_COUNT_MISMATCHED,
+    MODEL_MAIN_OUTPUT_RANK_MISMATCHED,
+    MODEL_MAIN_OUTPUT_SHAPE_MISMATCHED,
+    MODEL_MAIN_OUTPUT_TYPE_MISMATCHED,
+
+    OP_ARG_COUNT_MISMATCH,
+    OP_ARG_NAME_EMPTY,
+    OP_ARG_OUTPUT_CIRCULAR_DEFINITION,
+    OP_ARG_TYPE_MISMATCH,
+    OP_ARG_VALUE_UNDEFINED,
+    OP_ATTRIBUTE_NAME_EMPTY,
+    OP_ATTRIBUTE_VALUE_UNDEFINED,
+    OP_BLOCK_COUNT_INVALID,
+    OP_INVALID_IN_CONTEXT,
+    OP_NAME_EMPTY,
+    OP_OUTPUT_COUNT_MISMATCHED,
+    OP_OUTPUT_NAME_EMPTY,
+    OP_OUTPUT_NAME_SHADOWS,
+    OP_OUTPUT_TYPE_INVALID,
+    OP_PARAM_COUNT_MISMATCHED,
+    OP_PARAM_INVALID,
+    OP_PARAM_NAME_EMPTY,
+    OP_REQUIRED_ARG_NOT_FOUND,
+
+    PARAMETER_NAME_EMPTY,
+    PARAMETER_VALUE_UNDEFINED,
+
+    PROGRAM_MAIN_FUNCTION_MISSING,
+    PROGRAM_NULL,
+    PROGRAM_PARSE_THREW,
+};
+
+}
diff --git a/mlmodel/src/ResultType.hpp b/mlmodel/src/ResultType.hpp
new file mode 100644
index 000000000..3c4d8f2be
--- /dev/null
+++ b/mlmodel/src/ResultType.hpp
@@ -0,0 +1,49 @@
+//
+//  ResultType.hpp
+//  CoreML
+//
+//  Created by Jeff Kilpatrick on 12/16/19.
+//  Copyright © 2019 Apple Inc. All rights reserved.
+//
+
+#pragma once
+
+namespace CoreML {
+
+enum class ResultType {
+    NO_ERROR,
+
+    // Feature type of transform doesn't match target type of a prior
+    // transform, i.e. one-hot encoding the output of a linear regressor.
+    TYPE_MISMATCH,
+
+    // Feature type of a transform violates invariant type conditions,
+    // i.e. passing a dictionary to a linear regressor.
+    FEATURE_TYPE_INVARIANT_VIOLATION,
+
+    // File I/O errors
+    UNABLE_TO_OPEN_FILE,
+    FAILED_TO_SERIALIZE,
+    FAILED_TO_DESERIALIZE,
+
+    // Invalid protobuf file (internally inconsistent)
+    INVALID_COMPATIBILITY_VERSION,
+    UNSUPPORTED_COMPATIBILITY_VERSION,
+    UNSUPPORTED_FEATURE_TYPE_FOR_MODEL_TYPE,
+    TOO_MANY_FEATURES_FOR_MODEL_TYPE,
+    INTERFACE_FEATURE_NAME_MISMATCH,
+    INTERFACE_MODEL_PARAMETER_MISMATCH,
+    INVALID_MODEL_INTERFACE,
+
+    // Invalid protobuf model parameters
+    INVALID_MODEL_PARAMETERS,
+
+    // Invalid updatable parameters
+    INVALID_UPDATABLE_MODEL_PARAMETERS,
+    INVALID_UPDATABLE_MODEL_CONFIGURATION,
+
+    // NN shaper failure, not necessarily an error
+    POTENTIALLY_INVALID_NEURAL_NETWORK_SHAPES
+};
+
+}
diff --git a/mlmodel/src/TreeEnsembleCommon.cpp b/mlmodel/src/TreeEnsembleCommon.cpp
index f799d72e9..f4ca2f38d 100644
--- a/mlmodel/src/TreeEnsembleCommon.cpp
+++ b/mlmodel/src/TreeEnsembleCommon.cpp
@@ -162,7 +162,7 @@ namespace CoreML { namespace TreeEnsembles {
         ////////////////////////////////////////////////////////////////////////////////
 
         // Get the full list of nodes from the spec.
-        auto nodes = tes.nodes();
+        const auto& nodes = tes.nodes();
 
         /**  Stage 1: Translation.
          *
@@ -316,7 +316,7 @@ namespace CoreML { namespace TreeEnsembles {
             }
 
             // Check to make sure that each tree has exactly one root.
-            {
+            if (root_nodes.size() != 0) {
                 // Sort by tree ID to detect non-unique elements.
                 std::sort(root_node_ids.begin(), root_node_ids.end());
 
diff --git a/mlmodel/src/Utils.cpp b/mlmodel/src/Utils.cpp
index dc22abe8e..0a7dd0484 100644
--- a/mlmodel/src/Utils.cpp
+++ b/mlmodel/src/Utils.cpp
@@ -123,6 +123,11 @@ void CoreML::downgradeSpecificationVersion(Specification::Model *pModel) {
         pModel->set_specificationversion(MLMODEL_SPECIFICATION_VERSION_NEWEST);
     }
 
+
+    if (pModel->specificationversion() == MLMODEL_SPECIFICATION_VERSION_IOS14 && !hasIOS14Features(*pModel)) {
+        pModel->set_specificationversion(MLMODEL_SPECIFICATION_VERSION_IOS13);
+    }
+
     if (pModel->specificationversion() == MLMODEL_SPECIFICATION_VERSION_IOS13 && !hasIOS13Features(*pModel)) {
         pModel->set_specificationversion(MLMODEL_SPECIFICATION_VERSION_IOS12);
     }
@@ -192,7 +197,7 @@ bool CoreML::hasWeightOfType(const Specification::NeuralNetworkLayer& layer,
         case Specification::NeuralNetworkLayer::LayerCase::kInnerProduct:
             return (isWeightParamOfType(layer.innerproduct().weights(),type) ||
                     isWeightParamOfType(layer.innerproduct().bias(), type));
-            
+
         case Specification::NeuralNetworkLayer::LayerCase::kBatchedMatmul:
             return (isWeightParamOfType(layer.batchedmatmul().weights(),type) ||
                     isWeightParamOfType(layer.batchedmatmul().bias(), type));
@@ -229,7 +234,7 @@ bool CoreML::hasWeightOfType(const Specification::NeuralNetworkLayer& layer,
         case Specification::NeuralNetworkLayer::LayerCase::kEmbedding:
             return (isWeightParamOfType(layer.embedding().weights(), type) ||
                     isWeightParamOfType(layer.embedding().bias(), type));
-            
+
         case Specification::NeuralNetworkLayer::LayerCase::kEmbeddingND:
             return (isWeightParamOfType(layer.embeddingnd().weights(), type) ||
                     isWeightParamOfType(layer.embeddingnd().bias(), type));
@@ -280,7 +285,7 @@ bool CoreML::hasWeightOfType(const Specification::Model& model, const WeightPara
 // if the old ones are also filled in with something plausible, then there is nothing
 // preventing us from running on older versions of Core ML.
 bool CoreML::hasFlexibleShapes(const Specification::Model& model) {
-    
+
     auto inputs = model.description().input();
     for (const auto& input: inputs) {
         if (input.type().Type_case() == Specification::FeatureType::kMultiArrayType) {
@@ -362,7 +367,7 @@ bool CoreML::hasIOS12Features(const Specification::Model& model) {
         default:
             return (hasFlexibleShapes(model) || hasCustomModel(model) || hasCategoricalSequences(model) ||
                     hasAppleTextClassifier(model) || hasAppleWordTagger(model) ||
-                    hasAppleImageFeatureExtractor(model) || hasUnsignedQuantizedWeights(model) ||
+                    hasScenePrint(model) || hasUnsignedQuantizedWeights(model) ||
                     hasNonmaxSuppression(model) || hasBayesianProbitRegressor(model) ||
                     hasIOS12NewNeuralNetworkLayers(model));
     }
@@ -381,11 +386,11 @@ bool CoreML::hasIOS13Features(const Specification::Model& model) {
     // - model is of type Gazetteer
     // - model is of type WordEmbedding
     // - (... add others here ...)
-    
+
     if (model.isupdatable()) {
         return true;
     }
-    
+
     bool result = false;
     switch (model.Type_case()) {
         case Specification::Model::kPipeline:
@@ -431,6 +436,95 @@ bool CoreML::hasIOS13Features(const Specification::Model& model) {
     return false;
 }
 
+bool CoreML::hasDefaultValueForOptionalInputs(const Specification::Model& model) {
+    // Checks if default optional value has been set or not
+    for (const auto& input: model.description().input()) {
+        if (input.type().isoptional()){
+            switch (input.type().multiarraytype().defaultOptionalValue_case()) {
+                case CoreML::Specification::ArrayFeatureType::kDoubleDefaultValue:
+                case CoreML::Specification::ArrayFeatureType::kFloatDefaultValue:
+                case CoreML::Specification::ArrayFeatureType::kIntDefaultValue:
+                        return true;
+                default:
+                    break;
+            }
+        }
+    }
+    return false;
+}
+
+bool CoreML::hasFloat32InputsOrOutputsForNonmaxSuppression(const Specification::Model& model) {
+    if (!hasNonmaxSuppression(model)) {
+        // not NMS.
+        return false;
+    }
+
+    auto inputs = model.description().input();
+    for (const auto& input: inputs) {
+        if (input.type().Type_case() == Specification::FeatureType::kMultiArrayType) {
+            if (input.type().multiarraytype().datatype() == Specification::ArrayFeatureType_ArrayDataType_FLOAT32) {
+                return true;
+            }
+        }
+    }
+
+    auto outputs = model.description().output();
+    for (const auto& output: outputs) {
+        if (output.type().Type_case() == Specification::FeatureType::kMultiArrayType) {
+            if (output.type().multiarraytype().datatype() == Specification::ArrayFeatureType_ArrayDataType_FLOAT32) {
+                return true;
+            }
+        }
+    }
+
+    return false;
+}
+
+bool CoreML::hasIOS14Features(const Specification::Model& model) {
+    // New IOS14 features:
+    // - new layers in Neural Network
+    // - Non-zero values for optional inputs
+    // - VisionFeaturePrint.Object
+    // - Float32 input/output for Non-Maximum Suppression
+    // - Apple Word Tagger using transfer learning (revision == 3)
+
+    bool result = false;
+    switch (model.Type_case()) {
+        case Specification::Model::kPipeline:
+            for (auto &m : model.pipeline().models()) {
+                result = result || hasIOS14Features(m);
+                if (result) {
+                    return true;
+                }
+            }
+            break;
+        case Specification::Model::kPipelineRegressor:
+            for (auto &m : model.pipelineregressor().pipeline().models()) {
+                result = result || hasIOS14Features(m);
+                if (result) {
+                    return true;
+                }
+            }
+            break;
+        case Specification::Model::kPipelineClassifier:
+            for (auto &m : model.pipelineclassifier().pipeline().models()) {
+                result = result || hasIOS14Features(m);
+                if (result) {
+                    return true;
+                }
+            }
+            break;
+        case Specification::Model::kSerializedModel:
+            return true;
+        case Specification::Model::kWordTagger:
+            return model.wordtagger().revision() == 3;
+        default:
+            return (hasIOS14NeuralNetworkFeatures(model) || hasObjectPrint(model) || hasFloat32InputsOrOutputsForNonmaxSuppression(model));
+    }
+    return false;
+}
+
+
 bool CoreML::hasCustomModel(const Specification::Model& model) {
     return (model.Type_case() == Specification::Model::kCustomModel);
 }
@@ -455,6 +549,14 @@ bool CoreML::hasAppleImageFeatureExtractor(const Specification::Model& model) {
     return (model.Type_case() == Specification::Model::kVisionFeaturePrint);
 }
 
+bool CoreML::hasScenePrint(const Specification::Model& model) {
+    return (hasAppleImageFeatureExtractor(model) && model.visionfeatureprint().has_scene());
+}
+
+bool CoreML::hasObjectPrint(const Specification::Model& model) {
+    return (hasAppleImageFeatureExtractor(model) && model.visionfeatureprint().has_object());
+}
+
 bool CoreML::hasNonmaxSuppression(const Specification::Model& model) {
     return (model.Type_case() == Specification::Model::kNonMaximumSuppression);
 }
@@ -503,10 +605,10 @@ bool CoreML::hasCategoricalSequences(const Specification::Model& model) {
 }
 
 bool CoreML::hasIOS12NewNeuralNetworkLayers(const Specification::Model& model) {
-    
+
     // Return True if the model has the two new NN layers added in iOS 12, which are
     // resizeBilinear and CropResize
-    
+
     auto layers = getNNSpec(model);
     if (layers) {
         for (int i=0; i< layers->size(); i++){
@@ -556,12 +658,13 @@ bool CoreML::hasModelOrSubModelProperty(const Specification::Model& model, const
 }
 
 bool CoreML::isIOS12NeuralNetworkLayer(const Specification::NeuralNetworkLayer& layer) {
-    
+
     // Return True if the NN layer is from the set exposed in iOS 12
     switch (layer.layer_case()) {
         case Specification::NeuralNetworkLayer::LayerCase::kConvolution:
             return (layer.input().size() == 1);
         case Specification::NeuralNetworkLayer::LayerCase::kInnerProduct:
+            return !layer.innerproduct().int8dynamicquantize();
         case Specification::NeuralNetworkLayer::LayerCase::kBatchnorm:
         case Specification::NeuralNetworkLayer::LayerCase::kActivation:
         case Specification::NeuralNetworkLayer::LayerCase::kPooling:
@@ -574,6 +677,12 @@ bool CoreML::isIOS12NeuralNetworkLayer(const Specification::NeuralNetworkLayer&
         case Specification::NeuralNetworkLayer::LayerCase::kMultiply:
         case Specification::NeuralNetworkLayer::LayerCase::kUnary:
         case Specification::NeuralNetworkLayer::LayerCase::kUpsample:
+            if (layer.upsample().linearupsamplemode() != Specification::UpsampleLayerParams_LinearUpsampleMode_DEFAULT) {
+                return false;
+            }
+            if (layer.upsample().fractionalscalingfactor_size() > 0) {
+                return false;
+            }
         case Specification::NeuralNetworkLayer::LayerCase::kBias:
         case Specification::NeuralNetworkLayer::LayerCase::kL2Normalize:
         case Specification::NeuralNetworkLayer::LayerCase::kReshape:
@@ -595,6 +704,9 @@ bool CoreML::isIOS12NeuralNetworkLayer(const Specification::NeuralNetworkLayer&
         case Specification::NeuralNetworkLayer::LayerCase::kEmbedding:
         case Specification::NeuralNetworkLayer::LayerCase::kSequenceRepeat:
         case Specification::NeuralNetworkLayer::LayerCase::kReorganizeData:
+            if (layer.reorganizedata().mode() == Specification::ReorganizeDataLayerParams::PIXEL_SHUFFLE) {
+                      return false;
+            }
         case Specification::NeuralNetworkLayer::LayerCase::kSlice:
         case Specification::NeuralNetworkLayer::LayerCase::kCustom:
         case Specification::NeuralNetworkLayer::kResizeBilinear:
@@ -607,16 +719,15 @@ bool CoreML::isIOS12NeuralNetworkLayer(const Specification::NeuralNetworkLayer&
 
 
 bool CoreML::hasIOS13NeuralNetworkFeatures(const Specification::Model& model) {
-    
+
     /* check if any of the messages in NeuralNetwork.proto, that were added in iOS version 13, are being used.
       If they are, return True, otherwise return False.
-     
+
      In particular, check for the presence of the following messages:
      1. any new layer type, which was not in iOS 12.
      2. if the value of enums "NeuralNetworkMultiArrayShapeMapping" or "NeuralNetworkImageShapeMapping" is non 0
      */
-    
-    // - if the value of enums "NeuralNetworkMultiArrayShapeMapping" or "NeuralNetworkImageShapeMapping" is non 0
+
     switch (model.Type_case()) {
         case Specification::Model::TypeCase::kNeuralNetwork:
             if (model.neuralnetwork().arrayinputshapemapping() != Specification::NeuralNetworkMultiArrayShapeMapping::RANK5_ARRAY_MAPPING) {
@@ -642,7 +753,7 @@ bool CoreML::hasIOS13NeuralNetworkFeatures(const Specification::Model& model) {
         default:
             break;
     }
-    
+
     // check for new layers: by checking if its NOT one of the layers supported in iOS 12
     auto layers = getNNSpec(model);
     if (layers) {
@@ -653,7 +764,60 @@ bool CoreML::hasIOS13NeuralNetworkFeatures(const Specification::Model& model) {
             }
         }
     }
-    
+
     return false;
 }
 
+bool CoreML::hasIOS14NeuralNetworkFeatures(const Specification::Model& model) {
+
+    // Return True if the model has the new Neural network features added in
+    // ios 14
+
+    if (hasDefaultValueForOptionalInputs(model)) {
+        return true;
+    }
+
+    auto layers = getNNSpec(model);
+    if (layers) {
+        for (int i=0; i<layers->size(); i++){
+            const Specification::NeuralNetworkLayer& layer = (*layers)[i];
+            switch (layer.layer_case()) {
+                case Specification::NeuralNetworkLayer::kCumSum:
+                case Specification::NeuralNetworkLayer::kOneHot:
+                case Specification::NeuralNetworkLayer::kClampedReLU:
+                case Specification::NeuralNetworkLayer::kArgSort:
+                case Specification::NeuralNetworkLayer::kPooling3D:
+                case Specification::NeuralNetworkLayer::kGlobalPooling3D:
+                case Specification::NeuralNetworkLayer::kSliceBySize:
+                case Specification::NeuralNetworkLayer::kConvolution3D:
+                    return true;
+                case Specification::NeuralNetworkLayer::kSliceDynamic:
+                    if (layer.input().size() == 7) {
+                        return true;
+                    } else if (layer.slicedynamic().squeezemasks_size()) {
+                        return true;
+                    }
+                case Specification::NeuralNetworkLayer::kUpsample:
+                    if (layer.upsample().linearupsamplemode() != Specification::UpsampleLayerParams_LinearUpsampleMode_DEFAULT) {
+                        return true;
+                    }
+                    if (layer.upsample().fractionalscalingfactor_size() > 0) {
+                        return true;
+                    }
+                case Specification::NeuralNetworkLayer::kReorganizeData:
+                    if (layer.reorganizedata().mode() == Specification::ReorganizeDataLayerParams::PIXEL_SHUFFLE) {
+                      return true;
+                    }
+                case Specification::NeuralNetworkLayer::kInnerProduct:
+                    if (layer.innerproduct().int8dynamicquantize())
+                        return true;
+                case Specification::NeuralNetworkLayer::kBatchedMatmul:
+                    if (layer.batchedmatmul().int8dynamicquantize())
+                        return true;
+                default:
+                    continue;
+            }
+        }
+    }
+    return false;
+}
diff --git a/mlmodel/src/Utils.hpp b/mlmodel/src/Utils.hpp
index 3547f65e5..0ec949e07 100644
--- a/mlmodel/src/Utils.hpp
+++ b/mlmodel/src/Utils.hpp
@@ -95,6 +95,7 @@ namespace CoreML {
     bool hasIOS11_2Features(const Specification::Model& model);
     bool hasIOS12Features(const Specification::Model& model);
     bool hasIOS13Features(const Specification::Model& model);
+    bool hasIOS14Features(const Specification::Model& model);
 
     typedef std::pair<std::string,std::string> StringPair;
     // Returns a vector of pairs of strings, one pair per custom layer instance
@@ -113,6 +114,8 @@ namespace CoreML {
     bool hasAppleGazetteer(const Specification::Model& model);
     bool hasAppleWordEmbedding(const Specification::Model& model);
     bool hasAppleImageFeatureExtractor(const Specification::Model& model);
+    bool hasScenePrint(const Specification::Model& model);
+    bool hasObjectPrint(const Specification::Model& model);
     bool hasCategoricalSequences(const Specification::Model& model);
     bool hasNonmaxSuppression(const Specification::Model& model);
     bool hasBayesianProbitRegressor(const Specification::Model& model);
@@ -121,6 +124,9 @@ namespace CoreML {
     bool hasIOS12NewNeuralNetworkLayers(const Specification::Model& model);
     bool isIOS12NeuralNetworkLayer(const Specification::NeuralNetworkLayer& layer);
     bool hasIOS13NeuralNetworkFeatures(const Specification::Model& model);
+    bool hasIOS14NeuralNetworkFeatures(const Specification::Model& model);
+    bool hasDefaultValueForOptionalInputs(const Specification::Model& model);
+    bool hasFloat32InputsOrOutputsForNonmaxSuppression(const Specification::Model& model);
 
     bool hasModelOrSubModelProperty(const Specification::Model& model, const std::function<bool(const Specification::Model&)> &boolFunc);
 
diff --git a/mlmodel/src/ArrayFeatureExtractorValidator.cpp b/mlmodel/src/Validation/ArrayFeatureExtractorValidator.cpp
similarity index 100%
rename from mlmodel/src/ArrayFeatureExtractorValidator.cpp
rename to mlmodel/src/Validation/ArrayFeatureExtractorValidator.cpp
diff --git a/mlmodel/src/BayesianProbitRegressionValidator.cpp b/mlmodel/src/Validation/BayesianProbitRegressionValidator.cpp
similarity index 100%
rename from mlmodel/src/BayesianProbitRegressionValidator.cpp
rename to mlmodel/src/Validation/BayesianProbitRegressionValidator.cpp
diff --git a/mlmodel/src/CategoricalMappingValidator.cpp b/mlmodel/src/Validation/CategoricalMappingValidator.cpp
similarity index 100%
rename from mlmodel/src/CategoricalMappingValidator.cpp
rename to mlmodel/src/Validation/CategoricalMappingValidator.cpp
diff --git a/mlmodel/src/CustomModelValidator.cpp b/mlmodel/src/Validation/CustomModelValidator.cpp
similarity index 100%
rename from mlmodel/src/CustomModelValidator.cpp
rename to mlmodel/src/Validation/CustomModelValidator.cpp
diff --git a/mlmodel/src/DictVectorizerValidator.cpp b/mlmodel/src/Validation/DictVectorizerValidator.cpp
similarity index 100%
rename from mlmodel/src/DictVectorizerValidator.cpp
rename to mlmodel/src/Validation/DictVectorizerValidator.cpp
diff --git a/mlmodel/src/FeatureVectorizerValidator.cpp b/mlmodel/src/Validation/FeatureVectorizerValidator.cpp
similarity index 100%
rename from mlmodel/src/FeatureVectorizerValidator.cpp
rename to mlmodel/src/Validation/FeatureVectorizerValidator.cpp
diff --git a/mlmodel/src/GazetteerValidator.cpp b/mlmodel/src/Validation/GazetteerValidator.cpp
similarity index 100%
rename from mlmodel/src/GazetteerValidator.cpp
rename to mlmodel/src/Validation/GazetteerValidator.cpp
diff --git a/mlmodel/src/IdentityValidator.cpp b/mlmodel/src/Validation/IdentityValidator.cpp
similarity index 100%
rename from mlmodel/src/IdentityValidator.cpp
rename to mlmodel/src/Validation/IdentityValidator.cpp
diff --git a/mlmodel/src/ImputerValidator.cpp b/mlmodel/src/Validation/ImputerValidator.cpp
similarity index 100%
rename from mlmodel/src/ImputerValidator.cpp
rename to mlmodel/src/Validation/ImputerValidator.cpp
diff --git a/mlmodel/src/InterfaceValidators.cpp b/mlmodel/src/Validation/InterfaceValidators.cpp
similarity index 79%
rename from mlmodel/src/InterfaceValidators.cpp
rename to mlmodel/src/Validation/InterfaceValidators.cpp
index c1f601b19..64d8c65f9 100644
--- a/mlmodel/src/InterfaceValidators.cpp
+++ b/mlmodel/src/Validation/InterfaceValidators.cpp
@@ -163,6 +163,33 @@ namespace CoreML {
                                       "Description of multiarray feature '" + desc.name() + "' has an invalid or unspecified dataType. "
                                       "It must be specified as DOUBLE, FLOAT32 or INT32");
                 }
+
+                switch (type.multiarraytype().defaultOptionalValue_case()) {
+                    case CoreML::Specification::ArrayFeatureType::kDoubleDefaultValue:
+                        if (type.multiarraytype().datatype() != Specification::ArrayFeatureType_ArrayDataType_DOUBLE){
+                            return Result(ResultType::INVALID_MODEL_INTERFACE,
+                                          "Description of multiarray feature '" + desc.name() + "' has mistmatch"
+                                          " between dataType and the type of default optional value.");
+                        }
+                        break;
+                    case CoreML::Specification::ArrayFeatureType::kFloatDefaultValue:
+                        if (type.multiarraytype().datatype() != Specification::ArrayFeatureType_ArrayDataType_FLOAT32){
+                            return Result(ResultType::INVALID_MODEL_INTERFACE,
+                                          "Description of multiarray feature '" + desc.name() + "' has mistmatch"
+                                          " between dataType and the type of default optional value.");
+                        }
+                        break;
+                    case CoreML::Specification::ArrayFeatureType::kIntDefaultValue:
+                        if (type.multiarraytype().datatype() != Specification::ArrayFeatureType_ArrayDataType_INT32){
+                            return Result(ResultType::INVALID_MODEL_INTERFACE,
+                                          "Description of multiarray feature '" + desc.name() + "' has mistmatch"
+                                          " between dataType and the type of default optional value.");
+                        }
+                        break;
+                    default:
+                        break;
+                }
+                
                 break;
 
             }
@@ -211,7 +238,7 @@ namespace CoreML {
                             if (!foundDefault) {
                                 return Result(ResultType::INVALID_MODEL_INTERFACE,
                                               "Description of image feature '" + desc.name() + "' has a default size of " +
-                                              std::to_string(defaultWidth) + " x " + std::to_string(defaultHeight) +
+                                              std::to_string(defaultWidth) + " × " + std::to_string(defaultHeight) +
                                               " which is not within the allowed enumerated sizes specified.");
                             }
 
@@ -386,6 +413,44 @@ namespace CoreML {
         return result;
     }
 
+    Result validateClassifierFeatureDescriptions(const Specification::ModelDescription& interface,
+                                                 bool expected_class_is_int64) {
+
+        const auto& predictedFeatureName = interface.predictedfeaturename();
+        const auto& probOutputName = interface.predictedprobabilitiesname();
+
+        if (predictedFeatureName.empty()) {
+            return Result(ResultType::INVALID_MODEL_INTERFACE,
+                          "Specification is missing classifier predictedFeatureName");
+        } else {
+            auto expected_class = (expected_class_is_int64
+                                   ? Specification::FeatureType::TypeCase::kInt64Type
+                                   : Specification::FeatureType::TypeCase::kStringType);
+
+            auto result = validateDescriptionsContainFeatureWithNameAndType(interface.output(),
+                                                                            predictedFeatureName,
+                                                                            {expected_class});
+            if (!result.good()) {
+                return result;
+            }
+        }
+
+        if (!probOutputName.empty()) {
+            // TODO @znation: validate array length below
+            // and value type (must be double? different for different classifiers?)
+            // TODO Probability outputs are always dictionaries!
+            auto result = validateDescriptionsContainFeatureWithNameAndType(interface.output(),
+                                                                            probOutputName,
+                                                                            {Specification::FeatureType::TypeCase::kMultiArrayType, // TODO ARRAY TYPE IS INVALID, REMOVE
+                                                                            Specification::FeatureType::TypeCase::kDictionaryType});
+            if (!result.good()) {
+                return result;
+            }
+        }
+
+        return Result();
+    }
+
     /*
      * Validate optional inputs/outputs.
      * For most models, optional is not allowed (all inputs/outputs required).
@@ -412,7 +477,40 @@ namespace CoreML {
     inline Result validateOptionalTree(const Specification::ModelDescription& interface) {
         return validateOptionalOutputs(interface);
     }
-    
+
+    inline Result validateDefaultOptionalValues(const Specification::Model& format) {
+        // - Validate default optional values for NN Model that
+        //   - Has default value set if input is optional and spec 5 model
+        // - Error out if model is not Neural Network
+        const Specification::ModelDescription& description = format.description();
+
+        for (const auto& input : description.input()) {
+            if (input.type().isoptional()) {
+                switch (input.type().multiarraytype().defaultOptionalValue_case()) {
+                    case CoreML::Specification::ArrayFeatureType::kDoubleDefaultValue:
+                    case CoreML::Specification::ArrayFeatureType::kFloatDefaultValue:
+                    case CoreML::Specification::ArrayFeatureType::kIntDefaultValue:
+                        // Default value for optional inputs is applicable
+                        // only for NeuralNetwork models with Spec 5 (iOS 14) onwards.
+                        if (format.Type_case() != Specification::Model::kNeuralNetwork &&
+                            format.Type_case() != Specification::Model::kNeuralNetworkRegressor &&
+                            format.Type_case() != Specification::Model::kNeuralNetworkClassifier) {
+                            return Result(ResultType::INVALID_MODEL_PARAMETERS,
+                                          "Default optional values are only allowed for neural networks.");
+                        }
+                        if (format.specificationversion() < MLMODEL_SPECIFICATION_VERSION_IOS14) {
+                            return Result(ResultType::INVALID_MODEL_INTERFACE,
+                                          "Default value for optional inputs is supported from specification 5 (iOS 14) onwards!");
+                        }
+                        break;
+                    default:
+                        break;
+                }
+            }
+        }
+        return Result();
+    }
+
     inline Result validateOptionalNN(const Specification::ModelDescription& description) {
         // just need to check that not all inputs are optional
         bool hasNotOptional = false;
@@ -430,6 +528,12 @@ namespace CoreML {
 
     Result validateOptional(const Specification::Model& format) {
         Result r;
+        r = validateDefaultOptionalValues(format);
+
+        if (!r.good()) {
+            return r;
+        }
+
         switch (format.Type_case()) {
             case Specification::Model::kImputer:
                 // Imputed values can be handled by replacing a particular value, so
@@ -450,6 +554,9 @@ namespace CoreML {
                 // pipeline has valid optional inputs iff the models inside are valid.
                 // this should be guaranteed by the pipeline validator.
                 break;
+            case Specification::Model::kItemSimilarityRecommender:
+                // allow arbitrary optional in the recommender.  The recommender valiadator catches these.
+                break;
             case Specification::Model::kIdentity:
                 // anything goes for the identity function
                 break;
diff --git a/mlmodel/src/ItemSimilarityRecommenderValidator.cpp b/mlmodel/src/Validation/ItemSimilarityRecommenderValidator.cpp
similarity index 100%
rename from mlmodel/src/ItemSimilarityRecommenderValidator.cpp
rename to mlmodel/src/Validation/ItemSimilarityRecommenderValidator.cpp
diff --git a/mlmodel/src/KNearestNeighborsClassifierValidator.cpp b/mlmodel/src/Validation/KNearestNeighborsClassifierValidator.cpp
similarity index 100%
rename from mlmodel/src/KNearestNeighborsClassifierValidator.cpp
rename to mlmodel/src/Validation/KNearestNeighborsClassifierValidator.cpp
diff --git a/mlmodel/src/LinearModelValidator.cpp b/mlmodel/src/Validation/LinearModelValidator.cpp
similarity index 100%
rename from mlmodel/src/LinearModelValidator.cpp
rename to mlmodel/src/Validation/LinearModelValidator.cpp
diff --git a/mlmodel/src/LinkedModelValidator.cpp b/mlmodel/src/Validation/LinkedModelValidator.cpp
similarity index 100%
rename from mlmodel/src/LinkedModelValidator.cpp
rename to mlmodel/src/Validation/LinkedModelValidator.cpp
diff --git a/mlmodel/src/NeuralNetwork/NeuralNetworkLayerValidator.cpp b/mlmodel/src/Validation/NeuralNetwork/NeuralNetworkLayerValidator.cpp
similarity index 89%
rename from mlmodel/src/NeuralNetwork/NeuralNetworkLayerValidator.cpp
rename to mlmodel/src/Validation/NeuralNetwork/NeuralNetworkLayerValidator.cpp
index 44395a236..f922eeb3d 100644
--- a/mlmodel/src/NeuralNetwork/NeuralNetworkLayerValidator.cpp
+++ b/mlmodel/src/Validation/NeuralNetwork/NeuralNetworkLayerValidator.cpp
@@ -16,14 +16,14 @@ using namespace CoreML;
 
 //    ConvolutionLayerParams convolution = 4;
 Result NeuralNetworkSpecValidator::validateConvolutionLayer(const Specification::NeuralNetworkLayer& layer) {
-    
+
     Result r;
     r = validateInputCount(layer, 1, 2);
     if (!r.good()) {return r;}
-    
+
     r = validateOutputCount(layer, 1, 1);
     if (!r.good()) {return r;}
-    
+
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Convolution", blobNameToRank);
         if (!r.good()) {return r;}
@@ -36,14 +36,14 @@ Result NeuralNetworkSpecValidator::validateConvolutionLayer(const Specification:
             return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         }
     }
-    
+
     // We need to check if the ConvolutionPaddingType is set
     if (layer.convolution().ConvolutionPaddingType_case() == Specification::ConvolutionLayerParams::ConvolutionPaddingTypeCase::CONVOLUTIONPADDINGTYPE_NOT_SET) {
         std::string err = "Padding type for convolution layer '" + layer.name() + "' is not set.";
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         return r;
     }
-    
+
     const auto& params = layer.convolution();
     bool is_deconv = params.isdeconvolution();
     if (is_deconv && layer.input_size() != 1) {
@@ -95,7 +95,7 @@ Result NeuralNetworkSpecValidator::validateConvolutionLayer(const Specification:
     WeightParamType weightsValueType, biasValueType;
     weightsValueType = valueType(params.weights());
     biasValueType = valueType(params.bias());
-    
+
     // Check weight/bias value types. Only float32 or float16 parameters can be populated at any time
     if ( (weightsValueType == UNSPECIFIED) || (has_bias && biasValueType == UNSPECIFIED)) {
         std::string err = "Convolution layer '" + layer.name() + "'  has invalid weights/bias fields.";
@@ -110,7 +110,7 @@ Result NeuralNetworkSpecValidator::validateConvolutionLayer(const Specification:
             return r;
         }
     }
-    
+
     // Get populated weight and bias sizes
     // Check weights
     uint64_t expected_weight_size = 0;
@@ -131,12 +131,12 @@ Result NeuralNetworkSpecValidator::validateConvolutionLayer(const Specification:
         }
         if (weight_size != expected_weight_size) {
             if (is_deconv) {
-                std::string err = "Deconvolution layer '" + layer.name() + "' has weight matrix of size " + std::to_string(weight_size) + " to encode a " + std::to_string(kernelChannels) + " x " + std::to_string(outputChannels/nGroups) + " x " + std::to_string(kernelHeight) + " x " + std::to_string(kernelWidth) + " convolution.";
+                std::string err = "Deconvolution layer '" + layer.name() + "' has weight matrix of size " + std::to_string(weight_size) + " to encode a " + std::to_string(kernelChannels) + " × " + std::to_string(outputChannels/nGroups) + " × " + std::to_string(kernelHeight) + " × " + std::to_string(kernelWidth) + " convolution.";
                 r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
                 return r;
             }
             else {
-                std::string err = "Convolution layer '" + layer.name() + "' has weight matrix of size " + std::to_string(weight_size) + " to encode a " + std::to_string(outputChannels) + " x " + std::to_string(kernelChannels) + " x " + std::to_string(kernelHeight) + " x " + std::to_string(kernelWidth) + " convolution.";
+                std::string err = "Convolution layer '" + layer.name() + "' has weight matrix of size " + std::to_string(weight_size) + " to encode a " + std::to_string(outputChannels) + " × " + std::to_string(kernelChannels) + " × " + std::to_string(kernelHeight) + " × " + std::to_string(kernelWidth) + " convolution.";
                 r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
                 return r;
             }
@@ -149,7 +149,7 @@ Result NeuralNetworkSpecValidator::validateConvolutionLayer(const Specification:
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, "Layer " + layer.name() + "has not specified weights.");
         return r;
     }
-    
+
     // Check the bias
     uint64_t bias_size = 0;
     if (has_bias) {
@@ -177,17 +177,198 @@ Result NeuralNetworkSpecValidator::validateConvolutionLayer(const Specification:
     return r;
 }
 
+Result NeuralNetworkSpecValidator::validateConvolution3DLayer(const Specification::NeuralNetworkLayer& layer) {
+
+    Result r;
+    r = validateInputCount(layer, 1, 1);
+    if (!r.good()) { return r; }
+
+    r = validateOutputCount(layer, 1, 1);
+    if (!r.good()) { return r; }
+
+    if (ndArrayInterpretation) {
+        r = validateInputOutputRankEquality(layer, "Convolution3D", blobNameToRank);
+        if (!r.good()) { return r; }
+        r = validateRankCount(layer, "Convolution3D", 5, -1, blobNameToRank);
+        if (!r.good()) { return r; }
+    }
+
+    const auto& params = layer.convolution3d();
+
+    if (layer.input_size() != 1) {
+        std::string err = "Convolution3D layer: '" + layer.name() +
+            "', convolution3D does not support weight as input tensor.";
+        return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+    }
+
+    // Validate input and output channels and number of convolution groups are positive
+    int inputChannels = params.inputchannels();
+    int outputChannels = params.outputchannels();
+    int nGroups = params.ngroups();
+    r = validatePositive(inputChannels, "Input Channels");
+    if (!r.good()) { return r; }
+    r = validatePositive(outputChannels, "Output Channels");
+    if (!r.good()) { return r; }
+    r = validatePositive(nGroups, "Groups");
+    if (!r.good()) { return r; }
+
+    // Validate kernel is positive
+    int kernelDepth = params.kerneldepth();
+    int kernelHeight = params.kernelheight();
+    int kernelWidth = params.kernelwidth();
+    r = validatePositive(kernelDepth, "Kernel Depth");
+    if (!r.good()) { return r; }
+    r = validatePositive(kernelHeight, "Kernel Height");
+    if (!r.good()) { return r; }
+    r = validatePositive(kernelWidth, "Kernel Width");
+    if (!r.good()) { return r; }
+
+    // Validate stride is positive
+    r = validatePositive(params.stridedepth(), "Stride Depth");
+    if (!r.good()) { return r; }
+    r = validatePositive(params.strideheight(), "Stride Height");
+    if (!r.good()) { return r; }
+    r = validatePositive(params.stridewidth(), "Stride Width");
+    if (!r.good()) { return r; }
+
+    // Validate dilation is positive
+    r = validatePositive(params.dilationdepth(), "Dilation Depth");
+    if (!r.good()) { return r; }
+    r = validatePositive(params.dilationheight(), "Dilation Height");
+    if (!r.good()) { return r; }
+    r = validatePositive(params.dilationwidth(), "Dilation Width");
+    if (!r.good()) { return r; }
+
+    // Validate padding
+    int customPaddingFront = params.custompaddingfront();
+    int customPaddingBack = params.custompaddingback();
+    int customPaddingTop = params.custompaddingtop();
+    int customPaddingBottom = params.custompaddingbottom();
+    int customPaddingLeft = params.custompaddingleft();
+    int customPaddingRight = params.custompaddingright();
+    if (customPaddingFront < 0) {
+        std::string err = "Custom Padding Front must be non-negative, got '" +
+            std::to_string(customPaddingFront) + "'.";
+        return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+    }
+    if (customPaddingBack < 0) {
+        std::string err = "Custom Padding Back must be non-negative, got '" +
+            std::to_string(customPaddingBack) + "'.";
+        return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+    }
+    if (customPaddingTop < 0) {
+        std::string err = "Custom Padding Top must be non-negative, got '" +
+            std::to_string(customPaddingTop) + "'.";
+        return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+    }
+    if (customPaddingBottom < 0) {
+        std::string err = "Custom Padding Bottom must be non-negative, got '" +
+            std::to_string(customPaddingBottom) + "'.";
+        return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+    }
+    if (customPaddingLeft < 0) {
+        std::string err = "customPadding Left must be non-negative, got '" +
+            std::to_string(customPaddingLeft) + "'.";
+        return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+    }
+    if (customPaddingRight < 0) {
+        std::string err = "customPadding Right must be non-negative, got '" +
+            std::to_string(customPaddingRight) + "'.";
+        return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+    }
+
+    WeightParamType weightsValueType, biasValueType;
+    weightsValueType = valueType(params.weights());
+    biasValueType = valueType(params.bias());
+    bool has_bias = params.hasbias();
+
+    // Check weight/bias value types. Only float32 or float16 parameters can be populated at any time
+    if ((weightsValueType == UNSPECIFIED) || (has_bias && biasValueType == UNSPECIFIED)) {
+        std::string err = "Convolution3D layer '" + layer.name() +
+            "'  has invalid weights/bias fields.";
+        r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+        return r;
+    }
+    if (has_bias){
+        if ((weightsValueType == CoreML::FLOAT16 && biasValueType == CoreML::FLOAT32) ||
+            (weightsValueType == CoreML::FLOAT32 && biasValueType == CoreML::FLOAT16)) {
+            r = Result(ResultType::INVALID_MODEL_PARAMETERS, "Convolution3D layer '" + layer.name() +
+                "' has unmatched precisions of weights/bias They should either be half or full precision.");
+            return r;
+        }
+    }
+
+    bool is_deconv = params.isdeconvolution();
+
+    if (params.outputshape_size() != 0) {
+        if (!is_deconv) {
+            std::string err = "Deconvolution3D Layer '" + layer.name() + "' Output Shape is supported for Deconvolution layer.";
+            return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+        } else if (params.outputshape_size() != 3) {
+            std::string err = "Deconvolution3D layer: '" + layer.name() + "' , if set, output shape must be of length 3.";
+            return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+        }
+    }
+    // Manually check if weights are quantized--we don't currently support them and
+    // `validateGeneralWeightParams` allows them
+    if (weightsValueType == QUINT) {
+        std::string err = "Layer '" + layer.name() + "' has invalid weights field. Quantized " +
+            "weights are not supported.";
+        r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+        return r;
+    }
+    uint64_t expected_weight_size = 0;
+    if (is_deconv) {
+        expected_weight_size = static_cast<uint64_t>((outputChannels / nGroups) * inputChannels * kernelDepth * kernelHeight * kernelWidth);
+    } else {
+        expected_weight_size = static_cast<uint64_t>(outputChannels * (inputChannels / nGroups) * kernelDepth * kernelHeight * kernelWidth);
+    }
+    r = validateGeneralWeightParams(params.weights(), expected_weight_size, uint64_t(outputChannels),
+                                    "Convolution3D ", layer.name(), "weights");
+    if (!r.good()) { return r; }
+
+    // Check the bias
+    int bias_size = 0;
+    if (has_bias) {
+        if (biasValueType == FLOAT32 || biasValueType == FLOAT16){
+            if (biasValueType == FLOAT32){
+                bias_size = params.bias().floatvalue().size();
+            } else {
+                bias_size = int(params.bias().float16value().size() / 2);
+            }
+            if (bias_size != outputChannels) {
+                std::string err = "Convolution3D layer '" + layer.name() +
+                    "' has a bias vector of size " + std::to_string(bias_size) + " but should be " +
+                    std::to_string(outputChannels) + ".";
+                r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+                return r;
+            }
+        } else if (biasValueType == QUINT){
+            // We don't currently support quantized parameters.
+            std::string err = "Layer '" + layer.name() + "' has invalid bias field. Quantized " +
+                "bias is not supported.";
+            r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+            return r;
+        } else { // EMPTY
+            r = Result(ResultType::INVALID_MODEL_PARAMETERS, "Layer " + layer.name() +
+                "has not specified bias.");
+            return r;
+        }
+    }
+    return r;
+}
+
 static Result validateInnerProductWeightsBias(const Specification::NeuralNetworkLayer& layer,
                                               const Specification::WeightParams& weights,
                                               const Specification::WeightParams& bias) {
-    
+
     Result r;
-    
+
     bool has_bias;
     uint64_t num_inputs;
     uint64_t num_outputs;
     std::string layer_type;
-    
+
     switch(layer.layer_case()) {
         case Specification::NeuralNetworkLayer::LayerCase::kInnerProduct: {
             const auto& params = layer.innerproduct();
@@ -208,11 +389,11 @@ static Result validateInnerProductWeightsBias(const Specification::NeuralNetwork
         default:
             return r;
     }
-    
+
     WeightParamType weightsValueType, biasValueType;
     weightsValueType = valueType(weights);
     biasValueType = valueType(bias);
-    
+
     // Check for weight and bias value type
     if ((weightsValueType == UNSPECIFIED) || (has_bias && biasValueType == UNSPECIFIED)) {
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, layer_type + " layer '" + layer.name() + "' has invalid weights/bias fields.");
@@ -227,7 +408,7 @@ static Result validateInnerProductWeightsBias(const Specification::NeuralNetwork
             return r;
         }
     }
-    
+
     // Check weights
     uint64_t weight_size = 0;
     if (weightsValueType == FLOAT32 || weightsValueType == FLOAT16){
@@ -239,7 +420,7 @@ static Result validateInnerProductWeightsBias(const Specification::NeuralNetwork
         if (num_inputs * num_outputs != weight_size) {
             r = Result(ResultType::INVALID_MODEL_PARAMETERS, "Layer '" + layer.name() +
                        " has incorrect weight matrix size " + std::to_string(weight_size) +
-                       " to encode a " + std::to_string(num_inputs) + " x " +
+                       " to encode a " + std::to_string(num_inputs) + " × " +
                        std::to_string(num_outputs) + " " + layer_type + ".");
             return r;
         }
@@ -248,7 +429,7 @@ static Result validateInnerProductWeightsBias(const Specification::NeuralNetwork
                                         layer_type, layer.name(), "weight");
         if (!r.good()) return r;
     }
-    
+
     // Check the bias
     uint64_t bias_size = 0;
     if (has_bias){
@@ -274,7 +455,7 @@ static Result validateInnerProductWeightsBias(const Specification::NeuralNetwork
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         return r;
     }
-    
+
     return r;
 }
 
@@ -288,30 +469,34 @@ Result NeuralNetworkSpecValidator::validateInnerProductLayer(const Specification
     if (!r.good()) {
         return r;
     }
-    
+
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "InnerProduct", blobNameToRank);
         if (!r.good()) {return r;}
         r = validateRankCount(layer, "InnerProduct", 1, 5, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     const auto& params = layer.innerproduct();
-    
+    if (params.int8dynamicquantize()) {
+        r = validateInt8Requirements(params.weights(), "InnerProduct", layer.name());
+        if (!r.good()) {return r;}
+    }
+
     r = validateInnerProductWeightsBias(layer, params.weights(), params.bias());
-    
+
     return r;
 }
 
 //    BatchnormLayerParams batchnorm = 6;
 Result NeuralNetworkSpecValidator::validateBatchnormLayer(const Specification::NeuralNetworkLayer& layer) {
-    
+
     Result r;
     r = validateInputCount(layer, 1, 1);
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Batchnorm", blobNameToRank);
@@ -319,7 +504,7 @@ Result NeuralNetworkSpecValidator::validateBatchnormLayer(const Specification::N
         r = validateRankCount(layer, "Batchnorm", 3, -1, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     // Check parameters types
     bool has_f32_params = ((valueType(layer.batchnorm().gamma()) == FLOAT32) || (valueType(layer.batchnorm().beta()) == FLOAT32) ||
                            (valueType(layer.batchnorm().mean()) == FLOAT32)  || (valueType(layer.batchnorm().variance()) == FLOAT32));
@@ -332,7 +517,7 @@ Result NeuralNetworkSpecValidator::validateBatchnormLayer(const Specification::N
         "should either be specified in half or full precision, mixed parameters are not supported.";
         return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
     }
-    
+
     // Check parameters length
     uint64_t num_channels = static_cast<uint64_t>(layer.batchnorm().channels());
     r = validateGeneralWeightParams(layer.batchnorm().gamma(), num_channels, 1, "BatchNorm", layer.name(), "gamma");
@@ -360,7 +545,7 @@ Result NeuralNetworkSpecValidator::validateActivation(const Specification::Neura
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()) {return r;}
     if (ndArrayInterpretation) {
         if (layer.activation().NonlinearityType_case() == Specification::ActivationParams::NonlinearityTypeCase::kPReLU) {
@@ -376,7 +561,7 @@ Result NeuralNetworkSpecValidator::validateActivation(const Specification::Neura
             if (!r.good()) {return r;}
         }
     }
-    
+
     return validateActivationParams(layer.activation());
 }
 
@@ -385,26 +570,107 @@ Result NeuralNetworkSpecValidator::validatePoolingLayer(const Specification::Neu
     Result r;
     r = validateInputCount(layer, 1, 1);
     if (!r.good()) {return r;}
-    
+
     r = validateOutputCount(layer, 1, 1);
     if (!r.good()) {return r;}
-    
+
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Pooling", blobNameToRank);
         if (!r.good()) {return r;}
         r = validateRankCount(layer, "Pooling", 4, -1, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     // We need to check if the PoolingPaddingType is set
     if (layer.pooling().PoolingPaddingType_case() == Specification::PoolingLayerParams::PoolingPaddingTypeCase::POOLINGPADDINGTYPE_NOT_SET) {
         std::string err = "Padding type for the pooling layer '" + layer.name() + "' is not set.";
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
     }
+
+    return r;
+}
+
+Result NeuralNetworkSpecValidator::validatePooling3dLayer(const Specification::NeuralNetworkLayer& layer) {
+    Result r;
+    r = validateInputCount(layer, 1, 1);
+    if (!r.good()) { return r; }
+    
+    r = validateOutputCount(layer, 1, 1);
+    if (!r.good()) { return r; }
+    
+    if (ndArrayInterpretation) {
+        r = validateInputOutputRankEquality(layer, "Pooling3d", blobNameToRank);
+        if (!r.good()) { return r; }
+        // Rank 5 for 2 spacial dimensions, 1 temporal dimension, batch dimension, and 1+ channels.
+        r = validateRankCount(layer, "Pooling3d", 5, -1, blobNameToRank);
+        if (!r.good()) { return r; }
+    }
+    
+    // Kernel
+    const auto pooling3d = layer.pooling3d();
+   
+    r = validatePositive(pooling3d.kerneldepth(), "Kernel Depth");
+    if (!r.good()) { return r; }
+
+    r = validatePositive(pooling3d.kernelheight(), "Kernel Height");
+    if (!r.good()) { return r; }
+
+    r = validatePositive(pooling3d.kernelwidth(), "Kernel Width");
+    if (!r.good()) { return r; }
+
+    // Stride
+    r = validatePositive(pooling3d.stridedepth(), "Stride Depth");
+    if (!r.good()) { return r; }
+    
+    r = validatePositive(pooling3d.strideheight(), "Stride Height");
+    if (!r.good()) { return r; }
+    
+    r = validatePositive(pooling3d.stridewidth(), "Stride Width");
+    if (!r.good()) { return r; }
+    
+    // Custom Padding
+    auto paddingType = pooling3d.paddingtype();
+    r = validatePooling3dPadding(paddingType, pooling3d.custompaddingfront(), "Front");
+    if (!r.good()) { return r; }
+
+    r = validatePooling3dPadding(paddingType, pooling3d.custompaddingback(), "Back");
+    if (!r.good()) { return r; }
+
+    r = validatePooling3dPadding(paddingType, pooling3d.custompaddingtop(), "Top");
+    if (!r.good()) { return r; }
+
+    r = validatePooling3dPadding(paddingType, pooling3d.custompaddingbottom(), "Bottom");
+    if (!r.good()) { return r; }
+
+    r = validatePooling3dPadding(paddingType, pooling3d.custompaddingleft(), "Left");
+    if (!r.good()) { return r; }
+
+    r = validatePooling3dPadding(paddingType, pooling3d.custompaddingright(), "Right");
+    if (!r.good()) { return r; }
     
     return r;
 }
 
+
+Result NeuralNetworkSpecValidator::validateGlobalPooling3dLayer(const Specification::NeuralNetworkLayer& layer) {
+    Result r;
+    r = validateInputCount(layer, 1, 1);
+    if (!r.good()) {return r;}
+
+    r = validateOutputCount(layer, 1, 1);
+    if (!r.good()) {return r;}
+
+    if (ndArrayInterpretation) {
+        r = validateInputOutputRankEquality(layer, "Pooling3d", blobNameToRank);
+        if (!r.good()) {return r;}
+        // Rank 5 for 2 spacial dimensions, 1 temporal dimension, batch dimension, and 1+ channels.
+        r = validateRankCount(layer, "Pooling3d", 5, -1, blobNameToRank);
+        if (!r.good()) {return r;}
+    }
+
+    return r;
+}
+
 //    PaddingLayerParams padding = 9;
 Result NeuralNetworkSpecValidator::validatePaddingLayer(const Specification::NeuralNetworkLayer& layer) {
     Result r;
@@ -412,15 +678,15 @@ Result NeuralNetworkSpecValidator::validatePaddingLayer(const Specification::Neu
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Padding", blobNameToRank);
         if (!r.good()) {return r;}
-        r = validateRankCount(layer, "Padding", 3, -1, blobNameToRank);
+        r = validateRankCount(layer, "Padding", 2, -1, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     const auto& params = layer.padding();
     if (!(params.paddingamounts().borderamounts_size() == 0
           || params.paddingamounts().borderamounts_size() == 2)) {
@@ -428,13 +694,13 @@ Result NeuralNetworkSpecValidator::validatePaddingLayer(const Specification::Neu
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         return r;
     }
-    
+
     if (params.PaddingType_case() == Specification::PaddingLayerParams::PaddingTypeCase::PADDINGTYPE_NOT_SET) {
         std::string err = "Padding layer " + layer.name() + " padding type is not set.";
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         return r;
     }
-    
+
     return r;
 }
 
@@ -445,7 +711,7 @@ Result NeuralNetworkSpecValidator::validateLRNLayer(const Specification::NeuralN
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "LRNLayer", blobNameToRank);
@@ -453,13 +719,13 @@ Result NeuralNetworkSpecValidator::validateLRNLayer(const Specification::NeuralN
         r = validateRankCount(layer, "LRNLayer", 3, -1, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     if (layer.lrn().k() < 0.0) {
         std::string err = "Parameter 'K' for the LRN layer '" + layer.name() + "' must be positive.";
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         return r;
     }
-    
+
     return r;
 }
 
@@ -471,14 +737,14 @@ Result NeuralNetworkSpecValidator::validateSplitLayer(const Specification::Neura
         // between 2 and any number of outputs
         r = validateOutputCount(layer, 2, -1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Split", blobNameToRank);
         if (!r.good()) {return r;}
         r = validateRankCount(layer, "Split", 3, -1, blobNameToRank);
         if (!r.good()) {return r;}
-        
+
         // check that all outputs have same rank
         int rank = 0;
         if (blobNameToRank.find(layer.output(0)) != blobNameToRank.end()) {
@@ -486,7 +752,7 @@ Result NeuralNetworkSpecValidator::validateSplitLayer(const Specification::Neura
         } else {
             return r;
         }
-        
+
         for (const auto& output : layer.output()) {
             if (blobNameToRank.find(output) != blobNameToRank.end()) {
                 if (rank != blobNameToRank.at(output)) {
@@ -497,7 +763,7 @@ Result NeuralNetworkSpecValidator::validateSplitLayer(const Specification::Neura
             }
         }
     }
-    
+
     return r;
 }
 
@@ -530,13 +796,13 @@ Result NeuralNetworkSpecValidator::validateUnaryFunctionLayer(const Specificatio
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Unary", blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     return r;
 }
 
@@ -547,7 +813,7 @@ Result NeuralNetworkSpecValidator::validateUpsampleLayer(const Specification::Ne
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Upsample", blobNameToRank);
@@ -555,21 +821,48 @@ Result NeuralNetworkSpecValidator::validateUpsampleLayer(const Specification::Ne
         r = validateRankCount(layer, "Upsample", 3, -1, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     const auto& params = layer.upsample();
-    // scaling factor must be 2D if provided
-    if (!(params.scalingfactor_size() == 0 || params.scalingfactor_size() == 2)) {
-        std::string err = "Scaling factor in the upsampling layer '" + layer.name() + "' must be a vector of size 2 (i.e height, width) but is a vector of size " + std::to_string(params.scalingfactor_size()) + ".";
+    /* If scalingFactor or fractionalScalingFactor are provided, they must be mutually exclusive and of size 2.
+                             scalingFactor
+                             0 | 2 | Other
+                      0      V   V   I
+       fractionalSF   2      V   I   I
+                      Other  I   I   I
+     */
+    bool validScalingFactors;
+    if (params.scalingfactor_size() == 0) {
+        validScalingFactors = params.fractionalscalingfactor_size() == 0 || params.fractionalscalingfactor_size() == 2;
+    } else {
+        validScalingFactors = params.fractionalscalingfactor_size() == 0 && params.scalingfactor_size() == 2;
+    }
+    if (!validScalingFactors) {
+        std::string err = "Invalid scaling factor in upsampling layer '" + layer.name()
+            + "'. Only one of scalingFactor and fractionalScalingFactor can be set, and if set, must be of size 2. Found scalingFactor of size "
+            + std::to_string(params.scalingfactor_size()) + " and fractionalScalingFactor of size " + std::to_string(params.fractionalscalingfactor_size());
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         return r;
     }
-    
+    if (params.fractionalscalingfactor_size() == 2
+        && (params.mode() == Specification::UpsampleLayerParams_InterpolationMode_NN
+            || params.linearupsamplemode() == Specification::UpsampleLayerParams_LinearUpsampleMode_DEFAULT)) {
+        std::string err = "Invalid upsample layer '" + layer.name() + "'. Fractional upsample only compatible with align_corners=true or align_corners=false";
+        r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+        return r;
+    }
+
+    if(params.linearupsamplemode() != Specification::UpsampleLayerParams_LinearUpsampleMode_DEFAULT
+       && params.mode() == Specification::UpsampleLayerParams_InterpolationMode_NN) {
+        std::string err = "Layer '" + layer.name() + "' of type Upsample uses Nearest Neighbors but uses linear upsampling mode other than DEFAULT.";
+        r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+    }
+
     return r;
 }
 
 //    BiasLayerParams bias = 18;
 Result NeuralNetworkSpecValidator::validateBiasLayer(const Specification::NeuralNetworkLayer& layer) {
-    
+
     Result r;
     r = validateInputCount(layer, 1, 1);
     if (r.good()) {
@@ -578,17 +871,17 @@ Result NeuralNetworkSpecValidator::validateBiasLayer(const Specification::Neural
     if (!r.good()) {
         return r;
     }
-    
+
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Bias", blobNameToRank);
         if (!r.good()) {return r;}
         r = validateRankCount(layer, "Bias", 3, -1, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     const auto& params = layer.bias();
     WeightParamType paramType = valueType(params.bias());
-    
+
     // Only float32 or float16 parameters can be populated at any time
     if (paramType == UNSPECIFIED) {
         std::string err = "Bias product layer '" + layer.name() + "' has both full precision and half precision weights and/or bias fields populated";
@@ -622,7 +915,7 @@ Result NeuralNetworkSpecValidator::validateL2NormLayer(const Specification::Neur
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "L2Normalize", blobNameToRank);
@@ -630,19 +923,19 @@ Result NeuralNetworkSpecValidator::validateL2NormLayer(const Specification::Neur
         r = validateRankCount(layer, "L2Normalize", 3, -1, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     return r;
 }
 
 //    ReshapeLayerParams reshape = 20;
 Result NeuralNetworkSpecValidator::validateReshapeLayer(const Specification::NeuralNetworkLayer& layer) {
-    
+
     Result r;
     r = validateInputCount(layer, 1, 1);
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Reshape", blobNameToRank);
@@ -650,13 +943,13 @@ Result NeuralNetworkSpecValidator::validateReshapeLayer(const Specification::Neu
         r = validateRankCount(layer, "Reshape", 5, 5, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     const auto& params = layer.reshape();
     if (params.targetshape_size() != 3 && params.targetshape_size() != 4) {
         std::string err = "Reshape layer '" + layer.name() + "' target shape must be 3D or 4D.";
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
     }
-    
+
     return r;
 }
 
@@ -667,7 +960,7 @@ Result NeuralNetworkSpecValidator::validateFlattenLayer(const Specification::Neu
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Flatten", blobNameToRank);
@@ -675,19 +968,19 @@ Result NeuralNetworkSpecValidator::validateFlattenLayer(const Specification::Neu
         r = validateRankCount(layer, "Flatten", 3, -1, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     return r;
 }
 
 //    PermuteLayerParams permute = 22;
 Result NeuralNetworkSpecValidator::validatePermuteLayer(const Specification::NeuralNetworkLayer& layer) {
-    
+
     Result r;
     r = validateInputCount(layer, 1, 1);
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Permute", blobNameToRank);
@@ -695,13 +988,13 @@ Result NeuralNetworkSpecValidator::validatePermuteLayer(const Specification::Neu
         r = validateRankCount(layer, "Permute", 5, 5, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     const auto& params = layer.permute();
     if (params.axis_size() != 4) {
         std::string err = "Permute layer '" + layer.name() + "' must have 4D axis parameters.";
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
     }
-    
+
     return r;
 }
 
@@ -712,14 +1005,14 @@ Result NeuralNetworkSpecValidator::validateReduceLayer(const Specification::Neur
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (ndArrayInterpretation && layer.inputtensor_size() > 0) {
         r = validateInputOutputRankEquality(layer, "Reduce", blobNameToRank);
         if (!r.good()) {return r;}
         int rank = static_cast<int>(layer.inputtensor(0).rank());
         bool sufficientInputRank = true;
         std::string err;
-        
+
         switch (layer.reduce().axis()) {
             case Specification::ReduceLayerParams::CHW:
                 if (rank < 3) {sufficientInputRank = false;}
@@ -755,7 +1048,7 @@ Result NeuralNetworkSpecValidator::validateReorganizeDataLayer(const Specificati
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "ReorganizeData", blobNameToRank);
@@ -763,7 +1056,7 @@ Result NeuralNetworkSpecValidator::validateReorganizeDataLayer(const Specificati
         r = validateRankCount(layer, "ReorganizeData", 3, -1, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     const auto& reorg = layer.reorganizedata();
     if (static_cast<int>(reorg.blocksize()) < 2) {
         std::string err = "Block size for layer '" + layer.name() + "' must be > 1.";
@@ -779,14 +1072,14 @@ Result NeuralNetworkSpecValidator::validateSliceLayer(const Specification::Neura
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (ndArrayInterpretation && layer.inputtensor_size() > 0) {
         r = validateInputOutputRankEquality(layer, "Slice", blobNameToRank);
         if (!r.good()) {return r;}
         int rank = static_cast<int>(layer.inputtensor(0).rank());
         bool sufficientInputRank = true;
         std::string err;
-        
+
         switch (layer.slice().axis()) {
             case Specification::SliceLayerParams_SliceAxis_CHANNEL_AXIS:
                 if (rank < 3) {sufficientInputRank = false;}
@@ -806,7 +1099,7 @@ Result NeuralNetworkSpecValidator::validateSliceLayer(const Specification::Neura
             return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         }
     }
-    
+
     const auto& slice = layer.slice();
     int stride = static_cast<int>(slice.stride());
     if (stride < 1) {
@@ -814,7 +1107,7 @@ Result NeuralNetworkSpecValidator::validateSliceLayer(const Specification::Neura
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         return r;
     }
-    
+
     int64_t start = (int64_t)slice.startindex();
     int64_t end = slice.endindex();
     if ((end > 0 && end < start )
@@ -823,7 +1116,7 @@ Result NeuralNetworkSpecValidator::validateSliceLayer(const Specification::Neura
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         return r;
     }
-    
+
     return r;
 }
 
@@ -837,7 +1130,7 @@ Result NeuralNetworkSpecValidator::validateLoadConstantLayer(const Specification
     if (!r.good()) {
         return r;
     }
-    
+
     if (ndArrayInterpretation) {
         if (blobNameToRank.find(layer.output(0)) != blobNameToRank.end()) {
             int rank = blobNameToRank.at(layer.output(0));
@@ -845,10 +1138,10 @@ Result NeuralNetworkSpecValidator::validateLoadConstantLayer(const Specification
             if (!r.good()) {return r;}
         }
     }
-    
+
     const auto& params = layer.loadconstant();
     WeightParamType paramType = valueType(params.data());
-    
+
     // Only float32 or float16 parameters can be populated at any time
     if (paramType == UNSPECIFIED) {
         std::string err = "Load constant layer '" + layer.name() + "' has both full precision and half precision weight fields populated";
@@ -868,9 +1161,9 @@ Result NeuralNetworkSpecValidator::validateLoadConstantLayer(const Specification
     } else {
         r = validateGeneralWeightParams(params.data(), total_shape, 1, "LoadConstant", layer.name(), "constants");
     }
-    
+
     if (!r.good()) return r;
-    
+
     return Result();
 }
 
@@ -884,21 +1177,21 @@ Result NeuralNetworkSpecValidator::validateScaleLayer(const Specification::Neura
     if (!r.good()) {
         return r;
     }
-    
+
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Scale", blobNameToRank);
         if (!r.good()) {return r;}
         r = validateRankCount(layer, "Scale", 3, -1, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     const auto& params = layer.scale();
-    
+
     bool has_bias = params.hasbias();
     WeightParamType scaleValueType, biasValueType;
     scaleValueType = valueType(params.scale());
     biasValueType = valueType(params.bias());
-    
+
     // Check for scale and bias value type. Only float32 or float16 parameters can be populated at any time.
     // Both bias and weights should have the same value types
     if ( (scaleValueType == UNSPECIFIED) || (has_bias && biasValueType == UNSPECIFIED)) {
@@ -915,7 +1208,7 @@ Result NeuralNetworkSpecValidator::validateScaleLayer(const Specification::Neura
             return r;
         }
     }
-    
+
     // Checks scale shape and size
     if (!(params.shapescale_size() == 1 || params.shapescale_size() == 3)) { // check shape
         std::string err = "The shape vector for the scale layer '" + layer.name() + "' is " +
@@ -926,14 +1219,14 @@ Result NeuralNetworkSpecValidator::validateScaleLayer(const Specification::Neura
     for (int i = 0; i < params.shapescale_size(); i++) {
         total_scale_shape *= params.shapescale(i);
     }
-    
+
     if (params.shapescale_size() == 3 && params.shapescale(0) > 1){
         r = validateGeneralWeightParams(params.scale(), total_scale_shape, params.shapescale(0), "Scale", layer.name(), "scale");
     } else {
         r = validateGeneralWeightParams(params.scale(), total_scale_shape, 1, "Scale", layer.name(), "scale");
     }
     if (!r.good()) return r;
-    
+
     // Checks bias shape and size
     if (has_bias) {
         if (!(params.shapebias_size() == 1 || params.shapebias_size() == 3)) {
@@ -957,7 +1250,7 @@ Result NeuralNetworkSpecValidator::validateScaleLayer(const Specification::Neura
 
 //    SimpleRecurrentLayerParams simpleRecurrent = 26;
 Result NeuralNetworkSpecValidator::validateSimpleRecurrentLayer(const Specification::NeuralNetworkLayer& layer) {
-    
+
     Result r;
     // Must specify hidden state
     r = validateInputCount(layer, 2, 2);
@@ -967,7 +1260,7 @@ Result NeuralNetworkSpecValidator::validateSimpleRecurrentLayer(const Specificat
     if (!r.good()) {
         return r;
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "SimpleRecurrent", blobNameToRank);
@@ -975,14 +1268,14 @@ Result NeuralNetworkSpecValidator::validateSimpleRecurrentLayer(const Specificat
         r = validateRankCount(layer, "SimpleRecurrent", 5, 5, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     const auto& params = layer.simplerecurrent();
     bool hasBiasVector = params.hasbiasvector();
     WeightParamType weightMatrixValueType, recursionMatrixValueType, biasVectorValueType;
     weightMatrixValueType = valueType(params.weightmatrix());
     recursionMatrixValueType = valueType(params.recursionmatrix());
     biasVectorValueType = valueType(params.biasvector());
-    
+
     // Verify all weights are of valid type
     if((weightMatrixValueType == UNSPECIFIED) || recursionMatrixValueType == UNSPECIFIED || (hasBiasVector && biasVectorValueType == UNSPECIFIED)){
         std::string err = "Simple recurrent layer '" + layer.name() + "' has invalid weightMatrix/recusionMatrix/Bias fields.";
@@ -999,7 +1292,7 @@ Result NeuralNetworkSpecValidator::validateSimpleRecurrentLayer(const Specificat
             return r;
         }
     }
-    
+
     // Check weight matrix size
     // input matrix
     uint64_t input_matrix_size = params.inputvectorsize() * params.outputvectorsize();
@@ -1021,13 +1314,13 @@ Result NeuralNetworkSpecValidator::validateSimpleRecurrentLayer(const Specificat
     }
     // Validate the activations as well
     return validateRecurrentActivationParams(layer.simplerecurrent().activation());
-    
+
 }
 
 //    GRULayerParams gru = 27;
 Result NeuralNetworkSpecValidator::validateGRULayer(const Specification::NeuralNetworkLayer& layer) {
     Result r;
-    
+
     // Must specify hidden states
     r = validateInputCount(layer, 1, 2);
     if (r.good()) {
@@ -1036,7 +1329,7 @@ Result NeuralNetworkSpecValidator::validateGRULayer(const Specification::NeuralN
     if (!r.good()) {
         return r;
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "GRU", blobNameToRank);
@@ -1044,10 +1337,10 @@ Result NeuralNetworkSpecValidator::validateGRULayer(const Specification::NeuralN
         r = validateRankCount(layer, "GRU", 5, 5, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     const auto& params = layer.gru();
     bool hasBiasVectors = params.hasbiasvectors();
-    
+
     std::vector<CoreML::WeightParamType> weightTypeList;
     weightTypeList.push_back(valueType(params.updategateweightmatrix()));
     weightTypeList.push_back(valueType(params.updategaterecursionmatrix()));
@@ -1066,8 +1359,8 @@ Result NeuralNetworkSpecValidator::validateGRULayer(const Specification::NeuralN
         return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
     }
     // allen-continue
-    
-    
+
+
     // Check the size of the input matrices
     const uint64_t input_matrix_size = params.inputvectorsize() * params.outputvectorsize();
     const uint64_t outSize = params.outputvectorsize();
@@ -1080,7 +1373,7 @@ Result NeuralNetworkSpecValidator::validateGRULayer(const Specification::NeuralN
     r = validateGeneralWeightParams(params.outputgateweightmatrix(), input_matrix_size,
                                     outSize, "GRU", layer.name(), "output gate weight matrix");
     if (!r.good()) return r;
-    
+
     // Check the size of the recurrent matrices
     const uint64_t recurrent_matrix_size = params.outputvectorsize() * params.outputvectorsize();
     r = validateGeneralWeightParams(params.updategaterecursionmatrix(), recurrent_matrix_size,
@@ -1092,7 +1385,7 @@ Result NeuralNetworkSpecValidator::validateGRULayer(const Specification::NeuralN
     r = validateGeneralWeightParams(params.outputgaterecursionmatrix(), recurrent_matrix_size,
                                     outSize, "GRU", layer.name(), "output gate recursion matrix");
     if (!r.good()) return r;
-    
+
     if (hasBiasVectors){
         const uint64_t bias_size = params.outputvectorsize();
         r = validateGeneralWeightParams(params.updategatebiasvector(), bias_size, 1,
@@ -1105,7 +1398,7 @@ Result NeuralNetworkSpecValidator::validateGRULayer(const Specification::NeuralN
                                         "GRU", layer.name(), "output gate bias vector");
         if (!r.good()) return r;
     }
-    
+
     // Now check the activations
     for (const auto& activation : params.activations()) {
         r = validateRecurrentActivationParams(activation);
@@ -1127,7 +1420,7 @@ Result NeuralNetworkSpecValidator::validateUniDirectionalLSTMLayer(const Specifi
     if (!r.good()) {
         return r;
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "UniDirectionalLSTM", blobNameToRank);
@@ -1135,29 +1428,29 @@ Result NeuralNetworkSpecValidator::validateUniDirectionalLSTMLayer(const Specifi
         r = validateRankCount(layer, "UniDirectionalLSTM", 5, 5, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     for (const auto& activation : layer.unidirectionallstm().activations()) {
         r = validateRecurrentActivationParams(activation);
         if (!r.good()) {
             break;
         }
     }
-    
+
     // Validate common LSTM params and ensure that all weight field types are consistent
     r = validateLSTMWeightParams(layer.unidirectionallstm().weightparams(), layer.unidirectionallstm().params());
     if (!r.good()) {
         return r;
     }
-    
+
     Specification::UniDirectionalLSTMLayerParams recurrent = layer.unidirectionallstm();
     uint64_t x = recurrent.inputvectorsize();
     uint64_t h = recurrent.outputvectorsize();
-    
+
     if (recurrent.activations_size() != 3){
         const std::string err = std::string("Unidirectional LSTM layer:" + layer.name() + " must provide 3 activations");
         return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
     }
-    
+
     // Check weight matrices' sizes
     r = validateGeneralWeightParams(recurrent.weightparams().inputgateweightmatrix(), h*x, h,
                                     "Unidirectional LSTM", layer.name(), "input gate weight matrix");
@@ -1211,7 +1504,7 @@ Result NeuralNetworkSpecValidator::validateUniDirectionalLSTMLayer(const Specifi
                                         "Unidirectional LSTM", layer.name(), "output gate peep hole vector");
         if(!r.good()) return r;
     }
-    
+
     return r;
 }
 
@@ -1226,7 +1519,7 @@ Result NeuralNetworkSpecValidator::validateBiDirectionalLSTMLayer(const Specific
     if (!r.good()) {
         return r;
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "BiDirectionalLSTM", blobNameToRank);
@@ -1234,7 +1527,7 @@ Result NeuralNetworkSpecValidator::validateBiDirectionalLSTMLayer(const Specific
         r = validateRankCount(layer, "BiDirectionalLSTM", 5, 5, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     for (const auto& activation : layer.bidirectionallstm().activationsforwardlstm()) {
         r = validateRecurrentActivationParams(activation);
         if (!r.good()) {
@@ -1247,33 +1540,33 @@ Result NeuralNetworkSpecValidator::validateBiDirectionalLSTMLayer(const Specific
             break;
         }
     }
-    
+
     // Validate common LSTM params and ensure that all weight field types are consistent
     r = validateLSTMWeightParams(layer.unidirectionallstm().weightparams(), layer.unidirectionallstm().params());
     if (!r.good()) {
         return r;
     }
-    
+
     Specification::BiDirectionalLSTMLayerParams recurrent = layer.bidirectionallstm();
     Specification::LSTMParams lstmParams = recurrent.params();
     std::string err;
-    
+
     if (recurrent.activationsforwardlstm_size() != 3) {
         err = std::string("Bidirectional LSTM layer:" + layer.name() + " forward lstm must provide 3 activations");
         return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
     }
-    
+
     if (recurrent.activationsbackwardlstm_size() != 3){
         err = std::string("Bidirectional LSTM layer:" + layer.name() + " backward lstm must provide 3 activations");
         return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
     }
-    
+
     // Verify weights and biases sizes
     uint64_t h = recurrent.outputvectorsize();
     uint64_t x = recurrent.inputvectorsize();
     const Specification::LSTMWeightParams& weightParamsF = recurrent.weightparams(0);
     const Specification::LSTMWeightParams& weightParamsB = recurrent.weightparams(1);
-    
+
     // Check forward weight matrices' sizes
     r = validateGeneralWeightParams(weightParamsF.inputgateweightmatrix(), h*x, h,
                                     "Bidirectional LSTM", layer.name(), "forward input gate weight matrix");
@@ -1326,7 +1619,7 @@ Result NeuralNetworkSpecValidator::validateBiDirectionalLSTMLayer(const Specific
     r = validateGeneralWeightParams(weightParamsB.outputgaterecursionmatrix(), h*h, h,
                                     "Bidirectional LSTM", layer.name(), "backward output gate recursion matrix");
     if(!r.good()) return r;
-    
+
     // Check bias vectors
     if (recurrent.params().hasbiasvectors()) {
         r = validateGeneralWeightParams(weightParamsF.inputgatebiasvector(), h, 1,
@@ -1385,14 +1678,14 @@ Result NeuralNetworkSpecValidator::validateCropLayer(const Specification::Neural
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Crop", blobNameToRank);
         if (!r.good()) {return r;}
         r = validateRankCount(layer, "Crop", 3, -1, blobNameToRank);
         if (!r.good()) {return r;}
-        
+
         if (layer.input_size() > 1) {
             if (blobNameToRank.find(layer.input(0)) != blobNameToRank.end() &&
                 blobNameToRank.find(layer.input(1)) != blobNameToRank.end()) {
@@ -1404,7 +1697,7 @@ Result NeuralNetworkSpecValidator::validateCropLayer(const Specification::Neural
             }
         }
     }
-    
+
     if (layer.input_size() == 1) {
         // check the border amounts
         if (layer.crop().cropamounts().borderamounts_size() != 2) {
@@ -1421,7 +1714,7 @@ Result NeuralNetworkSpecValidator::validateCropLayer(const Specification::Neural
             return r;
         }
     }
-    
+
     return r;
 }
 
@@ -1433,14 +1726,14 @@ Result NeuralNetworkSpecValidator::validateDotLayer(const Specification::NeuralN
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "DotProduct", blobNameToRank);
         if (!r.good()) {return r;}
         r = validateRankCount(layer, "DotProduct", 3, -1, blobNameToRank);
         if (!r.good()) {return r;}
-        
+
         if (blobNameToRank.find(layer.input(0)) != blobNameToRank.end() &&
             blobNameToRank.find(layer.input(1)) != blobNameToRank.end()) {
             if (blobNameToRank.at(layer.input(0)) != blobNameToRank.at(layer.input(1))) {
@@ -1450,7 +1743,7 @@ Result NeuralNetworkSpecValidator::validateDotLayer(const Specification::NeuralN
             }
         }
     }
-    
+
     return r;
 }
 
@@ -1461,7 +1754,7 @@ Result NeuralNetworkSpecValidator::validateMvnLayer(const Specification::NeuralN
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "MeanVarianceNormalize", blobNameToRank);
@@ -1469,7 +1762,7 @@ Result NeuralNetworkSpecValidator::validateMvnLayer(const Specification::NeuralN
         r = validateRankCount(layer, "MeanVarianceNormalize", 3, -1, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     return r;
 }
 
@@ -1477,14 +1770,14 @@ Result NeuralNetworkSpecValidator::validateMvnLayer(const Specification::NeuralN
 static Result validateEmbeddingWeightsBias(const Specification::NeuralNetworkLayer& layer,
                                               const Specification::WeightParams& weights,
                                               const Specification::WeightParams& bias) {
-    
+
     Result r;
-    
+
     bool has_bias;
     uint64_t input_dim;
     uint64_t output_channels;
     std::string layer_type;
-    
+
     switch(layer.layer_case()) {
         case Specification::NeuralNetworkLayer::LayerCase::kEmbedding: {
             const auto& params = layer.embedding();
@@ -1505,18 +1798,18 @@ static Result validateEmbeddingWeightsBias(const Specification::NeuralNetworkLay
         default:
             return r;
     }
-    
+
     WeightParamType weightsValueType, biasValueType;
     weightsValueType = valueType(weights);
     biasValueType = valueType(bias);
-    
+
     // Only float32 or float16 parameters can be populated at any time
     if ((weightsValueType == UNSPECIFIED) || (has_bias && biasValueType == UNSPECIFIED)){
         std::string err = layer_type + " '" + layer.name() + "' has invalid weights/bias fields. Field value types should match and should either be half or full precision.";
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         return r;
     }
-    
+
     if (has_bias){
         if ((weightsValueType == CoreML::FLOAT16 && biasValueType == CoreML::FLOAT32) ||
             (weightsValueType == CoreML::FLOAT32 && biasValueType == CoreML::FLOAT16)){
@@ -1534,7 +1827,7 @@ static Result validateEmbeddingWeightsBias(const Specification::NeuralNetworkLay
                                         layer_type, layer.name(), "bias");
         if (!r.good()) return r;
     }
-    
+
     return r;
 }
 
@@ -1548,17 +1841,17 @@ Result NeuralNetworkSpecValidator::validateEmbeddingLayer(const Specification::N
     if (!r.good()) {
         return r;
     }
-    
+
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Embedding", blobNameToRank);
         if (!r.good()) {return r;}
         r = validateRankCount(layer, "Embedding", 4, -1, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     const auto& params = layer.embedding();
     r = validateEmbeddingWeightsBias(layer, params.weights(), params.bias());
-    
+
     return r;
 }
 
@@ -1571,17 +1864,17 @@ Result NeuralNetworkSpecValidator::validateEmbeddingNDLayer(const Specification:
     if (!r.good()) {
         return r;
     }
-    
+
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "EmbeddingND", blobNameToRank);
         if (!r.good()) {return r;}
         r = validateRankCount(layer, "EmbeddingND", 2, 5, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     const auto& params = layer.embeddingnd();
     r = validateEmbeddingWeightsBias(layer, params.weights(), params.bias());
-    
+
     return r;
 }
 
@@ -1619,7 +1912,7 @@ Result NeuralNetworkSpecValidator::validateSequenceRepeatLayer(const Specificati
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "SequenceRepeat", blobNameToRank);
@@ -1627,7 +1920,7 @@ Result NeuralNetworkSpecValidator::validateSequenceRepeatLayer(const Specificati
         r = validateRankCount(layer, "SequenceRepeat", 5, -1, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     return r;
 }
 
@@ -1635,10 +1928,10 @@ Result NeuralNetworkSpecValidator::validateSoftmaxLayer(const Specification::Neu
     Result r;
     r = validateInputCount(layer, 1, 1);
     if (!r.good()) {return r;}
-    
+
     r = validateOutputCount(layer, 1, 1);
     if (!r.good()) {return r;}
-    
+
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Softmax", blobNameToRank);
         if (!r.good()) {return r;}
@@ -1653,7 +1946,7 @@ Result NeuralNetworkSpecValidator::validateConcatLayer(const Specification::Neur
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "Concat", blobNameToRank);
@@ -1665,7 +1958,7 @@ Result NeuralNetworkSpecValidator::validateConcatLayer(const Specification::Neur
             r = validateRankCount(layer, "Concat", 3, -1, blobNameToRank);
             if (!r.good()) {return r;}
         }
-       
+
         // check that all inputs have same rank
         int rank = 0;
         if (blobNameToRank.find(layer.input(0)) != blobNameToRank.end()) {
@@ -1673,7 +1966,7 @@ Result NeuralNetworkSpecValidator::validateConcatLayer(const Specification::Neur
         } else {
             return r;
         }
-        
+
         for (const auto& input : layer.input()) {
             if (blobNameToRank.find(input) != blobNameToRank.end()) {
                 if (rank != blobNameToRank.at(input)) {
@@ -1684,7 +1977,7 @@ Result NeuralNetworkSpecValidator::validateConcatLayer(const Specification::Neur
             }
         }
     }
-    
+
     return r;
 }
 
@@ -1694,19 +1987,19 @@ Result NeuralNetworkSpecValidator::validateCustomLayer(const Specification::Neur
     if (r.good()) {
         r = validateOutputCount(layer, 1, -1);
     }
-    
+
     if (layer.custom().classname().size() == 0) {
         std::string err = "Custom layer " + layer.name() + " has an empty 'className' field. This field is required in order for Core ML to link to the implementation for this custom class.";
         return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
     }
-    
+
     for (const auto& param: layer.custom().weights()) {
         if (!checkSingleWeightType(param)) {
             std::string err = "Custom layer " + layer.name() + " has a weights parameter with multiple types filled in.  The WeightParams message should be treated as a oneof.";
             r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         }
     }
-    
+
     return r;
 }
 
@@ -1717,7 +2010,7 @@ Result NeuralNetworkSpecValidator::validateResizeBilinearLayer(const Specificati
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "ResizeBilinear", blobNameToRank);
@@ -1725,7 +2018,7 @@ Result NeuralNetworkSpecValidator::validateResizeBilinearLayer(const Specificati
         r = validateRankCount(layer, "ResizeBilinear", 3, -1, blobNameToRank);
         if (!r.good()) {return r;}
     }
-    
+
     const auto& params = layer.resizebilinear();
     // target Size must be 2D if provided
     if (!(params.targetsize_size() == 0 || params.targetsize_size() == 2)) {
@@ -1733,7 +2026,7 @@ Result NeuralNetworkSpecValidator::validateResizeBilinearLayer(const Specificati
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         return r;
     }
-    
+
     return r;
 }
 
@@ -1744,14 +2037,14 @@ Result NeuralNetworkSpecValidator::validateCropResizeLayer(const Specification::
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!r.good()){ return r;}
     if (ndArrayInterpretation) {
         r = validateInputOutputRankEquality(layer, "CropResize", blobNameToRank);
         if (!r.good()) {return r;}
         r = validateRankCount(layer, "CropResize", 5, -1, blobNameToRank);
         if (!r.good()) {return r;}
-        
+
         if (blobNameToRank.find(layer.input(0)) != blobNameToRank.end() &&
             blobNameToRank.find(layer.input(1)) != blobNameToRank.end()) {
             if (blobNameToRank.at(layer.input(0)) != blobNameToRank.at(layer.input(1))) {
@@ -1760,9 +2053,9 @@ Result NeuralNetworkSpecValidator::validateCropResizeLayer(const Specification::
                 return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
             }
         }
-        
+
     }
-    
+
     const auto& params = layer.cropresize();
     // target Size must be 2D if provided
     if (!(params.targetsize_size() == 0 || params.targetsize_size() == 2)) {
@@ -1770,7 +2063,7 @@ Result NeuralNetworkSpecValidator::validateCropResizeLayer(const Specification::
         r = Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         return r;
     }
-    
+
     return r;
 }
 
@@ -1784,7 +2077,7 @@ Result NeuralNetworkSpecValidator::validateBranchLayer(const Specification::Neur
     if (!r.good()) {
         return r;
     }
-    
+
     if (layer.inputtensor_size()) {
         auto &in_tensor = layer.inputtensor(0);
         if (in_tensor.dimvalue_size()) {
@@ -1796,9 +2089,9 @@ Result NeuralNetworkSpecValidator::validateBranchLayer(const Specification::Neur
             }
         }
     }
-    
+
     std::string condition = layer.input(0);
-    
+
     const auto& params = layer.branch();
     // check that condition is already present in the network
     if (blobs.find(condition) == blobs.end()) {
@@ -1806,7 +2099,7 @@ Result NeuralNetworkSpecValidator::validateBranchLayer(const Specification::Neur
         + condition + "' which is not present in the network prior to this layer.";
         return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
     }
-    
+
     // get the NN spec for If and Else branches
     const auto& ifNNSpec = params.ifbranch();
     const auto& elseNNSpec = params.elsebranch();
@@ -1815,14 +2108,14 @@ Result NeuralNetworkSpecValidator::validateBranchLayer(const Specification::Neur
         return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
     }
     bool isElseBranch = (elseNNSpec.layers_size() > 0) ? true : false;
-    
+
     // validate both If and Else branches
     NeuralNetworkSpecValidator ifNNValidator(blobs, ModelIOBlobNameToRank, ndArrayInterpretation, loopStackDepth, blobNameToRank);
     r = ifNNValidator.validateNeuralNetwork(ifNNSpec);
     if (!r.good()) {
         return r;
     }
-    
+
     if (isElseBranch) {
         NeuralNetworkSpecValidator elseNNValidator(blobs, ModelIOBlobNameToRank, ndArrayInterpretation, loopStackDepth, blobNameToRank);
         r = elseNNValidator.validateNeuralNetwork(elseNNSpec);
@@ -1841,7 +2134,7 @@ Result NeuralNetworkSpecValidator::validateBranchLayer(const Specification::Neur
             }
         }
     }
-    
+
     return r;
 }
 
@@ -1880,7 +2173,7 @@ Result NeuralNetworkSpecValidator::validateBatchedMatmulLayer(const Specificatio
         r = validateOutputCount(layer, 1, 1);
         if (!r.good()) {return r;}
     }
-    
+
     // validate rank, if present
     if (layer.input_size() == 2 && layer.inputtensor_size() == 2 && layer.outputtensor_size() == 1) {
         int in1_rank = static_cast<int>(layer.inputtensor(0).rank());
@@ -1891,7 +2184,7 @@ Result NeuralNetworkSpecValidator::validateBatchedMatmulLayer(const Specificatio
             return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         }
     }
-    
+
     if (layer.input_size() == 1 && layer.inputtensor_size() == 1 && layer.outputtensor_size() == 1) {
         int in_rank = static_cast<int>(layer.inputtensor(0).rank());
         int out_rank = static_cast<int>(layer.outputtensor(0).rank());
@@ -1900,18 +2193,28 @@ Result NeuralNetworkSpecValidator::validateBatchedMatmulLayer(const Specificatio
             return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         }
     }
-    
+
     if (layer.input_size() > 1 && layer.batchedmatmul().hasbias()) {
         std::string err = "BatchedMatMul layer '" + layer.name() + "': has two inputs and 'hasBias' flag is set to True."
                                 "However, bias is only supported when the layer has 1 input.";
         return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
     }
     
+    if (layer.input_size() > 1 && layer.batchedmatmul().int8dynamicquantize()) {
+        std::string err = "BatchedMatMul layer '" + layer.name() + "': cannot use dynamic quantization with 2 inputs.";
+        return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+    }
+
+    if (layer.batchedmatmul().int8dynamicquantize()) {
+        r = validateInt8Requirements(layer.batchedmatmul().weights(), "BatchedMatMul", layer.name());
+        if (!r.good()) {return r;}
+    }
+
     if (layer.input_size() == 1) {
         const auto& params = layer.batchedmatmul();
         r = validateInnerProductWeightsBias(layer, params.weights(), params.bias());
     }
-    
+
     return r;
 }
 
@@ -2360,7 +2663,7 @@ Result NeuralNetworkSpecValidator::validateSplitNDLayer(const Specification::Neu
     if (params.splitsizes_size() > 0) {
         numSplits = static_cast<size_t>(params.splitsizes_size());
     } else {
-        numSplits = params.numsplits();
+        numSplits = static_cast<size_t>(params.numsplits());
     }
     if (numSplits == 0) {
         const std::string err = "Either split_sizes or num_splits should be provided for '" + layer.name() + "' layer.";
@@ -2464,7 +2767,7 @@ Result NeuralNetworkSpecValidator::validateSliceStaticLayer(const Specification:
 
 Result NeuralNetworkSpecValidator::validateSliceDynamicLayer(const Specification::NeuralNetworkLayer& layer) {
     Result r;
-    r = validateInputCount(layer, 2, 6);
+    r = validateInputCount(layer, 2, 7);
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
@@ -2473,7 +2776,7 @@ Result NeuralNetworkSpecValidator::validateSliceDynamicLayer(const Specification
 
 Result NeuralNetworkSpecValidator::validateTileLayer(const Specification::NeuralNetworkLayer& layer) {
     Result r;
-    r = validateInputCount(layer, 1, 1);
+    r = validateInputCount(layer, 1, 2);
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
@@ -2606,7 +2909,7 @@ Result NeuralNetworkSpecValidator::validateLoopLayer(const Specification::Neural
     if (!r.good()) {
         return r;
     }
-    
+
     // check that if input exists, and if its shape exists, its scalar
     if (layer.inputtensor_size()) {
         auto &in_tensor = layer.inputtensor(0);
@@ -2619,13 +2922,13 @@ Result NeuralNetworkSpecValidator::validateLoopLayer(const Specification::Neural
             }
         }
     }
-    
+
     const auto& params = layer.loop();
     const auto &conditionNNSpec = params.conditionnetwork();
     std::string conditionVar = params.conditionvar();
     const auto &bodyNNSpec = params.bodynetwork();
     bool isConditionNet = (conditionNNSpec.layers_size() > 0) ? true : false;
-    
+
     // validate some generic requirements for the existense of fields
     if (bodyNNSpec.layers_size() == 0) {
         std::string err = "Loop Layer '" + std::string(layer.name()) + "' has an empty body network";
@@ -2639,17 +2942,17 @@ Result NeuralNetworkSpecValidator::validateLoopLayer(const Specification::Neural
         std::string err = "Loop Layer '" + std::string(layer.name()) + "': has no input, no condition network and max loop iterations is 0.";
         return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
     }
-    
-    
+
+
     NeuralNetworkSpecValidator conditionNNValidator(blobs, ModelIOBlobNameToRank, ndArrayInterpretation, loopStackDepth, blobNameToRank);
-    
+
     // validate the condition network if it exists
     if (isConditionNet) {
         r = conditionNNValidator.validateNeuralNetwork(conditionNNSpec);
         if (!r.good()) {
             return r;
         }
-        
+
         // conditionVar must be produced by the condition network
         if (blobs.find(conditionVar) == blobs.end()) { // conditionVar not in the parent NN
             if (conditionNNValidator.blobs.find(conditionVar) == conditionNNValidator.blobs.end()) {
@@ -2664,7 +2967,7 @@ Result NeuralNetworkSpecValidator::validateLoopLayer(const Specification::Neural
                 return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
             }
         }
-        
+
         // add the blobs generated by the condition network to the scope of the parent network
         for (auto& blob_map: conditionNNValidator.blobs) {
             std::string current_blob_name = blob_map.first;
@@ -2672,18 +2975,18 @@ Result NeuralNetworkSpecValidator::validateLoopLayer(const Specification::Neural
                                             conditionNNValidator.blobs[current_blob_name].end());
         }
     }
-    
+
     // validate the body network
     NeuralNetworkSpecValidator bodyNNValidator(blobs, ModelIOBlobNameToRank, ndArrayInterpretation, loopStackDepth + 1, blobNameToRank);
     r = bodyNNValidator.validateNeuralNetwork(bodyNNSpec);
     if (!r.good()) {
         return r;
     }
-    
+
     // update the set of "blobs" of the current Neural Network:
     // - if there is no condition network, all the blobs generated in the body network gets added to the scope of the parent network
     // - if there is a condition network, all its blobs gets added to the overall scope (already done above), as well the ones from the bodynetwork that are present in the condition net.
-    
+
     if (!isConditionNet) {
         for (auto& blob_map: bodyNNValidator.blobs) {
             std::string current_blob_name = blob_map.first;
@@ -2699,7 +3002,7 @@ Result NeuralNetworkSpecValidator::validateLoopLayer(const Specification::Neural
             }
         }
     }
-    
+
     return r;
 }
 
@@ -2712,7 +3015,7 @@ Result NeuralNetworkSpecValidator::validateLoopContinueBreakLayer(const Specific
     if (!r.good()) {
         return r;
     }
-    
+
     if (loopStackDepth == 0) {
         std::string err;
         if (layer.layer_case() == Specification::NeuralNetworkLayer::LayerCase::kLoopBreak) {
@@ -2722,7 +3025,7 @@ Result NeuralNetworkSpecValidator::validateLoopContinueBreakLayer(const Specific
         }
         return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
     }
-    
+
     return r;
 }
 
@@ -2744,7 +3047,7 @@ Result NeuralNetworkSpecValidator::validateRankPreservingReshapeLayer(const Spec
             return r;
         }
     }
-    
+
     r = validateRankExists(layer);
     if (!r.good()) {
         return r;
@@ -2794,12 +3097,12 @@ Result NeuralNetworkSpecValidator::validateExpandDimsLayer(const Specification::
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (layer.expanddims().axes_size() == 0) {
         std::string err = "ExpandDims Layer '" + std::string(layer.name()) + "': length of the 'axes' parameter cannot be 0.";
         return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
     }
-    
+
     // either all values in axes must be positive or all negative
     std::set<int> axes_set;
     for (int i=0; i<layer.expanddims().axes_size(); i++) {
@@ -2811,17 +3114,17 @@ Result NeuralNetworkSpecValidator::validateExpandDimsLayer(const Specification::
             axes_set.insert(a);
         }
     }
-    
+
     if (layer.inputtensor_size() > 0 && layer.outputtensor_size() > 0) {
         int input_rank = static_cast<int>(layer.inputtensor(0).rank());
         int output_rank = static_cast<int>(layer.outputtensor(0).rank());
         int axes_length = layer.expanddims().axes_size();
-        
+
         if (input_rank + axes_length != output_rank) {
             std::string err = "ExpandDims Layer '" + std::string(layer.name()) + "': input rank plus the length of the axes parameter must equal output rank.";
             return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         }
-        
+
         std::vector<int> axes;
         for (int i=0; i<axes_length; i++) {
             int a = (int)layer.expanddims().axes(i);
@@ -2834,7 +3137,7 @@ Result NeuralNetworkSpecValidator::validateExpandDimsLayer(const Specification::
             }
             axes.push_back(a);
         }
-        
+
         int max_value = *std::max_element(axes.begin(), axes.end());
         int min_value = *std::min_element(axes.begin(), axes.end());
         if (max_value > output_rank - 1 || min_value < 0) {
@@ -2852,14 +3155,14 @@ Result NeuralNetworkSpecValidator::validateSqueezeLayer(const Specification::Neu
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (!layer.squeeze().squeezeall()) {
-        
+
         if (layer.squeeze().axes_size() == 0) {
             std::string err = "Squeeze Layer '" + std::string(layer.name()) + "': length of the 'axes' parameter cannot be 0.";
             return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
         }
-        
+
         // either all values in axes must be positive or all negative
         std::set<int> axes_set;
         for (int i=0; i<layer.squeeze().axes_size(); i++) {
@@ -2871,19 +3174,19 @@ Result NeuralNetworkSpecValidator::validateSqueezeLayer(const Specification::Neu
                 axes_set.insert(a);
             }
         }
-        
+
         if (layer.inputtensor_size() > 0 && layer.outputtensor_size() > 0) {
             int input_rank = static_cast<int>(layer.inputtensor(0).rank());
             int output_rank = static_cast<int>(layer.outputtensor(0).rank());
             int axes_length = layer.squeeze().axes_size();
-            
+
             if (input_rank != 1) {
                 if (output_rank + axes_length != input_rank) {
                     std::string err = "Squeeze Layer '" + std::string(layer.name()) + "': output rank plus the length of the axes parameter must equal input rank.";
                     return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
                 }
             }
-            
+
             std::vector<int> axes;
             for (int i=0; i<axes_length; i++) {
                 int a = (int)layer.squeeze().axes(i);
@@ -2896,7 +3199,7 @@ Result NeuralNetworkSpecValidator::validateSqueezeLayer(const Specification::Neu
                 }
                 axes.push_back(a);
             }
-            
+
             int max_value = *std::max_element(axes.begin(), axes.end());
             int min_value = *std::min_element(axes.begin(), axes.end());
             if (max_value > input_rank - 1 || min_value < 0) {
@@ -2905,7 +3208,7 @@ Result NeuralNetworkSpecValidator::validateSqueezeLayer(const Specification::Neu
             }
         }
     }
-    
+
     return r;
 }
 
@@ -2915,12 +3218,12 @@ Result NeuralNetworkSpecValidator::validateRangeStaticLayer(const Specification:
     if (r.good()) {
         r = validateOutputCount(layer, 1, 1);
     }
-    
+
     if (layer.outputtensor_size() > 0) {
         int rank = static_cast<int>(layer.outputtensor(0).rank());
         r =  checkRank(layer, "Range", 1, 1, "output", rank);
     }
-    
+
     return r;
 }
 
@@ -3112,7 +3415,7 @@ Result NeuralNetworkSpecValidator::validateTopKLayer(const Specification::Neural
     if (!(r = validateInputCount(layer, 1, 2)).good()) return r;
     if (!(r = validateOutputCount(layer, 2, 2)).good()) return r;
     if (!(r = validateInputOutputRankEquality(layer, "TopK", blobNameToRank)).good()) return r;
-    
+
     if (blobNameToRank.find(layer.input(0)) != blobNameToRank.end() &&
         blobNameToRank.find(layer.output(1)) != blobNameToRank.end()) {
         if (blobNameToRank.at(layer.input(0)) != blobNameToRank.at(layer.output(1))) {
@@ -3264,6 +3567,60 @@ Result NeuralNetworkSpecValidator::validateNMSLayer(const Specification::NeuralN
     return r;
 }
 
+Result NeuralNetworkSpecValidator::validateOneHotLayer(const Specification::NeuralNetworkLayer& layer) {
+    Result r;
+    r = validateInputCount(layer, 1, 2);
+    if (r.good()) {
+        r = validateOutputCount(layer, 1, 1);
+    }
+    return r;
+}
+
+Result NeuralNetworkSpecValidator::validateCumSumLayer(const Specification::NeuralNetworkLayer& layer) {
+    Result r;
+    r = validateInputCount(layer, 1, 2);
+    if (r.good()) {
+        r = validateOutputCount(layer, 1, 1);
+    }
+    return r;
+}
+
+Result NeuralNetworkSpecValidator::validateArgsortLayer(const Specification::NeuralNetworkLayer& layer) {
+    Result r;
+    if (!(r = validateInputCount(layer, 1, 1)).good()) return r;
+    if (!(r = validateOutputCount(layer, 1, 1)).good()) return r;
+    if (!(r = validateInputOutputRankEquality(layer, "Argsort", blobNameToRank)).good()) return r;
+
+    const auto& params = layer.argsort();
+
+    if (params.axis() < 0) {
+        const std::string err = "Value of 'axis' is negative for layer of type 'ArgSort' and name '" + layer.name() + "', which is not supported. It must be positive.";
+        return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+    }
+
+    if (layer.inputtensor_size() > 0) {
+        const int rank = static_cast<int>(layer.inputtensor(0).rank());
+        if (params.axis() < 0 || params.axis() >= rank) {
+            const std::string err = "Value of 'axis' is " + std::to_string(params.axis()) + \
+                                    ", but it must be in the range [0," \
+                                    + std::to_string(rank) + ") for layer of type 'ArgSort' and name '" + layer.name() + "'.";
+            return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+        }
+    }
+    return r;
+}
+
+Result NeuralNetworkSpecValidator::validateSliceBySizeLayer(const Specification::NeuralNetworkLayer& layer) {
+    Result r;
+    r = validateInputCount(layer, 2, 2);
+    if (r.good()) {
+        r = validateOutputCount(layer, 1, 1);
+    }
+    return r;
+}
+
+
+
 Result NeuralNetworkSpecValidator::validateFailUnknownType(const Specification::NeuralNetworkLayer& layer) {
     return Result(ResultType::INVALID_MODEL_PARAMETERS, "Unsupported layer type (" + layer.GetTypeName() + ") for layer '" + layer.name() + "'.");
 }
diff --git a/mlmodel/src/NeuralNetwork/NeuralNetworkShapes.cpp b/mlmodel/src/Validation/NeuralNetwork/NeuralNetworkShapes.cpp
similarity index 99%
rename from mlmodel/src/NeuralNetwork/NeuralNetworkShapes.cpp
rename to mlmodel/src/Validation/NeuralNetwork/NeuralNetworkShapes.cpp
index 6c0c5e32c..f6a6c3f50 100644
--- a/mlmodel/src/NeuralNetwork/NeuralNetworkShapes.cpp
+++ b/mlmodel/src/Validation/NeuralNetwork/NeuralNetworkShapes.cpp
@@ -7,7 +7,7 @@
 //
 
 #include "NeuralNetworkShapes.hpp"
-#include "../Utils.hpp"
+#include "Utils.hpp"
 
 using namespace CoreML;
 
diff --git a/mlmodel/src/NeuralNetwork/NeuralNetworkShapes.hpp b/mlmodel/src/Validation/NeuralNetwork/NeuralNetworkShapes.hpp
similarity index 94%
rename from mlmodel/src/NeuralNetwork/NeuralNetworkShapes.hpp
rename to mlmodel/src/Validation/NeuralNetwork/NeuralNetworkShapes.hpp
index 50c13bd6d..dedc31fae 100644
--- a/mlmodel/src/NeuralNetwork/NeuralNetworkShapes.hpp
+++ b/mlmodel/src/Validation/NeuralNetwork/NeuralNetworkShapes.hpp
@@ -9,11 +9,11 @@
 #ifndef MLMODEL_NeuralNetworkShapes_hpp
 #define MLMODEL_NeuralNetworkShapes_hpp
 
-#include "../../build/format/NeuralNetwork_enums.h"
-#include "../Validators.hpp"
-#include "../ValidatorUtils-inl.hpp"
-#include "../LayerShapeConstraints.hpp"
-#include "../transforms/NeuralNetwork.hpp"
+#include "../build/format/NeuralNetwork_enums.h"
+#include "Validation/Validators.hpp"
+#include "Validation/ValidatorUtils-inl.hpp"
+#include "LayerShapeConstraints.hpp"
+#include "transforms/NeuralNetwork.hpp"
 #include <iostream>
 #include <functional>
 
@@ -33,7 +33,7 @@ namespace CoreML {
 
         bool isValid() const;
 
-        const ShapeConstraint& shape(const std::string& name) const;        
+        const ShapeConstraint& shape(const std::string& name) const;
 
         void print() const;
 
diff --git a/mlmodel/src/NeuralNetwork/NeuralNetworkValidator.cpp b/mlmodel/src/Validation/NeuralNetwork/NeuralNetworkValidator.cpp
similarity index 94%
rename from mlmodel/src/NeuralNetwork/NeuralNetworkValidator.cpp
rename to mlmodel/src/Validation/NeuralNetwork/NeuralNetworkValidator.cpp
index d7aaa1622..ac571cc2e 100644
--- a/mlmodel/src/NeuralNetwork/NeuralNetworkValidator.cpp
+++ b/mlmodel/src/Validation/NeuralNetwork/NeuralNetworkValidator.cpp
@@ -8,7 +8,8 @@
 
 #include "NeuralNetworkValidator.hpp"
 #include "NeuralNetworkValidatorUtils.hpp"
-#include "../ParameterValidator.hpp"
+#include "ResultReason.hpp"
+#include "Validation/ParameterValidator.hpp"
 
 #include <algorithm>
 #include <functional>
@@ -39,6 +40,8 @@ Result NeuralNetworkSpecValidator::validateLayer(const Specification::NeuralNetw
     switch(layer.layer_case()) {
         case Specification::NeuralNetworkLayer::LayerCase::kConvolution:
             return validateConvolutionLayer(layer);
+        case Specification::NeuralNetworkLayer::LayerCase::kConvolution3D:
+            return validateConvolution3DLayer(layer);
         case Specification::NeuralNetworkLayer::LayerCase::kInnerProduct:
             return validateInnerProductLayer(layer);
         case Specification::NeuralNetworkLayer::LayerCase::kBatchnorm:
@@ -47,6 +50,10 @@ Result NeuralNetworkSpecValidator::validateLayer(const Specification::NeuralNetw
             return validateActivation(layer);
         case Specification::NeuralNetworkLayer::LayerCase::kPooling:
             return validatePoolingLayer(layer);
+        case Specification::NeuralNetworkLayer::LayerCase::kPooling3D:
+            return validatePooling3dLayer(layer);
+        case Specification::NeuralNetworkLayer::LayerCase::kGlobalPooling3D:
+            return validateGlobalPooling3dLayer(layer);
         case Specification::NeuralNetworkLayer::LayerCase::kPadding:
             return validatePaddingLayer(layer);
         case Specification::NeuralNetworkLayer::LayerCase::kConcat:
@@ -178,6 +185,7 @@ Result NeuralNetworkSpecValidator::validateLayer(const Specification::NeuralNetw
             return validateLoopContinueBreakLayer(layer);
         case CoreML::Specification::NeuralNetworkLayer::kErf:
         case CoreML::Specification::NeuralNetworkLayer::kGelu:
+        case CoreML::Specification::NeuralNetworkLayer::kClampedReLU:
             return validateActivationLayers(layer);
         case CoreML::Specification::NeuralNetworkLayer::kRankPreservingReshape:
             return validateRankPreservingReshapeLayer(layer);
@@ -315,6 +323,14 @@ Result NeuralNetworkSpecValidator::validateLayer(const Specification::NeuralNetw
             return validateConstantPadLayer(layer);
         case Specification::NeuralNetworkLayer::LayerCase::kNonMaximumSuppression:
             return validateNMSLayer(layer);
+        case Specification::NeuralNetworkLayer::LayerCase::kOneHot:
+            return validateOneHotLayer(layer);
+        case Specification::NeuralNetworkLayer::LayerCase::kCumSum:
+            return validateCumSumLayer(layer);
+        case Specification::NeuralNetworkLayer::LayerCase::kArgSort:
+            return validateArgsortLayer(layer);
+        case Specification::NeuralNetworkLayer::LayerCase::kSliceBySize:
+            return validateSliceBySizeLayer(layer);
         default:
             return validateFailUnknownType(layer);
     }
@@ -457,12 +473,12 @@ Result validateNeuralNetworkTopLevel(const Specification::ModelDescription& inte
     // ndArrayInterpretation == True ==>  Tensors can have any rank (including 5).
     
     bool ndArrayInterpretation = false;
-    
+
     bool hasNonIOS12Layer = false;
     bool hasNewArrayShapeMapping = false;
     bool hasNewImageShapeMapping = false;
     bool hasMultiArrayInput = false;
-    
+
     for (const auto& input: interface.input()) {
         if (input.type().Type_case() == Specification::FeatureType::kMultiArrayType) {
             hasMultiArrayInput = true;
@@ -491,7 +507,7 @@ Result validateNeuralNetworkTopLevel(const Specification::ModelDescription& inte
     
     if (hasNonIOS12Layer && !hasNewArrayShapeMapping && hasMultiArrayInput) {
         return Result(ResultType::INVALID_MODEL_INTERFACE,
-                      "Neural Network Multi-Array input shape mapping cannot be 'RANK5_ARRAY_MAPPING' if the network contains a layer added in version 3 (iOS 13) or later.");
+                      "Neural Network Multi-Array input shape mapping cannot be 'RANK5_ARRAY_MAPPING' if the network contains a layer added in version 4 (iOS 13) or later. Use 'EXACT_ARRAY_MAPPING' instead.");
     }
     
     if (!hasNewArrayShapeMapping && hasNewImageShapeMapping && hasMultiArrayInput) {
@@ -523,20 +539,10 @@ Result validateNeuralNetworkTopLevel(const Specification::ModelDescription& inte
                         return Result(ResultType::INVALID_MODEL_INTERFACE,
                                       "Neural networks require at least one non-optional input.");
                     }
-    
-    
-    // Check the inputs and output types
-    if (!std::all_of(interface.input().begin(),
-                     interface.input().end(),
-                     [](const Specification::FeatureDescription& input) {
-                         return (input.type().Type_case() == Specification::FeatureType::kImageType
-                                 || input.type().Type_case() == Specification::FeatureType::kMultiArrayType);
-                     })) {
-                         return Result(ResultType::INVALID_MODEL_INTERFACE,
-                                       "Neural Networks require inputs to be images or MLMultiArray.");
-                     }
-    
-    
+
+    // Check the input types
+    HANDLE_RESULT_AND_RETURN_ON_ERROR(validateInputOutputTypes(interface.input(), ResultReason::MODEL_INPUT_TYPE_INVALID, "inputs"));
+
     std::map<std::string, int> ioBlobNameToRank; // to collect ranks of input/output blobs from the shapes present in the description
     
     // populate "ioBlobNameToRank"
@@ -615,23 +621,10 @@ Result validateNeuralNetworkTopLevel(const Specification::ModelDescription& inte
                 }
                 
             } else { // validate input shape when "ndArrayInterpretation" is True
-                
-                int rank = input.type().multiarraytype().shape().size();
-                if (!(rank > 0)) {
-                    return Result(ResultType::INVALID_MODEL_INTERFACE, "Input MLMultiArray to neural networks must have at least 1 dimension.");
-                }
-                switch (input.type().multiarraytype().ShapeFlexibility_case()) {
-                    case CoreML::Specification::ArrayFeatureType::kEnumeratedShapes:
-                        break;
-                    case CoreML::Specification::ArrayFeatureType::kShapeRange:
-                        if (input.type().multiarraytype().shaperange().sizeranges_size() != rank) {
-                            return Result(ResultType::INVALID_MODEL_INTERFACE, "For MLMultiArray input: Rank of the flexible shape range must match the rank of the default shape.");
-                            break;
-                        }
-                    case CoreML::Specification::ArrayFeatureType::SHAPEFLEXIBILITY_NOT_SET:
-                        break;
+
+                if (!(r = validateNdMultiArrayInputType(input.type().multiarraytype())).good()) {
+                    return r;
                 }
-                
             } // if else block on spec version to check validity of input shape
         }
     }
@@ -752,16 +745,8 @@ namespace CoreML {
         const auto& interface = format.description();
         
         // This isn't true for classifiers and regressors -- need to template specialize it to make these work
-        if (!std::all_of(interface.output().begin(),
-                         interface.output().end(),
-                         [](const Specification::FeatureDescription& output) {
-                             return output.type().Type_case() == Specification::FeatureType::kMultiArrayType ||
-                             output.type().Type_case() == Specification::FeatureType::kImageType;
-                         })) {
-                             return Result(ResultType::INVALID_MODEL_INTERFACE,
-                                           "Neural Network outputs must be either an image or MLMultiArray.");
-                         }
-        
+        HANDLE_RESULT_AND_RETURN_ON_ERROR(validateInputOutputTypes(interface.output(), ResultReason::MODEL_OUTPUT_TYPE_INVALID, "outputs"));
+
         std::set<std::string> outputBlobNames;
         
         Result r = validateNeuralNetworkTopLevel(format.description(), format.neuralnetwork(), outputBlobNames, format.isupdatable());
diff --git a/mlmodel/src/NeuralNetwork/NeuralNetworkValidator.hpp b/mlmodel/src/Validation/NeuralNetwork/NeuralNetworkValidator.hpp
similarity index 92%
rename from mlmodel/src/NeuralNetwork/NeuralNetworkValidator.hpp
rename to mlmodel/src/Validation/NeuralNetwork/NeuralNetworkValidator.hpp
index 2ede9c9d7..99f8db92b 100644
--- a/mlmodel/src/NeuralNetwork/NeuralNetworkValidator.hpp
+++ b/mlmodel/src/Validation/NeuralNetwork/NeuralNetworkValidator.hpp
@@ -9,38 +9,38 @@
 #ifndef NeuralNetworkValidator_h
 #define NeuralNetworkValidator_h
 
-#include "../../build/format/NeuralNetwork_enums.h"
-#include "../Validators.hpp"
-#include "../ValidatorUtils-inl.hpp"
-#include "../transforms/NeuralNetwork.hpp"
+#include "../../../build/format/NeuralNetwork_enums.h"
+#include "Validation/Validators.hpp"
+#include "Validation/ValidatorUtils-inl.hpp"
+#include "transforms/NeuralNetwork.hpp"
 #include "NeuralNetworkShapes.hpp"
-#include "../QuantizationValidationUtils.hpp"
+#include "Validation/QuantizationValidationUtils.hpp"
 #include "UpdatableNeuralNetworkValidator.hpp"
 
 namespace CoreML {
-    
+
     class NeuralNetworkSpecValidator {
-        
+
     public:
-        
+
         NeuralNetworkSpecValidator();
         NeuralNetworkSpecValidator(const std::map<std::string, std::set<std::string>> &blobsInitial,
                                    const std::map<std::string, int> &ioBlobNameToRank,
                                    bool ndArrayMode,
                                    int loopDepth,
                                    const std::map<std::string, int> &blobRanks);
-        
+
         template<typename T>
         Result validateNeuralNetwork(const T& nn);
-        
+
         bool ndArrayInterpretation;
-        
+
         // For Model inputs/outputs, map from name to rank
         std::map<std::string, int> ModelIOBlobNameToRank;
-        
+
         // For all model blobs, map from name to rank
         std::map<std::string, int> blobNameToRank;
-        
+
         // Collection of data blob names in the Neural Network.
         // The collection starts with the list of all Model inputs, and grows later.
         // As layers are iterated through, they add their output blobs to this map.
@@ -49,19 +49,22 @@ namespace CoreML {
         // (a data blob maybe produced by more than one layers if its a copy layer, or the layers
         //  are within if and else branches)
         std::map<std::string, std::set<std::string>> blobs;
-        
+
         int loopStackDepth;
-        
-        
+
+
     private:
-        
+
         Result validateLayer(const Specification::NeuralNetworkLayer& layer);
-        
+
         Result validateConvolutionLayer(const Specification::NeuralNetworkLayer& layer);
+        Result validateConvolution3DLayer(const Specification::NeuralNetworkLayer& layer);
         Result validateInnerProductLayer(const Specification::NeuralNetworkLayer& layer);
         Result validateBatchnormLayer(const Specification::NeuralNetworkLayer& layer);
         Result validateActivation(const Specification::NeuralNetworkLayer& layer);
         Result validatePoolingLayer(const Specification::NeuralNetworkLayer& layer);
+        Result validatePooling3dLayer(const Specification::NeuralNetworkLayer& layer);
+        Result validateGlobalPooling3dLayer(const Specification::NeuralNetworkLayer& layer);
         Result validatePaddingLayer(const Specification::NeuralNetworkLayer& layer);
         Result validateLRNLayer(const Specification::NeuralNetworkLayer& layer);
         Result validateSplitLayer(const Specification::NeuralNetworkLayer& layer);
@@ -176,6 +179,11 @@ namespace CoreML {
         Result validateLayerNormalizationLayer(const Specification::NeuralNetworkLayer& layer);
         Result validateConstantPadLayer(const Specification::NeuralNetworkLayer& layer);
         Result validateNMSLayer(const Specification::NeuralNetworkLayer& layer);
+        Result validateOneHotLayer(const Specification::NeuralNetworkLayer& layer);
+        Result validateCumSumLayer(const Specification::NeuralNetworkLayer& layer);
+        Result validateClampedReluLayer(const Specification::NeuralNetworkLayer& layer);
+        Result validateArgsortLayer(const Specification::NeuralNetworkLayer& layer);
+        Result validateSliceBySizeLayer(const Specification::NeuralNetworkLayer& layer);
 
         Result validateFailUnknownType(const Specification::NeuralNetworkLayer& layer);
     };
diff --git a/mlmodel/src/NeuralNetwork/NeuralNetworkValidatorGraph.hpp b/mlmodel/src/Validation/NeuralNetwork/NeuralNetworkValidatorGraph.hpp
similarity index 97%
rename from mlmodel/src/NeuralNetwork/NeuralNetworkValidatorGraph.hpp
rename to mlmodel/src/Validation/NeuralNetwork/NeuralNetworkValidatorGraph.hpp
index ef51f6b90..38149f588 100644
--- a/mlmodel/src/NeuralNetwork/NeuralNetworkValidatorGraph.hpp
+++ b/mlmodel/src/Validation/NeuralNetwork/NeuralNetworkValidatorGraph.hpp
@@ -3,9 +3,9 @@
 //  mlmodel
 //
 
-#include "../Validators.hpp"
+#include "Validation/Validators.hpp"
 
-using namespace CoreML;
+namespace CoreML {
 
 static bool isLayerSupportedForBackprop(const Specification::NeuralNetworkLayer* layer) {
     switch (layer->layer_case()) {
@@ -32,9 +32,9 @@ static bool isLayerSupportedForBackprop(const Specification::NeuralNetworkLayer*
 }
 
 struct LayerNode {
-    
+
     public:
-    
+
     std::vector<LayerNode *> parents; // list of nodes that are parent to this node
     std::vector<LayerNode *> children;
     Specification::NeuralNetworkLayer::LayerCase layerType;
@@ -44,13 +44,13 @@ struct LayerNode {
     std::vector<std::string> outputNames;
     bool isUpdatable;
     bool isBackPropagable;
-    
+
     LayerNode () {}
-    
+
     LayerNode(const Specification::LossLayer *lossLayer)
     {
         name = lossLayer->name();
-        
+
         switch (lossLayer->LossLayerType_case()) {
             case CoreML::Specification::LossLayer::kCategoricalCrossEntropyLossLayer:
                 inputNames.push_back(lossLayer->categoricalcrossentropylosslayer().input());
@@ -67,7 +67,7 @@ struct LayerNode {
         isUpdatable = false;
         isBackPropagable = false;
     }
-    
+
     LayerNode(const Specification::NeuralNetworkLayer *layer)
     {
         std::vector<std::string> inNames;
@@ -89,14 +89,14 @@ struct LayerNode {
 };
 
 struct NeuralNetworkValidatorGraph {
-    
+
     public:
-    
+
     std::map<std::string, LayerNode *> nodeNameToNode;
     std::map<std::string, LayerNode *> blobNameToProducingNode;
-    
+
     NeuralNetworkValidatorGraph() {}
-    
+
     void insertNode(LayerNode *node)
     {
         for (const auto& name: node->inputNames) {
@@ -111,7 +111,7 @@ struct NeuralNetworkValidatorGraph {
         }
         nodeNameToNode[node->name] = node;
     }
-    
+
     LayerNode *getNodeFromName(std::string name) const
     {
         if (nodeNameToNode.find(name) == nodeNameToNode.end()) {
@@ -121,5 +121,5 @@ struct NeuralNetworkValidatorGraph {
     }
 };
 
-
+}
 
diff --git a/mlmodel/src/NeuralNetwork/NeuralNetworkValidatorUtils.hpp b/mlmodel/src/Validation/NeuralNetwork/NeuralNetworkValidatorUtils.hpp
similarity index 83%
rename from mlmodel/src/NeuralNetwork/NeuralNetworkValidatorUtils.hpp
rename to mlmodel/src/Validation/NeuralNetwork/NeuralNetworkValidatorUtils.hpp
index beec2e0d4..f3fe2ee79 100644
--- a/mlmodel/src/NeuralNetwork/NeuralNetworkValidatorUtils.hpp
+++ b/mlmodel/src/Validation/NeuralNetwork/NeuralNetworkValidatorUtils.hpp
@@ -7,12 +7,12 @@
 //
 
 #include "NeuralNetworkValidator.hpp"
-#include "../Utils.hpp"
+#include "Utils.hpp"
 
 #include <algorithm>
 #include <sstream>
 
-using namespace CoreML;
+namespace CoreML {
 
 inline Result validateTensorMessage(const Specification::Tensor& tensor, const Specification::NeuralNetworkLayer& layer) {
     std::string err;
@@ -115,6 +115,48 @@ inline Result validateInputCount(const Specification::NeuralNetworkLayer& layer,
     }
 }
 
+inline Result validateInputOutputTypes(const ::google::protobuf::RepeatedPtrField<Specification::FeatureDescription>& features,
+                                       ResultReason reason,
+                                       const std::string& featureTypesDesc) {
+    auto checkFeatures = [&](const Specification::FeatureDescription& feature) {
+        switch (feature.type().Type_case()) {
+            case Specification::FeatureType::kImageType:
+            case Specification::FeatureType::kMultiArrayType:
+                return true;
+            default:
+                return false;
+        }
+    };
+
+    if (!std::all_of(features.cbegin(), features.cend(), checkFeatures)) {
+         return Result(ResultType::INVALID_MODEL_INTERFACE, reason,
+                       "Neural Networks require " + featureTypesDesc + " to be images or MLMultiArray.");
+    }
+
+    return Result();
+}
+
+inline Result validateNdMultiArrayInputType(const Specification::ArrayFeatureType& arrayType)
+{
+    auto rank = arrayType.shape().size();
+    if (!(rank > 0)) {
+        return Result(ResultType::INVALID_MODEL_INTERFACE, "Input MLMultiArray to neural networks must have at least 1 dimension.");
+    }
+    switch (arrayType.ShapeFlexibility_case()) {
+        case CoreML::Specification::ArrayFeatureType::kEnumeratedShapes:
+            break;
+        case CoreML::Specification::ArrayFeatureType::kShapeRange:
+            if (arrayType.shaperange().sizeranges_size() != rank) {
+                return Result(ResultType::INVALID_MODEL_INTERFACE, "For MLMultiArray input: Rank of the flexible shape range must match the rank of the default shape.");
+                break;
+            }
+        case CoreML::Specification::ArrayFeatureType::SHAPEFLEXIBILITY_NOT_SET:
+            break;
+    }
+
+    return Result();
+}
+
 inline Result validateOutputCount(const Specification::NeuralNetworkLayer& layer, int min, int max) {
     
     assert( min <= max || max < 0 );
@@ -326,4 +368,34 @@ inline Result validateRecurrentActivationParams(const Specification::ActivationP
     return Result();
 }
 
+// Validate that 3d Pooling Padding arguments are only set if padding type is CUSTOM;
+// and if set, that they are non-negative.
+inline Result validatePooling3dPadding(const Specification::Pooling3DLayerParams_Pooling3DPaddingType paddingType,
+                              const int padding, const std::string& paddingName) {
+    if (paddingType == Specification::Pooling3DLayerParams_Pooling3DPaddingType_CUSTOM) {
+        if (padding < 0) {
+            std::string err = "Custom Padding " + paddingName + " must be non-negative, got " + std::to_string(padding) + ".";
+            return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+        }
+    } else {
+        if (padding != 0) {
+            std::string err = "Custom Padding " + paddingName + " cannot be non-zero (got " + std::to_string(padding)
+            + ") unless padding type is CUSTOM (got " + std::to_string(paddingType) + ").";
+            return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+        }
+    }
+    Result r;
+    return r;
+}
+
+// Validate that a value is positive
+inline Result validatePositive(const int value, const std::string& name) {
+    if (value <= 0 ) {
+        std::string err = name + " must be positive, got " + std::to_string(value) + ".";
+        return Result(ResultType::INVALID_MODEL_PARAMETERS, err);
+    }
+    Result r;
+    return r;
+}
 
+}
diff --git a/mlmodel/src/NeuralNetwork/UpdatableNeuralNetworkValidator.cpp b/mlmodel/src/Validation/NeuralNetwork/UpdatableNeuralNetworkValidator.cpp
similarity index 96%
rename from mlmodel/src/NeuralNetwork/UpdatableNeuralNetworkValidator.cpp
rename to mlmodel/src/Validation/NeuralNetwork/UpdatableNeuralNetworkValidator.cpp
index 296f50c31..74cbbab70 100644
--- a/mlmodel/src/NeuralNetwork/UpdatableNeuralNetworkValidator.cpp
+++ b/mlmodel/src/Validation/NeuralNetwork/UpdatableNeuralNetworkValidator.cpp
@@ -5,7 +5,7 @@
 
 
 #include "UpdatableNeuralNetworkValidator.hpp"
-#include "../ParameterValidator.hpp"
+#include "Validation/ParameterValidator.hpp"
 
 
 using namespace CoreML;
@@ -131,10 +131,7 @@ template<typename T> Result validateTrainingInputs(const Specification::ModelDes
         target = updateParams.losslayers(0).meansquarederrorlosslayer().target();
     }
 
-    bool isClassifier = true;
-
-    // Done to detect if the NN is a neuralNetworkClassifier
-    try { (void)(dynamic_cast<const Specification::NeuralNetworkClassifier &>(nn)); } catch (std::bad_cast) { isClassifier = false; }
+    bool isClassifier = (dynamic_cast<const Specification::NeuralNetworkClassifier*>(&nn) != nullptr);
 
     bool trainingInputMeetsRequirement = false;
     for (size_t i = 0; i < trainingInputExclusiveIndices.size(); i++) {
@@ -393,9 +390,11 @@ template<typename T> static Result isTrainingConfigurationSupported(const T& nn)
 static Result validateWeightParamsUpdatable(const Specification::NeuralNetworkLayer& layer) {
     Result r;
     
-    bool weight_update_flag;
+    bool weight_update_flag = false;
     bool bias_update_flag = false;
     bool has_bias = false;
+    bool weights_are_quantized = false;
+    bool bias_is_quantized = false;
     
     std::string err;
     
@@ -404,19 +403,28 @@ static Result validateWeightParamsUpdatable(const Specification::NeuralNetworkLa
             has_bias = layer.convolution().hasbias();
             if (has_bias) {
                 bias_update_flag = layer.convolution().bias().isupdatable();
+                bias_is_quantized = layer.convolution().bias().has_quantization();
             }
+            weights_are_quantized = layer.convolution().weights().has_quantization();
             weight_update_flag = layer.convolution().weights().isupdatable();
             break;
         case Specification::NeuralNetworkLayer::kInnerProduct:
             has_bias = layer.innerproduct().hasbias();
             if (has_bias) {
                 bias_update_flag = layer.innerproduct().bias().isupdatable();
+                bias_is_quantized = layer.innerproduct().bias().has_quantization();
             }
+            weights_are_quantized = layer.innerproduct().weights().has_quantization();
             weight_update_flag = layer.innerproduct().weights().isupdatable();
             break;
         default:
             return r;
     }
+
+    if (weights_are_quantized || bias_is_quantized) {
+        err = "An updatable layer, named '" + layer.name() + "', has quantized weights/bias param. Quantized weights/bias not supported for update.";
+        return Result(ResultType::INVALID_UPDATABLE_MODEL_PARAMETERS, err);
+    }
     
     if (!weight_update_flag || ((has_bias) && (!bias_update_flag))) {
         err = "An updatable layer, named '" + layer.name() + "', has a weight/bias param which is not marked as updatable.";
diff --git a/mlmodel/src/NeuralNetwork/UpdatableNeuralNetworkValidator.hpp b/mlmodel/src/Validation/NeuralNetwork/UpdatableNeuralNetworkValidator.hpp
similarity index 54%
rename from mlmodel/src/NeuralNetwork/UpdatableNeuralNetworkValidator.hpp
rename to mlmodel/src/Validation/NeuralNetwork/UpdatableNeuralNetworkValidator.hpp
index cd967fbb8..d4d6c9fab 100644
--- a/mlmodel/src/NeuralNetwork/UpdatableNeuralNetworkValidator.hpp
+++ b/mlmodel/src/Validation/NeuralNetwork/UpdatableNeuralNetworkValidator.hpp
@@ -4,17 +4,15 @@
 //
 
 #include <stdio.h>
-#include "../Validators.hpp"
+#include "Validation/Validators.hpp"
 #include <queue>
 #include "NeuralNetworkValidatorGraph.hpp"
 
-using namespace CoreML;
-
 /*
  Top level function for validating whether a Neural network, marked as updatable
  is valid or not, which includes the check whether it is supported or not.
  */
 
-template<typename T> Result validateUpdatableNeuralNetwork(const T& nn);
+template<typename T> ::CoreML::Result validateUpdatableNeuralNetwork(const T& nn);
 
-template<typename T> Result validateTrainingInputs(const Specification::ModelDescription& modelDescription, const T& nn);
+template<typename T> ::CoreML::Result validateTrainingInputs(const ::CoreML::Specification::ModelDescription& modelDescription, const T& nn);
diff --git a/mlmodel/src/NonMaximumSuppressionValidator.cpp b/mlmodel/src/Validation/NonMaximumSuppressionValidator.cpp
similarity index 82%
rename from mlmodel/src/NonMaximumSuppressionValidator.cpp
rename to mlmodel/src/Validation/NonMaximumSuppressionValidator.cpp
index a17f3caff..51a8cb1b8 100644
--- a/mlmodel/src/NonMaximumSuppressionValidator.cpp
+++ b/mlmodel/src/Validation/NonMaximumSuppressionValidator.cpp
@@ -7,6 +7,8 @@
 #include "Validators.hpp"
 #include "ValidatorUtils-inl.hpp"
 #include "../build/format/Model.pb.h"
+#include "DataType.hpp"
+#include "Globals.hpp"
 
 namespace CoreML {
     
@@ -183,25 +185,49 @@ namespace CoreML {
         const auto& coordinates_in = description.input()[coordinatesInputIndex];
         const auto& confidence_out = description.output()[confidenceOutputIndex];
         const auto& coordinates_out = description.output()[coordinatesOutputIndex];
-        
-        if (confidence_in.type().multiarraytype().datatype() != Specification::ArrayFeatureType_ArrayDataType_DOUBLE) {
-            return Result(ResultType::INVALID_MODEL_PARAMETERS,
-                          "Input confidence must have dataType DOUBLE");
-        }
-        
-        if (confidence_out.type().multiarraytype().datatype() != Specification::ArrayFeatureType_ArrayDataType_DOUBLE) {
-            return Result(ResultType::INVALID_MODEL_PARAMETERS,
-                          "Output confidence must have dataType DOUBLE");
-        }
-        
-        if (coordinates_in.type().multiarraytype().datatype() != Specification::ArrayFeatureType_ArrayDataType_DOUBLE) {
-            return Result(ResultType::INVALID_MODEL_PARAMETERS,
-                          "Input coordinates must have dataType DOUBLE");
+
+        const auto confidence_in_type = confidence_in.type().multiarraytype().datatype();
+        const auto confidence_out_type = confidence_out.type().multiarraytype().datatype();
+        const auto coordinates_in_type = coordinates_in.type().multiarraytype().datatype();
+        const auto coordinates_out_type = coordinates_out.type().multiarraytype().datatype();
+
+        if (confidence_in_type != confidence_out_type ||
+            confidence_in_type != coordinates_in_type ||
+            confidence_in_type != coordinates_out_type) {
+            std::stringstream ss;
+            ss << "'confidence' and 'coordinates' must use a same element type, but ";
+            ss << "'input confidence' is " <<  FeatureType(confidence_in.type()).toString() << ", ";
+            ss << "'output confidence' is " <<  FeatureType(confidence_out.type()).toString() << ", ";
+            ss << "'input coordinates' are " <<  FeatureType(coordinates_in.type()).toString() << ", ";
+            ss << "and 'output coordinates' are " <<  FeatureType(coordinates_out.type()).toString() << ".";
+
+            return Result(ResultType::INVALID_MODEL_PARAMETERS, ss.str());
         }
-        
-        if (coordinates_out.type().multiarraytype().datatype() != Specification::ArrayFeatureType_ArrayDataType_DOUBLE) {
-            return Result(ResultType::INVALID_MODEL_PARAMETERS,
-                          "Output confidence must have dataType DOUBLE");
+
+        if (format.specificationversion() >= MLMODEL_SPECIFICATION_VERSION_IOS14) {
+            if (confidence_in_type != Specification::ArrayFeatureType_ArrayDataType_DOUBLE &&
+                confidence_in_type != Specification::ArrayFeatureType_ArrayDataType_FLOAT32) {
+                std::stringstream ss;
+                ss << "The element data type of 'confidence' and 'coordinates' must be either MultiArray<DOUBLE> or MultiArray<FLOAT32>, but ";
+                ss << "'input confidence' is " <<  FeatureType(confidence_in.type()).toString() << ", ";
+                ss << "'output confidence' is " <<  FeatureType(confidence_out.type()).toString() << ", ";
+                ss << "'input coordinates' are " <<  FeatureType(coordinates_in.type()).toString() << ", ";
+                ss << "and 'output coordinates' are " <<  FeatureType(coordinates_out.type()).toString() << ".";
+
+                return Result(ResultType::INVALID_MODEL_PARAMETERS, ss.str());
+            }
+        } else {
+            if (confidence_in_type != Specification::ArrayFeatureType_ArrayDataType_DOUBLE) {
+                std::stringstream ss;
+                ss << "The element data type of 'confidence' and 'coordinates' must be MultiArray<DOUBLE> for model specification version earlier than " << MLMODEL_SPECIFICATION_VERSION_IOS14 << ", but ";
+                ss << "'input confidence' is " <<  FeatureType(confidence_in.type()).toString() << ", ";
+                ss << "'output confidence' is " <<  FeatureType(confidence_out.type()).toString() << ", ";
+                ss << "'input coordinates' are " <<  FeatureType(coordinates_in.type()).toString() << ", ";
+                ss << "and 'output coordinates' are " <<  FeatureType(coordinates_out.type()).toString() << ". ";
+                ss << "To enable MultiArray<FLOAT32>, use the model specification version " << MLMODEL_SPECIFICATION_VERSION_IOS14 << " or later.";
+
+                return Result(ResultType::INVALID_MODEL_PARAMETERS, ss.str());
+            }
         }
 
         auto rankOfFlexibleShape = [] (const CoreML::Specification::ArrayFeatureType &marray) -> size_t {
diff --git a/mlmodel/src/NormalizerValidator.cpp b/mlmodel/src/Validation/NormalizerValidator.cpp
similarity index 100%
rename from mlmodel/src/NormalizerValidator.cpp
rename to mlmodel/src/Validation/NormalizerValidator.cpp
diff --git a/mlmodel/src/OneHotEncoderValidator.cpp b/mlmodel/src/Validation/OneHotEncoderValidator.cpp
similarity index 100%
rename from mlmodel/src/OneHotEncoderValidator.cpp
rename to mlmodel/src/Validation/OneHotEncoderValidator.cpp
diff --git a/mlmodel/src/ParameterValidator.cpp b/mlmodel/src/Validation/ParameterValidator.cpp
similarity index 91%
rename from mlmodel/src/ParameterValidator.cpp
rename to mlmodel/src/Validation/ParameterValidator.cpp
index bb32b7c17..6e8092b14 100644
--- a/mlmodel/src/ParameterValidator.cpp
+++ b/mlmodel/src/Validation/ParameterValidator.cpp
@@ -8,8 +8,8 @@
 
 using namespace CoreML;
 
-Result validateInt64Parameter(const std::string& parameterName, const Specification::Int64Parameter& int64Parameter, bool shouldBePositive) {
-    
+Result CoreML::validateInt64Parameter(const std::string& parameterName, const Specification::Int64Parameter& int64Parameter, bool shouldBePositive) {
+
     const ::google::protobuf::int64 defaultValue = int64Parameter.defaultvalue();
 
     if (shouldBePositive) {
@@ -60,23 +60,22 @@ Result validateInt64Parameter(const std::string& parameterName, const Specificat
 
         }
     }
-    
+
     return Result();
 }
 
-Result validateDoubleParameter(const std::string& parameterName, const Specification::DoubleParameter& doubleParameter) {
-    
+Result CoreML::validateDoubleParameter(const std::string& parameterName, const Specification::DoubleParameter& doubleParameter) {
+
     const ::google::protobuf::double_t defaultValue = doubleParameter.defaultvalue();
-    
+
     if (doubleParameter.has_range()) {
         const Specification::DoubleRange& range = doubleParameter.range();
-        
+
         if (defaultValue < range.minvalue() || defaultValue > range.maxvalue()) {
             std::string err = "Specified Default Value (" + std::to_string(defaultValue) + ") out of Allowed Value Range for '" + parameterName + "'";
             return Result(ResultType::INVALID_UPDATABLE_MODEL_CONFIGURATION, err);
         }
     }
-    
+
     return Result();
 }
-
diff --git a/mlmodel/src/ParameterValidator.hpp b/mlmodel/src/Validation/ParameterValidator.hpp
similarity index 94%
rename from mlmodel/src/ParameterValidator.hpp
rename to mlmodel/src/Validation/ParameterValidator.hpp
index fd4b1ad27..f6da241df 100644
--- a/mlmodel/src/ParameterValidator.hpp
+++ b/mlmodel/src/Validation/ParameterValidator.hpp
@@ -7,8 +7,9 @@
 #include "Validators.hpp"
 #include <queue>
 
-using namespace CoreML;
+namespace CoreML {
 
 Result validateInt64Parameter(const std::string& parameterName, const Specification::Int64Parameter& int64Parameter, bool shouldBePositive);
 
 Result validateDoubleParameter(const std::string& parameterName, const Specification::DoubleParameter& doubleParameter);
+}
diff --git a/mlmodel/src/PipelineValidator.cpp b/mlmodel/src/Validation/PipelineValidator.cpp
similarity index 98%
rename from mlmodel/src/PipelineValidator.cpp
rename to mlmodel/src/Validation/PipelineValidator.cpp
index 41a7a048b..d28c34751 100644
--- a/mlmodel/src/PipelineValidator.cpp
+++ b/mlmodel/src/Validation/PipelineValidator.cpp
@@ -54,8 +54,7 @@ namespace CoreML {
             }
 
             // validate the model itself and bail out if it's invalid
-            Model wrapper(model);
-            Result r = wrapper.validate();
+            Result r = Model::validate(model);
             if (!r.good()) { return r; }
 
             // Now add in the outputs of this model to the mix.
diff --git a/mlmodel/src/QuantizationValidationUtils.cpp b/mlmodel/src/Validation/QuantizationValidationUtils.cpp
similarity index 100%
rename from mlmodel/src/QuantizationValidationUtils.cpp
rename to mlmodel/src/Validation/QuantizationValidationUtils.cpp
diff --git a/mlmodel/src/QuantizationValidationUtils.hpp b/mlmodel/src/Validation/QuantizationValidationUtils.hpp
similarity index 100%
rename from mlmodel/src/QuantizationValidationUtils.hpp
rename to mlmodel/src/Validation/QuantizationValidationUtils.hpp
diff --git a/mlmodel/src/SVMValidator.cpp b/mlmodel/src/Validation/SVMValidator.cpp
similarity index 100%
rename from mlmodel/src/SVMValidator.cpp
rename to mlmodel/src/Validation/SVMValidator.cpp
diff --git a/mlmodel/src/ScalarValidator.cpp b/mlmodel/src/Validation/ScalarValidator.cpp
similarity index 100%
rename from mlmodel/src/ScalarValidator.cpp
rename to mlmodel/src/Validation/ScalarValidator.cpp
diff --git a/mlmodel/src/SoundAnalysisPreprocessingValidator.cpp b/mlmodel/src/Validation/SoundAnalysisPreprocessingValidator.cpp
similarity index 100%
rename from mlmodel/src/SoundAnalysisPreprocessingValidator.cpp
rename to mlmodel/src/Validation/SoundAnalysisPreprocessingValidator.cpp
diff --git a/mlmodel/src/TextClassifierValidator.cpp b/mlmodel/src/Validation/TextClassifierValidator.cpp
similarity index 100%
rename from mlmodel/src/TextClassifierValidator.cpp
rename to mlmodel/src/Validation/TextClassifierValidator.cpp
diff --git a/mlmodel/src/TreeEnsembleValidator.cpp b/mlmodel/src/Validation/TreeEnsembleValidator.cpp
similarity index 100%
rename from mlmodel/src/TreeEnsembleValidator.cpp
rename to mlmodel/src/Validation/TreeEnsembleValidator.cpp
diff --git a/mlmodel/src/ValidatorUtils-inl.hpp b/mlmodel/src/Validation/ValidatorUtils-inl.hpp
similarity index 72%
rename from mlmodel/src/ValidatorUtils-inl.hpp
rename to mlmodel/src/Validation/ValidatorUtils-inl.hpp
index b504fdb55..e88c8af6f 100644
--- a/mlmodel/src/ValidatorUtils-inl.hpp
+++ b/mlmodel/src/Validation/ValidatorUtils-inl.hpp
@@ -12,7 +12,8 @@
 #include "Comparison.hpp"
 #include "Format.hpp"
 #include "Result.hpp"
-#include "../build/format/FeatureTypes_enums.h"
+#include "ResultType.hpp"
+#include "../../build/format/FeatureTypes_enums.h"
 #include <sstream>
 
 namespace CoreML {
@@ -21,6 +22,7 @@ namespace CoreML {
         FLOAT32, // float32 weights
         FLOAT16, // float16 weights
         QUINT,   // smaller or equal to 8-bit unsigned integer
+        QINT,    // int8 quantized weights
         UNSPECIFIED, // More then one type specified
         EMPTY // No populated fields
     };
@@ -34,7 +36,8 @@ namespace CoreML {
             numFilledIn++;
         if (param.rawvalue().size() > 0)
             numFilledIn++;
-        
+        if (param.int8rawvalue().size() > 0)
+            numFilledIn++;
         return (numFilledIn == 1);
     }
     
@@ -46,7 +49,8 @@ namespace CoreML {
             numFilledIn++;
         if (param.rawvalue().size() > 0)
             numFilledIn++;
-        
+        if (param.int8rawvalue().size() > 0)
+            numFilledIn++;
         return numFilledIn;
     }
 
@@ -63,8 +67,10 @@ namespace CoreML {
             return FLOAT32;
         } else if (param.float16value().size() > 0) {
             return FLOAT16;
-        } else if (param.rawvalue().size() > 0 && param.has_quantization()){
+        } else if (param.rawvalue().size() > 0 && param.has_quantization()) {
             return QUINT;
+        } else if (param.int8rawvalue().size() > 0 && param.has_quantization()) {
+            return QINT;
         }
         return EMPTY;
     }
@@ -194,6 +200,8 @@ namespace CoreML {
                 return (static_cast<int>(weights.float16value().size()));
             case QUINT:
                 return static_cast<int>(weights.rawvalue().size());
+            case QINT:
+                return static_cast<int>(weights.int8rawvalue().size());
             case EMPTY:
             case UNSPECIFIED:
             default:
@@ -202,6 +210,41 @@ namespace CoreML {
         return 0;
     };
 
+    /*
+     * Validates that other parameters are appropriate for using int8 quantization, including:
+     *      The weight type is int8
+     *      The number of quantization bits must be 8
+     *      The quantization type must be linear
+     *      The lineqrQuantization scale must have one element
+     *      The linearQuantization bias must be empty
+     *
+     */
+    inline Result validateInt8Requirements(const Specification::WeightParams& weights,
+                                           const std::string &layerType,
+                                           const std::string &layerName) {
+        if (CoreML::valueType(weights) != CoreML::QINT) {
+            return Result(ResultType::INVALID_MODEL_PARAMETERS, "Layer '" + layerName + "' of type '" + layerType + " :  \
+                          when flag 'int8DynamicQuantize' is set to true, weights must be stored in the int8 format.");
+        }
+
+        if (weights.quantization().numberofbits() != 8) {
+            return Result(ResultType::INVALID_MODEL_PARAMETERS, "Layer '" + layerName + "' of type '" + layerType + " : \
+                          Number of bits must equal 8 when flag 'int8DynamicQuantize' is set to true.");
+        } else if (!weights.quantization().has_linearquantization()) {
+            return Result(ResultType::INVALID_MODEL_PARAMETERS, "Layer '" + layerName + "' of type '" + layerType + " : \
+                          Linear quantization must be used when flag 'int8DynamicQuantize' is set to true.");
+        } else if (weights.quantization().linearquantization().scale_size() != 1) {
+            return Result(ResultType::INVALID_MODEL_PARAMETERS, "Layer '" + layerName + "' of type '" + layerType + " : \
+                          Linear quantization scale must be size 1 when flag 'int8DynamicQuantize' is set to true.");
+        } else if (weights.quantization().linearquantization().bias_size() != 0) {
+            return Result(ResultType::INVALID_MODEL_PARAMETERS, "Layer '" + layerName + "' of type '" + layerType + " : \
+                          Linear quantization bias must be empty when flag 'int8DynamicQuantize' is set to true.");
+        } else {
+            return Result();
+        }
+        
+    };
+
     Result validateSizeRange(const Specification::SizeRange & range);
 
 }
diff --git a/mlmodel/src/Validators.hpp b/mlmodel/src/Validation/Validators.hpp
similarity index 73%
rename from mlmodel/src/Validators.hpp
rename to mlmodel/src/Validation/Validators.hpp
index eb04ea1e8..35dbbcecd 100644
--- a/mlmodel/src/Validators.hpp
+++ b/mlmodel/src/Validation/Validators.hpp
@@ -11,7 +11,7 @@
 
 #include "Format.hpp"
 #include "Result.hpp"
-#include "../build/format/Model_enums.h"
+#include "../../build/format/Model_enums.h"
 
 #include "ValidatorUtils-inl.hpp"
 
@@ -68,7 +68,14 @@ namespace CoreML {
      */
     Result validateRegressorInterface(const Specification::ModelDescription& interface, int modelVersion);
 
-    
+
+    /*
+     * Validate classifier output feature descriptions.
+     */
+    Result validateClassifierFeatureDescriptions(const Specification::ModelDescription& interface,
+                                                 bool expected_class_is_int64);
+
+
     /*
      * Validate model interface describes a valid classifier
      *
@@ -129,41 +136,8 @@ namespace CoreML {
         if (!result.good()) {
             return result;
         }
-        
-        const auto& predictedFeatureName = interface.predictedfeaturename();
-        const auto& probOutputName = interface.predictedprobabilitiesname();
-        
-        
-        if (predictedFeatureName == "") {
-            return Result(ResultType::INVALID_MODEL_INTERFACE,
-                          "Specification is missing classifier predictedFeatureName");
-        } else {
-            auto expected_class = (expected_class_is_int64
-                                   ? Specification::FeatureType::TypeCase::kInt64Type
-                                   : Specification::FeatureType::TypeCase::kStringType);
-            
-            result = validateDescriptionsContainFeatureWithNameAndType(interface.output(),
-                                                                       predictedFeatureName,
-                                                                       {expected_class});
-            if (!result.good()) {
-                return result;
-            }
-        }
-        
-        if (probOutputName != "") {
-            // TODO @znation: validate array length below
-            // and value type (must be double? different for different classifiers?)
-            // TODO Probability outputs are always dictionaries!
-            result = validateDescriptionsContainFeatureWithNameAndType(interface.output(),
-                                                                       probOutputName,
-                                                                       {Specification::FeatureType::TypeCase::kMultiArrayType, // TODO ARRAY TYPE IS INVALID, REMOVE
-                                                                        Specification::FeatureType::TypeCase::kDictionaryType});
-            if (!result.good()) {
-                return result;
-            }
-        }
-        
-        return Result();
+
+        return validateClassifierFeatureDescriptions(interface, expected_class_is_int64);
     }
     
     /*
diff --git a/mlmodel/src/Validation/VisionFeaturePrintValidator.cpp b/mlmodel/src/Validation/VisionFeaturePrintValidator.cpp
new file mode 100644
index 000000000..de8715982
--- /dev/null
+++ b/mlmodel/src/Validation/VisionFeaturePrintValidator.cpp
@@ -0,0 +1,82 @@
+//
+//  VisionFeaturePrintValidator.cpp
+//  mlmodel
+//
+//  Created by Tao Jia on 4/3/18.
+//  Copyright © 2018 Apple Inc. All rights reserved.
+//
+
+#include "Result.hpp"
+#include "Validators.hpp"
+#include "ValidatorUtils-inl.hpp"
+#include "../build/format/Model.pb.h"
+
+namespace CoreML {
+    
+    template <>
+    Result validate<MLModelType_visionFeaturePrint>(const Specification::Model &format) {
+        const auto &interface = format.description();
+        
+        // make sure model is a vision feature print
+        if (!format.has_visionfeatureprint()) {
+            return Result(ResultType::INVALID_MODEL_PARAMETERS, "Model not a vision feature print.");
+        }
+        
+        Result result;
+        
+        // validate the inputs: only one input with image type is allowed
+        result = validateDescriptionsContainFeatureWithTypes(interface.input(), 1, {Specification::FeatureType::kImageType});
+        if (!result.good()) {
+            return result;
+        }
+
+        // other validate logics here
+        const auto &visionFeaturePrint = format.visionfeatureprint();
+        switch (visionFeaturePrint.VisionFeaturePrintType_case()) {
+            case Specification::CoreMLModels::VisionFeaturePrint::kScene:
+                if (visionFeaturePrint.scene().version() == Specification::CoreMLModels::VisionFeaturePrint_Scene_SceneVersion_SCENE_VERSION_INVALID) {
+                    return Result(ResultType::INVALID_MODEL_PARAMETERS, "Version for scene is invalid");
+                }
+                
+                if (visionFeaturePrint.scene().version() == Specification::CoreMLModels::VisionFeaturePrint_Scene_SceneVersion_SCENE_VERSION_1) {
+                    // validate the outputs: only one output with multiarray type is allowed for version 1
+                    result = validateDescriptionsContainFeatureWithTypes(interface.output(), 1, {Specification::FeatureType::kMultiArrayType});
+                    if (!result.good()) {
+                        return result;
+                    }
+                }
+                break;
+            case Specification::CoreMLModels::VisionFeaturePrint::kObject:
+                if (visionFeaturePrint.object().version() == Specification::CoreMLModels::VisionFeaturePrint_Object_ObjectVersion_OBJECT_VERSION_INVALID) {
+                    return Result(ResultType::INVALID_MODEL_PARAMETERS, "Version for object is invalid");
+                }
+                if (visionFeaturePrint.object().version() == Specification::CoreMLModels::VisionFeaturePrint_Object_ObjectVersion_OBJECT_VERSION_1) {
+
+                    if (visionFeaturePrint.object().output_size() != 2) {
+                        return Result(ResultType::INVALID_MODEL_PARAMETERS, "Two outputs for object need to be provided");
+                    }
+
+                    // validate the outputs: only two outputs with multiarray type is allowed for version 1
+                    result = validateDescriptionsContainFeatureWithTypes(interface.output(), 2, {Specification::FeatureType::kMultiArrayType});
+                    if (!result.good()) {
+                        return result;
+                    }
+                }
+                for (auto modelOutputFeature : interface.output()) {
+                    const std::string &modelOutputFeatureName = modelOutputFeature.name();
+                    const auto &visionFeaturePrintOutputNames = visionFeaturePrint.object().output();
+                    if (find(visionFeaturePrintOutputNames.begin(), visionFeaturePrintOutputNames.end(), modelOutputFeatureName) == visionFeaturePrintOutputNames.end()) {
+                        std::stringstream ss;
+                        ss << "Model description declares an output: " << modelOutputFeatureName << " but it is not declared in Vision Feature Print output";
+                        return Result(ResultType::INVALID_MODEL_PARAMETERS, ss.str());
+                    }
+                }
+                break;
+            case Specification::CoreMLModels::VisionFeaturePrint::VISIONFEATUREPRINTTYPE_NOT_SET:
+                return Result(ResultType::INVALID_MODEL_PARAMETERS, "Type for vision feature print not set");
+        }
+        
+        return result;
+    }
+    
+}
diff --git a/mlmodel/src/WordEmbeddingValidator.cpp b/mlmodel/src/Validation/WordEmbeddingValidator.cpp
similarity index 100%
rename from mlmodel/src/WordEmbeddingValidator.cpp
rename to mlmodel/src/Validation/WordEmbeddingValidator.cpp
diff --git a/mlmodel/src/WordTaggerValidator.cpp b/mlmodel/src/Validation/WordTaggerValidator.cpp
similarity index 100%
rename from mlmodel/src/WordTaggerValidator.cpp
rename to mlmodel/src/Validation/WordTaggerValidator.cpp
diff --git a/mlmodel/src/VisionFeaturePrintValidator.cpp b/mlmodel/src/VisionFeaturePrintValidator.cpp
deleted file mode 100644
index 39adb35fd..000000000
--- a/mlmodel/src/VisionFeaturePrintValidator.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//
-//  VisionFeaturePrintValidator.cpp
-//  mlmodel
-//
-//  Created by Tao Jia on 4/3/18.
-//  Copyright © 2018 Apple Inc. All rights reserved.
-//
-
-#include "Result.hpp"
-#include "Validators.hpp"
-#include "ValidatorUtils-inl.hpp"
-#include "../build/format/Model.pb.h"
-
-namespace CoreML {
-    
-    template <>
-    Result validate<MLModelType_visionFeaturePrint>(const Specification::Model &format) {
-        const auto &interface = format.description();
-        
-        // make sure model is a vision feature print
-        if (!format.has_visionfeatureprint()) {
-            return Result(ResultType::INVALID_MODEL_PARAMETERS, "Model not a vision feature print.");
-        }
-        
-        Result result;
-        
-        // validate the inputs: only one input with image type is allowed
-        result = validateDescriptionsContainFeatureWithTypes(interface.input(), 1, {Specification::FeatureType::kImageType});
-        if (!result.good()) {
-            return result;
-        }
-        
-        // validate the outputs: only one output with multiarray type is allowed
-        result = validateDescriptionsContainFeatureWithTypes(interface.output(), 1, {Specification::FeatureType::kMultiArrayType});
-        if (!result.good()) {
-            return result;
-        }
-        
-        // other validate logics here
-        const auto &visionFeaturePrint = format.visionfeatureprint();
-        switch (visionFeaturePrint.VisionFeaturePrintType_case()) {
-            case Specification::CoreMLModels::VisionFeaturePrint::kScene:
-                if (visionFeaturePrint.scene().version() == Specification::CoreMLModels::VisionFeaturePrint_Scene_SceneVersion_SCENE_VERSION_INVALID) {
-                    return Result(ResultType::INVALID_MODEL_PARAMETERS, "Version for scene is invalid");
-                }
-                break;
-            case Specification::CoreMLModels::VisionFeaturePrint::VISIONFEATUREPRINTTYPE_NOT_SET:
-                return Result(ResultType::INVALID_MODEL_PARAMETERS, "Type for vision feature print not set");
-        }
-        
-        return result;
-    }
-    
-}
diff --git a/mlmodel/tests/KNNValidatorTests.cpp b/mlmodel/tests/KNNValidatorTests.cpp
index 956b5298d..29a9cfa42 100644
--- a/mlmodel/tests/KNNValidatorTests.cpp
+++ b/mlmodel/tests/KNNValidatorTests.cpp
@@ -65,8 +65,8 @@ namespace CoreML { namespace KNNValidatorTests {
 
         for (size_t i = 0; i < pointCount; i++) {
             nnIndex->add_floatsamples();
-            float *sample = ((std::vector<float>)points[i]).data();
-            for (int j = 0; j < 4; j++) {
+            const auto& sample = points[i];
+            for (size_t j = 0; j < 4; j++) {
                 nnIndex->mutable_floatsamples((int)i)->add_vector(sample[j]);
             }
         }
diff --git a/mlmodel/tests/LinearModelTests.cpp b/mlmodel/tests/LinearModelTests.cpp
index 27b218d30..d7e5d9569 100644
--- a/mlmodel/tests/LinearModelTests.cpp
+++ b/mlmodel/tests/LinearModelTests.cpp
@@ -1,10 +1,9 @@
 #include "MLModelTests.hpp"
 
-// TODO -- Fix these headers.
-#include "../src/Model.hpp"
-#include "../src/transforms/OneHotEncoder.hpp"
-#include "../src/transforms/LinearModel.hpp"
-#include "../src/transforms/TreeEnsemble.hpp"
+#include "Model.hpp"
+#include "transforms/OneHotEncoder.hpp"
+#include "transforms/LinearModel.hpp"
+#include "transforms/TreeEnsemble.hpp"
 
 
 #include "framework/TestUtils.hpp"
diff --git a/mlmodel/tests/MLModelTests.hpp b/mlmodel/tests/MLModelTests.hpp
index cf01b3b25..8248c0cf7 100644
--- a/mlmodel/tests/MLModelTests.hpp
+++ b/mlmodel/tests/MLModelTests.hpp
@@ -29,6 +29,8 @@ MLMODEL_TEST(testNNValidatorBadInput2)
 MLMODEL_TEST(testNNValidatorBadOutput)
 MLMODEL_TEST(testNNValidatorBadOutput2)
 MLMODEL_TEST(testNNMissingLayer)
+MLMODEL_TEST(testInnerProductDynamicQuantizationConversionParameterValidation)
+MLMODEL_TEST(testBatchedMatMulDynamicQuantizationConversionParameterValidation)
 MLMODEL_TEST(testRNNLayer)
 MLMODEL_TEST(testRNNLayer2)
 MLMODEL_TEST(testNNValidatorAllOptional)
@@ -39,11 +41,30 @@ MLMODEL_TEST(testNNCompilerValidation)
 MLMODEL_TEST(testNNCompilerValidationGoodProbBlob)
 MLMODEL_TEST(testNNCompilerValidationBadProbBlob)
 MLMODEL_TEST(testInvalidPooling)
+MLMODEL_TEST(testValidPooling3d)
+MLMODEL_TEST(testInvalidPooling3dNegativeKernelSize)
+MLMODEL_TEST(testInvalidPooling3dCostumPaddingSetForNonCustomPaddingType)
+MLMODEL_TEST(testValidGlobalPooling3d)
+MLMODEL_TEST(testInvalidGlobalPooling3dWrongNumberOfInputs)
 MLMODEL_TEST(testInvalidConvolutionNoPadding)
 MLMODEL_TEST(testInvalidConvolutionNoWeights)
 MLMODEL_TEST(testInvalidConvolutionNoBias)
 MLMODEL_TEST(testValidConvolution)
 MLMODEL_TEST(testValidDeconvolution)
+MLMODEL_TEST(testInvalidConvolution3DNegativePadding)
+MLMODEL_TEST(testInvalidConvolution3DNoBias)
+MLMODEL_TEST(testInvalidConvolution3DNoInputChannels)
+MLMODEL_TEST(testInvalidConvolution3DNoOutputChannels)
+MLMODEL_TEST(testInvalidConvolution3DNoWeights)
+MLMODEL_TEST(testInvalidConvolution3DNonPositiveDilation)
+MLMODEL_TEST(testInvalidConvolution3DNonPositiveGroups)
+MLMODEL_TEST(testInvalidConvolution3DNonPositiveKernelSize)
+MLMODEL_TEST(testInvalidConvolution3DNonPositiveStride)
+MLMODEL_TEST(testInvalidConvolution3DTwoInputs)
+MLMODEL_TEST(testInvalidConvolution3DWithOutputShape)
+MLMODEL_TEST(testValidConvolution3D)
+MLMODEL_TEST(testInvalidDeConvolution3DOutputShape)
+MLMODEL_TEST(testValidDeConvolution3D)
 MLMODEL_TEST(testInvalidEmbedding)
 MLMODEL_TEST(testInvalidEmbeddingBias)
 MLMODEL_TEST(testValidEmbedding)
@@ -53,7 +74,11 @@ MLMODEL_TEST(testInvalidPaddingBorder)
 MLMODEL_TEST(testInvalidPaddingNoType)
 MLMODEL_TEST(testValidPadding)
 MLMODEL_TEST(testInvalidUpsample)
+MLMODEL_TEST(testInvalidUpsampleNearestNeighborsModeWithAlignCorners)
 MLMODEL_TEST(testValidUpsample)
+MLMODEL_TEST(testFractionalUpsample)
+MLMODEL_TEST(testValidUpsampleAlignCorners)
+MLMODEL_TEST(testUpsampleArgsortSpec)
 MLMODEL_TEST(testInvalidScaleBiasWeights)
 MLMODEL_TEST(testInvalidScaleWeights)
 MLMODEL_TEST(testInvalidScaleBiasLength)
@@ -71,12 +96,15 @@ MLMODEL_TEST(testValidSlice2)
 MLMODEL_TEST(testValidCustom)
 MLMODEL_TEST(testInvalidCustomNoName)
 MLMODEL_TEST(testInvalidCustomMultipleWeights)
+MLMODEL_TEST(testVisionFeatureScenePrintBasic)
+MLMODEL_TEST(testVisionFeatureObjectPrintBasic)
 MLMODEL_TEST(testVggishPreprocessingBasic)
 MLMODEL_TEST(testSpecDowngrade)
 MLMODEL_TEST(testSpecDowngradefp16)
 MLMODEL_TEST(testSpecDowngradeFlexibleShapes)
 MLMODEL_TEST(testSpecDowngradeFlexibleShapes2)
 MLMODEL_TEST(testSpecDowngradePipeline)
+MLMODEL_TEST(testWordTaggerTransferLearningSpecIOS14)
 MLMODEL_TEST(testBayesianProbitRegressionValidationBasic)
 MLMODEL_TEST(testRangeVal)
 MLMODEL_TEST(testRangeValDivide)
@@ -107,6 +135,10 @@ MLMODEL_TEST(testKNNValidatorLinearIndex)
 MLMODEL_TEST(testKNNValidatorSingleKdTreeIndex)
 MLMODEL_TEST(testKNNValidatorNoWeightingScheme)
 MLMODEL_TEST(testKNNValidatorNoDistanceFunction)
+MLMODEL_TEST(testInvalidDefaultOptionalValue)
+MLMODEL_TEST(testDefaultOptionalValueZeroIfNotSet)
+MLMODEL_TEST(testDefaultOptionalValueOnUnsupportedSpec)
+MLMODEL_TEST(testDefaultOptionalValueGood)
 MLMODEL_TEST(testKNNValidatorGood)
 MLMODEL_TEST(testEmptyKNNValidationGood)
 MLMODEL_TEST(testLabelTypeMismatchTest)
@@ -116,6 +148,9 @@ MLMODEL_TEST(testNumberOfNeighborsWithDefaultValueInSet)
 MLMODEL_TEST(testNumberOfNeighborsWithDefaultValueNotInSet)
 MLMODEL_TEST(testNumberOfNeighborsWithInvalidRange)
 MLMODEL_TEST(testNumberOfNeighborsWithInvalidSet)
+MLMODEL_TEST(testValidReorganizeData)
+MLMODEL_TEST(testInvalidReorganizeDataInputRank)
+MLMODEL_TEST(testInvalidReorganizeDataBlockSize)
 
 MLMODEL_TEST(testValidBranch)
 MLMODEL_TEST(testInvalidBranchOutputNotProduced1)
@@ -157,9 +192,13 @@ MLMODEL_TEST(testInvalidLayerNormalizationNoNormalizedShape)
 MLMODEL_TEST(testInvalidLayerNormalizationNoGammaOrBeta)
 MLMODEL_TEST(testInvalidLayerNormalizationWrongGammaOrBeta)
 MLMODEL_TEST(testInvalidConstantPad)
+MLMODEL_TEST(testInvalidArgsortWrongAxis)
 
 // Updatable model tests
 MLMODEL_TEST(testUpdatableModelSpecVersion)
+MLMODEL_TEST(testInvalidUpdatableModelQuantizedWeights)
+MLMODEL_TEST(testInvalidUpdatableModelQuantizedBias)
+MLMODEL_TEST(testValidUpdatableModelQuantizedWeightsAndBiasForNonUpdatableLayer)
 MLMODEL_TEST(testInvalidUpdatableModelWrongType)
 MLMODEL_TEST(testInvalidUpdatableModelWrongLayer)
 MLMODEL_TEST(testInvalidUpdatableModelWrongWeights)
@@ -205,7 +244,8 @@ MLMODEL_TEST(testEpsOutOfAllowedRange)
 MLMODEL_TEST(testEpochsOutOfAllowedRange)
 MLMODEL_TEST(testEpochsOutOfAllowedSet)
 
-// Ttraining input validation test
+// Training input validation test
+
 // All are non-classifier unless otherwise described. All include model inputs unless specified "Only"
 MLMODEL_TEST(testInvalid_NoTrainingInputs)
 MLMODEL_TEST(testInvalid_OnlyModelInputs)
@@ -229,4 +269,5 @@ MLMODEL_TEST(testValid_Classifier_PredictedFeatureNameAndTarget)
 MLMODEL_TEST(testInvalid_Classifier_PredictedFeatureNameWrongType)
 MLMODEL_TEST(testValid_WithMSE)
 MLMODEL_TEST(testValid_Pipeline)
+
 #undef MLMODEL_TEST
diff --git a/mlmodel/tests/ModelContainerTests.cpp b/mlmodel/tests/ModelContainerTests.cpp
index 083751197..4750909cd 100644
--- a/mlmodel/tests/ModelContainerTests.cpp
+++ b/mlmodel/tests/ModelContainerTests.cpp
@@ -1,8 +1,7 @@
 #include "MLModelTests.hpp"
 
-// TODO -- Fix these headers.
-#include "../src/Model.hpp"
-#include "../src/Utils.hpp"
+#include "Model.hpp"
+#include "Utils.hpp"
 
 #include "framework/TestUtils.hpp"
 
diff --git a/mlmodel/tests/ModelCreationUtils.cpp b/mlmodel/tests/ModelCreationUtils.cpp
index 216e56207..a87ed8794 100644
--- a/mlmodel/tests/ModelCreationUtils.cpp
+++ b/mlmodel/tests/ModelCreationUtils.cpp
@@ -18,7 +18,7 @@ Specification::NeuralNetwork* buildBasicUpdatableNeuralNetworkModel(Specificatio
     return buildBasicNeuralNetworkModel(m, true, &inTensorAttr, &outTensorAttr);
 }
 
-Specification::NeuralNetwork* buildBasicNeuralNetworkModel(Specification::Model& m, bool isUpdatable, const TensorAttributes *inTensorAttr, const TensorAttributes *outTensorAttr, int numberOfLayers) {
+Specification::NeuralNetwork* buildBasicNeuralNetworkModel(Specification::Model& m, bool isUpdatable, const TensorAttributes *inTensorAttr, const TensorAttributes *outTensorAttr, int numberOfLayers, bool areWeightsQuantized, bool isBiasQuantized) {
     auto inTensor = m.mutable_description()->add_input();
     inTensor->set_name(inTensorAttr->name);
     auto inTensorShape = inTensor->mutable_type()->mutable_multiarraytype();
@@ -53,9 +53,33 @@ Specification::NeuralNetwork* buildBasicNeuralNetworkModel(Specification::Model&
         Specification::InnerProductLayerParams *innerProductParams = layer->mutable_innerproduct();
         innerProductParams->set_inputchannels(1);
         innerProductParams->set_outputchannels(1);
-        innerProductParams->mutable_weights()->add_floatvalue(1.0);
+
+        // set weight
+        auto* weights = innerProductParams->mutable_weights();
+        if (areWeightsQuantized) {
+            auto *quant_params = weights->mutable_quantization();
+            quant_params->set_numberofbits(1);
+            auto *linear_quant_params = quant_params->mutable_linearquantization();
+            linear_quant_params->add_scale(1.0f);
+            linear_quant_params->add_bias(0.0f);
+            weights->set_int8rawvalue("x01"); // this is [1]
+        } else {
+            weights->add_floatvalue(1.0);
+        }
+
+        // set bias
         innerProductParams->set_hasbias(true);
-        innerProductParams->mutable_bias()->add_floatvalue(1.0);
+        auto* bias = innerProductParams->mutable_bias();
+        if (isBiasQuantized) {
+            auto *quant_params = bias->mutable_quantization();
+            quant_params->set_numberofbits(1);
+            auto *linear_quant_params = quant_params->mutable_linearquantization();
+            linear_quant_params->add_scale(1.0f);
+            linear_quant_params->add_bias(0.0f);
+            bias->set_int8rawvalue("x01"); // this is [1]
+        } else {
+            bias->add_floatvalue(1.0);
+        }
         
         if (isUpdatable) {
             layer->set_isupdatable(true);
@@ -76,7 +100,7 @@ Specification::NeuralNetwork* buildBasicNeuralNetworkModel(Specification::Model&
     return neuralNet;
 }
 
-Specification::NeuralNetwork* addInnerProductLayer(Specification::Model& m, bool isUpdatable, const char *name, const TensorAttributes *inTensorAttr, const TensorAttributes *outTensorAttr) {
+Specification::NeuralNetwork* addInnerProductLayer(Specification::Model& m, bool isUpdatable, const char *name, const TensorAttributes *inTensorAttr, const TensorAttributes *outTensorAttr, bool areWeightsQuantized, bool isBiasQuantized) {
     
     auto neuralNet = m.mutable_neuralnetwork();
     auto layer = neuralNet->add_layers();
@@ -87,9 +111,33 @@ Specification::NeuralNetwork* addInnerProductLayer(Specification::Model& m, bool
     Specification::InnerProductLayerParams *innerProductParams = layer->mutable_innerproduct();
     innerProductParams->set_inputchannels(1);
     innerProductParams->set_outputchannels(1);
-    innerProductParams->mutable_weights()->add_floatvalue(1.0);
+
+    // set weight
+    auto* weights = innerProductParams->mutable_weights();
+    if (areWeightsQuantized) {
+        auto *quant_params = weights->mutable_quantization();
+        quant_params->set_numberofbits(1);
+        auto *linear_quant_params = quant_params->mutable_linearquantization();
+        linear_quant_params->add_scale(1.0f);
+        linear_quant_params->add_bias(0.0f);
+        weights->set_int8rawvalue("x01"); // this is [1]
+    } else {
+        weights->add_floatvalue(1.0);
+    }
+
+    // set bias
     innerProductParams->set_hasbias(true);
-    innerProductParams->mutable_bias()->add_floatvalue(1.0);
+    auto* bias = innerProductParams->mutable_bias();
+    if (isBiasQuantized) {
+        auto *quant_params = bias->mutable_quantization();
+        quant_params->set_numberofbits(1);
+        auto *linear_quant_params = quant_params->mutable_linearquantization();
+        linear_quant_params->add_scale(1.0f);
+        linear_quant_params->add_bias(0.0f);
+        bias->set_int8rawvalue("x01"); // this is [1]
+    } else {
+        bias->add_floatvalue(1.0);
+    }
     
     if (isUpdatable) {
         layer->set_isupdatable(true);
@@ -187,7 +235,7 @@ Specification::NeuralNetworkClassifier* buildBasicNeuralNetworkClassifierModel(S
     float *destination_weights = weightsWrite->mutable_data();
     for (uint64_t i = 0; i < C_in; i++) {
         for (uint64_t j = 0; j < C_out; j++) {
-            float random = float(rand())/(RAND_MAX);
+            float random = float(rand())/float(RAND_MAX);
             destination_weights[i * C_out + j] = random;
         }
     }
@@ -200,7 +248,7 @@ Specification::NeuralNetworkClassifier* buildBasicNeuralNetworkClassifierModel(S
         float *destination_bias = biasWrite->mutable_data();
         for (uint64_t i = 0; i < 1; i++) {
             for (uint64_t j = 0; j < C_out; j++) {
-                float random = float(rand())/(RAND_MAX);
+                float random = float(rand())/float(RAND_MAX);
                 destination_bias[i * C_out + j] = random;
             }
         }
diff --git a/mlmodel/tests/ModelCreationUtils.hpp b/mlmodel/tests/ModelCreationUtils.hpp
index 2a1e97c16..6b0b3bd3c 100644
--- a/mlmodel/tests/ModelCreationUtils.hpp
+++ b/mlmodel/tests/ModelCreationUtils.hpp
@@ -6,9 +6,8 @@
 //  Copyright © 2019 Apple Inc. All rights reserved.
 //
 
-#include "../src/Format.hpp"
-#include "../src/Model.hpp"
-#include "../src/NeuralNetwork/NeuralNetworkShapes.hpp"
+#include "Format.hpp"
+#include "Model.hpp"
 
 #include "framework/TestUtils.hpp"
 
@@ -21,7 +20,7 @@ typedef struct {
 
 Specification::NeuralNetwork* buildBasicUpdatableNeuralNetworkModel(Specification::Model& m);
 
-Specification::NeuralNetwork* buildBasicNeuralNetworkModel(Specification::Model& m, bool isUpdatable, const TensorAttributes *inTensorAttr, const TensorAttributes *outTensorAttr, int numberOfLayers = 1);
+Specification::NeuralNetwork* buildBasicNeuralNetworkModel(Specification::Model& m, bool isUpdatable, const TensorAttributes *inTensorAttr, const TensorAttributes *outTensorAttr, int numberOfLayers = 1, bool areWeightsQuantized = false, bool isBiasQuantized = false);
 
 Specification::NeuralNetworkClassifier* buildBasicNeuralNetworkClassifierModel(Specification::Model& m, bool isUpdatable, const TensorAttributes *inTensorAttr, std::vector<std::string> stringClassLabels, std::vector<int64_t> intClassLabels, bool includeBias);
 
@@ -33,6 +32,6 @@ Specification::Pipeline* buildEmptyPipelineModelWithStringOutput(Specification::
 
 void addCategoricalCrossEntropyLossWithSoftmaxAndSGDOptimizer(Specification::Model& m, const char *softmaxInputName);
 
-Specification::NeuralNetwork* addInnerProductLayer(Specification::Model& m, bool isUpdatable, const char *name, const TensorAttributes *inTensorAttr, const TensorAttributes *outTensorAttr);
+Specification::NeuralNetwork* addInnerProductLayer(Specification::Model& m, bool isUpdatable, const char *name, const TensorAttributes *inTensorAttr, const TensorAttributes *outTensorAttr, bool areWeightsQuantized = false, bool isBiasQuantized = false);
 
 Specification::NeuralNetwork* addSoftmaxLayer(Specification::Model& m, const char *name,  const char *input, const char *output);
diff --git a/mlmodel/tests/NNShaperTest.cpp b/mlmodel/tests/NNShaperTest.cpp
index 715d543aa..c3b30a69c 100644
--- a/mlmodel/tests/NNShaperTest.cpp
+++ b/mlmodel/tests/NNShaperTest.cpp
@@ -7,9 +7,9 @@
 //
 
 #include "MLModelTests.hpp"
-#include "../src/Format.hpp"
-#include "../src/Model.hpp"
-#include "../src/NeuralNetwork/NeuralNetworkShapes.hpp"
+#include "Format.hpp"
+#include "Model.hpp"
+#include "Validation/NeuralNetwork/NeuralNetworkShapes.hpp"
 
 #include "framework/TestUtils.hpp"
 
diff --git a/mlmodel/tests/NNValidatorTests.cpp b/mlmodel/tests/NNValidatorTests.cpp
index d95680953..5a36cb3fa 100644
--- a/mlmodel/tests/NNValidatorTests.cpp
+++ b/mlmodel/tests/NNValidatorTests.cpp
@@ -7,9 +7,10 @@
 //
 
 #include "MLModelTests.hpp"
-#include "../src/Format.hpp"
-#include "../src/Model.hpp"
-#include "../src/NeuralNetwork/NeuralNetworkShapes.hpp"
+#include "Format.hpp"
+#include "Model.hpp"
+#include "ResultReason.hpp"
+#include "Validation/NeuralNetwork/NeuralNetworkShapes.hpp"
 
 #include "framework/TestUtils.hpp"
 
@@ -18,7 +19,6 @@
 
 using namespace CoreML;
 
-
 int testNNValidatorSimple() {
 
     Specification::Model m1;
@@ -157,13 +157,11 @@ int testNNValidatorBadOutput2() {
     innerProductParams->set_hasbias(false);
 
     Result res = validate<MLModelType_neuralNetwork>(m1);
-    ML_ASSERT_BAD(res);
+    ML_ASSERT_BAD_WITH_REASON(res, ResultReason::MODEL_OUTPUT_TYPE_INVALID);
 
     return 0;
 }
 
-
-
 int testNNValidatorAllOptional() {
 
     Specification::Model m1;
@@ -173,6 +171,9 @@ int testNNValidatorAllOptional() {
     auto type = topIn->mutable_type();
     type->mutable_multiarraytype();
     type->set_isoptional(true);
+    auto arr = type->mutable_multiarraytype();
+    arr->set_datatype(Specification::ArrayFeatureType::ArrayDataType::ArrayFeatureType_ArrayDataType_INT32);
+    arr->set_floatdefaultvalue(2.0);
 
     auto *out = m1.mutable_description()->add_output();
     out->set_name("B");
@@ -191,6 +192,210 @@ int testNNValidatorAllOptional() {
     return 0;
 }
 
+int testInvalidDefaultOptionalValue() {
+
+    Specification::Model m;
+
+    auto *in1 = m.mutable_description()->add_input();
+    in1->set_name("input1");
+    auto *inShape1 = in1->mutable_type()->mutable_multiarraytype();
+    inShape1->set_datatype(Specification::ArrayFeatureType::ArrayDataType::ArrayFeatureType_ArrayDataType_FLOAT32);
+
+    inShape1->add_shape(3);
+    inShape1->add_shape(5);
+    inShape1->add_shape(2);
+
+    auto *in2 = m.mutable_description()->add_input();
+    in2->set_name("input2");
+    auto type = in2->mutable_type();
+    type->set_isoptional(true);
+    auto arr = type->mutable_multiarraytype();
+    arr->set_datatype(Specification::ArrayFeatureType::ArrayDataType::ArrayFeatureType_ArrayDataType_INT32);
+    arr->set_floatdefaultvalue(2.0);
+    arr->add_shape(3);
+    arr->add_shape(5);
+    arr->add_shape(2);
+
+    auto *out = m.mutable_description()->add_output();
+    out->set_name("output");
+    auto *outShape = out->mutable_type()->mutable_multiarraytype();
+    outShape->add_shape(3);
+    outShape->add_shape(5);
+    outShape->add_shape(1);
+
+    const auto nn = m.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    auto *layers = nn->add_layers();
+    layers->add_input("input1");
+    layers->add_input("input2");
+    layers->add_output("output");
+    layers->add_inputtensor()->set_rank(3);
+    layers->add_inputtensor()->set_rank(3);
+
+    auto *params = layers->mutable_stack();
+    params->set_axis(2);
+
+    m.set_specificationversion(4);
+    // axis should be in range [-(rank + 1), rank + 1)
+    Result res = Model::validate(m);
+    ML_ASSERT_BAD(res);
+    ML_ASSERT(res.message().find("mistmatch between dataType and the type") != std::string::npos);
+
+    return 0;
+}
+
+int testDefaultOptionalValueZeroIfNotSet() {
+
+    Specification::Model m;
+
+    auto *in1 = m.mutable_description()->add_input();
+    in1->set_name("input1");
+    auto *inShape1 = in1->mutable_type()->mutable_multiarraytype();
+    inShape1->set_datatype(Specification::ArrayFeatureType::ArrayDataType::ArrayFeatureType_ArrayDataType_FLOAT32);
+
+    inShape1->add_shape(3);
+    inShape1->add_shape(5);
+    inShape1->add_shape(2);
+
+    auto *in2 = m.mutable_description()->add_input();
+    in2->set_name("input2");
+    auto type = in2->mutable_type();
+    type->set_isoptional(true);
+    auto arr = type->mutable_multiarraytype();
+    arr->set_datatype(Specification::ArrayFeatureType::ArrayDataType::ArrayFeatureType_ArrayDataType_INT32);
+    // Not set default value which should raise error!
+
+    arr->add_shape(3);
+    arr->add_shape(5);
+    arr->add_shape(2);
+
+    auto *out = m.mutable_description()->add_output();
+    out->set_name("output");
+    auto *outShape = out->mutable_type()->mutable_multiarraytype();
+    outShape->set_datatype(Specification::ArrayFeatureType::ArrayDataType::ArrayFeatureType_ArrayDataType_FLOAT32);
+    outShape->add_shape(3);
+    outShape->add_shape(5);
+    outShape->add_shape(1);
+
+    const auto nn = m.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    auto *layers = nn->add_layers();
+    layers->add_input("input1");
+    layers->add_output("output");
+    layers->add_inputtensor()->set_rank(3);
+
+    auto *params = layers->mutable_stack();
+    params->set_axis(2);
+
+    m.set_specificationversion(5);
+    ML_ASSERT(m.description().input(1).type().multiarraytype().intdefaultvalue() == 0);
+    return 0;
+}
+
+int testDefaultOptionalValueOnUnsupportedSpec() {
+
+    Specification::Model m;
+
+    auto *in1 = m.mutable_description()->add_input();
+    in1->set_name("input1");
+    auto *inShape1 = in1->mutable_type()->mutable_multiarraytype();
+    inShape1->set_datatype(Specification::ArrayFeatureType::ArrayDataType::ArrayFeatureType_ArrayDataType_FLOAT32);
+
+    inShape1->add_shape(3);
+    inShape1->add_shape(5);
+    inShape1->add_shape(2);
+
+    auto *in2 = m.mutable_description()->add_input();
+    in2->set_name("input2");
+    auto type = in2->mutable_type();
+    type->set_isoptional(true);
+    auto arr = type->mutable_multiarraytype();
+    arr->set_datatype(Specification::ArrayFeatureType::ArrayDataType::ArrayFeatureType_ArrayDataType_INT32);
+    arr->set_intdefaultvalue(2);
+    arr->add_shape(3);
+    arr->add_shape(5);
+    arr->add_shape(2);
+
+    auto *out = m.mutable_description()->add_output();
+    out->set_name("output");
+    auto *outShape = out->mutable_type()->mutable_multiarraytype();
+    outShape->set_datatype(Specification::ArrayFeatureType::ArrayDataType::ArrayFeatureType_ArrayDataType_FLOAT32);
+    outShape->add_shape(3);
+    outShape->add_shape(5);
+    outShape->add_shape(1);
+
+    const auto nn = m.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    auto *layers = nn->add_layers();
+    layers->add_input("input1");
+    layers->add_output("output");
+    layers->add_inputtensor()->set_rank(3);
+
+    auto *params = layers->mutable_stack();
+    params->set_axis(2);
+
+    m.set_specificationversion(4);
+    // axis should be in range [-(rank + 1), rank + 1)
+    Result res = Model::validate(m);
+    ML_ASSERT_BAD(res);
+    ML_ASSERT(res.message().find("Default value for optional inputs is supported from specification 5 (iOS 14)") != std::string::npos);
+
+    return 0;
+}
+
+int testDefaultOptionalValueGood() {
+
+    Specification::Model m;
+
+    auto *in1 = m.mutable_description()->add_input();
+    in1->set_name("input1");
+    auto *inShape1 = in1->mutable_type()->mutable_multiarraytype();
+    inShape1->set_datatype(Specification::ArrayFeatureType::ArrayDataType::ArrayFeatureType_ArrayDataType_FLOAT32);
+    inShape1->add_shape(3);
+    inShape1->add_shape(5);
+    inShape1->add_shape(2);
+
+    auto *in2 = m.mutable_description()->add_input();
+    in2->set_name("input2");
+    auto type = in2->mutable_type();
+    type->set_isoptional(true);
+    auto arr = type->mutable_multiarraytype();
+    arr->set_datatype(Specification::ArrayFeatureType::ArrayDataType::ArrayFeatureType_ArrayDataType_INT32);
+    arr->set_intdefaultvalue(2);
+    arr->add_shape(3);
+    arr->add_shape(5);
+    arr->add_shape(2);
+
+    auto *out = m.mutable_description()->add_output();
+    out->set_name("output");
+    auto *outShape = out->mutable_type()->mutable_multiarraytype();
+    outShape->set_datatype(Specification::ArrayFeatureType::ArrayDataType::ArrayFeatureType_ArrayDataType_FLOAT32);
+    outShape->add_shape(3);
+    outShape->add_shape(5);
+    outShape->add_shape(1);
+
+    const auto nn = m.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    auto *layers = nn->add_layers();
+    layers->add_input("input1");
+    layers->add_input("input2");
+    layers->add_output("output");
+    layers->add_inputtensor()->set_rank(3);
+    layers->add_inputtensor()->set_rank(3);
+
+    auto *params = layers->mutable_stack();
+    params->set_axis(2);
+
+    m.set_specificationversion(5);
+    // axis should be in range [-(rank + 1), rank + 1)
+    Result res = Model::validate(m);
+    ML_ASSERT_GOOD(res);
+    return 0;
+}
 
 int testNNValidatorMissingInput() {
 
@@ -388,7 +593,174 @@ int testNNMissingLayer() {
     ML_ASSERT_BAD(res);
 
     return 0;
+}
+
+int testInnerProductDynamicQuantizationConversionParameterValidation() {
+
+    // Setup
+    Specification::Model m1;
+    Specification::NeuralNetwork *nnWrite = m1.mutable_neuralnetwork();
+    nnWrite->set_arrayinputshapemapping(Specification::EXACT_ARRAY_MAPPING);
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("A");
+    topIn->mutable_type()->mutable_multiarraytype();
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+
+    auto *out = m1.mutable_description()->add_output();
+    out->set_name("B");
+    out->mutable_type()->mutable_multiarraytype();
+
+    Specification::NeuralNetworkLayer* layer1 = nnWrite->add_layers();
+    layer1->set_name("inner_product");
+    layer1->add_input("A");
+    layer1->add_output("B");
+    Specification::InnerProductLayerParams* inner_product_params = layer1->mutable_innerproduct();
+    inner_product_params->set_inputchannels(4);
+    inner_product_params->set_outputchannels(2);
+    inner_product_params->set_hasbias(false);
+    inner_product_params->set_int8dynamicquantize(true);
+
+    auto* weights = inner_product_params->mutable_weights();
+    weights->set_int8rawvalue("11111111");
+    weights->mutable_quantization()->set_numberofbits(8);
+    weights->mutable_quantization()->mutable_linearquantization()->add_scale(4);
+
+    // Setup: Correct model
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_GOOD(res);
+
+    // Case 1: has bias
+    inner_product_params->set_hasbias(true);
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_GOOD(res);
+    inner_product_params->set_hasbias(false);
+
+    // Case 2: Non empty linear quantization bias
+    weights->mutable_quantization()->mutable_linearquantization()->add_bias(1);
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    weights->mutable_quantization()->mutable_linearquantization()->clear_bias();
+
+    // Case 3: numberofbits != 8
+    weights->mutable_quantization()->set_numberofbits(7);
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    weights->mutable_quantization()->set_numberofbits(8);
+
+    // Case 4: Lookup table mode is on
+    weights->mutable_quantization()->mutable_lookuptablequantization()->add_floatvalue(1);
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    weights->mutable_quantization()->mutable_linearquantization()->add_scale(4);
+
+    // Case 5: uint8 weights
+    weights->clear_int8rawvalue();
+    weights->set_rawvalue("11111111");
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    weights->clear_rawvalue();
+
+    // Case 6: float16 weights
+    weights->set_float16value("0101010101010101");
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    weights->clear_float16value();
+
+    // Case 7: float weights
+    for (int i = 0; i < 8; ++i){
+        weights->add_floatvalue(1);
+    }
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    weights->clear_floatvalue();
+
+    return 0;
+}
+
+int testBatchedMatMulDynamicQuantizationConversionParameterValidation() {
+
+    Specification::Model m1;
+    Specification::NeuralNetwork* nnMain = m1.mutable_neuralnetwork();
+    // Required for spec v4 and onwards
+    nnMain->set_arrayinputshapemapping(Specification::EXACT_ARRAY_MAPPING);
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("A");
+    topIn->mutable_type()->mutable_multiarraytype();
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+
+    auto *out = m1.mutable_description()->add_output();
+    out->set_name("B");
+    out->mutable_type()->mutable_multiarraytype();
+
+    Specification::NeuralNetworkLayer* layer1 = nnMain->add_layers();
+    layer1->set_name("batched_mat_mul");
+    layer1->add_input("A");
+    layer1->add_output("B");
+    Specification::BatchedMatMulLayerParams* batch_mat_mul_params = layer1->mutable_batchedmatmul();
+    batch_mat_mul_params->set_weightmatrixfirstdimension(4);
+    batch_mat_mul_params->set_weightmatrixseconddimension(2);
+    batch_mat_mul_params->set_hasbias(false);
+    batch_mat_mul_params->set_int8dynamicquantize(true);
+
+    auto* weights = batch_mat_mul_params->mutable_weights();
+    weights->set_int8rawvalue("11111111");
+    weights->mutable_quantization()->set_numberofbits(8);
+    weights->mutable_quantization()->mutable_linearquantization()->add_scale(4);
+
+    // Setup: Correct validation
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_GOOD(res);
+
+    // Case 1: has bias
+    batch_mat_mul_params->set_hasbias(true);
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_GOOD(res);
+    batch_mat_mul_params->set_hasbias(false);
 
+    // Case 2: Non empty linear quantization bias
+    weights->mutable_quantization()->mutable_linearquantization()->add_bias(0);
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    weights->mutable_quantization()->mutable_linearquantization()->clear_bias();
+
+    // Case 3: numberofbits != 8
+    weights->mutable_quantization()->set_numberofbits(7);
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    weights->mutable_quantization()->set_numberofbits(8);
+
+    // Case 4: Lookup table mode is on
+    weights->mutable_quantization()->mutable_lookuptablequantization()->add_floatvalue(1);
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    weights->mutable_quantization()->mutable_linearquantization()->add_scale(4);
+
+    // Case 5: uint8 weights
+    weights->clear_int8rawvalue();
+    weights->set_rawvalue("11111111");
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    weights->clear_rawvalue();
+
+    // Case 6: float16 weights
+    weights->set_float16value("0101010101010101");
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    weights->clear_float16value();
+
+    // Case 7: float weights
+    for (int i = 0; i < 8; ++i){
+        weights->add_floatvalue(1);
+    }
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    weights->clear_floatvalue();
+
+    return 0;
 }
 
 int testRNNLayer() {
@@ -767,58 +1139,286 @@ int testInvalidPooling() {
 
 }
 
-int testInvalidConvolutionNoPadding() {
-
+int testValidPooling3d() {
     Specification::Model m1;
 
     auto *topIn = m1.mutable_description()->add_input();
     topIn->set_name("input");
     auto *shape = topIn->mutable_type()->mutable_multiarraytype();
-    shape->add_shape(3);
-    shape->add_shape(100);
-    shape->add_shape(100);
+    // Adding 5 shapes for a rank 5 input.
+    shape->add_shape(10);
+    shape->add_shape(11);
+    shape->add_shape(12);
+    shape->add_shape(13);
+    shape->add_shape(14);
 
     auto *out3 = m1.mutable_description()->add_output();
     out3->set_name("probs");
     out3->mutable_type()->mutable_multiarraytype();
 
     const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(CoreML::Specification::EXACT_ARRAY_MAPPING);
 
-    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
-    convLayer->add_input("input");
-    convLayer->add_output("probs");
-    auto *params = convLayer->mutable_convolution();
-    params->set_outputchannels(5);
-    params->set_kernelchannels(3);
-
-    params->set_hasbias(false);
-
-    // not specifying a padding type should be invalid
+    Specification::NeuralNetworkLayer *pooling3dLayer = nn->add_layers();
+    pooling3dLayer->add_input("input");
+    pooling3dLayer->add_output("probs");
+    auto *mutablePooling3d = pooling3dLayer->mutable_pooling3d();
+    
+    // Add Kernel sizes
+    mutablePooling3d->set_kerneldepth(2);
+    mutablePooling3d->set_kernelheight(2);
+    mutablePooling3d->set_kernelwidth(2);
+
+    // Add Strides
+    mutablePooling3d->set_stridedepth(5);
+    mutablePooling3d->set_strideheight(5);
+    mutablePooling3d->set_stridewidth(5);
+
+    // Add 6 Custom Paddings
+    mutablePooling3d->set_paddingtype(CoreML::Specification::Pooling3DLayerParams_Pooling3DPaddingType_CUSTOM);
+    mutablePooling3d->set_custompaddingfront(7);
+    mutablePooling3d->set_custompaddingback(7);
+    mutablePooling3d->set_custompaddingtop(7);
+    mutablePooling3d->set_custompaddingbottom(7);
+    mutablePooling3d->set_custompaddingleft(7);
+    mutablePooling3d->set_custompaddingright(7);
+    
     Result res = validate<MLModelType_neuralNetwork>(m1);
-    ML_ASSERT_BAD(res);
+    ML_ASSERT_GOOD(res);
+    
     return 0;
-
 }
 
-int testInvalidConvolutionNoWeights() {
-
+int testInvalidPooling3dNegativeKernelSize() {
     Specification::Model m1;
 
     auto *topIn = m1.mutable_description()->add_input();
     topIn->set_name("input");
     auto *shape = topIn->mutable_type()->mutable_multiarraytype();
-    shape->add_shape(3);
-    shape->add_shape(100);
-    shape->add_shape(100);
+    // Adding 5 shapes for a rank 5 input.
+    shape->add_shape(10);
+    shape->add_shape(11);
+    shape->add_shape(12);
+    shape->add_shape(13);
+    shape->add_shape(14);
 
     auto *out3 = m1.mutable_description()->add_output();
     out3->set_name("probs");
     out3->mutable_type()->mutable_multiarraytype();
 
     const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(CoreML::Specification::EXACT_ARRAY_MAPPING);
 
-    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
-    convLayer->add_input("input");
+    Specification::NeuralNetworkLayer *pooling3dLayer = nn->add_layers();
+    pooling3dLayer->add_input("input");
+    pooling3dLayer->add_output("probs");
+    auto *mutablePooling3d = pooling3dLayer->mutable_pooling3d();
+    
+    // Add Kernel sizes
+    mutablePooling3d->set_kerneldepth(2);
+    mutablePooling3d->set_kernelheight(2);
+    mutablePooling3d->set_kernelwidth(-1);
+
+    // Add Strides
+    mutablePooling3d->set_stridedepth(5);
+    mutablePooling3d->set_strideheight(5);
+    mutablePooling3d->set_stridewidth(5);
+
+    // Add 6 Custom Paddings
+    mutablePooling3d->set_paddingtype(CoreML::Specification::Pooling3DLayerParams_Pooling3DPaddingType_CUSTOM);
+    mutablePooling3d->set_custompaddingfront(7);
+    mutablePooling3d->set_custompaddingback(7);
+    mutablePooling3d->set_custompaddingtop(7);
+    mutablePooling3d->set_custompaddingbottom(7);
+    mutablePooling3d->set_custompaddingleft(7);
+    mutablePooling3d->set_custompaddingright(7);
+    
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    
+    return 0;
+}
+
+
+int testInvalidPooling3dCostumPaddingSetForNonCustomPaddingType() {
+    Specification::Model m1;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    // Adding 5 shapes for a rank 5 input.
+    shape->add_shape(10);
+    shape->add_shape(11);
+    shape->add_shape(12);
+    shape->add_shape(13);
+    shape->add_shape(14);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(CoreML::Specification::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *pooling3dLayer = nn->add_layers();
+    pooling3dLayer->add_input("input");
+    pooling3dLayer->add_output("probs");
+    auto *mutablePooling3d = pooling3dLayer->mutable_pooling3d();
+    
+    // Add Kernel sizes
+    mutablePooling3d->set_kerneldepth(2);
+    mutablePooling3d->set_kernelheight(2);
+    mutablePooling3d->set_kernelwidth(2);
+
+    // Add Strides
+    mutablePooling3d->set_stridedepth(5);
+    mutablePooling3d->set_strideheight(5);
+    mutablePooling3d->set_stridewidth(5);
+
+    // Add 6 Custom Paddings
+    mutablePooling3d->set_paddingtype(CoreML::Specification::Pooling3DLayerParams_Pooling3DPaddingType_VALID);
+    mutablePooling3d->set_custompaddingfront(7);
+    mutablePooling3d->set_custompaddingback(7);
+    mutablePooling3d->set_custompaddingtop(7);
+    mutablePooling3d->set_custompaddingbottom(7);
+    mutablePooling3d->set_custompaddingleft(7);
+    mutablePooling3d->set_custompaddingright(7);
+    
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    
+    return 0;
+}
+
+int testValidGlobalPooling3d() {
+    Specification::Model m1;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    // Adding 5 shapes for a rank 5 input.
+    shape->add_shape(10);
+    shape->add_shape(11);
+    shape->add_shape(12);
+    shape->add_shape(13);
+    shape->add_shape(14);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(CoreML::Specification::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *globalPooling3dLayer = nn->add_layers();
+    globalPooling3dLayer->add_input("input");
+    globalPooling3dLayer->add_output("probs");
+    auto *mutablePooling3d = globalPooling3dLayer->mutable_globalpooling3d();
+
+    mutablePooling3d->set_type(CoreML::Specification::GlobalPooling3DLayerParams_GlobalPoolingType3D_AVERAGE);
+
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_GOOD(res);
+
+    return 0;
+}
+
+int testInvalidGlobalPooling3dWrongNumberOfInputs() {
+    Specification::Model m1;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    // Adding 5 shapes for a rank 5 input.
+    shape->add_shape(10);
+    shape->add_shape(11);
+    shape->add_shape(12);
+    shape->add_shape(13);
+    shape->add_shape(14);
+
+    auto *topIn2 = m1.mutable_description()->add_input();
+    topIn2->set_name("input 2");
+    auto *shape2 = topIn2->mutable_type()->mutable_multiarraytype();
+    // Adding 5 shapes for a rank 5 input.
+    shape2->add_shape(10);
+    shape2->add_shape(11);
+    shape2->add_shape(12);
+    shape2->add_shape(13);
+    shape2->add_shape(14);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(CoreML::Specification::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *globalPooling3dLayer = nn->add_layers();
+    globalPooling3dLayer->add_input("input");
+    globalPooling3dLayer->add_input("input 2");
+    globalPooling3dLayer->add_output("probs");
+    auto *mutablePooling3d = globalPooling3dLayer->mutable_globalpooling3d();
+
+    mutablePooling3d->set_type(CoreML::Specification::GlobalPooling3DLayerParams_GlobalPoolingType3D_AVERAGE);
+
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+
+    return 0;
+}
+
+int testInvalidConvolutionNoPadding() {
+
+    Specification::Model m1;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(3);
+    shape->add_shape(100);
+    shape->add_shape(100);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input");
+    convLayer->add_output("probs");
+    auto *params = convLayer->mutable_convolution();
+    params->set_outputchannels(5);
+    params->set_kernelchannels(3);
+
+    params->set_hasbias(false);
+
+    // not specifying a padding type should be invalid
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    return 0;
+
+}
+
+int testInvalidConvolutionNoWeights() {
+
+    Specification::Model m1;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(3);
+    shape->add_shape(100);
+    shape->add_shape(100);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input");
     convLayer->add_output("probs");
     auto *params = convLayer->mutable_convolution();
     params->set_outputchannels(5);
@@ -934,21 +1534,1132 @@ int testValidConvolution() {
 
 }
 
-
-int testValidDeconvolution() {
-
+
+int testValidDeconvolution() {
+
+    Specification::Model m1;
+
+    int output_channels = 5;
+    int kernel_channels = 3;
+    int kernel_height = 2;
+    int kernel_width = 5;
+    int nGroups = 1;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(3);
+    shape->add_shape(100);
+    shape->add_shape(100);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input");
+    convLayer->add_output("probs");
+    auto *params = convLayer->mutable_convolution();
+    params->set_outputchannels(5);
+    params->set_kernelchannels(3);
+    params->add_kernelsize(kernel_height);
+    params->add_kernelsize(kernel_width);
+
+    params->set_hasbias(true);
+
+    params->set_isdeconvolution(true);
+    params->add_outputshape(110);
+    params->add_outputshape(110);
+
+    (void)params->mutable_valid();
+
+    for (int i = 0; i < output_channels * (kernel_channels / nGroups) * kernel_height * kernel_width; i++) {
+        params->mutable_weights()->add_floatvalue(1.0);
+    }
+
+    for (int i = 0; i < output_channels; i++) {
+        params->mutable_bias()->add_floatvalue(1.0);
+    }
+
+    // Not specifying the right number of weights should be invalid
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_GOOD(res);
+    return 0;
+}
+
+int testInvalidConvolution3DNegativePadding() {
+    Specification::Model m1;
+
+    int input_channels = 3;
+    int output_channels = 3;
+    int kernel_depth = 3;
+    int kernel_height = 3;
+    int kernel_width = 3;
+    int nGroups = 1;
+    int stride_depth = 1;
+    int stride_height = 1;
+    int stride_width = 1;
+    int dilation_depth = 1;
+    int dilation_height = 1;
+    int dilation_width = 1;
+    int pad_front = 0;
+    int pad_back = 0;
+    int pad_top = 0;
+    int pad_bottom = -3;
+    int pad_left = 0;
+    int pad_right = -2;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+    shape->add_shape(3);
+    shape->add_shape(32);
+    shape->add_shape(100);
+    shape->add_shape(100);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input");
+    convLayer->add_output("probs");
+    auto *params = convLayer->mutable_convolution3d();
+    params->set_inputchannels(input_channels);
+    params->set_outputchannels(output_channels);
+    params->set_kerneldepth(kernel_depth);
+    params->set_kernelheight(kernel_height);
+    params->set_kernelwidth(kernel_width);
+    params->set_ngroups(nGroups);
+    params->set_stridedepth(stride_depth);
+    params->set_strideheight(stride_height);
+    params->set_stridewidth(stride_width);
+    params->set_dilationdepth(dilation_depth);
+    params->set_dilationheight(dilation_height);
+    params->set_dilationwidth(dilation_width);
+    params->set_paddingtype(CoreML::Specification::Convolution3DLayerParams_PaddingType_CUSTOM);
+    params->set_custompaddingfront(pad_front);
+    params->set_custompaddingback(pad_back);
+    params->set_custompaddingtop(pad_top);
+    params->set_custompaddingbottom(pad_bottom);
+    params->set_custompaddingleft(pad_left);
+    params->set_custompaddingright(pad_right);
+
+    params->set_hasbias(true);
+
+    // Fill weights
+    for (int i = 0; i < output_channels * (input_channels / nGroups) * kernel_depth * kernel_height * kernel_width; i++) {
+        params->mutable_weights()->add_floatvalue(1.0);
+    }
+
+    // Fill bias
+    for (int i = 0; i < output_channels; i++) {
+        params->mutable_bias()->add_floatvalue(1.0);
+    }
+
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    return 0;
+}
+
+int testInvalidConvolution3DNoBias() {
+
+    Specification::Model m1;
+
+    int input_channels = 3;
+    int output_channels = 3;
+    int kernel_depth = 3;
+    int kernel_height = 3;
+    int kernel_width = 3;
+    int nGroups = 1;
+    int stride_depth = 1;
+    int stride_height = 1;
+    int stride_width = 1;
+    int dilation_depth = 1;
+    int dilation_height = 1;
+    int dilation_width = 1;
+    int pad_front = 0;
+    int pad_back = 0;
+    int pad_top = 0;
+    int pad_bottom = 0;
+    int pad_left = 0;
+    int pad_right = 0;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+    shape->add_shape(3);
+    shape->add_shape(32);
+    shape->add_shape(100);
+    shape->add_shape(100);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input");
+    convLayer->add_output("probs");
+    auto *params = convLayer->mutable_convolution3d();
+    params->set_inputchannels(input_channels);
+    params->set_outputchannels(output_channels);
+    params->set_kerneldepth(kernel_depth);
+    params->set_kernelheight(kernel_height);
+    params->set_kernelwidth(kernel_width);
+    params->set_ngroups(nGroups);
+    params->set_stridedepth(stride_depth);
+    params->set_strideheight(stride_height);
+    params->set_stridewidth(stride_width);
+    params->set_dilationdepth(dilation_depth);
+    params->set_dilationheight(dilation_height);
+    params->set_dilationwidth(dilation_width);
+    params->set_paddingtype(CoreML::Specification::Convolution3DLayerParams_PaddingType_CUSTOM);
+    params->set_custompaddingfront(pad_front);
+    params->set_custompaddingback(pad_back);
+    params->set_custompaddingtop(pad_top);
+    params->set_custompaddingbottom(pad_bottom);
+    params->set_custompaddingleft(pad_left);
+    params->set_custompaddingright(pad_right);
+
+    params->set_hasbias(true);
+
+    // Fill Weights
+    for (int i = 0; i < output_channels * (input_channels / nGroups) * kernel_depth * kernel_height * kernel_width; i++) {
+        params->mutable_weights()->add_floatvalue(1.0);
+    }
+
+    // Not specifying any biases should be invalid
+
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    return 0;
+
+}
+
+int testInvalidConvolution3DNoInputChannels() {
+
+    Specification::Model m1;
+
+    int input_channels = 0;
+    int output_channels = 3;
+    int kernel_depth = 3;
+    int kernel_height = 3;
+    int kernel_width = 3;
+    int nGroups = 1;
+    int stride_depth = 1;
+    int stride_height = 1;
+    int stride_width = 1;
+    int dilation_depth = 1;
+    int dilation_height = 1;
+    int dilation_width = 1;
+    int pad_front = 0;
+    int pad_back = 0;
+    int pad_top = 0;
+    int pad_bottom = 0;
+    int pad_left = 0;
+    int pad_right = 0;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+    shape->add_shape(3);
+    shape->add_shape(32);
+    shape->add_shape(100);
+    shape->add_shape(100);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input");
+    convLayer->add_output("probs");
+    auto *params = convLayer->mutable_convolution3d();
+    params->set_inputchannels(input_channels);
+    params->set_outputchannels(output_channels);
+    params->set_kerneldepth(kernel_depth);
+    params->set_kernelheight(kernel_height);
+    params->set_kernelwidth(kernel_width);
+    params->set_ngroups(nGroups);
+    params->set_stridedepth(stride_depth);
+    params->set_strideheight(stride_height);
+    params->set_stridewidth(stride_width);
+    params->set_dilationdepth(dilation_depth);
+    params->set_dilationheight(dilation_height);
+    params->set_dilationwidth(dilation_width);
+    params->set_paddingtype(CoreML::Specification::Convolution3DLayerParams_PaddingType_CUSTOM);
+    params->set_custompaddingfront(pad_front);
+    params->set_custompaddingback(pad_back);
+    params->set_custompaddingtop(pad_top);
+    params->set_custompaddingbottom(pad_bottom);
+    params->set_custompaddingleft(pad_left);
+    params->set_custompaddingright(pad_right);
+
+    params->set_hasbias(true);
+
+    // Fill weights
+    for (int i = 0; i < output_channels * (input_channels / nGroups) * kernel_depth * kernel_height * kernel_width; i++) {
+        params->mutable_weights()->add_floatvalue(1.0);
+    }
+
+    // Fill bias
+    for (int i = 0; i < output_channels; i++) {
+        params->mutable_bias()->add_floatvalue(1.0);
+    }
+
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    return 0;
+
+}
+
+int testInvalidConvolution3DNoOutputChannels() {
+
+    Specification::Model m1;
+
+    int input_channels = 3;
+    int output_channels = 0;
+    int kernel_depth = 3;
+    int kernel_height = 3;
+    int kernel_width = 3;
+    int nGroups = 1;
+    int stride_depth = 1;
+    int stride_height = 1;
+    int stride_width = 1;
+    int dilation_depth = 1;
+    int dilation_height = 1;
+    int dilation_width = 1;
+    int pad_front = 0;
+    int pad_back = 0;
+    int pad_top = 0;
+    int pad_bottom = 0;
+    int pad_left = 0;
+    int pad_right = 0;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+    shape->add_shape(3);
+    shape->add_shape(32);
+    shape->add_shape(100);
+    shape->add_shape(100);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input");
+    convLayer->add_output("probs");
+    auto *params = convLayer->mutable_convolution3d();
+    params->set_inputchannels(input_channels);
+    params->set_outputchannels(output_channels);
+    params->set_kerneldepth(kernel_depth);
+    params->set_kernelheight(kernel_height);
+    params->set_kernelwidth(kernel_width);
+    params->set_ngroups(nGroups);
+    params->set_stridedepth(stride_depth);
+    params->set_strideheight(stride_height);
+    params->set_stridewidth(stride_width);
+    params->set_dilationdepth(dilation_depth);
+    params->set_dilationheight(dilation_height);
+    params->set_dilationwidth(dilation_width);
+    params->set_paddingtype(CoreML::Specification::Convolution3DLayerParams_PaddingType_CUSTOM);
+    params->set_custompaddingfront(pad_front);
+    params->set_custompaddingback(pad_back);
+    params->set_custompaddingtop(pad_top);
+    params->set_custompaddingbottom(pad_bottom);
+    params->set_custompaddingleft(pad_left);
+    params->set_custompaddingright(pad_right);
+
+    params->set_hasbias(true);
+
+    // Fill weights
+    for (int i = 0; i < output_channels * (input_channels / nGroups) * kernel_depth * kernel_height * kernel_width; i++) {
+        params->mutable_weights()->add_floatvalue(1.0);
+    }
+
+    // Fill bias
+    for (int i = 0; i < output_channels; i++) {
+        params->mutable_bias()->add_floatvalue(1.0);
+    }
+
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    return 0;
+
+}
+
+int testInvalidConvolution3DNoWeights() {
+
+    Specification::Model m1;
+
+    int input_channels = 3;
+    int output_channels = 3;
+    int kernel_depth = 3;
+    int kernel_height = 3;
+    int kernel_width = 3;
+    int nGroups = 1;
+    int stride_depth = 1;
+    int stride_height = 1;
+    int stride_width = 1;
+    int dilation_depth = 1;
+    int dilation_height = 1;
+    int dilation_width = 1;
+    int pad_front = 0;
+    int pad_back = 0;
+    int pad_top = 0;
+    int pad_bottom = 0;
+    int pad_left = 0;
+    int pad_right = 0;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+    shape->add_shape(3);
+    shape->add_shape(32);
+    shape->add_shape(100);
+    shape->add_shape(100);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input");
+    convLayer->add_output("probs");
+    auto *params = convLayer->mutable_convolution3d();
+    params->set_inputchannels(input_channels);
+    params->set_outputchannels(output_channels);
+    params->set_kerneldepth(kernel_depth);
+    params->set_kernelheight(kernel_height);
+    params->set_kernelwidth(kernel_width);
+    params->set_ngroups(nGroups);
+    params->set_stridedepth(stride_depth);
+    params->set_strideheight(stride_height);
+    params->set_stridewidth(stride_width);
+    params->set_dilationdepth(dilation_depth);
+    params->set_dilationheight(dilation_height);
+    params->set_dilationwidth(dilation_width);
+    params->set_paddingtype(CoreML::Specification::Convolution3DLayerParams_PaddingType_CUSTOM);
+    params->set_custompaddingfront(pad_front);
+    params->set_custompaddingback(pad_back);
+    params->set_custompaddingtop(pad_top);
+    params->set_custompaddingbottom(pad_bottom);
+    params->set_custompaddingleft(pad_left);
+    params->set_custompaddingright(pad_right);
+
+    params->set_hasbias(false);
+
+    // Not specifying any weights should be invalid
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    return 0;
+
+}
+
+int testInvalidConvolution3DNonPositiveDilation() {
+    Specification::Model m1;
+
+    int input_channels = 3;
+    int output_channels = 3;
+    int kernel_depth = 3;
+    int kernel_height = 3;
+    int kernel_width = 3;
+    int nGroups = 1;
+    int stride_depth = 1;
+    int stride_height = 1;
+    int stride_width = 1;
+    int dilation_depth = -1;
+    int dilation_height = 1;
+    int dilation_width = 1;
+    int pad_front = 0;
+    int pad_back = 0;
+    int pad_top = 0;
+    int pad_bottom = 0;
+    int pad_left = 0;
+    int pad_right = 0;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+    shape->add_shape(3);
+    shape->add_shape(32);
+    shape->add_shape(100);
+    shape->add_shape(100);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input");
+    convLayer->add_output("probs");
+    auto *params = convLayer->mutable_convolution3d();
+    params->set_inputchannels(input_channels);
+    params->set_outputchannels(output_channels);
+    params->set_kerneldepth(kernel_depth);
+    params->set_kernelheight(kernel_height);
+    params->set_kernelwidth(kernel_width);
+    params->set_ngroups(nGroups);
+    params->set_stridedepth(stride_depth);
+    params->set_strideheight(stride_height);
+    params->set_stridewidth(stride_width);
+    params->set_dilationdepth(dilation_depth);
+    params->set_dilationheight(dilation_height);
+    params->set_dilationwidth(dilation_width);
+    params->set_paddingtype(CoreML::Specification::Convolution3DLayerParams_PaddingType_CUSTOM);
+    params->set_custompaddingfront(pad_front);
+    params->set_custompaddingback(pad_back);
+    params->set_custompaddingtop(pad_top);
+    params->set_custompaddingbottom(pad_bottom);
+    params->set_custompaddingleft(pad_left);
+    params->set_custompaddingright(pad_right);
+
+    params->set_hasbias(true);
+
+    // Fill weights
+    for (int i = 0; i < output_channels * (input_channels / nGroups) * kernel_depth * kernel_height * kernel_width; i++) {
+        params->mutable_weights()->add_floatvalue(1.0);
+    }
+
+    // Fill bias
+    for (int i = 0; i < output_channels; i++) {
+        params->mutable_bias()->add_floatvalue(1.0);
+    }
+
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    return 0;
+
+}
+
+int testInvalidConvolution3DNonPositiveGroups() {
+    Specification::Model m1;
+
+    int input_channels = 3;
+    int output_channels = 3;
+    int kernel_depth = 3;
+    int kernel_height = 3;
+    int kernel_width = 3;
+    int nGroups = 0;
+    int stride_depth = 1;
+    int stride_height = 1;
+    int stride_width = 1;
+    int dilation_depth = -1;
+    int dilation_height = 1;
+    int dilation_width = 1;
+    int pad_front = 0;
+    int pad_back = 0;
+    int pad_top = 0;
+    int pad_bottom = 0;
+    int pad_left = 0;
+    int pad_right = 0;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+    shape->add_shape(3);
+    shape->add_shape(32);
+    shape->add_shape(100);
+    shape->add_shape(100);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input");
+    convLayer->add_output("probs");
+    auto *params = convLayer->mutable_convolution3d();
+    params->set_inputchannels(input_channels);
+    params->set_outputchannels(output_channels);
+    params->set_kerneldepth(kernel_depth);
+    params->set_kernelheight(kernel_height);
+    params->set_kernelwidth(kernel_width);
+    params->set_ngroups(nGroups);
+    params->set_stridedepth(stride_depth);
+    params->set_strideheight(stride_height);
+    params->set_stridewidth(stride_width);
+    params->set_dilationdepth(dilation_depth);
+    params->set_dilationheight(dilation_height);
+    params->set_dilationwidth(dilation_width);
+    params->set_paddingtype(CoreML::Specification::Convolution3DLayerParams_PaddingType_CUSTOM);
+    params->set_custompaddingfront(pad_front);
+    params->set_custompaddingback(pad_back);
+    params->set_custompaddingtop(pad_top);
+    params->set_custompaddingbottom(pad_bottom);
+    params->set_custompaddingleft(pad_left);
+    params->set_custompaddingright(pad_right);
+
+    params->set_hasbias(true);
+
+    // Fill weights
+    // Since nGroups is 0, pretend it's 1 to fill weights
+    for (int i = 0; i < output_channels * (input_channels / 1) * kernel_depth * kernel_height * kernel_width; i++) {
+        params->mutable_weights()->add_floatvalue(1.0);
+    }
+
+    // Fill bias
+    for (int i = 0; i < output_channels; i++) {
+        params->mutable_bias()->add_floatvalue(1.0);
+    }
+
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    return 0;
+
+}
+
+int testInvalidConvolution3DNonPositiveKernelSize() {
+    Specification::Model m1;
+
+    int input_channels = 3;
+    int output_channels = 3;
+    int kernel_depth = 3;
+    int kernel_height = 0;
+    int kernel_width = 3;
+    int nGroups = 1;
+    int stride_depth = 1;
+    int stride_height = 1;
+    int stride_width = 1;
+    int dilation_depth = 1;
+    int dilation_height = 1;
+    int dilation_width = 1;
+    int pad_front = 0;
+    int pad_back = 0;
+    int pad_top = 0;
+    int pad_bottom = 0;
+    int pad_left = 0;
+    int pad_right = 0;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+    shape->add_shape(3);
+    shape->add_shape(32);
+    shape->add_shape(100);
+    shape->add_shape(100);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input");
+    convLayer->add_output("probs");
+    auto *params = convLayer->mutable_convolution3d();
+    params->set_inputchannels(input_channels);
+    params->set_outputchannels(output_channels);
+    params->set_kerneldepth(kernel_depth);
+    params->set_kernelheight(kernel_height);
+    params->set_kernelwidth(kernel_width);
+    params->set_ngroups(nGroups);
+    params->set_stridedepth(stride_depth);
+    params->set_strideheight(stride_height);
+    params->set_stridewidth(stride_width);
+    params->set_dilationdepth(dilation_depth);
+    params->set_dilationheight(dilation_height);
+    params->set_dilationwidth(dilation_width);
+    params->set_paddingtype(CoreML::Specification::Convolution3DLayerParams_PaddingType_CUSTOM);
+    params->set_custompaddingfront(pad_front);
+    params->set_custompaddingback(pad_back);
+    params->set_custompaddingtop(pad_top);
+    params->set_custompaddingbottom(pad_bottom);
+    params->set_custompaddingleft(pad_left);
+    params->set_custompaddingright(pad_right);
+
+    params->set_hasbias(true);
+
+    // Fill weights
+    for (int i = 0; i < output_channels * (input_channels / nGroups) * kernel_depth * kernel_height * kernel_width; i++) {
+        params->mutable_weights()->add_floatvalue(1.0);
+    }
+
+    // Fill bias
+    for (int i = 0; i < output_channels; i++) {
+        params->mutable_bias()->add_floatvalue(1.0);
+    }
+
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    return 0;
+
+}
+
+int testInvalidConvolution3DNonPositiveStride() {
+    Specification::Model m1;
+
+    int input_channels = 3;
+    int output_channels = 3;
+    int kernel_depth = 3;
+    int kernel_height = 3;
+    int kernel_width = 3;
+    int nGroups = 1;
+    int stride_depth = 1;
+    int stride_height = 1;
+    int stride_width = -2;
+    int dilation_depth = 1;
+    int dilation_height = 1;
+    int dilation_width = 1;
+    int pad_front = 0;
+    int pad_back = 0;
+    int pad_top = 0;
+    int pad_bottom = 0;
+    int pad_left = 0;
+    int pad_right = 0;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+    shape->add_shape(3);
+    shape->add_shape(32);
+    shape->add_shape(100);
+    shape->add_shape(100);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input");
+    convLayer->add_output("probs");
+    auto *params = convLayer->mutable_convolution3d();
+    params->set_inputchannels(input_channels);
+    params->set_outputchannels(output_channels);
+    params->set_kerneldepth(kernel_depth);
+    params->set_kernelheight(kernel_height);
+    params->set_kernelwidth(kernel_width);
+    params->set_ngroups(nGroups);
+    params->set_stridedepth(stride_depth);
+    params->set_strideheight(stride_height);
+    params->set_stridewidth(stride_width);
+    params->set_dilationdepth(dilation_depth);
+    params->set_dilationheight(dilation_height);
+    params->set_dilationwidth(dilation_width);
+    params->set_paddingtype(CoreML::Specification::Convolution3DLayerParams_PaddingType_CUSTOM);
+    params->set_custompaddingfront(pad_front);
+    params->set_custompaddingback(pad_back);
+    params->set_custompaddingtop(pad_top);
+    params->set_custompaddingbottom(pad_bottom);
+    params->set_custompaddingleft(pad_left);
+    params->set_custompaddingright(pad_right);
+
+    params->set_hasbias(true);
+
+    // Fill weights
+    for (int i = 0; i < output_channels * (input_channels / nGroups) * kernel_depth * kernel_height * kernel_width; i++) {
+        params->mutable_weights()->add_floatvalue(1.0);
+    }
+
+    // Fill bias
+    for (int i = 0; i < output_channels; i++) {
+        params->mutable_bias()->add_floatvalue(1.0);
+    }
+
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    return 0;
+
+}
+
+int testInvalidConvolution3DTwoInputs() {
+    Specification::Model m1;
+
+    int input_channels = 3;
+    int output_channels = 3;
+    int kernel_depth = 3;
+    int kernel_height = 3;
+    int kernel_width = 3;
+    int nGroups = 1;
+    int stride_depth = 1;
+    int stride_height = 1;
+    int stride_width = 1;
+    int dilation_depth = 1;
+    int dilation_height = 1;
+    int dilation_width = 1;
+    int pad_front = 0;
+    int pad_back = 0;
+    int pad_top = 0;
+    int pad_bottom = -3;
+    int pad_left = 0;
+    int pad_right = -2;
+
+    auto *topIn0 = m1.mutable_description()->add_input();
+    topIn0->set_name("input0");
+    auto *shape0 = topIn0->mutable_type()->mutable_multiarraytype();
+    shape0->add_shape(1);
+    shape0->add_shape(3);
+    shape0->add_shape(32);
+    shape0->add_shape(100);
+    shape0->add_shape(100);
+
+    auto *topIn1 = m1.mutable_description()->add_input();
+    topIn1->set_name("input1");
+    auto *shape1 = topIn1->mutable_type()->mutable_multiarraytype();
+    shape1->add_shape(1);
+    shape1->add_shape(3);
+    shape1->add_shape(3);
+    shape1->add_shape(3);
+    shape1->add_shape(3);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input0");
+    convLayer->add_input("input1");
+    convLayer->add_output("probs");
+    auto *params = convLayer->mutable_convolution3d();
+    params->set_inputchannels(input_channels);
+    params->set_outputchannels(output_channels);
+    params->set_kerneldepth(kernel_depth);
+    params->set_kernelheight(kernel_height);
+    params->set_kernelwidth(kernel_width);
+    params->set_ngroups(nGroups);
+    params->set_stridedepth(stride_depth);
+    params->set_strideheight(stride_height);
+    params->set_stridewidth(stride_width);
+    params->set_dilationdepth(dilation_depth);
+    params->set_dilationheight(dilation_height);
+    params->set_dilationwidth(dilation_width);
+    params->set_paddingtype(CoreML::Specification::Convolution3DLayerParams_PaddingType_CUSTOM);
+    params->set_custompaddingfront(pad_front);
+    params->set_custompaddingback(pad_back);
+    params->set_custompaddingtop(pad_top);
+    params->set_custompaddingbottom(pad_bottom);
+    params->set_custompaddingleft(pad_left);
+    params->set_custompaddingright(pad_right);
+
+    params->set_hasbias(true);
+
+    // Fill weights
+    for (int i = 0; i < output_channels * (input_channels / nGroups) * kernel_depth * kernel_height * kernel_width; i++) {
+        params->mutable_weights()->add_floatvalue(1.0);
+    }
+
+    // Fill bias
+    for (int i = 0; i < output_channels; i++) {
+        params->mutable_bias()->add_floatvalue(1.0);
+    }
+
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    return 0;
+
+}
+
+int testValidConvolution3D() {
+
+    Specification::Model m1;
+
+    int input_channels = 3;
+    int output_channels = 3;
+    int kernel_depth = 3;
+    int kernel_height = 3;
+    int kernel_width = 3;
+    int nGroups = 1;
+    int stride_depth = 1;
+    int stride_height = 1;
+    int stride_width = 1;
+    int dilation_depth = 1;
+    int dilation_height = 1;
+    int dilation_width = 1;
+    int pad_front = 0;
+    int pad_back = 0;
+    int pad_top = 0;
+    int pad_bottom = 0;
+    int pad_left = 0;
+    int pad_right = 0;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+    shape->add_shape(3);
+    shape->add_shape(32);
+    shape->add_shape(100);
+    shape->add_shape(100);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input");
+    convLayer->add_output("probs");
+    auto *params = convLayer->mutable_convolution3d();
+    params->set_inputchannels(input_channels);
+    params->set_outputchannels(output_channels);
+    params->set_kerneldepth(kernel_depth);
+    params->set_kernelheight(kernel_height);
+    params->set_kernelwidth(kernel_width);
+    params->set_ngroups(nGroups);
+    params->set_stridedepth(stride_depth);
+    params->set_strideheight(stride_height);
+    params->set_stridewidth(stride_width);
+    params->set_dilationdepth(dilation_depth);
+    params->set_dilationheight(dilation_height);
+    params->set_dilationwidth(dilation_width);
+    params->set_paddingtype(CoreML::Specification::Convolution3DLayerParams_PaddingType_SAME);
+    params->set_custompaddingfront(pad_front);
+    params->set_custompaddingback(pad_back);
+    params->set_custompaddingtop(pad_top);
+    params->set_custompaddingbottom(pad_bottom);
+    params->set_custompaddingleft(pad_left);
+    params->set_custompaddingright(pad_right);
+    params->set_hasbias(true);
+
+    // Fill weights
+    for (int i = 0; i < (output_channels / nGroups) * input_channels * kernel_depth * kernel_height * kernel_width; i++) {
+        params->mutable_weights()->add_floatvalue(1.0);
+    }
+
+    // Fill bias
+    for (int i = 0; i < output_channels; i++) {
+        params->mutable_bias()->add_floatvalue(1.0);
+    }
+
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_GOOD(res);
+    return 0;
+
+}
+
+int testInvalidConvolution3DWithOutputShape() {
+    Specification::Model m1;
+
+    int input_channels = 3;
+    int output_channels = 3;
+    int kernel_depth = 3;
+    int kernel_height = 3;
+    int kernel_width = 3;
+    int nGroups = 1;
+    int stride_depth = 1;
+    int stride_height = 1;
+    int stride_width = 1;
+    int dilation_depth = 1;
+    int dilation_height = 1;
+    int dilation_width = 1;
+    int pad_front = 0;
+    int pad_back = 0;
+    int pad_top = 0;
+    int pad_bottom = 0;
+    int pad_left = 0;
+    int pad_right = 0;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+    shape->add_shape(3);
+    shape->add_shape(32);
+    shape->add_shape(100);
+    shape->add_shape(100);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input");
+    convLayer->add_output("probs");
+    auto *params = convLayer->mutable_convolution3d();
+    params->set_inputchannels(input_channels);
+    params->set_outputchannels(output_channels);
+    params->set_kerneldepth(kernel_depth);
+    params->set_kernelheight(kernel_height);
+    params->set_kernelwidth(kernel_width);
+    params->set_ngroups(nGroups);
+    params->set_stridedepth(stride_depth);
+    params->set_strideheight(stride_height);
+    params->set_stridewidth(stride_width);
+    params->set_dilationdepth(dilation_depth);
+    params->set_dilationheight(dilation_height);
+    params->set_dilationwidth(dilation_width);
+    params->set_paddingtype(CoreML::Specification::Convolution3DLayerParams_PaddingType_CUSTOM);
+    params->set_custompaddingfront(pad_front);
+    params->set_custompaddingback(pad_back);
+    params->set_custompaddingtop(pad_top);
+    params->set_custompaddingbottom(pad_bottom);
+    params->set_custompaddingleft(pad_left);
+    params->set_custompaddingright(pad_right);
+    params->add_outputshape(4);
+    params->set_hasbias(true);
+
+    // Fill weights
+    for (int i = 0; i < output_channels * (input_channels / nGroups) * kernel_depth * kernel_height * kernel_width; i++) {
+        params->mutable_weights()->add_floatvalue(1.0);
+    }
+
+    // Fill bias
+    for (int i = 0; i < output_channels; i++) {
+        params->mutable_bias()->add_floatvalue(1.0);
+    }
+
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    ML_ASSERT(res.message().find("Output Shape is supported for Deconvolution layer") != std::string::npos);
+    return 0;
+}
+
+int testInvalidDeConvolution3DOutputShape() {
+    Specification::Model m1;
+
+    int input_channels = 3;
+    int output_channels = 3;
+    int kernel_depth = 3;
+    int kernel_height = 3;
+    int kernel_width = 3;
+    int nGroups = 1;
+    int stride_depth = 1;
+    int stride_height = 1;
+    int stride_width = 1;
+    int dilation_depth = 1;
+    int dilation_height = 1;
+    int dilation_width = 1;
+    int pad_front = 0;
+    int pad_back = 0;
+    int pad_top = 0;
+    int pad_bottom = 0;
+    int pad_left = 0;
+    int pad_right = 0;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+    shape->add_shape(3);
+    shape->add_shape(32);
+    shape->add_shape(100);
+    shape->add_shape(100);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
+    convLayer->add_input("input");
+    convLayer->add_output("probs");
+    auto *params = convLayer->mutable_convolution3d();
+    params->set_inputchannels(input_channels);
+    params->set_outputchannels(output_channels);
+    params->set_kerneldepth(kernel_depth);
+    params->set_kernelheight(kernel_height);
+    params->set_kernelwidth(kernel_width);
+    params->set_ngroups(nGroups);
+    params->set_stridedepth(stride_depth);
+    params->set_strideheight(stride_height);
+    params->set_stridewidth(stride_width);
+    params->set_dilationdepth(dilation_depth);
+    params->set_dilationheight(dilation_height);
+    params->set_dilationwidth(dilation_width);
+    params->set_paddingtype(CoreML::Specification::Convolution3DLayerParams_PaddingType_CUSTOM);
+    params->set_custompaddingfront(pad_front);
+    params->set_custompaddingback(pad_back);
+    params->set_custompaddingtop(pad_top);
+    params->set_custompaddingbottom(pad_bottom);
+    params->set_custompaddingleft(pad_left);
+    params->set_custompaddingright(pad_right);
+    params->set_isdeconvolution(true);
+    params->add_outputshape(4);
+    params->set_hasbias(true);
+
+    // Fill weights
+    for (int i = 0; i < output_channels * (input_channels / nGroups) * kernel_depth * kernel_height * kernel_width; i++) {
+        params->mutable_weights()->add_floatvalue(1.0);
+    }
+
+    // Fill bias
+    for (int i = 0; i < output_channels; i++) {
+        params->mutable_bias()->add_floatvalue(1.0);
+    }
+
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    ML_ASSERT(res.message().find("if set, output shape must be of length 3") != std::string::npos);
+    return 0;
+}
+
+int testValidDeConvolution3D() {
     Specification::Model m1;
 
-    int output_channels = 5;
-    int kernel_channels = 3;
-    int kernel_height = 2;
-    int kernel_width = 5;
+    int input_channels = 3;
+    int output_channels = 3;
+    int kernel_depth = 3;
+    int kernel_height = 3;
+    int kernel_width = 3;
     int nGroups = 1;
+    int stride_depth = 1;
+    int stride_height = 1;
+    int stride_width = 1;
+    int dilation_depth = 1;
+    int dilation_height = 1;
+    int dilation_width = 1;
+    int pad_front = 0;
+    int pad_back = 0;
+    int pad_top = 0;
+    int pad_bottom = 0;
+    int pad_left = 0;
+    int pad_right = 0;
 
     auto *topIn = m1.mutable_description()->add_input();
     topIn->set_name("input");
     auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
     shape->add_shape(3);
+    shape->add_shape(32);
     shape->add_shape(100);
     shape->add_shape(100);
 
@@ -957,38 +2668,50 @@ int testValidDeconvolution() {
     out3->mutable_type()->mutable_multiarraytype();
 
     const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
 
     Specification::NeuralNetworkLayer *convLayer = nn->add_layers();
     convLayer->add_input("input");
     convLayer->add_output("probs");
-    auto *params = convLayer->mutable_convolution();
-    params->set_outputchannels(5);
-    params->set_kernelchannels(3);
-    params->add_kernelsize(kernel_height);
-    params->add_kernelsize(kernel_width);
-
-    params->set_hasbias(true);
-
+    auto *params = convLayer->mutable_convolution3d();
+    params->set_inputchannels(input_channels);
+    params->set_outputchannels(output_channels);
+    params->set_kerneldepth(kernel_depth);
+    params->set_kernelheight(kernel_height);
+    params->set_kernelwidth(kernel_width);
+    params->set_ngroups(nGroups);
+    params->set_stridedepth(stride_depth);
+    params->set_strideheight(stride_height);
+    params->set_stridewidth(stride_width);
+    params->set_dilationdepth(dilation_depth);
+    params->set_dilationheight(dilation_height);
+    params->set_dilationwidth(dilation_width);
+    params->set_paddingtype(CoreML::Specification::Convolution3DLayerParams_PaddingType_CUSTOM);
+    params->set_custompaddingfront(pad_front);
+    params->set_custompaddingback(pad_back);
+    params->set_custompaddingtop(pad_top);
+    params->set_custompaddingbottom(pad_bottom);
+    params->set_custompaddingleft(pad_left);
+    params->set_custompaddingright(pad_right);
     params->set_isdeconvolution(true);
-    params->add_outputshape(110);
-    params->add_outputshape(110);
-
-    (void)params->mutable_valid();
+    params->set_hasbias(true);
 
-    for (int i = 0; i < output_channels * (kernel_channels / nGroups) * kernel_height * kernel_width; i++) {
+    // Fill weights
+    for (int i = 0; i < output_channels * (input_channels / nGroups) * kernel_depth * kernel_height * kernel_width; i++) {
         params->mutable_weights()->add_floatvalue(1.0);
     }
 
+    // Fill bias
     for (int i = 0; i < output_channels; i++) {
         params->mutable_bias()->add_floatvalue(1.0);
     }
 
-    // Not specifying the right number of weights should be invalid
     Result res = validate<MLModelType_neuralNetwork>(m1);
     ML_ASSERT_GOOD(res);
     return 0;
 }
 
+
 int testInvalidEmbedding() {
 
     Specification::Model m1;
@@ -1259,6 +2982,7 @@ int testValidPadding() {
     return 0;
 }
 
+
 int testInvalidUpsample() {
 
     Specification::Model m1;
@@ -1267,12 +2991,15 @@ int testInvalidUpsample() {
     topIn->set_name("input");
     auto *shape = topIn->mutable_type()->mutable_multiarraytype();
     shape->add_shape(5);
+    shape->add_shape(5);
+    shape->add_shape(5);
 
     auto *out3 = m1.mutable_description()->add_output();
     out3->set_name("probs");
     out3->mutable_type()->mutable_multiarraytype();
 
     const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
 
     Specification::NeuralNetworkLayer *upsampleLayer = nn->add_layers();
     upsampleLayer->add_input("input");
@@ -1287,6 +3014,43 @@ int testInvalidUpsample() {
     return 0;
 }
 
+int testInvalidUpsampleNearestNeighborsModeWithAlignCorners() {
+
+    Specification::Model m1;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(5);
+    shape->add_shape(5);
+    shape->add_shape(5);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *upsampleLayer = nn->add_layers();
+    upsampleLayer->add_input("input");
+    upsampleLayer->add_output("probs");
+    auto *params = upsampleLayer->mutable_upsample();
+
+    params->set_mode(Specification::UpsampleLayerParams_InterpolationMode::UpsampleLayerParams_InterpolationMode_NN);
+    params->set_linearupsamplemode(Specification::UpsampleLayerParams_LinearUpsampleMode_ALIGN_CORNERS_FALSE);
+    
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+
+    params->set_mode(Specification::UpsampleLayerParams_InterpolationMode::UpsampleLayerParams_InterpolationMode_NN);
+    params->set_linearupsamplemode(Specification::UpsampleLayerParams_LinearUpsampleMode_ALIGN_CORNERS_TRUE);
+    
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    return 0;
+}
+
 int testValidUpsample() {
 
     Specification::Model m1;
@@ -1316,6 +3080,159 @@ int testValidUpsample() {
     return 0;
 }
 
+int testFractionalUpsample() {
+
+    Specification::Model m1;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(5);
+    shape->add_shape(5);
+    shape->add_shape(5);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *upsampleLayer = nn->add_layers();
+    upsampleLayer->add_input("input");
+    upsampleLayer->add_output("probs");
+    auto *params = upsampleLayer->mutable_upsample();
+
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+
+    // No scaling factor still valid (1x scaling)
+    ML_ASSERT_GOOD(res);
+
+    // Fractional scaling factor valid
+    params->add_fractionalscalingfactor(2.5);
+    params->add_fractionalscalingfactor(3.5);
+    
+    // Requires "align corners" bilinear mode
+    params->set_mode(Specification::UpsampleLayerParams_InterpolationMode_NN);
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+
+    params->set_mode(Specification::UpsampleLayerParams_InterpolationMode_BILINEAR);
+    params->set_linearupsamplemode(Specification::UpsampleLayerParams_LinearUpsampleMode_DEFAULT);
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+
+    params->set_linearupsamplemode(Specification::UpsampleLayerParams_LinearUpsampleMode_ALIGN_CORNERS_TRUE);
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_GOOD(res);
+
+    params->set_linearupsamplemode(Specification::UpsampleLayerParams_LinearUpsampleMode_ALIGN_CORNERS_FALSE);
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_GOOD(res);
+
+    // Invalid to provide both
+    params->add_scalingfactor(1);
+    params->add_scalingfactor(1);
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_BAD(res);
+    return 0;
+}
+
+int testValidUpsampleAlignCorners() {
+
+    Specification::Model m1;
+
+    auto *topIn = m1.mutable_description()->add_input();
+    topIn->set_name("input");
+    auto *shape = topIn->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(5);
+    shape->add_shape(5);
+    shape->add_shape(5);
+
+    auto *out3 = m1.mutable_description()->add_output();
+    out3->set_name("probs");
+    out3->mutable_type()->mutable_multiarraytype();
+
+    const auto nn = m1.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer *upsampleLayer = nn->add_layers();
+    upsampleLayer->add_input("input");
+    upsampleLayer->add_output("probs");
+    auto *params = upsampleLayer->mutable_upsample();
+
+    // Scaling factor needs to be 2D
+    params->add_scalingfactor(1.0);
+    params->add_scalingfactor(1.0);
+
+    params->set_mode(Specification::UpsampleLayerParams_InterpolationMode_BILINEAR);
+    params->set_linearupsamplemode(Specification::UpsampleLayerParams_LinearUpsampleMode_ALIGN_CORNERS_FALSE);
+    
+    Result res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_GOOD(res);
+
+    params->set_linearupsamplemode(Specification::UpsampleLayerParams_LinearUpsampleMode_ALIGN_CORNERS_TRUE);
+    
+    res = validate<MLModelType_neuralNetwork>(m1);
+    ML_ASSERT_GOOD(res);
+
+    // Check that the new field sets spec version to ios 14
+    Model mlmodel = Model(m1);
+    ML_ASSERT(mlmodel.getProto().specificationversion() == MLMODEL_SPECIFICATION_VERSION_IOS14);
+    return 0;
+}
+
+int testUpsampleArgsortSpec() {
+    /* Ensure that the model is treated as the iOS 14 specification
+       when new layers are included and upsample only includes
+       legacy params.
+    */
+    Specification::Model m;
+
+    auto *in = m.mutable_description()->add_input();
+    in->set_name("input");
+    auto *inShape = in->mutable_type()->mutable_multiarraytype();
+    inShape->add_shape(1);
+    inShape->add_shape(3);
+    inShape->add_shape(3);
+
+    auto *out = m.mutable_description()->add_output();
+    out->set_name("output");
+    auto *outShape = out->mutable_type()->mutable_multiarraytype();
+    outShape->add_shape(1);
+    outShape->add_shape(3);
+    outShape->add_shape(3);
+
+    const auto nn = m.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    auto *upsampleLayer = nn->add_layers();
+    upsampleLayer->set_name("upsample");
+    upsampleLayer->add_input("input");
+    upsampleLayer->add_output("A");
+    auto *upsampleParams = upsampleLayer->mutable_upsample();
+
+    // Scaling factor needs to be 2D
+    upsampleParams->add_scalingfactor(1.0);
+    upsampleParams->add_scalingfactor(1.0);
+
+    upsampleParams->set_mode(Specification::UpsampleLayerParams_InterpolationMode_BILINEAR);
+    
+    auto *argsortLayer = nn->add_layers();
+    argsortLayer->set_name("argsort");
+    argsortLayer->add_input("A");
+    argsortLayer->add_output("output");
+    auto *argsortParams = argsortLayer->mutable_argsort();
+    argsortParams->set_axis(1);
+
+    Result res = validate<MLModelType_neuralNetwork>(m);
+    ML_ASSERT_GOOD(res);
+    Model mlmodel = Model(m);
+    ML_ASSERT(mlmodel.getProto().specificationversion() == MLMODEL_SPECIFICATION_VERSION_IOS14);
+
+    return 0;
+}
+
 int testValidSoftmax() {
 
     Specification::Model m1;
@@ -4014,5 +5931,157 @@ int testInvalidLayerNormalizationWrongGammaOrBeta() {
     return 0;
 }
 
+int testInvalidArgsortWrongAxis() {
+
+    // axis can't be negative or larger than equal to input rank
+
+    Specification::Model m;
+
+    auto *in = m.mutable_description()->add_input();
+    in->set_name("input");
+    auto *inShape = in->mutable_type()->mutable_multiarraytype();
+    inShape->add_shape(3);
+    inShape->add_shape(5);
+    inShape->add_shape(2);
+
+    auto *out = m.mutable_description()->add_output();
+    out->set_name("output");
+    auto *outShape = out->mutable_type()->mutable_multiarraytype();
+    outShape->add_shape(3);
+    outShape->add_shape(5);
+    outShape->add_shape(2);
+
+    const auto nn = m.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    auto *layers = nn->add_layers();
+    layers->set_name("argsort");
+    layers->add_input("input");
+    layers->add_output("output");
+    layers->add_inputtensor()->set_rank(3);
+
+    auto *params = layers->mutable_argsort();
+
+    // CASE 1: negative axis
+    params->set_axis(-1);
+
+    // axis should be in range [0, rank)
+    Result res = validate<MLModelType_neuralNetwork>(m);
+    ML_ASSERT_BAD(res);
+    ML_ASSERT(res.message().find("axis") != std::string::npos);
+
+    // CASE 2: axis greater than equal to input rank
+    params->set_axis(3);
+    res = validate<MLModelType_neuralNetwork>(m);
+    ML_ASSERT_BAD(res);
+    ML_ASSERT(res.message().find("axis") != std::string::npos);
+
+    return 0;
+}
+
+int testValidReorganizeData() {
+    Specification::Model m;
+    Specification::FeatureDescription* input = m.mutable_description()->add_input();
+    input->set_name("input");
+    Specification::ArrayFeatureType* shape = input->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+    shape->add_shape(18);
+    shape->add_shape(5);
+    shape->add_shape(7);
+
+    Specification::FeatureDescription* output = m.mutable_description()->add_output();
+    output->set_name("output");
+    output->mutable_type()->mutable_multiarraytype();
+    Specification::ArrayFeatureType* outShape = output->mutable_type()->mutable_multiarraytype();
+    outShape->add_shape(1);
+    outShape->add_shape(2);
+    outShape->add_shape(15);
+    outShape->add_shape(21);
+
+    Specification::NeuralNetwork* nn = m.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer* layer = nn->add_layers();
+    layer->add_input("input");
+    layer->add_output("output");
+    layer->mutable_reorganizedata()->set_mode(Specification::ReorganizeDataLayerParams::DEPTH_TO_SPACE);
+    layer->mutable_reorganizedata()->set_blocksize(3);
+
+    Result res = validate<MLModelType_neuralNetwork>(m);
+    ML_ASSERT_GOOD(res);
+
+    return 0;
+}
+
+int testInvalidReorganizeDataInputRank() {
+    // Tests that the validator rejects layers with an input rank that doesn't
+    // match the output rank.
+    Specification::Model m;
+    Specification::FeatureDescription* input = m.mutable_description()->add_input();
+    input->set_name("input");
+    Specification::ArrayFeatureType* shape = input->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(18);
+    shape->add_shape(5);
+    shape->add_shape(7);
+
+    Specification::FeatureDescription* output = m.mutable_description()->add_output();
+    output->set_name("output");
+    output->mutable_type()->mutable_multiarraytype();
+    Specification::ArrayFeatureType* outShape = output->mutable_type()->mutable_multiarraytype();
+    outShape->add_shape(1);
+    outShape->add_shape(2);
+    outShape->add_shape(15);
+    outShape->add_shape(21);
+
+    Specification::NeuralNetwork* nn = m.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer* layer = nn->add_layers();
+    layer->add_input("input");
+    layer->add_output("output");
+    layer->mutable_reorganizedata()->set_mode(Specification::ReorganizeDataLayerParams::DEPTH_TO_SPACE);
+    layer->mutable_reorganizedata()->set_blocksize(3);
+
+    Result res = validate<MLModelType_neuralNetwork>(m);
+    ML_ASSERT_BAD(res);
+
+    return 0;
+}
+
+int testInvalidReorganizeDataBlockSize() {
+    // Tests that the validator rejects layers with an impossible block size.
+    // (Block size for a reorganizeData layer must be greater than 1).
+    Specification::Model m;
+    Specification::FeatureDescription* input = m.mutable_description()->add_input();
+    input->set_name("input");
+    Specification::ArrayFeatureType* shape = input->mutable_type()->mutable_multiarraytype();
+    shape->add_shape(1);
+    shape->add_shape(18);
+    shape->add_shape(5);
+    shape->add_shape(7);
+
+    Specification::FeatureDescription* output = m.mutable_description()->add_output();
+    output->set_name("output");
+    output->mutable_type()->mutable_multiarraytype();
+    Specification::ArrayFeatureType* outShape = output->mutable_type()->mutable_multiarraytype();
+    outShape->add_shape(1);
+    outShape->add_shape(2);
+    outShape->add_shape(15);
+    outShape->add_shape(21);
+
+    Specification::NeuralNetwork* nn = m.mutable_neuralnetwork();
+    nn->set_arrayinputshapemapping(Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+
+    Specification::NeuralNetworkLayer* layer = nn->add_layers();
+    layer->add_input("input");
+    layer->add_output("output");
+    layer->mutable_reorganizedata()->set_mode(Specification::ReorganizeDataLayerParams::DEPTH_TO_SPACE);
+    layer->mutable_reorganizedata()->set_blocksize(1);
+
+    Result res = validate<MLModelType_neuralNetwork>(m);
+    ML_ASSERT_BAD(res);
+
+    return 0;
+}
 
 #pragma clang diagnostic pop
diff --git a/mlmodel/tests/OneHotEncoderTests.cpp b/mlmodel/tests/OneHotEncoderTests.cpp
index ad6a475c4..0dd9c584d 100644
--- a/mlmodel/tests/OneHotEncoderTests.cpp
+++ b/mlmodel/tests/OneHotEncoderTests.cpp
@@ -1,9 +1,8 @@
 #include "MLModelTests.hpp"
 
-// TODO -- Fix these headers.
-#include "../src/Model.hpp"
-#include "../src/transforms/OneHotEncoder.hpp"
-#include "../src/transforms/TreeEnsemble.hpp"
+#include "Model.hpp"
+#include "transforms/OneHotEncoder.hpp"
+#include "transforms/TreeEnsemble.hpp"
 
 
 #include "framework/TestUtils.hpp"
diff --git a/mlmodel/tests/ParameterTests.hpp b/mlmodel/tests/ParameterTests.hpp
index a6061b623..cf911e42b 100644
--- a/mlmodel/tests/ParameterTests.hpp
+++ b/mlmodel/tests/ParameterTests.hpp
@@ -6,9 +6,8 @@
 //  Copyright © 2019 Apple Inc. All rights reserved.
 //
 
-#include "../src/Format.hpp"
-#include "../src/Model.hpp"
-#include "../src/NeuralNetwork/NeuralNetworkShapes.hpp"
+#include "Format.hpp"
+#include "Model.hpp"
 
 #include "framework/TestUtils.hpp"
 
diff --git a/mlmodel/tests/SaveLoadTests.cpp b/mlmodel/tests/SaveLoadTests.cpp
index 155de8afc..8754ff739 100644
--- a/mlmodel/tests/SaveLoadTests.cpp
+++ b/mlmodel/tests/SaveLoadTests.cpp
@@ -1,10 +1,9 @@
 #include "MLModelTests.hpp"
 
-// TODO -- Fix these headers.
-#include "../src/Model.hpp"
-#include "../src/transforms/OneHotEncoder.hpp"
-#include "../src/transforms/LinearModel.hpp"
-#include "../src/transforms/TreeEnsemble.hpp"
+#include "Model.hpp"
+#include "transforms/OneHotEncoder.hpp"
+#include "transforms/LinearModel.hpp"
+#include "transforms/TreeEnsemble.hpp"
 
 
 #include "framework/TestUtils.hpp"
diff --git a/mlmodel/tests/TreeEnsembleTests.cpp b/mlmodel/tests/TreeEnsembleTests.cpp
index 9e76e12f2..29dd65b9c 100644
--- a/mlmodel/tests/TreeEnsembleTests.cpp
+++ b/mlmodel/tests/TreeEnsembleTests.cpp
@@ -1,10 +1,9 @@
 #include "MLModelTests.hpp"
 
-// TODO -- Fix these headers.
-#include "../src/Model.hpp"
-#include "../src/transforms/OneHotEncoder.hpp"
-#include "../src/transforms/LinearModel.hpp"
-#include "../src/transforms/TreeEnsemble.hpp"
+#include "Model.hpp"
+#include "transforms/OneHotEncoder.hpp"
+#include "transforms/LinearModel.hpp"
+#include "transforms/TreeEnsemble.hpp"
 
 
 #include "framework/TestUtils.hpp"
diff --git a/mlmodel/tests/UpdatableModelValidatorTests.cpp b/mlmodel/tests/UpdatableModelValidatorTests.cpp
index 7fa03291b..a38c2f2b8 100644
--- a/mlmodel/tests/UpdatableModelValidatorTests.cpp
+++ b/mlmodel/tests/UpdatableModelValidatorTests.cpp
@@ -9,7 +9,6 @@
 #include "MLModelTests.hpp"
 #include "../src/Format.hpp"
 #include "../src/Model.hpp"
-#include "../src/NeuralNetwork/NeuralNetworkShapes.hpp"
 #include "ParameterTests.hpp"
 #include "ModelCreationUtils.hpp"
 #include "../src/Utils.hpp"
@@ -22,82 +21,82 @@
 using namespace CoreML;
 
 int testInvalidUpdatableModelWrongType() {
-    
+
     /*
      checks that isUpdatable property is true only when the model type is
      - NN, KNN
      */
-    
+
     Specification::Model m1;
-    
+
     auto *topIn = m1.mutable_description()->add_input();
     topIn->set_name("input");
     auto *type_in = topIn->mutable_type()->mutable_multiarraytype();
     type_in->set_datatype(Specification::ArrayFeatureType_ArrayDataType_INT32);
     type_in->add_shape(1);
-    
+
     auto *out = m1.mutable_description()->add_output();
     out->set_name("output");
     auto *type_out = out->mutable_type()->mutable_multiarraytype();
     type_out->set_datatype(Specification::ArrayFeatureType_ArrayDataType_INT32);
-    
+
     m1.mutable_identity();
     m1.set_isupdatable(true);
     m1.set_specificationversion(MLMODEL_SPECIFICATION_VERSION_IOS13);
-    
+
     Result res = Model::validate(m1);
     ML_ASSERT_BAD(res);
     return 0;
 }
 
 int testInvalidUpdatableModelWrongLayer() {
-    
+
     /*
      checks that isUpdatable property is true only for an updatable layer (conv or innerproduct)
      */
-    
+
     Specification::Model m1;
-    
+
     int num_inputs = 5;
     int num_outputs = 3;
-    
+
     auto *topIn = m1.mutable_description()->add_input();
     topIn->set_name("input");
     auto *shape = topIn->mutable_type()->mutable_multiarraytype();
     shape->set_datatype(Specification::ArrayFeatureType_ArrayDataType_INT32);
     shape->add_shape(1);
-    
+
     auto *out = m1.mutable_description()->add_output();
     out->set_name("probs");
     auto *type_out = out->mutable_type()->mutable_multiarraytype();
     type_out->set_datatype(Specification::ArrayFeatureType_ArrayDataType_INT32);
 
-    
+
     m1.set_isupdatable(true);
     m1.set_specificationversion(MLMODEL_SPECIFICATION_VERSION_IOS13);
-    
+
     const auto nn = m1.mutable_neuralnetwork();
-    
+
     Specification::NeuralNetworkLayer *embeddingLayer = nn->add_layers();
     embeddingLayer->add_input("input");
     embeddingLayer->add_output("probs");
     embeddingLayer->set_isupdatable(true);
     embeddingLayer->set_name("embed");
-    
+
     auto *params = embeddingLayer->mutable_embedding();
     params->set_inputdim(num_inputs);
     params->set_outputchannels(num_outputs);
-    
+
     params->set_hasbias(true);
-    
+
     for (int i = 0; i < num_inputs * num_outputs; i++) {
         params->mutable_weights()->add_floatvalue(1.0);
     }
-    
+
     for (int i = 0; i < num_outputs; i++) {
         params->mutable_bias()->add_floatvalue(1.0);
     }
-    
+
     Result res = Model::validate(m1);
 
     ML_ASSERT_BAD(res);
@@ -106,29 +105,29 @@ int testInvalidUpdatableModelWrongLayer() {
 
 
 int testInvalidUpdatableModelWrongWeights() {
-    
+
     /*
      checks that updatable peroperty is true for weights if the layer is marked as updatable.
      */
-    
+
     Specification::Model m1;
-    
+
     auto *topIn = m1.mutable_description()->add_input();
     topIn->set_name("input");
     auto *shape = topIn->mutable_type()->mutable_multiarraytype();
     shape->set_datatype(Specification::ArrayFeatureType_ArrayDataType_INT32);
     shape->add_shape(1);
-    
+
     auto *out = m1.mutable_description()->add_output();
     out->set_name("probs");
     auto *type_out = out->mutable_type()->mutable_multiarraytype();
     type_out->set_datatype(Specification::ArrayFeatureType_ArrayDataType_INT32);
-    
+
     m1.set_isupdatable(true);
     m1.set_specificationversion(MLMODEL_SPECIFICATION_VERSION_IOS13);
-    
+
     const auto nn = m1.mutable_neuralnetwork();
-    
+
     Specification::NeuralNetworkLayer *innerProductLayer = nn->add_layers();
     innerProductLayer->add_input("input");
     innerProductLayer->add_output("probs");
@@ -138,168 +137,168 @@ int testInvalidUpdatableModelWrongWeights() {
     Specification::InnerProductLayerParams *innerProductParams = innerProductLayer->mutable_innerproduct();
     innerProductParams->set_inputchannels(1);
     innerProductParams->set_outputchannels(1);
-    
+
     innerProductParams->mutable_weights()->add_floatvalue(1.0);
     innerProductParams->mutable_weights()->set_isupdatable(false);
-    
+
     innerProductParams->set_hasbias(true);
     innerProductParams->mutable_bias()->add_floatvalue(1.0);
     innerProductParams->mutable_bias()->set_isupdatable(true);
 
     Result res = Model::validate(m1);
-    
+
     ML_ASSERT_BAD(res);
     return 0;
 }
 
 int testInvalidUpdatableModelWrongBiases() {
-    
+
     /*
      checks that updatable peroperty is true for biases if the layer is marked as updatable.
      */
-    
+
     Specification::Model m1;
-    
+
     auto *topIn = m1.mutable_description()->add_input();
     topIn->set_name("input");
     auto *shape = topIn->mutable_type()->mutable_multiarraytype();
     shape->set_datatype(Specification::ArrayFeatureType_ArrayDataType_INT32);
     shape->add_shape(1);
-    
+
     auto *out = m1.mutable_description()->add_output();
     out->set_name("probs");
     auto *type_out = out->mutable_type()->mutable_multiarraytype();
     type_out->set_datatype(Specification::ArrayFeatureType_ArrayDataType_INT32);
-    
+
     m1.set_isupdatable(true);
     m1.set_specificationversion(MLMODEL_SPECIFICATION_VERSION_IOS13);
-    
+
     const auto nn = m1.mutable_neuralnetwork();
-    
+
     Specification::NeuralNetworkLayer *innerProductLayer = nn->add_layers();
     innerProductLayer->add_input("input");
     innerProductLayer->add_output("probs");
     innerProductLayer->set_isupdatable(true);
-    
+
     Specification::InnerProductLayerParams *innerProductParams = innerProductLayer->mutable_innerproduct();
     innerProductParams->set_inputchannels(1);
     innerProductParams->set_outputchannels(1);
-    
+
     innerProductParams->mutable_weights()->add_floatvalue(1.0);
     innerProductParams->mutable_weights()->set_isupdatable(true);
-    
+
     innerProductParams->set_hasbias(true);
     innerProductParams->mutable_bias()->add_floatvalue(1.0);
     innerProductParams->mutable_bias()->set_isupdatable(false);
-    
+
     Result res = Model::validate(m1);
-    
+
     ML_ASSERT_BAD(res);
     return 0;
 }
 
 int testInvalidUpdatableModelNonUpdatableLayers() {
-    
+
     /*
      checks that updatable property is true for atleast one layer if a model is updatable.
      */
-    
+
     Specification::Model m1;
-    
+
     auto *topIn = m1.mutable_description()->add_input();
     topIn->set_name("input");
     auto *shape = topIn->mutable_type()->mutable_multiarraytype();
     shape->set_datatype(Specification::ArrayFeatureType_ArrayDataType_INT32);
     shape->add_shape(1);
-    
+
     auto *out = m1.mutable_description()->add_output();
     out->set_name("probs");
     auto *type_out = out->mutable_type()->mutable_multiarraytype();
     type_out->set_datatype(Specification::ArrayFeatureType_ArrayDataType_INT32);
-    
+
     m1.set_isupdatable(true);
     m1.set_specificationversion(MLMODEL_SPECIFICATION_VERSION_IOS13);
-    
+
     const auto nn = m1.mutable_neuralnetwork();
-    
+
     Specification::NeuralNetworkLayer *innerProductLayer = nn->add_layers();
-    
+
     innerProductLayer->add_input("input");
     innerProductLayer->add_output("probs");
-    
+
     Specification::InnerProductLayerParams *innerProductParams = innerProductLayer->mutable_innerproduct();
     innerProductParams->set_inputchannels(1);
     innerProductParams->set_outputchannels(1);
-    
+
     innerProductParams->mutable_weights()->add_floatvalue(1.0);
-    
+
     innerProductParams->set_hasbias(true);
     innerProductParams->mutable_bias()->add_floatvalue(1.0);
-    
+
     Result res = Model::validate(m1);
-    
+
     ML_ASSERT_BAD(res);
     return 0;
 }
 
 int testInvalidUpdatableModelwithCollidedLayerAndLossLayerNames() {
-    
+
     /*
      checks that updatable model has no collison on names for model layers and loss layers
      */
-    
+
     Specification::Model m1;
-    
+
     auto *topIn = m1.mutable_description()->add_input();
     topIn->set_name("input");
     auto *shape = topIn->mutable_type()->mutable_multiarraytype();
     shape->set_datatype(Specification::ArrayFeatureType_ArrayDataType_INT32);
     shape->add_shape(1);
-    
+
     auto *out = m1.mutable_description()->add_output();
     out->set_name("probs");
     auto *type_out = out->mutable_type()->mutable_multiarraytype();
     type_out->set_datatype(Specification::ArrayFeatureType_ArrayDataType_INT32);
-    
+
     m1.set_isupdatable(true);
     m1.set_specificationversion(MLMODEL_SPECIFICATION_VERSION_IOS13);
-    
+
     const auto nn = m1.mutable_neuralnetwork();
-    
+
     Specification::NetworkUpdateParameters *updateParams = nn->mutable_updateparams();
     Specification::LossLayer *lossLayer = updateParams->add_losslayers();
     lossLayer->set_name("name1");
-    
+
     Specification::NeuralNetworkLayer *innerProductLayer = nn->add_layers();
     innerProductLayer->add_input("input");
     innerProductLayer->add_output("probs");
     innerProductLayer->set_name("name1");
     innerProductLayer->set_isupdatable(true);
-    
+
     Specification::InnerProductLayerParams *innerProductParams = innerProductLayer->mutable_innerproduct();
     innerProductParams->set_inputchannels(1);
     innerProductParams->set_outputchannels(1);
-    
+
     innerProductParams->mutable_weights()->add_floatvalue(1.0);
     innerProductParams->mutable_weights()->set_isupdatable(true);
-    
+
     innerProductParams->set_hasbias(true);
     innerProductParams->mutable_bias()->add_floatvalue(1.0);
     innerProductParams->mutable_bias()->set_isupdatable(true);
 
     Result res = Model::validate(m1);
-    
+
     ML_ASSERT_BAD(res);
     return 0;
 }
 
 int testInvalidModelUnsupportedLayersForBP() {
-    
+
     /* checks if there are layers between updatable-marked layers and loss function that do not support back-propagation
      input ---> inner_product (U) ----> ABS (not supported for BP) ---> pooling --> output
      */
-    
-    
+
+
     Specification::Model m;
     auto *topIn = m.mutable_description()->add_input();
     topIn->set_name("A");
@@ -309,15 +308,15 @@ int testInvalidModelUnsupportedLayersForBP() {
     shape->add_shape(1);
     shape->add_shape(1);
     shape->add_shape(1);
-    
+
     auto *out = m.mutable_description()->add_output();
     out->set_name("B");
     auto *type_out = out->mutable_type()->mutable_multiarraytype();
     type_out->set_datatype(Specification::ArrayFeatureType_ArrayDataType_FLOAT32);
-    
+
     m.set_isupdatable(true);
     m.set_specificationversion(MLMODEL_SPECIFICATION_VERSION_IOS13);
-    
+
     auto *nn = m.mutable_neuralnetwork();
     auto *l1 = nn->add_layers();
     l1->set_name("inner_layer");
@@ -332,14 +331,14 @@ int testInvalidModelUnsupportedLayersForBP() {
     innerProductParams->set_hasbias(true);
     innerProductParams->mutable_bias()->add_floatvalue(1.0);
     innerProductParams->mutable_bias()->set_isupdatable(true);
-    
+
     auto *l2 = nn->add_layers();
     l2->set_name("abs_layer");
     l2->add_input("ip");
     l2->add_output("abs_out");
     auto *elem = l2->mutable_unary();
     elem->set_type(Specification::UnaryFunctionLayerParams::ABS);
-    
+
     auto *l3 = nn->add_layers();
     l3->set_name("pooling_layer");
     l3->add_input("abs_out");
@@ -348,70 +347,70 @@ int testInvalidModelUnsupportedLayersForBP() {
     params->set_type(::Specification::PoolingLayerParams::AVERAGE);
     params->set_globalpooling(true);
     params->mutable_valid();
-    
+
     Specification::NetworkUpdateParameters *updateParams = nn->mutable_updateparams();
     Specification::LossLayer *lossLayer = updateParams->add_losslayers();
     lossLayer->set_name("loss_layer");
-    
+
     Specification::CategoricalCrossEntropyLossLayer *ceLossLayer = lossLayer->mutable_categoricalcrossentropylosslayer();
     ceLossLayer->set_input("B");
     ceLossLayer->set_target("label_target");
-    
+
     Result res = Model::validate(m);
     ML_ASSERT_BAD(res);
     return 0;
 }
 
 int testInvalidUpdatableLayerMissingBias() {
-    
+
     /*
      an inner product layer marked as updatable must have bias parameter
      */
-    
+
     Specification::Model m1;
-    
+
     auto *topIn = m1.mutable_description()->add_input();
     topIn->set_name("input");
     auto *shape = topIn->mutable_type()->mutable_multiarraytype();
     shape->set_datatype(Specification::ArrayFeatureType_ArrayDataType_INT32);
     shape->add_shape(1);
-    
+
     auto *out = m1.mutable_description()->add_output();
     out->set_name("probs");
     auto *type_out = out->mutable_type()->mutable_multiarraytype();
     type_out->set_datatype(Specification::ArrayFeatureType_ArrayDataType_INT32);
-    
+
     m1.set_isupdatable(true);
     m1.set_specificationversion(MLMODEL_SPECIFICATION_VERSION_IOS13);
-    
+
     const auto nn = m1.mutable_neuralnetwork();
-    
+
     Specification::NeuralNetworkLayer *innerProductLayer = nn->add_layers();
     innerProductLayer->set_name("ip");
     innerProductLayer->add_input("input");
     innerProductLayer->add_output("probs");
     innerProductLayer->set_isupdatable(true);
-    
+
     Specification::InnerProductLayerParams *innerProductParams = innerProductLayer->mutable_innerproduct();
     innerProductParams->set_inputchannels(1);
     innerProductParams->set_outputchannels(1);
-    
+
     innerProductParams->mutable_weights()->add_floatvalue(1.0);
-    
+
     innerProductParams->set_hasbias(false);
-    
+
     Result res = Model::validate(m1);
-    
+
     ML_ASSERT_BAD(res);
     return 0;
 }
 
 int testInvalidCategoricalCrossEntropyLossLayerInputs() {
-    
+
     /* at least one of the inputs of the loss layer must be produced within the inference network,
      otherwise the model is invalid.
      */
-    
+
     Specification::Model m;
     auto *topIn = m.mutable_description()->add_input();
     topIn->set_name("A");
@@ -421,15 +420,15 @@ int testInvalidCategoricalCrossEntropyLossLayerInputs() {
     shape->add_shape(1);
     shape->add_shape(1);
     shape->add_shape(1);
-    
+
     auto *out = m.mutable_description()->add_output();
     out->set_name("B");
     auto *type_out = out->mutable_type()->mutable_multiarraytype();
     type_out->set_datatype(Specification::ArrayFeatureType_ArrayDataType_FLOAT32);
-    
+
     m.set_isupdatable(true);
     m.set_specificationversion(MLMODEL_SPECIFICATION_VERSION_IOS13);
-    
+
     auto *nn = m.mutable_neuralnetwork();
     auto *l1 = nn->add_layers();
     l1->set_name("inner_layer");
@@ -444,26 +443,26 @@ int testInvalidCategoricalCrossEntropyLossLayerInputs() {
     innerProductParams->set_hasbias(true);
     innerProductParams->mutable_bias()->add_floatvalue(1.0);
     innerProductParams->mutable_bias()->set_isupdatable(true);
-    
+
     Specification::NetworkUpdateParameters *updateParams = nn->mutable_updateparams();
     Specification::LossLayer *lossLayer = updateParams->add_losslayers();
     lossLayer->set_name("cross_entropy_loss_layer");
-    
+
     Specification::CategoricalCrossEntropyLossLayer *ceLossLayer = lossLayer->mutable_categoricalcrossentropylosslayer();
     ceLossLayer->set_input("C");
     ceLossLayer->set_target("label_target");
-    
+
     Result res = Model::validate(m);
     ML_ASSERT_BAD(res);
     return 0;
 }
 
 int testInvalidMeanSquaredErrorLossLayerInputs() {
-    
+
     /* at least one of the inputs of the loss layer must be produced within the inference network,
      otherwise the model is invalid.
      */
-    
+
     Specification::Model m;
     auto *topIn = m.mutable_description()->add_input();
     topIn->set_name("A");
@@ -473,15 +472,15 @@ int testInvalidMeanSquaredErrorLossLayerInputs() {
     shape->add_shape(1);
     shape->add_shape(1);
     shape->add_shape(1);
-    
+
     auto *out = m.mutable_description()->add_output();
     out->set_name("B");
     auto *type_out = out->mutable_type()->mutable_multiarraytype();
     type_out->set_datatype(Specification::ArrayFeatureType_ArrayDataType_FLOAT32);
-    
+
     m.set_isupdatable(true);
     m.set_specificationversion(MLMODEL_SPECIFICATION_VERSION_IOS13);
-    
+
     auto *nn = m.mutable_neuralnetwork();
     auto *l1 = nn->add_layers();
     l1->set_name("inner_layer");
@@ -496,15 +495,15 @@ int testInvalidMeanSquaredErrorLossLayerInputs() {
     innerProductParams->set_hasbias(true);
     innerProductParams->mutable_bias()->add_floatvalue(1.0);
     innerProductParams->mutable_bias()->set_isupdatable(true);
-    
+
     Specification::NetworkUpdateParameters *updateParams = nn->mutable_updateparams();
     Specification::LossLayer *lossLayer = updateParams->add_losslayers();
     lossLayer->set_name("mse_loss_layer");
-    
+
     Specification::MeanSquaredErrorLossLayer *mseLossLayer = lossLayer->mutable_meansquarederrorlosslayer();
     mseLossLayer->set_input("C");
     mseLossLayer->set_target("label_target");
-    
+
     Result res = Model::validate(m);
     ML_ASSERT_BAD(res);
     return 0;
@@ -518,21 +517,21 @@ int testInvalidModelInvalidSoftmax() {
     //    |Dense| --> |SM| --> |Dense| --> |SM| --> |CCE|
     //     -----        --      -----      --       ---
     //     updatable            updatable
-    
+
     Specification::Model m;
     auto *nn = buildBasicUpdatableNeuralNetworkModel(m);
-    
+
     // add a softmax layer
     nn = addSoftmaxLayer(m, "softmax", "B", "softmax_out");
-    
+
     // add an updatable inner product layer after softmax
     TensorAttributes inTensorAttr = { "softmax_out", 3 };
     TensorAttributes outTensorAttr = { "inner_layer_after_softmax_out", 1 };
     nn = addInnerProductLayer(m, true, "inner_layer_after_softmax", &inTensorAttr, &outTensorAttr);
-    
+
     // add second softmax layer
     nn = addSoftmaxLayer(m, "softmax_2", "inner_layer_after_softmax_out", "softmax_2_out");
-    
+
     // set a CCE loss layer
     Specification::NetworkUpdateParameters *updateParams = nn->mutable_updateparams();
     Specification::LossLayer *lossLayer = updateParams->add_losslayers();
@@ -540,12 +539,12 @@ int testInvalidModelInvalidSoftmax() {
     Specification::CategoricalCrossEntropyLossLayer *ceLossLayer = lossLayer->mutable_categoricalcrossentropylosslayer();
     ceLossLayer->set_input("softmax_2_out");
     ceLossLayer->set_target("label_target");
-    
+
     // now add an updatable model parameter.
     addLearningRate(nn, Specification::Optimizer::kSgdOptimizer, 0.7f, 0.0f, 1.0f);
     addMiniBatchSize(nn, Specification::Optimizer::kSgdOptimizer, 10, 5, 100, std::set<int64_t>());
     addEpochs(nn, 100, 1, 100, std::set<int64_t>());
-    
+
     Result res = Model::validate(m);
     // "validator error: There is a layer (softmax), which does not support backpropagation, between an updatable marked layer and the loss function."
     ML_ASSERT_BAD(res);
@@ -553,27 +552,27 @@ int testInvalidModelInvalidSoftmax() {
 }
 
 int testValidModelValidMultipleSoftmax_1() {
-    
+
     // This test creates the following model. This is a valid model. The first softmax does not need to back prop
     // cause the first dense is NOT updatable. Second softmax is also valid cause it is attached to CCE.
     //     -----        --       -----       --       ---
     //    |Dense| -- > |SM| --> |Dense| --> |SM| --> |CCE|
     //     -----        --       -----       --       ---
     //     non-updatable        updatable
-    
+
     Specification::Model m;
     TensorAttributes inTensorAttr = { "A", 3 };
     TensorAttributes outTensorAttr = { "B", 1 };
     auto *nn = buildBasicNeuralNetworkModel(m, false, &inTensorAttr, &outTensorAttr);
-    
+
     // add a softmax layer
     nn = addSoftmaxLayer(m, "softmax", "B", "softmax_out");
-    
+
     // add an updatable inner product layer
     inTensorAttr = { "softmax_out", 1 };
     outTensorAttr = { "inner_layer_after_softmax_out", 1 };
     nn = addInnerProductLayer(m, true, "inner_layer_after_softmax", &inTensorAttr, &outTensorAttr);
-    
+
     // add second softmax layer
     nn = addSoftmaxLayer(m, "softmax_2", "inner_layer_after_softmax_out", "softmax_2_out");
 
@@ -584,19 +583,19 @@ int testValidModelValidMultipleSoftmax_1() {
     Specification::CategoricalCrossEntropyLossLayer *ceLossLayer = lossLayer->mutable_categoricalcrossentropylosslayer();
     ceLossLayer->set_input("softmax_2_out");
     ceLossLayer->set_target("label_target");
-    
+
     // now add an updatable model parameter.
     addLearningRate(nn, Specification::Optimizer::kSgdOptimizer, 0.7f, 0.0f, 1.0f);
     addMiniBatchSize(nn, Specification::Optimizer::kSgdOptimizer, 10, 5, 100, std::set<int64_t>());
     addEpochs(nn, 100, 1, 100, std::set<int64_t>());
-    
+
     Result res = Model::validate(m);
     ML_ASSERT_GOOD(res);
     return 0;
 }
 
 int testValidModelValidMultipleSoftmax_2() {
-    
+
     // This test creates the following model. This is a valid model. The first softmax is attached to CCE.
     // Second softmax is also valid. Although it is not attached to CCE, it is out of backpropagation scope.
     //     -----        --       -----       --
@@ -606,13 +605,13 @@ int testValidModelValidMultipleSoftmax_2() {
     //                  |     ---
     //                   --> |CCE|
     //                        ---
-    
+
     Specification::Model m;
     auto *nn = buildBasicUpdatableNeuralNetworkModel(m);
-    
+
     // add a softmax layer
     nn = addSoftmaxLayer(m, "softmax", "B", "softmax_out");
-    
+
     // attach a CCE loss layer to softmax
     addCategoricalCrossEntropyLoss(m, nn, "cross_entropy_loss_layer", "softmax_out", "label_target");
 
@@ -620,22 +619,22 @@ int testValidModelValidMultipleSoftmax_2() {
     TensorAttributes inTensorAttr = { "softmax_out", 1 };
     TensorAttributes outTensorAttr = { "inner_layer_after_softmax_out", 1 };
     nn = addInnerProductLayer(m, true, "inner_layer_after_softmax", &inTensorAttr, &outTensorAttr);
-    
+
     // add a second softmax layer
     nn = addSoftmaxLayer(m, "softmax_2", "inner_layer_after_softmax_out", "softmax_2_out");
-    
+
     // now add an updatable model parameter.
     addLearningRate(nn, Specification::Optimizer::kSgdOptimizer, 0.7f, 0.0f, 1.0f);
     addMiniBatchSize(nn, Specification::Optimizer::kSgdOptimizer, 10, 5, 100, std::set<int64_t>());
     addEpochs(nn, 100, 1, 100, std::set<int64_t>());
-    
+
     Result res = Model::validate(m);
     ML_ASSERT_GOOD(res);
     return 0;
 }
 
 int testValidModelMultipleSoftmaxOutputs() {
-    
+
     // This test creates the following model. This is a valid case.
     //     -----        --       -----
     //    |Dense| -- > |SM| --> |Dense|
@@ -644,18 +643,18 @@ int testValidModelMultipleSoftmaxOutputs() {
     //                  |     ---
     //                   --> |CCE|
     //                        ---
-    
+
     Specification::Model m;
     auto *nn = buildBasicUpdatableNeuralNetworkModel(m);
-    
+
     // add a softmax layer
     nn = addSoftmaxLayer(m, "softmax", "B", "softmax_out");
-    
+
     // add a non-updatable inner product layer
     TensorAttributes inTensorAttr = { "softmax_out", 3 };
     TensorAttributes outTensorAttr = { "inner_layer_after_softmax_out", 1 };
     nn = addInnerProductLayer(m, false, "inner_layer_after_softmax", &inTensorAttr, &outTensorAttr);
-    
+
     // attach a CCE to softmax
     addCategoricalCrossEntropyLoss(m, nn, "cross_entropy_loss_layer", "softmax_out", "label_target");
 
@@ -664,15 +663,15 @@ int testValidModelMultipleSoftmaxOutputs() {
     addLearningRate(nn, Specification::Optimizer::kSgdOptimizer, 0.7f, 0.0f, 1.0f);
     addMiniBatchSize(nn, Specification::Optimizer::kSgdOptimizer, 10, 5, 100, std::set<int64_t>());
     addEpochs(nn, 100, 1, 100, std::set<int64_t>());
-    
+
     Result res = Model::validate(m);
     ML_ASSERT_GOOD(res);
     return 0;
-    
+
 }
 
 int testInvalidModelMultipleLoss() {
-    
+
     // This test creates the following model. This is an invalid case as a model with multiple losses is not supported
     //     -----        --       -----       --       ---
     //    |Dense| -- > |SM| --> |Dense| --> |SM| --> |CCE|
@@ -681,13 +680,13 @@ int testInvalidModelMultipleLoss() {
     //                  |     ---
     //                   --> |CCE|
     //                        ---
-    
+
     Specification::Model m;
     auto *nn = buildBasicUpdatableNeuralNetworkModel(m);
-    
+
     // add a softmax layer
     nn = addSoftmaxLayer(m, "softmax", "B", "softmax_out");
-    
+
     // add an updatable inner product layer
     TensorAttributes inTensorAttr = { "softmax_out", 3 };
     TensorAttributes outTensorAttr = { "inner_layer_after_softmax_out", 1 };
@@ -705,7 +704,7 @@ int testInvalidModelMultipleLoss() {
     addLearningRate(nn, Specification::Optimizer::kSgdOptimizer, 0.7f, 0.0f, 1.0f);
     addMiniBatchSize(nn, Specification::Optimizer::kSgdOptimizer, 10, 5, 100, std::set<int64_t>());
     addEpochs(nn, 100, 1, 100, std::set<int64_t>());
-    
+
     Result res = Model::validate(m);
     // "validator error: This model has more than one loss layers specified, which is not supported at the moment."
     ML_ASSERT_BAD(res);
@@ -714,7 +713,7 @@ int testInvalidModelMultipleLoss() {
 
 void buildBasicUpdatableModelWithCategoricalCrossEntropyAndSoftmax(Specification::Model& m);
 void buildBasicUpdatableModelWithCategoricalCrossEntropyAndSoftmax(Specification::Model& m) {
-    
+
     auto *nn = buildBasicUpdatableNeuralNetworkModel(m);
 
     // set a softmax layer
@@ -729,29 +728,29 @@ void buildBasicUpdatableModelWithCategoricalCrossEntropyAndSoftmax(Specification
 
 void buildBasicUpdatableModelWithMeanSquaredError(Specification::Model& m);
 void buildBasicUpdatableModelWithMeanSquaredError(Specification::Model& m) {
-    
+
     auto *nn = buildBasicUpdatableNeuralNetworkModel(m);
 
     addMeanSquareError(m, nn, "mean_squared_error_loss_layer", "B", "label_target");
 }
 
 int testMissingUpdatableModelParameters() {
-    
+
     Specification::Model m;
-    
+
     // basic neural network model with CCE and softmax without any updatable model parameters.
     buildBasicUpdatableModelWithCategoricalCrossEntropyAndSoftmax(m);
-    
+
     // expect validation to fail due to missing updatable model parameters.
     Result res = Model::validate(m);
     ML_ASSERT_BAD(res);
-    
+
     // clear model and redo test with MSE
     m.Clear();
-    
+
     // basic neural network model with MSE without any updatable model parameters.
     buildBasicUpdatableModelWithMeanSquaredError(m);
-    
+
     // expect validation to fail due to missing updatable model parameters.
     res = Model::validate(m);
     ML_ASSERT_BAD(res);
@@ -764,20 +763,20 @@ int testUpdatableModelSpecVersion() {
      checks that an updatable model has correct spec version
      - MLMODEL_SPECIFICATION_VERSION_IOS13
      */
-    
+
     Specification::Model m;
     buildBasicUpdatableModelWithCategoricalCrossEntropyAndSoftmax(m);
     // now add an updatable model parameter.
     addLearningRate(m.mutable_neuralnetwork(), Specification::Optimizer::kSgdOptimizer, 0.7f, 0.0f, 1.0f);
     addMiniBatchSize(m.mutable_neuralnetwork(), Specification::Optimizer::kSgdOptimizer, 10, 5, 100, std::set<int64_t>());
     addEpochs(m.mutable_neuralnetwork(), 100, 1, 100, std::set<int64_t>());
-    
+
     // now set incorrect spec version
     m.set_specificationversion(MLMODEL_SPECIFICATION_VERSION_IOS12);
-    
+
     Result res = Model::validate(m);
     ML_ASSERT_BAD(res);  // "Model specification version for an updatable model must be '4' or above."
-    
+
     // fix spec version
     m.set_specificationversion(MLMODEL_SPECIFICATION_VERSION_IOS13);
     res = Model::validate(m);
@@ -785,27 +784,110 @@ int testUpdatableModelSpecVersion() {
     return 0;
 }
 
+int testInvalidUpdatableModelQuantizedWeights() {
+    Specification::Model m;
+    TensorAttributes inTensorAttr = { "A", 3 };
+    TensorAttributes outTensorAttr = { "B", 1 };
+    auto *nn = buildBasicNeuralNetworkModel(m, true, &inTensorAttr, &outTensorAttr, 1, true, false);
+
+    // set a softmax layer
+    auto softmaxLayer = nn->add_layers();
+    softmaxLayer->set_name("softmax");
+    softmaxLayer->add_input("B");
+    softmaxLayer->add_output("softmax_out");
+    softmaxLayer->mutable_softmax();
+
+    addCategoricalCrossEntropyLoss(m, nn, "cross_entropy_loss_layer", "softmax_out", "label_target");
+
+    // now add updatable model parameters.
+    addLearningRate(m.mutable_neuralnetwork(), Specification::Optimizer::kSgdOptimizer, 0.7f, 0.0f, 1.0f);
+    addMiniBatchSize(m.mutable_neuralnetwork(), Specification::Optimizer::kSgdOptimizer, 10, 5, 100, std::set<int64_t>());
+    addEpochs(m.mutable_neuralnetwork(), 100, 1, 100, std::set<int64_t>());
+
+    // expect validation to fail.
+    // "validator error: An updatable layer, named 'inner_layer', has quantized weights/bias param. Quantized weights/bias not supported for update."
+    Result res = Model::validate(m);
+    ML_ASSERT_BAD(res);
+    return 0;
+}
+
+int testInvalidUpdatableModelQuantizedBias() {
+    Specification::Model m;
+    TensorAttributes inTensorAttr = { "A", 3 };
+    TensorAttributes outTensorAttr = { "B", 1 };
+    auto *nn = buildBasicNeuralNetworkModel(m, true, &inTensorAttr, &outTensorAttr, 1, false, true);
+
+    // set a softmax layer
+    auto softmaxLayer = nn->add_layers();
+    softmaxLayer->set_name("softmax");
+    softmaxLayer->add_input("B");
+    softmaxLayer->add_output("softmax_out");
+    softmaxLayer->mutable_softmax();
+
+    // set a CCE loss layer
+    addCategoricalCrossEntropyLoss(m, nn, "cross_entropy_loss_layer", "softmax_out", "label_target");
+
+    // now add updatable model parameters.
+    addLearningRate(m.mutable_neuralnetwork(), Specification::Optimizer::kSgdOptimizer, 0.7f, 0.0f, 1.0f);
+    addMiniBatchSize(m.mutable_neuralnetwork(), Specification::Optimizer::kSgdOptimizer, 10, 5, 100, std::set<int64_t>());
+    addEpochs(m.mutable_neuralnetwork(), 100, 1, 100, std::set<int64_t>());
+
+    // expect validation to fail.
+    // "validator error: An updatable layer, named 'inner_layer', has quantized weights/bias param. Quantized weights/bias not supported for update."
+    Result res = Model::validate(m);
+    ML_ASSERT_BAD(res);
+    return 0;
+}
+
+int testValidUpdatableModelQuantizedWeightsAndBiasForNonUpdatableLayer() {
+    Specification::Model m;
+    TensorAttributes inTensorAttr = { "A", 3 };
+    TensorAttributes outTensorAttr = { "B", 1 };
+    auto *nn = buildBasicNeuralNetworkModel(m, true, &inTensorAttr, &outTensorAttr, 1, false, false);
+
+    // add a non-updatable inner product with quantized weights after softmax
+    inTensorAttr = { "B", 1 };
+    outTensorAttr = { "non_updatable_fc_output", 1 };
+    nn = addInnerProductLayer(m, false, "non_updatable_fc", &inTensorAttr, &outTensorAttr, true, true);
+
+    // add a softmax layer
+    nn = addSoftmaxLayer(m, "softmax", "non_updatable_fc_output", "softmax_out");
+
+    // set a CCE loss layer
+    addCategoricalCrossEntropyLoss(m, nn, "cross_entropy_loss_layer", "softmax_out", "label_target");
+
+    // now add updatable model parameters.
+    addLearningRate(nn, Specification::Optimizer::kSgdOptimizer, 0.7f, 0.0f, 1.0f);
+    addMiniBatchSize(nn, Specification::Optimizer::kSgdOptimizer, 10, 5, 100, std::set<int64_t>());
+    addEpochs(m.mutable_neuralnetwork(), 100, 1, 100, std::set<int64_t>());
+
+    // expect validation to pass.
+    Result res = Model::validate(m);
+    ML_ASSERT_GOOD(res);
+    return 0;
+}
+
 int testMissingMiniBatchSizeParameter() {
-    
+
     Specification::Model m;
-    
+
     // basic neural network model with CCE and softmax without any updatable model parameters.
     buildBasicUpdatableModelWithCategoricalCrossEntropyAndSoftmax(m);
-    
+
     // expect validation to fail due to missing updatable model parameters.
     Result res = Model::validate(m);
     ML_ASSERT_BAD(res);
-    
+
     // now add an updatable model parameter.
     addLearningRate(m.mutable_neuralnetwork(), Specification::Optimizer::kSgdOptimizer, 0.7f, 0.0f, 1.0f);
-    
+
     // expect validation to still fail due to missing mini batch size.
     res = Model::validate(m);
     ML_ASSERT_BAD(res);
-    
+
     addMiniBatchSize(m.mutable_neuralnetwork(), Specification::Optimizer::kSgdOptimizer, 10, 5, 100, std::set<int64_t>());
     addEpochs(m.mutable_neuralnetwork(), 100, 1, 100, std::set<int64_t>());
-    
+
     // expect validation to pass.
     res = Model::validate(m);
     ML_ASSERT_GOOD(res);
@@ -813,26 +895,26 @@ int testMissingMiniBatchSizeParameter() {
 }
 
 int testMissingLearningRateParameter() {
-    
+
     Specification::Model m;
-    
+
     // basic neural network model without any updatable model parameters.
     buildBasicUpdatableModelWithCategoricalCrossEntropyAndSoftmax(m);
-    
+
     // expect validation to fail due to missing updatable model parameters.
     Result res = Model::validate(m);
     ML_ASSERT_BAD(res);
-    
+
     // now add an updatable model parameter.
     addMiniBatchSize(m.mutable_neuralnetwork(), Specification::Optimizer::kSgdOptimizer, 10, 5, 100, std::set<int64_t>());
-    
+
     // expect validation to still fail due to missing optimizer.
     res = Model::validate(m);
     ML_ASSERT_BAD(res);
-    
+
     addLearningRate(m.mutable_neuralnetwork(), Specification::Optimizer::kSgdOptimizer, 0.7f, 0.0f, 1.0f);
     addEpochs(m.mutable_neuralnetwork(), 100, 1, 100, std::set<int64_t>());
-    
+
     // expect validation to pass.
     res = Model::validate(m);
     ML_ASSERT_GOOD(res);
@@ -869,28 +951,28 @@ int testMissingBeta1Parameter() {
 }
 
 int testMissingBeta2Parameter() {
-    
+
     Specification::Model m;
-    
+
     // basic neural network model without any updatable model parameters.
     buildBasicUpdatableModelWithCategoricalCrossEntropyAndSoftmax(m);
-    
+
     // expect validation to fail due to missing updatable model parameters.
     Result res = Model::validate(m);
     ML_ASSERT_BAD(res);
-    
+
     // now add an updatable model parameter.
     addLearningRate(m.mutable_neuralnetwork(), Specification::Optimizer::kAdamOptimizer, 0.7f, 0.0f, 1.0f);
     addMiniBatchSize(m.mutable_neuralnetwork(), Specification::Optimizer::kAdamOptimizer, 10, 5, 100, std::set<int64_t>());
     addEpochs(m.mutable_neuralnetwork(), 100, 1, 100, std::set<int64_t>());
     addBeta1(m.mutable_neuralnetwork(), Specification::Optimizer::kAdamOptimizer, 0.7f, 0.0f, 1.0f);
     addEps(m.mutable_neuralnetwork(), Specification::Optimizer::kAdamOptimizer, 0.7f, 0.0f, 1.0f);
-    
+
     res = Model::validate(m);
     ML_ASSERT_BAD(res);
-    
+
     addBeta2(m.mutable_neuralnetwork(), Specification::Optimizer::kAdamOptimizer, 0.7f, 0.0f, 1.0f);
-    
+
     // expect validation to pass.
     res = Model::validate(m);
     ML_ASSERT_GOOD(res);
@@ -898,28 +980,28 @@ int testMissingBeta2Parameter() {
 }
 
 int testMissingEpsParameter() {
-    
+
     Specification::Model m;
-    
+
     // basic neural network model without any updatable model parameters.
     buildBasicUpdatableModelWithCategoricalCrossEntropyAndSoftmax(m);
-    
+
     // expect validation to fail due to missing updatable model parameters.
     Result res = Model::validate(m);
     ML_ASSERT_BAD(res);
-    
+
     // now add an updatable model parameter.
     addLearningRate(m.mutable_neuralnetwork(), Specification::Optimizer::kAdamOptimizer, 0.7f, 0.0f, 1.0f);
     addMiniBatchSize(m.mutable_neuralnetwork(), Specification::Optimizer::kAdamOptimizer, 10, 5, 100, std::set<int64_t>());
     addEpochs(m.mutable_neuralnetwork(), 100, 1, 100, std::set<int64_t>());
     addBeta1(m.mutable_neuralnetwork(), Specification::Optimizer::kAdamOptimizer, 0.7f, 0.0f, 1.0f);
     addBeta2(m.mutable_neuralnetwork(), Specification::Optimizer::kAdamOptimizer, 0.7f, 0.0f, 1.0f);
-    
+
     res = Model::validate(m);
     ML_ASSERT_BAD(res);
-    
+
     addEps(m.mutable_neuralnetwork(), Specification::Optimizer::kAdamOptimizer, 0.7f, 0.0f, 1.0f);
-    
+
     // expect validation to pass.
     res = Model::validate(m);
     ML_ASSERT_GOOD(res);
@@ -927,20 +1009,20 @@ int testMissingEpsParameter() {
 }
 
 int testMissingEpochsParameter() {
-    
+
     Specification::Model m;
-    
+
     // basic neural network model without any updatable model parameters.
     buildBasicUpdatableModelWithCategoricalCrossEntropyAndSoftmax(m);
-    
+
     // expect validation to fail due to missing updatable model parameters.
     Result res = Model::validate(m);
     ML_ASSERT_BAD(res);
-    
+
     // now add an updatable model parameter.
     addMiniBatchSize(m.mutable_neuralnetwork(), Specification::Optimizer::kSgdOptimizer, 10, 5, 100, std::set<int64_t>());
     addLearningRate(m.mutable_neuralnetwork(), Specification::Optimizer::kSgdOptimizer, 0.7f, 0.0f, 1.0f);
-    
+
     // expect validation to pass.
     res = Model::validate(m);
     ML_ASSERT_BAD(res);
@@ -965,318 +1047,318 @@ int testExistingShuffleWithMissingSeedParameter() {
 }
 
 int testNonUpdatablePipelineWithNonUpdatableModels() {
-    
+
     Specification::Model spec;
     Result res;
     TensorAttributes tensorAttributesA = { "A", 3 };
     TensorAttributes tensorAttributesB = { "B", 1 };
     TensorAttributes tensorAttributesC = { "C", 1 };
     TensorAttributes tensorAttributesD = { "D", 3 };
-    
+
     auto pipeline = buildEmptyPipelineModelWithStringOutput(spec, false, &tensorAttributesA, "E");
-    
+
     auto m1 = pipeline->add_models();
     auto m2 = pipeline->add_models();
     auto m3 = pipeline->add_models();
     auto m4 = pipeline->add_models();
-    
+
     (void)buildBasicNeuralNetworkModel(*m1, false, &tensorAttributesA, &tensorAttributesB);
     (void)buildBasicNeuralNetworkModel(*m2, false, &tensorAttributesB, &tensorAttributesC);
     (void)buildBasicNeuralNetworkModel(*m3, false, &tensorAttributesC, &tensorAttributesD);
     (void)buildBasicNearestNeighborClassifier(*m4, false, &tensorAttributesD, "E");
-    
+
     res = Model::validate(*m1);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m2);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m3);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m4);
     ML_ASSERT_GOOD(res);
-    
+
     // expect validation to pass!
     res = Model::validate(spec);
     ML_ASSERT_GOOD(res);
-    
+
     return 0;
 }
 
 int testNonUpdatablePipelineWithOneUpdatableModel() {
-    
+
     Specification::Model spec;
     Result res;
     TensorAttributes tensorAttributesA = { "A", 3 };
     TensorAttributes tensorAttributesB = { "B", 1 };
     TensorAttributes tensorAttributesC = { "C", 1 };
     TensorAttributes tensorAttributesD = { "D", 3 };
-    
+
     auto pipeline = buildEmptyPipelineModelWithStringOutput(spec, false, &tensorAttributesA, "E");
-    
+
     auto m1 = pipeline->add_models();
     auto m2 = pipeline->add_models();
     auto m3 = pipeline->add_models();
     auto m4 = pipeline->add_models();
-    
+
     (void)buildBasicNeuralNetworkModel(*m1, false, &tensorAttributesA, &tensorAttributesB);
     (void)buildBasicNeuralNetworkModel(*m2, false, &tensorAttributesB, &tensorAttributesC);
     (void)buildBasicNeuralNetworkModel(*m3, false, &tensorAttributesC, &tensorAttributesD);
     (void)buildBasicNearestNeighborClassifier(*m4, true, &tensorAttributesD, "E");
-    
+
     res = Model::validate(*m1);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m2);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m3);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m4);
     ML_ASSERT_GOOD(res);
-    
+
     // expect validation to fail due to presence of uptable model in non-updatable pipeline.
     res = Model::validate(spec);
     ML_ASSERT_BAD(res);
-    
+
     return 0;
 }
 
 int testNonUpdatablePipelineWithOneUpdatableModelInsidePipelineHierarchy() {
-    
+
     Specification::Model spec;
     Result res;
     TensorAttributes tensorAttributesA = { "A", 3 };
     TensorAttributes tensorAttributesB = { "B", 1 };
     TensorAttributes tensorAttributesC = { "C", 1 };
     TensorAttributes tensorAttributesD = { "D", 3 };
-    
+
     auto pipeline1 = buildEmptyPipelineModelWithStringOutput(spec, false, &tensorAttributesA, "E");
-    
+
     auto m1 = pipeline1->add_models();
     auto m2 = pipeline1->add_models();
     auto m3 = pipeline1->add_models();
-    
+
     (void)buildBasicNeuralNetworkModel(*m1, false, &tensorAttributesA, &tensorAttributesB);
     auto pipeline2 = buildEmptyPipelineModel(*m2, false, &tensorAttributesB, &tensorAttributesD);
     (void)buildBasicNearestNeighborClassifier(*m3, false, &tensorAttributesD, "E");
-    
+
     auto m4 = pipeline2->add_models();
     auto m5 = pipeline2->add_models();
-    
+
     (void)buildBasicNeuralNetworkModel(*m4, false, &tensorAttributesB, &tensorAttributesC);
     (void)buildBasicNeuralNetworkModel(*m5, true, &tensorAttributesC, &tensorAttributesD);
     addCategoricalCrossEntropyLossWithSoftmaxAndSGDOptimizer(*m5, "D");
-    
+
     res = Model::validate(*m1);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m2);
     ML_ASSERT_BAD(res);
-    
+
     res = Model::validate(*m3);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m4);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m5);
     ML_ASSERT_GOOD(res);
-    
+
     // expect validation to fail due to presence of uptable model in non-updatable pipeline.
     res = Model::validate(spec);
     ML_ASSERT_BAD(res);
-    
+
     return 0;
 }
 
 int testUpdatablePipelineWithNonUpdatableModels() {
-    
+
     Specification::Model spec;
     Result res;
     TensorAttributes tensorAttributesA = { "A", 3 };
     TensorAttributes tensorAttributesB = { "B", 1 };
     TensorAttributes tensorAttributesC = { "C", 1 };
     TensorAttributes tensorAttributesD = { "D", 3 };
-    
+
     auto pipeline = buildEmptyPipelineModelWithStringOutput(spec, true, &tensorAttributesA, "E");
-    
+
     auto m1 = pipeline->add_models();
     auto m2 = pipeline->add_models();
     auto m3 = pipeline->add_models();
     auto m4 = pipeline->add_models();
-    
+
     (void)buildBasicNeuralNetworkModel(*m1, false, &tensorAttributesA, &tensorAttributesB);
     (void)buildBasicNeuralNetworkModel(*m2, false, &tensorAttributesB, &tensorAttributesC);
     (void)buildBasicNeuralNetworkModel(*m3, false, &tensorAttributesC, &tensorAttributesD);
     (void)buildBasicNearestNeighborClassifier(*m4, false, &tensorAttributesD, "E");
-    
+
     res = Model::validate(*m1);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m2);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m3);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m4);
     ML_ASSERT_GOOD(res);
-    
+
     // expect validation to fail due to missing updatable model in pipeline.
     res = Model::validate(spec);
     ML_ASSERT_BAD(res);
-    
+
     return 0;
 }
 
 int testUpdatablePipelineWithMultipleUpdatableModels() {
-    
+
     Specification::Model spec;
     Result res;
     TensorAttributes tensorAttributesA = { "A", 3 };
     TensorAttributes tensorAttributesB = { "B", 1 };
     TensorAttributes tensorAttributesC = { "C", 1 };
     TensorAttributes tensorAttributesD = { "D", 3 };
-    
+
     auto pipeline = buildEmptyPipelineModelWithStringOutput(spec, true, &tensorAttributesA, "E");
-    
+
     auto m1 = pipeline->add_models();
     auto m2 = pipeline->add_models();
     auto m3 = pipeline->add_models();
     auto m4 = pipeline->add_models();
-    
+
     (void)buildBasicNeuralNetworkModel(*m1, true, &tensorAttributesA, &tensorAttributesB);
     addCategoricalCrossEntropyLossWithSoftmaxAndSGDOptimizer(*m1, "B");
-    
+
     (void)buildBasicNeuralNetworkModel(*m2, false, &tensorAttributesB, &tensorAttributesC);
-    
+
     (void)buildBasicNeuralNetworkModel(*m3, true, &tensorAttributesC, &tensorAttributesD);
     addCategoricalCrossEntropyLossWithSoftmaxAndSGDOptimizer(*m3, "D");
-    
+
     (void)buildBasicNearestNeighborClassifier(*m4, false, &tensorAttributesD, "E");
-    
+
     res = Model::validate(*m1);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m2);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m3);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m4);
     ML_ASSERT_GOOD(res);
-    
+
     // expect validation to fail due to multiple updatable models in the pipeline.
     res = Model::validate(spec);
     ML_ASSERT_BAD(res);
-    
+
     return 0;
 }
 
 int testUpdatablePipelineWithOneUpdatableModel() {
-    
+
     Specification::Model spec;
     Result res;
     TensorAttributes tensorAttributesA = { "A", 3 };
     TensorAttributes tensorAttributesB = { "B", 1 };
     TensorAttributes tensorAttributesC = { "C", 1 };
     TensorAttributes tensorAttributesD = { "D", 3 };
-    
+
     auto pipeline = buildEmptyPipelineModelWithStringOutput(spec, true, &tensorAttributesA, "E");
-    
+
     auto m1 = pipeline->add_models();
     auto m2 = pipeline->add_models();
     auto m3 = pipeline->add_models();
     auto m4 = pipeline->add_models();
-    
+
     (void)buildBasicNeuralNetworkModel(*m1, false, &tensorAttributesA, &tensorAttributesB);
     (void)buildBasicNeuralNetworkModel(*m2, false, &tensorAttributesB, &tensorAttributesC);
     (void)buildBasicNeuralNetworkModel(*m3, false, &tensorAttributesC, &tensorAttributesD);
     (void)buildBasicNearestNeighborClassifier(*m4, true, &tensorAttributesD, "E");
-    
+
     res = Model::validate(*m1);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m2);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m3);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m4);
     ML_ASSERT_GOOD(res);
-    
+
     // expect validation to pass!
     res = Model::validate(spec);
     ML_ASSERT_GOOD(res);
-    
+
     return 0;
 }
 
 int testUpdatablePipelineWithOneUpdatableModelInsidePipelineHierarchy() {
-    
+
     Specification::Model spec;
     Result res;
     TensorAttributes tensorAttributesA = { "A", 3 };
     TensorAttributes tensorAttributesB = { "B", 1 };
     TensorAttributes tensorAttributesC = { "C", 1 };
     TensorAttributes tensorAttributesD = { "D", 3 };
-    
+
     auto pipeline1 = buildEmptyPipelineModelWithStringOutput(spec, true, &tensorAttributesA, "E");
-    
+
     auto m1 = pipeline1->add_models();
     auto m2 = pipeline1->add_models();
     auto m3 = pipeline1->add_models();
-    
+
     (void)buildBasicNeuralNetworkModel(*m1, false, &tensorAttributesA, &tensorAttributesB);
     (void)buildBasicNeuralNetworkModel(*m2, false, &tensorAttributesB, &tensorAttributesC);
     auto pipeline2 = buildEmptyPipelineModelWithStringOutput(*m3, true, &tensorAttributesC, "E");
-    
+
     auto m4 = pipeline2->add_models();
     auto m5 = pipeline2->add_models();
-    
+
     (void)buildBasicNeuralNetworkModel(*m4, false, &tensorAttributesC, &tensorAttributesD);
     (void)buildBasicNearestNeighborClassifier(*m5, true, &tensorAttributesD, "E");
-    
+
     res = Model::validate(*m1);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m2);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m3);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m4);
     ML_ASSERT_GOOD(res);
-    
+
     res = Model::validate(*m5);
     ML_ASSERT_GOOD(res);
-    
+
     // expect validation to pass!
     res = Model::validate(spec);
     ML_ASSERT_GOOD(res);
-    
+
     return 0;
 }
 
 int testValidUpdatableModelWith1024Layers() {
-    
+
     Specification::Model spec;
     Result res;
     TensorAttributes tensorAttributesIn = { "InTensor", 3 };
     TensorAttributes tensorAttributesOut = { "OutTensor", 1 };
-    
+
     (void)buildBasicNeuralNetworkModel(spec, true, &tensorAttributesIn, &tensorAttributesOut, 1024);
     addCategoricalCrossEntropyLossWithSoftmaxAndSGDOptimizer(spec, "OutTensor");
-    
+
     res = Model::validate(spec);
     ML_ASSERT_GOOD(res);
-    
+
     return 0;
 }
 
@@ -2392,5 +2474,3 @@ int testValid_Pipeline() {
 
     return 0;
 }
-
-
diff --git a/mlmodel/tests/UtilsTests.cpp b/mlmodel/tests/UtilsTests.cpp
index 2ff4834cd..557549b86 100644
--- a/mlmodel/tests/UtilsTests.cpp
+++ b/mlmodel/tests/UtilsTests.cpp
@@ -80,3 +80,61 @@ int testSpecDowngradePipeline() {
 
     return 0;
 }
+
+int testWordTaggerTransferLearningSpecIOS14() {
+    Specification::Model spec;
+
+    //initialization
+    spec.set_specificationversion(MLMODEL_SPECIFICATION_VERSION);
+    Specification::ModelDescription* interface = spec.mutable_description();
+    Specification::Metadata* metadata = interface->mutable_metadata();
+    metadata->set_shortdescription(std::string("This is a Word tagger model"));
+
+    auto *input = interface->add_input();
+    input->mutable_type()->mutable_stringtype();
+    input->set_name(std::string("text"));
+
+    auto *output1 = interface->add_output();
+    output1->mutable_type()->mutable_sequencetype()->mutable_stringtype();
+    output1->mutable_type()->mutable_sequencetype()->mutable_sizerange()->set_lowerbound(0);
+    output1->mutable_type()->mutable_sequencetype()->mutable_sizerange()->set_upperbound(10000);
+    output1->set_name(std::string("tags"));
+    auto *output2 = interface->add_output();
+    output2->mutable_type()->mutable_sequencetype()->mutable_int64type();
+    output2->mutable_type()->mutable_sequencetype()->mutable_sizerange()->set_lowerbound(0);
+    output2->mutable_type()->mutable_sequencetype()->mutable_sizerange()->set_upperbound(10000);
+    output2->set_name(std::string("locations"));
+    auto *output3 = interface->add_output();
+    output3->mutable_type()->mutable_sequencetype()->mutable_int64type();
+    output3->mutable_type()->mutable_sequencetype()->mutable_sizerange()->set_lowerbound(0);
+    output3->mutable_type()->mutable_sequencetype()->mutable_sizerange()->set_upperbound(10000);
+    output3->set_name(std::string("lengths"));
+    auto *output4 = interface->add_output();
+    output4->mutable_type()->mutable_sequencetype()->mutable_stringtype();
+    output4->mutable_type()->mutable_sequencetype()->mutable_sizerange()->set_lowerbound(0);
+    output4->mutable_type()->mutable_sequencetype()->mutable_sizerange()->set_upperbound(10000);
+    output4->set_name(std::string("tokens"));
+
+    auto *wordTagger = spec.mutable_wordtagger();
+    wordTagger->set_language(std::string("en-US"));
+    wordTagger->set_tokensoutputfeaturename(std::string("tokens"));
+    wordTagger->set_tokentagsoutputfeaturename(std::string("tags"));
+    wordTagger->set_tokenlocationsoutputfeaturename(std::string("locations"));
+    wordTagger->set_tokenlengthsoutputfeaturename(std::string("lengths"));
+    auto *tags = wordTagger->mutable_stringtags();
+    tags->add_vector("PER");
+    tags->add_vector("LOC");
+    std::string modelData = "This is a dummy model";
+    wordTagger->set_modelparameterdata(modelData);
+    wordTagger->set_revision(3); //transfer learning using revision 3
+
+    // Constructing an CoreML::Model should downgrade spec on load
+    Model model1(spec);
+    ML_ASSERT_EQ(model1.getProto().specificationversion(), MLMODEL_SPECIFICATION_VERSION_IOS14);
+
+    wordTagger->set_revision(1);
+    Model model2(spec);
+    ML_ASSERT_EQ(model2.getProto().specificationversion(), MLMODEL_SPECIFICATION_VERSION_IOS12);
+
+    return 0;
+}
diff --git a/mlmodel/tests/VisionFeaturePrintValidatorTests.cpp b/mlmodel/tests/VisionFeaturePrintValidatorTests.cpp
new file mode 100644
index 000000000..2ee14b8eb
--- /dev/null
+++ b/mlmodel/tests/VisionFeaturePrintValidatorTests.cpp
@@ -0,0 +1,110 @@
+//
+//  VisionFeaturePrintValidatorTests.cpp
+//  CoreML_framework
+//
+//  Created by Tao Jia on 3/20/20.
+//  Copyright © 2019 Apple Inc. All rights reserved.
+//
+
+#include "MLModelTests.hpp"
+#include "../src/Format.hpp"
+#include "../src/Model.hpp"
+#include "ParameterTests.hpp"
+
+#include "framework/TestUtils.hpp"
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wsign-conversion"
+
+using namespace CoreML;
+
+int testVisionFeatureScenePrintBasic() {
+
+    Specification::ImageFeatureType* inputImageFeatureType = new Specification::ImageFeatureType();
+    Specification::FeatureType* inputFeatureType = new Specification::FeatureType();
+    inputFeatureType->set_allocated_imagetype(inputImageFeatureType);
+
+    Specification::ArrayFeatureType* outputArrayFeatureType = new Specification::ArrayFeatureType();
+    Specification::FeatureType* outputFeatureType = new Specification::FeatureType();
+    outputFeatureType->set_allocated_multiarraytype(outputArrayFeatureType);
+
+    Specification::ModelDescription* description = new Specification::ModelDescription();
+    Specification::FeatureDescription* input = description->add_input();
+    Specification::FeatureDescription* output = description->add_output();
+    input->set_allocated_type(inputFeatureType);
+    output->set_allocated_type(outputFeatureType);
+
+    Specification::Model model;
+    model.set_allocated_description(description);
+
+    Result result = validate<MLModelType_visionFeaturePrint>(model);
+    ML_ASSERT_BAD(result);
+
+    auto *preprocessing = model.mutable_visionfeatureprint();
+    result = validate<MLModelType_visionFeaturePrint>(model);
+    ML_ASSERT_BAD(result);
+
+    preprocessing->mutable_scene();
+    result = validate<MLModelType_visionFeaturePrint>(model);
+    ML_ASSERT_BAD(result);
+
+    preprocessing->mutable_scene()->set_version(Specification::CoreMLModels::VisionFeaturePrint_Scene_SceneVersion_SCENE_VERSION_1);
+    result = validate<MLModelType_visionFeaturePrint>(model);
+    ML_ASSERT_GOOD(result);
+    
+    return 0;
+}
+
+int testVisionFeatureObjectPrintBasic() {
+
+    Specification::ImageFeatureType* inputImageFeatureType = new Specification::ImageFeatureType();
+    Specification::FeatureType* inputFeatureType = new Specification::FeatureType();
+    inputFeatureType->set_allocated_imagetype(inputImageFeatureType);
+
+    Specification::ArrayFeatureType* output1ArrayFeatureType = new Specification::ArrayFeatureType();
+    Specification::FeatureType* output1FeatureType = new Specification::FeatureType();
+    output1FeatureType->set_allocated_multiarraytype(output1ArrayFeatureType);
+    
+    Specification::ArrayFeatureType* output2ArrayFeatureType = new Specification::ArrayFeatureType();
+    Specification::FeatureType* output2FeatureType = new Specification::FeatureType();
+    output2FeatureType->set_allocated_multiarraytype(output2ArrayFeatureType);
+    
+    Specification::ModelDescription* description = new Specification::ModelDescription();
+    Specification::FeatureDescription* input = description->add_input();
+    Specification::FeatureDescription* output1 = description->add_output();
+    Specification::FeatureDescription* output2 = description->add_output();
+    input->set_allocated_type(inputFeatureType);
+    output1->set_allocated_type(output1FeatureType);
+    output2->set_allocated_type(output2FeatureType);
+
+    output1->set_name("a");
+    output2->set_name("b");
+
+    Specification::Model model;
+    model.set_allocated_description(description);
+
+    Result result = validate<MLModelType_visionFeaturePrint>(model);
+    ML_ASSERT_BAD(result);
+
+    auto *preprocessing = model.mutable_visionfeatureprint();
+    result = validate<MLModelType_visionFeaturePrint>(model);
+    ML_ASSERT_BAD(result);
+
+    auto object = preprocessing->mutable_object();
+    result = validate<MLModelType_visionFeaturePrint>(model);
+    ML_ASSERT_BAD(result);
+
+    object->set_version(Specification::CoreMLModels::VisionFeaturePrint_Object_ObjectVersion_OBJECT_VERSION_1);
+    result = validate<MLModelType_visionFeaturePrint>(model);
+    ML_ASSERT_BAD(result);
+
+    object->add_output("a");
+    result = validate<MLModelType_visionFeaturePrint>(model);
+    ML_ASSERT_BAD(result);
+    object->add_output("b");
+    result = validate<MLModelType_visionFeaturePrint>(model);
+    ML_ASSERT_GOOD(result);
+
+    return 0;
+}
+
diff --git a/mlmodel/tests/framework/TestUtils.hpp b/mlmodel/tests/framework/TestUtils.hpp
index 35cd2fdec..f0a2afc4a 100644
--- a/mlmodel/tests/framework/TestUtils.hpp
+++ b/mlmodel/tests/framework/TestUtils.hpp
@@ -4,8 +4,22 @@
     return 1; \
 } } \
 
+#define ML_ASSERT_NOT(x) ML_ASSERT(!(x))
+
 #define ML_ASSERT_GOOD(x) ML_ASSERT((x).good())
 #define ML_ASSERT_BAD(x) ML_ASSERT(!((x).good()))
+#define ML_ASSERT_BAD_WITH_REASON(x, r) ML_ASSERT_BAD((x)); ML_ASSERT_EQ((r), (x).reason())
+#define ML_ASSERT_BAD_WITH_TYPE(x, t) ML_ASSERT_BAD((x)); ML_ASSERT_EQ((t), (x).type())
 #define ML_ASSERT_EQ(x, y) ML_ASSERT((x) == (y))
+#define ML_ASSERT_NE(x, y) ML_ASSERT((x) != (y))
 #define ML_ASSERT_LT(x, y) ML_ASSERT((x) < (y))
 #define ML_ASSERT_GT(x, y) ML_ASSERT((x) > (y))
+#define ML_ASSERT_NULL(x) ML_ASSERT((x) == nullptr)
+#define ML_ASSERT_NOT_NULL(x) ML_ASSERT((x) != nullptr)
+
+#define ML_ASSERT_THROWS(expr, exType) { \
+    bool caughtCorrectException = false; \
+    try { expr; } \
+    catch (const exType&) { caughtCorrectException = true; } \
+    catch (...) { std::clog << __FILE__ << ":" << __LINE__ << ": error: caught unexpected exeception type.\n"; return 1;} \
+    if (!caughtCorrectException) { std::clog << __FILE__ << ":" << __LINE__ << ": expected exception, but none thrown.\n"; return 1; } }
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 000000000..fc13d158b
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,21 @@
+[pytest]
+log_cli = false
+log_cli_level = INFO
+
+log_file = test.log
+log_file_level = DEBUG
+
+log_format      = [%(asctime)s %(levelname)s %(pathname)s:%(lineno)s] %(message)s
+log_file_format = [%(asctime)s %(levelname)s %(pathname)s:%(lineno)s] %(message)s
+log_date_format      = %Y%m%d %H:%M:%S
+log_file_date_format = %Y%m%d %H:%M:%S
+
+markers =
+	slow: tests that take more than a second or so to run
+	tf_slow_tests: TensorFlow conversion tests taking more than a minute or so
+
+filterwarnings =
+	module
+	ignore:.*the imp module.*:DeprecationWarning
+	ignore:.*as a synonym of type is deprecated:FutureWarning
+	ignore:.*resolve package from __spec__ or __package__:ImportWarning
diff --git a/reqs/build.pip b/reqs/build.pip
new file mode 100644
index 000000000..bb375b7f8
--- /dev/null
+++ b/reqs/build.pip
@@ -0,0 +1,6 @@
+numpy==1.14.5; python_version <= '2.7'
+numpy; python_version >= '3.0'
+protobuf
+pytest
+six
+wheel
diff --git a/docs_requirements.pip b/reqs/docs.pip
similarity index 92%
rename from docs_requirements.pip
rename to reqs/docs.pip
index c32f7150c..769f9e57b 100644
--- a/docs_requirements.pip
+++ b/reqs/docs.pip
@@ -16,7 +16,6 @@ six
 snowballstemmer
 sphinx-rtd-theme
 sphinxcontrib-websupport
-sphinxtogithub
 sympy
 typing
 urllib3
diff --git a/reqs/test.pip b/reqs/test.pip
new file mode 100644
index 000000000..605b6ecb3
--- /dev/null
+++ b/reqs/test.pip
@@ -0,0 +1,29 @@
+boto3
+configparser
+Keras==2.1.6; python_version < "3.8"
+Pillow
+h5py
+future
+numpy
+libsvm; python_version >= "3.6"
+olefile==0.44
+onnx==1.6.0; python_version <= "3.7"
+pandas
+pillow
+pytest; python_version < '3.7'
+pytest==5.3.4; python_version >= '3.7'
+pytest-cov
+pytest-sugar
+scikit-learn==0.19.2; python_version <= '3.7'
+scikit-learn; python_version > '3.7'
+scipy
+sympy
+tensorflow==1.14.0; python_version < '3.8'
+torch==1.5.0
+torchvision
+xgboost
+mock
+wrapt
+pyyaml
+tqdm
+pytest-timeout
diff --git a/reqs/test_tf2.pip b/reqs/test_tf2.pip
new file mode 100644
index 000000000..957ad2d78
--- /dev/null
+++ b/reqs/test_tf2.pip
@@ -0,0 +1,7 @@
+tensorflow==2.1.0; python_version < '3.8'
+tensorflow==2.2.0; python_version >= '3.8'
+
+tensorflow-addons==0.7.1; python_version == '2.7'
+tensorflow-addons==0.8.3; python_version > '2.7' and python_version < '3.8'
+tensorflow-hub==0.8.0
+transformers==2.10.0; python_version > '3.6'
diff --git a/scripts/make_wheel.sh b/scripts/build.sh
similarity index 53%
rename from scripts/make_wheel.sh
rename to scripts/build.sh
index d84c3cf0a..2c177cd5b 100755
--- a/scripts/make_wheel.sh
+++ b/scripts/build.sh
@@ -4,12 +4,16 @@ set -e
 
 ##=============================================================================
 ## Main configuration processing
-COREMLTOOLS_HOME=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/..
+COREMLTOOLS_HOME=$( cd "$( dirname "$0" )/.." && pwd )
 BUILD_DIR="${COREMLTOOLS_HOME}/build"
 
 # command flag options
+BUILD_MODE="Release"
 NUM_PROCS=1
-PYTHON=$(which python)
+BUILD_PROTO=0
+BUILD_DIST=0
+PYTHON="3.7"
+CHECK_ENV=1
 
 unknown_option() {
   echo "Unknown option $1. Exiting."
@@ -17,12 +21,16 @@ unknown_option() {
 }
 
 print_help() {
-  echo "Builds the release branch and produce a wheel to the targets directory "
+  echo "Builds coremltools and dependent libraries."
   echo
-  echo "Usage: ./make_wheel.sh"
+  echo "Usage: zsh -i build.sh"
   echo
   echo "  --num_procs=n (default 1)       Specify the number of proceses to run in parallel."
   echo "  --python=*                      Python to use for configuration."
+  echo "  --protobuf                      Rebuild & overwrite the protocol buffers in MLModel."
+  echo "  --debug                         Build without optimizations and stripping symbols."
+  echo "  --dist                          Build the distribution (wheel)."
+  echo "  --no-check-env                  Don't check the environment to verify it's up to date."
   echo
   exit 1
 } # end of print help
@@ -33,6 +41,10 @@ while [ $# -gt 0 ]
   do case $1 in
     --python=*)          PYTHON=${1##--python=} ;;
     --num-procs=*)       NUM_PROCS=${1##--num-procs=} ;;
+    --protobuf)          BUILD_PROTO=1 ;;
+    --debug)             BUILD_MODE="Debug" ;;
+    --dist)              BUILD_DIST=1 ;;
+    --no-check-env)      CHECK_ENV=0 ;;
     --help)              print_help ;;
     *) unknown_option $1 ;;
   esac
@@ -45,10 +57,14 @@ echo "Configuring using python from $PYTHON"
 echo
 echo ${COREMLTOOLS_HOME}
 cd ${COREMLTOOLS_HOME}
-bash -e configure --python=$PYTHON --exclude-test-deps
+if [[ $CHECK_ENV == 1 ]]; then
+    zsh -i -e scripts/env_create.sh --python=$PYTHON --exclude-test-deps
+fi
+
+pip uninstall -y coremltools
 
 # Setup the right python
-source scripts/python_env.sh
+source scripts/env_activate.sh --python=$PYTHON
 echo
 echo "Using python from $(which python)"
 echo
@@ -67,11 +83,18 @@ else
 fi
 
 # Call CMake
-cmake $ADDITIONAL_CMAKE_OPTIONS -DCMAKE_BUILD_TYPE=Release\
- -DPYTHON_EXECUTABLE:FILEPATH=${PYTHON_EXECUTABLE}\
- -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR}\
- -DPYTHON_LIBRARY=${PYTHON_LIBRARY} ..
+cmake $ADDITIONAL_CMAKE_OPTIONS \
+  -DCMAKE_BUILD_TYPE=$BUILD_MODE \
+  -DPYTHON_EXECUTABLE:FILEPATH=$PYTHON_EXECUTABLE \
+  -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR \
+  -DPYTHON_LIBRARY=$PYTHON_LIBRARY \
+  -DOVERWRITE_PB_SOURCE=$BUILD_PROTO \
+  ${COREMLTOOLS_HOME}
 
 # Make the python wheel
 make -j${NUM_PROCS}
-make dist
+
+if [ $BUILD_DIST -eq 1 ]
+then
+  make dist
+fi
diff --git a/scripts/build_docs.sh b/scripts/build_docs.sh
new file mode 100755
index 000000000..21a3934d0
--- /dev/null
+++ b/scripts/build_docs.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+set -e
+
+##=============================================================================
+## Main configuration processing
+COREMLTOOLS_HOME=$( cd "$( dirname "$0" )/.." && pwd )
+
+# command flag options
+PYTHON="3.7"
+SOURCE_VERSION=""
+RELEASE=0
+UPLOAD=0
+MAIN_VERSION=0
+AUTH_TOKEN=""
+CHECK_ENV=1
+WHEEL_PATH=""
+
+unknown_option() {
+  echo "Unknown option $1. Exiting."
+  exit 1
+}
+
+print_help() {
+  echo "Builds the docs associated with the code"
+  echo
+  echo "Usage: zsh -i make_docs.sh"
+  echo
+  echo "  --wheel-path=*          Specify which wheel to use to make docs."
+  echo "  --python=*              Python to use for configuration."
+  echo "  --upload                Upload these docs with the current coremltools version."
+  echo "  --release               Release the uploaded docs with the current coremltools version."
+  echo "  --from-source-version=* If a version must be created, use this as the base to copy from.\
+                                  Default is the most recent version."
+  echo "  --auth-token=*          Auth token for accessing documentation API."
+  echo "  --set-main-version      Set the uploaded doc version as the main ('stable release') version."
+  echo "  --no-check-env          Don't check the environment to verify it's up to date."
+  echo
+  exit 1
+} # end of print help
+
+# command flag options
+# Parse command line configure flags ------------------------------------------
+while [ $# -gt 0 ]
+  do case $1 in
+    --python=*)              PYTHON=${1##--python=} ;;
+    --wheel-path=*)          WHEEL_PATH=${1##--wheel-path=} ;;
+    --from-source-version=*) SOURCE_VERSION=${1##--from-source-version=} ;;
+    --auth-token=*)          AUTH_TOKEN=${1##--auth-token=} ;;
+    --upload)                UPLOAD=1 ;;
+    --release)               RELEASE_VERSION=1 ;;
+    --no-check-env)          CHECK_ENV=0 ;;
+    --set-main-version)      MAIN_VERSION=1 ;;
+    --help)              print_help ;;
+    *) unknown_option $1 ;;
+  esac
+  shift
+done
+
+cd ${COREMLTOOLS_HOME}
+
+if [[ $PYTHON != "None" ]]; then
+  # Setup the right python
+  if [[ $CHECK_ENV == 1 ]]; then
+    zsh -i scripts/env_create.sh --python=$PYTHON --include-docs-deps
+  fi
+  source scripts/env_activate.sh --python=$PYTHON
+fi
+
+echo
+echo "Using python from $(which python)"
+echo
+
+if [[ $WHEEL_PATH != "" ]]; then
+    $PIP_EXECUTABLE install ${WHEEL_PATH} --upgrade
+else
+    cd ..
+    $PIP_EXECUTABLE install -e coremltools --upgrade
+    cd ${COREMLTOOLS_HOME}
+fi
+
+cd docs
+make html
+cd ..
+
+if [[ $UPLOAD == 1 ]]; then
+  if [[ $AUTH_TOKEN == "" ]]; then
+    echo "No auth token provided. Skipping upload."
+    pip uninstall coremltools
+    exit
+  fi
+
+  # Set up base API call
+  DOC_COMMAND=(python docs/upload_docs.py --auth_token $AUTH_TOKEN)
+
+  if [[ $SOURCE_VERSION != "" ]]; then
+    DOC_COMMAND+=(--from_source_version $SOURCE_VERSION)
+  fi
+  if [[ $RELEASE == 1 ]]; then
+    DOC_COMMAND+=(--release_version)
+  fi
+  if [[ $MAIN_VERSION == 1 ]]; then
+    DOC_COMMAND+=(--set_version_stable)
+  fi
+  ${DOC_COMMAND[@]}
+else
+  if [[ $MAIN_VERSION == 1 ]]; then
+    echo "You must set release-version to use set-main-version."
+  fi
+fi
+
+pip uninstall -y coremltools # We're using the build env for this script, so uninstall the wheel when we're done.
diff --git a/scripts/env_activate.sh b/scripts/env_activate.sh
new file mode 100755
index 000000000..a0c9f7bad
--- /dev/null
+++ b/scripts/env_activate.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+####  Usage ####
+# source env_activate.sh
+COREMLTOOLS_HOME=$( cd "$( dirname "$0" )/.." && pwd )
+COREMLTOOLS_NAME=$(basename $COREMLTOOLS_HOME)
+PYTHON=3.7
+ENV_DIR="${COREMLTOOLS_HOME}/envs"
+DEV=0
+
+function print_help {
+  echo "Activates the build environment for the specified python version."
+  echo
+  echo "Usage: source env_activate <options>"
+  echo
+  echo "  --dev                Init an environment setup for development."
+  echo "  --python=*           Python to use for configuration."
+  echo
+  echo "Example: source env_activate --python=3.7"
+  echo
+  exit 1
+} # end of print help
+
+function unknown_option {
+  echo "Unrecognized option: $1"
+  echo "To get help, run source env_activate --help"
+  exit 1
+} # end of unknown option
+
+###############################################################################
+#
+# Parse command line configure flags ------------------------------------------
+#
+while [ $# -gt 0 ]
+  do case $1 in
+    --python=*)            PYTHON=${1##--python=} ;;
+    --dev)                 DEV=1 ;;
+    --help)                print_help ;;
+    *) unknown_option $1 ;;
+  esac
+  shift
+done
+
+if [[ $DEV == 1 ]]; then
+    PYTHON_ENV="${ENV_DIR}/${COREMLTOOLS_NAME}-dev-py${PYTHON}"
+else
+    PYTHON_ENV="${ENV_DIR}/${COREMLTOOLS_NAME}-py${PYTHON}"
+fi
+
+# python executable
+export PYTHON_EXECUTABLE=$PYTHON_ENV/bin/python
+export PYTHON_VERSION=$($PYTHON_EXECUTABLE -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
+export PYTEST_EXECUTABLE=$PYTHON_ENV/bin/pytest
+export PIP_EXECUTABLE=$PYTHON_ENV/bin/pip
+export PYTHON_LIBRARY=$PYTHON_ENV/lib/libpython${PYTHON}m.dylib
+if [[ ${PYTHON_VERSION:0:1} == "3" ]];
+then
+    if [[ ${PYTHON_VERSION:2:3} -ge 8 ]]; then
+        export PYTHON_INCLUDE_DIR=$PYTHON_ENV/include/python${PYTHON_VERSION}/
+    else
+        export PYTHON_INCLUDE_DIR=$PYTHON_ENV/include/python${PYTHON_VERSION}m/
+    fi
+else 
+    export PYTHON_INCLUDE_DIR=$PYTHON_ENV/include/python${PYTHON_VERSION}/
+fi
+
+# Print it out
+echo "Export environment variables"
+echo PYTHON_EXECUTABLE=$PYTHON_EXECUTABLE
+echo PYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR
+echo PYTEST_EXECUTABLE=$PYTEST_EXECUTABLE
+echo PIP_EXECUTABLE=$PIP_EXECUTABLE
+echo PYTHON_VERSION=$PYTHON_VERSION
+echo PYTHON_LIBRARY=$PYTHON_LIBRARY
+echo 
+
+echo "Activating conda env in $PYTHON_ENV"
+conda activate $PYTHON_ENV
+echo
diff --git a/scripts/env_create.sh b/scripts/env_create.sh
new file mode 100755
index 000000000..cc155fd18
--- /dev/null
+++ b/scripts/env_create.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+
+
+##=============================================================================
+# Exit immediately on failure of a subcommand
+set -e
+
+## Main configuration processing
+COREMLTOOLS_HOME=$( cd "$( dirname "$0" )/.." && pwd )
+COREMLTOOLS_NAME=$(basename $COREMLTOOLS_HOME)
+ENV_DIR="${COREMLTOOLS_HOME}/envs"
+
+# command flag options
+cleanup_option=0
+include_build_deps=1
+include_test_deps=1
+include_docs_deps=0
+DEV=0
+PYTHON="3.7"
+force=0
+
+function print_help {
+  echo "Configures the build with the specified toolchain. "
+  echo
+  echo "If env_create has already been run before, running env_create "
+  echo "will simply reconfigure the build with no changes. "
+  echo
+  echo "Usage: zsh -i env_create <options>"
+  echo
+  echo "  --dev                Setup the environment for development."
+  echo "  --exclude-build-deps Exclude packages needed for building"
+  echo "  --exclude-test-deps  Exclude packages needed for testing"
+  echo "  --force              Rebuild the environment if it exists already." 
+  echo "  --include-docs-deps  Include packages needed for making docs" 
+  echo "  --python=*           Python to use for configuration."
+  echo
+  echo "Example: zsh -i env_create --python==3.7"
+  echo
+  exit 1
+} # end of print help
+
+function unknown_option {
+  echo "Unrecognized option: $1"
+  echo "To get help, run zsh -i env_create --help"
+  exit 1
+} # end of unknown option
+
+###############################################################################
+#
+# Parse command line configure flags ------------------------------------------
+#
+while [ $# -gt 0 ]
+  do case $1 in
+    --python=*)            PYTHON=${1##--python=} ;;
+    --dev)                 DEV=1;;
+    --exclude-build-deps)  include_build_deps=0;;
+    --exclude-test-deps)   include_test_deps=0;;
+    --include-docs-deps)   include_docs_deps=1;;
+    --force)               force=1;;
+    --help)                print_help ;;
+
+    *) unknown_option $1 ;;
+  esac
+  shift
+done
+
+if [[ $DEV == 1 ]] then
+    ENV_DIR="${ENV_DIR}/${COREMLTOOLS_NAME}-dev-py${PYTHON}"
+else
+    ENV_DIR="${ENV_DIR}/${COREMLTOOLS_NAME}-py${PYTHON}"
+fi
+
+echo "Using python version string $PYTHON"
+
+# Setup a new conda env using the existing python
+if conda activate $ENV_DIR && [ ${force} -eq 0 ]
+then
+  echo "Build environment already exists in $ENV_DIR."
+else
+  echo "Creating a new conda environment in $ENV_DIR"
+  conda create --prefix "$ENV_DIR" python="$PYTHON"
+  conda activate $ENV_DIR
+fi
+
+# Activate and install packages in the environment
+echo "Installing basic build requirements."
+if [[ $include_build_deps == 1 ]]; then
+    python -m pip install -r $COREMLTOOLS_HOME/reqs/build.pip
+fi
+
+# Install test requirements (upgrades packages if required)
+if [[ $include_test_deps == 1 ]]; then
+  echo "Installing additional test requirements."
+  python -m pip install -r $COREMLTOOLS_HOME/reqs/test.pip --upgrade
+fi
+
+# Install doc requirements (upgrades packages if required)
+if [[ $include_docs_deps == 1 ]]; then
+  echo "Installing additional document requirements."
+  python -m pip install -r $COREMLTOOLS_HOME/reqs/docs.pip --upgrade
+fi
+
+# Install doc requirements (upgrades packages if required)
+if [[ $include_docs_deps == 1 ]]; then
+  echo "Installing additional document requirements."
+  python -m pip install -r $COREMLTOOLS_HOME/reqs/docs.pip --upgrade
+fi
+
+if [[ $DEV == 1 ]]; then
+  echo "Setting up environment for development."
+  python -m pip install -e "$COREMLTOOLS_HOME/../coremltools" --upgrade
+fi
+
+conda deactivate
+
+echo 
+echo 
+echo 
+echo "Python env created for coremltools development."
+echo 
+echo "Run the following command to to activate it."
+echo
+if [[ $DEV == 1 ]]; then
+    echo "      source ./scripts/env_activate.sh --python=${PYTHON} --dev"
+else
+    echo "      source ./scripts/env_activate.sh --python=${PYTHON}"
+fi
+echo
diff --git a/scripts/make_docs.sh b/scripts/make_docs.sh
deleted file mode 100755
index 06d99c257..000000000
--- a/scripts/make_docs.sh
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/bin/bash
-
-set -e
-
-##=============================================================================
-## Main configuration processing
-COREMLTOOLS_HOME=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/..
-BUILD_DIR="${COREMLTOOLS_HOME}/build"
-
-# command flag options
-PYTHON=$(which python)
-
-unknown_option() {
-  echo "Unknown option $1. Exiting."
-  exit 1
-}
-
-print_help() {
-  echo "Builds the docs associated with the code"
-  echo
-  echo "Usage: ./make_docs.sh"
-  echo
-  echo "  --wheel-path=*          Specify which wheel to use to make docs."
-  echo "  --python=*              Python to use for configuration."
-  echo
-  exit 1
-} # end of print help
-
-# command flag options
-# Parse command line configure flags ------------------------------------------
-while [ $# -gt 0 ]
-  do case $1 in
-    --python=*)          PYTHON=${1##--python=} ;;
-    --wheel-path=*)      WHEEL_PATH=${1##--wheel-path=} ;;
-    --help)              print_help ;;
-    *) unknown_option $1 ;;
-  esac
-  shift
-done
-
-# First configure
-echo ${COREMLTOOLS_HOME}
-pushd ${COREMLTOOLS_HOME}
-bash -e configure --python=$PYTHON --include-docs-deps
-
-# Setup the right python
-source scripts/python_env.sh
-echo
-echo "Using python from $(which python)"
-echo
-
-$PIP_EXECUTABLE install ${WHEEL_PATH}
-pushd ${COREMLTOOLS_HOME}/docs
-make html
-popd
-
-echo "Zipping docs"
-TARGET_DIR=${COREMLTOOLS_HOME}/build/dist
-pushd ${COREMLTOOLS_HOME}/docs/_build/
-zip -r ${TARGET_DIR}/docs.zip html
-popd
-
-popd
diff --git a/scripts/python_env.sh b/scripts/python_env.sh
deleted file mode 100755
index 1ade12002..000000000
--- a/scripts/python_env.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-
-####  Usage ####
-# source python_env.sh
-
-if [[ -z $BASH_SOURCE ]]; then
-        SCRIPT_DIR=$( cd "$( dirname $0)" && pwd )
-else
-        SCRIPT_DIR=$( cd "$( dirname $BASH_SOURCE )" && pwd )
-fi
-COREMLTOOLS_HOME=$SCRIPT_DIR/..
-BUILD_DIR=${COREMLTOOLS_HOME}/build
-COREMLTOOLS_ENV=coremltools-dev
-PYTHON_ENV=$BUILD_DIR/$COREMLTOOLS_ENV
-
-# python executable
-export PYTHON_EXECUTABLE=$PYTHON_ENV/bin/python
-export PYTHON_VERSION=$($PYTHON_EXECUTABLE -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
-export PYTEST_EXECUTABLE=$PYTHON_ENV/bin/pytest
-export PIP_EXECUTABLE=$PYTHON_ENV/bin/pip
-export PYTHON_LIBRARY=$PYTHON_ENV/lib/python${PYTHON_VERSION}/
-if [[ ${PYTHON_VERSION:0:1} == "3" ]] ;
-then 
-    export PYTHON_INCLUDE_DIR=$PYTHON_ENV/include/python${PYTHON_VERSION}m/
-else 
-    export PYTHON_INCLUDE_DIR=$PYTHON_ENV/include/python${PYTHON_VERSION}/
-fi
-
-# Print it out
-echo "Export environment variables"
-echo BUILD_DIR=$BUILD_DIR
-echo PYTHON_EXECUTABLE=$PYTHON_EXECUTABLE
-echo PYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR
-echo PYTEST_EXECUTABLE=$PYTEST_EXECUTABLE
-echo PIP_EXECUTABLE=$PIP_EXECUTABLE
-echo PYTHON_VERSION=$PYTHON_VERSION
-echo PYTHON_LIBRARY=$PYTHON_LIBRARY
-echo 
-
-echo "Activating virtualenv in $PYTHON_ENV"
-. $PYTHON_ENV/bin/activate
-echo
diff --git a/scripts/release_wheel.sh b/scripts/release_wheel.sh
new file mode 100644
index 000000000..81ef00b6d
--- /dev/null
+++ b/scripts/release_wheel.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+set -e
+
+# Defaults
+COREMLTOOLS_HOME=$( cd "$( dirname "$0" )/.." && pwd )
+COREMLTOOLS_NAME=$(basename $COREMLTOOLS_HOME)
+
+WHEEL_DIR="$COREMLTOOLS_HOME/build/dist"
+PYPI="pypi"
+PYENV_PY_VERSION="3.7"
+CHECK_ENV=1
+
+unknown_option() {
+  echo "Unknown option $1. Exiting."
+  exit 1
+}
+
+print_help() {
+  echo "Release the Python CoreMLTools Wheel."
+  echo
+  echo "Usage: ./release_wheel.sh --wheel-dir=${WHEEL_DIR}"
+  echo
+  echo "  --wheel-dir               (Optional) Directory in which the wheels sit."
+  echo "  --pypi                    (Optional) Name of PyPi repository to release to."
+  echo "  --no-check-env            Don't check the environment to verify it's up to date."
+  echo
+  exit 1
+} # end of print help
+
+# command flag options
+# Parse command line configure flags ------------------------------------------
+while [ $# -gt 0 ]
+  do case $1 in
+    --wheel-dir=*)       WHEEL_DIR=${1##--wheel-dir=} ;;
+    --pypi=*)            PYPI=${1##--pypi=} ;;
+    --no-check-env)      CHECK_ENV=0 ;;
+    --help)              print_help ;;
+    *) unknown_option $1 ;;
+  esac
+  shift
+done
+
+echo "------------------------------------------------------------------------------"
+echo "Releasing CoreML Tools"
+echo "------------------------------------------------------------------------------"
+
+
+cd $COREMLTOOLS_HOME
+
+# Setup the python env
+if [[ $CHECK_ENV == 1 ]]; then
+  zsh -i scripts/env_create.sh --python=$PYENV_PY_VERSION --exclude-test-deps
+fi
+
+source scripts/env_activate.sh --python=$PYENV_PY_VERSION
+pip install twine
+
+# Setup the wheel
+rm -rf dist
+python setup.py sdist
+cp $WHEEL_DIR/*.whl dist/.
+twine check dist/*
+
+# Disabled. For now, we treat "release" as a collection job.
+# Upload the wheel
+# twine upload --config-file ~/.pypirc --repository $PYPI $1/dist/*
diff --git a/scripts/test.sh b/scripts/test.sh
new file mode 100755
index 000000000..bcbbb1fcf
--- /dev/null
+++ b/scripts/test.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+
+set -e
+
+##=============================================================================
+## Main configuration processing
+COREMLTOOLS_HOME=$( cd "$( dirname "$0" )/.." && pwd )
+COREMLTOOLS_NAME=$(basename $COREMLTOOLS_HOME)
+BUILD_DIR="${COREMLTOOLS_HOME}/build"
+WHEEL_PATH=""
+FAST=0
+SLOW=0
+COV=""
+CHECK_ENV=1
+TIME_OUT=600
+
+# command flag options
+PYTHON="3.7"
+
+unknown_option() {
+  echo "Unknown option $1. Exiting."
+  exit 1
+}
+
+print_help() {
+  echo "Test the wheel by running all unit tests"
+  echo
+  echo "Usage: zsh -i test_wheel.sh"
+  echo
+  echo "  --wheel-path=*          Specify which wheel to test. Otherwise, test the current coremltools dir."
+  echo "  --test-package=*        Test package to run."
+  echo "  --python=*              Python to use for configuration."
+  echo "  --requirements=*        [Optional] Path to the requirements.txt file."
+  echo "  --cov=*                 Generate converage report for these dirs."
+  echo "  --fast                  Run only fast tests."
+  echo "  --slow                  Run only slow tests."
+  echo "  --timeout               Timeout limit (on each test)"
+  echo "  --no-check-env          Don't check the environment to verify it's up to date."
+  echo
+  exit 1
+} # end of print help
+
+# command flag options
+# Parse command line configure flags ------------------------------------------
+while [ $# -gt 0 ]
+  do case $1 in
+    --requirements=*)    REQUIREMENTS=${1##--requirements=} ;;
+    --python=*)          PYTHON=${1##--python=} ;;
+    --test-package=*)    TEST_PACKAGE=${1##--test-package=} ;;
+    --wheel-path=*)      WHEEL_PATH=${1##--wheel-path=} ;;
+    --cov=*)             COV=${1##--cov=} ;;
+    --fast)              FAST=1;;
+    --slow)              SLOW=1;;
+    --no-check-env)      CHECK_ENV=0 ;;
+    --timeout=*)         TIME_OUT=${1##--timeout=} ;;
+    --help)              print_help ;;
+    *) unknown_option $1 ;;
+  esac
+  shift
+done
+
+# First configure
+cd ${COREMLTOOLS_HOME}
+if [[ $CHECK_ENV == 1 ]]; then
+    zsh -i -e scripts/env_create.sh --python=$PYTHON
+fi
+
+# Setup the right python
+source scripts/env_activate.sh --python=$PYTHON
+echo
+echo "Using python from $(which python)"
+echo
+
+if [[ $WHEEL_PATH == "" ]]; then
+    cd ..
+    pip install -e ${COREMLTOOLS_NAME} --upgrade
+    cd ${COREMLTOOLS_NAME}
+else
+    $PIP_EXECUTABLE install $~WHEEL_PATH --upgrade
+fi
+
+# Install dependencies if specified
+if [ ! -z "${REQUIREMENTS}" ]; then
+   $PIP_EXECUTABLE install -r "${REQUIREMENTS}"
+fi
+
+# Now run the tests
+echo "Running tests"
+
+TEST_CMD=($PYTEST_EXECUTABLE -ra -W "ignore::FutureWarning" -W "ignore::DeprecationWarning" --durations=100 --pyargs ${TEST_PACKAGE} --junitxml=${BUILD_DIR}/py-test-report.xml --timeout=${TIME_OUT})
+echo $TEST_CMD
+
+if [[ $SLOW != 1 || $FAST != 1 ]]; then
+    if [[ $SLOW == 1 ]]; then
+        TEST_CMD+=(-m "slow")
+    elif [[ $FAST == 1 ]]; then
+        TEST_CMD+=(-m "not slow")
+    fi
+fi
+
+if [[ $COV != "" ]]; then
+    TEST_CMD+=(--cov $COV)
+fi
+
+${TEST_CMD[@]}
+
+pip uninstall -y coremltools
diff --git a/scripts/test_wheel.sh b/scripts/test_wheel.sh
deleted file mode 100755
index 980a558c2..000000000
--- a/scripts/test_wheel.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-
-set -e
-
-##=============================================================================
-## Main configuration processing
-COREMLTOOLS_HOME=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/..
-BUILD_DIR="${COREMLTOOLS_HOME}/build"
-
-# command flag options
-PYTHON=$(which python)
-
-unknown_option() {
-  echo "Unknown option $1. Exiting."
-  exit 1
-}
-
-print_help() {
-  echo "Test the wheel by running all unit tests"
-  echo
-  echo "Usage: ./test_wheel.sh"
-  echo
-  echo "  --wheel-path=*          Specify which wheel to test."
-  echo "  --python=*              Python to use for configuration."
-  echo
-  exit 1
-} # end of print help
-
-# command flag options
-# Parse command line configure flags ------------------------------------------
-while [ $# -gt 0 ]
-  do case $1 in
-    --python=*)          PYTHON=${1##--python=} ;;
-    --wheel-path=*)      WHEEL_PATH=${1##--wheel-path=} ;;
-    --help)              print_help ;;
-    *) unknown_option $1 ;;
-  esac
-  shift
-done
-
-# First configure
-echo ${COREMLTOOLS_HOME}
-cd ${COREMLTOOLS_HOME}
-bash -e configure --python=$PYTHON
-
-# Setup the right python
-source scripts/python_env.sh
-echo
-echo "Using python from $(which python)"
-echo
-
-$PIP_EXECUTABLE install ${WHEEL_PATH}
-$PYTEST_EXECUTABLE -ra -m "not slow" --durations=100 coremltools/test -p no:warnings
diff --git a/setup.py b/setup.py
index dd7d5349a..5b4134324 100755
--- a/setup.py
+++ b/setup.py
@@ -5,78 +5,94 @@
 # Use of this source code is governed by a BSD-3-clause license that can be
 # found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
 
+import imp
 import os
-from setuptools import setup
+from setuptools import setup, find_packages
 
-from coremltools import __version__
+# Get the coremltools version string
+coremltools_dir = os.path.join(os.path.dirname(__file__), "coremltools")
+version_module = imp.load_source(
+    "coremltools.version", os.path.join(coremltools_dir, "version.py")
+)
+__version__ = version_module.__version__
 
-README = os.path.join(os.getcwd(), "README.rst")
+README = os.path.join(os.getcwd(), "README.md")
 
-with open(README) as f:
-    long_description = f.read()
 
-setup(name='coremltools',
-      version=__version__,
-      description='Community Tools for Core ML',
-      long_description=long_description,
-      author='Apple Inc.',
-      author_email='coremltools@apple.com',
-      url='https://github.com/apple/coremltools',
-      packages=[
-          'coremltools',
-          'coremltools._deps',
-          'coremltools.converters',
-          'coremltools.converters.caffe',
-          'coremltools.converters.sklearn',
-          'coremltools.converters.xgboost',
-          'coremltools.converters.libsvm',
-          'coremltools.converters.keras',
-          'coremltools.converters.tensorflow',
-          'coremltools.converters.nnssa',
-          'coremltools.converters.nnssa.commons',
-          'coremltools.converters.nnssa.commons.builtins',
-          'coremltools.converters.nnssa.commons.serialization',
-          'coremltools.converters.nnssa.coreml',
-          'coremltools.converters.nnssa.coreml.graph_pass',
-          'coremltools.converters.nnssa.frontend',
-          'coremltools.converters.nnssa.frontend.graph_pass',
-          'coremltools.converters.nnssa.frontend.tensorflow',
-          'coremltools.converters.nnssa.frontend.tensorflow.graph_pass',
-          'coremltools.graph_visualization',
-          'coremltools.models',
-          'coremltools.models.neural_network',
-          'coremltools.models.nearest_neighbors',
-          'coremltools.proto',
-          'coremltools._scripts'
-      ],
-      package_data={'': ['LICENSE.txt', 'README.rst', 'libcaffeconverter.so', 'libcoremlpython.so'],
-                    'coremltools': ['graph_visualization/__init__.py',
-                                    'graph_visualization/app.js',
-                                    'graph_visualization/index.html',
-                                    'graph_visualization/style.css',
-                                    'graph_visualization/assets/*',
-                                    'graph_visualization/icons/*']
-                    },
-      install_requires=[
-          'numpy >= 1.14.5',
-          'protobuf >= 3.1.0',
-          'six>=1.10.0'
-      ],
-      entry_points={
-          'console_scripts': ['coremlconverter = coremltools:_main']
-      },
-      classifiers=[
-          'Development Status :: 5 - Production/Stable',
-          'Intended Audience :: End Users/Desktop',
-          'Intended Audience :: Developers',
-          'Operating System :: MacOS :: MacOS X',
-          'Operating System :: POSIX :: Linux',
-          'Programming Language :: Python :: 2.7',
-          'Programming Language :: Python :: 3.5',
-          'Programming Language :: Python :: 3.6',
-          'Programming Language :: Python :: 3.7',
-          'Topic :: Scientific/Engineering',
-          'Topic :: Software Development'
-      ],
-      license='BSD'
-      )
+long_description = """coremltools
+===========
+
+`Core ML <http://developer.apple.com/documentation/coreml>`_
+is an Apple framework that allows developers to easily integrate
+machine learning (ML) models into apps. Core ML is available on iOS, iPadOS,
+watchOS, macOS, and tvOS. Core ML introduces a public file format (.mlmodel)
+for a broad set of ML methods including deep neural networks (convolutional
+and recurrent), tree ensembles (boosted trees, random forest, decision trees),
+and generalized linear models. Core ML models can be directly integrated into
+apps within Xcode.
+
+:code:`coremltools` is a python package for creating, examining, and testing models in
+the .mlmodel format. In particular, it can be used to:
+
+- Convert trained models from popular machine learning tools into Core ML format
+  (.mlmodel).
+- Write models to Core ML format with a simple API.
+- Making predictions using the Core ML framework (on select platforms) to
+  verify conversion.
+
+More Information
+----------------
+
+- `coremltools user guide and examples <https://coremltools.readme.io/>`_
+- `Core ML framework documentation <http://developer.apple.com/documentation/coreml>`_
+- `Machine learning at Apple <https://developer.apple.com/machine-learning>`_
+
+License
+-------
+Copyright (c) 2020, Apple Inc. All rights reserved.
+
+Use of this source code is governed by the
+`3-Clause BSD License <https://opensource.org/licenses/BSD-3-Clause>`_
+that can be found in the LICENSE.txt file.
+"""
+
+setup(
+    name="coremltools",
+    version=__version__,
+    description="Community Tools for Core ML",
+    long_description=long_description,
+    author="Apple Inc.",
+    author_email="coremltools@apple.com",
+    url="https://github.com/apple/coremltools",
+    packages=find_packages(),
+    package_data={
+        "": ["LICENSE.txt", "README.md", "libcaffeconverter.so", "libcoremlpython.so"]
+    },
+    install_requires=[
+        "numpy >= 1.14.5",
+        "protobuf >= 3.1.0",
+        "six>=1.10.0",
+        "attr",
+        "attrs",
+        "sympy",
+        "scipy",
+        'enum34;python_version < "3.4"',
+        "tqdm",
+    ],
+    entry_points={"console_scripts": ["coremlconverter = coremltools:_main"]},
+    classifiers=[
+        "Development Status :: 4 - Beta",
+        "Intended Audience :: End Users/Desktop",
+        "Intended Audience :: Developers",
+        "Operating System :: MacOS :: MacOS X",
+        "Operating System :: POSIX :: Linux",
+        "Programming Language :: Python :: 2.7",
+        "Programming Language :: Python :: 3.5",
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Topic :: Scientific/Engineering",
+        "Topic :: Software Development",
+    ],
+    license="BSD",
+)
diff --git a/test_requirements.pip b/test_requirements.pip
deleted file mode 100644
index e39ada7e9..000000000
--- a/test_requirements.pip
+++ /dev/null
@@ -1,11 +0,0 @@
-Keras==2.1.6
-Pillow
-h5py
-ipython
-numpy
-olefile==0.44
-pandas
-scikit-learn==0.19.2
-sympy
-tensorflow==1.14.0
-xgboost